labfreed 0.0.7__py2.py3-none-any.whl → 0.0.9__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labfreed/DisplayNameExtension/DisplayNameExtension.py +1 -1
- labfreed/PAC_ID/data_model.py +60 -28
- labfreed/PAC_ID/parse.py +6 -4
- labfreed/TREX/UneceUnits.json +33730 -0
- labfreed/TREX/data_model.py +869 -0
- labfreed/TREX/parse.py +128 -0
- labfreed/TREX/serialize.py +3 -0
- labfreed/TREX/unece_units.py +90 -0
- labfreed/__init__.py +1 -1
- labfreed/{TREXExtension → conversion_tools}/unit_utilities.py +8 -42
- labfreed/validation.py +115 -39
- {labfreed-0.0.7.dist-info → labfreed-0.0.9.dist-info}/METADATA +1 -1
- labfreed-0.0.9.dist-info/RECORD +22 -0
- labfreed/TREXExtension/data_model.py +0 -239
- labfreed/TREXExtension/parse.py +0 -53
- labfreed-0.0.7.dist-info/RECORD +0 -19
- /labfreed/{TREXExtension → conversion_tools}/uncertainty.py +0 -0
- /labfreed/{DisplayNameExtension → utilities}/base36.py +0 -0
- {labfreed-0.0.7.dist-info → labfreed-0.0.9.dist-info}/WHEEL +0 -0
- {labfreed-0.0.7.dist-info → labfreed-0.0.9.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,869 @@
|
|
|
1
|
+
from datetime import date, datetime, time
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
from collections import Counter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
from pydantic import RootModel, ValidationError, field_validator, model_validator, Field
|
|
8
|
+
from labfreed.TREX.unece_units import unece_unit_codes
|
|
9
|
+
from labfreed.validation import BaseModelWithValidationMessages
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
|
|
12
|
+
from ..PAC_ID.data_model import Extension
|
|
13
|
+
from labfreed.utilities.base36 import to_base36, from_base36
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TREX_Segment(BaseModelWithValidationMessages, ABC):
|
|
18
|
+
key: str
|
|
19
|
+
|
|
20
|
+
@model_validator(mode='after')
|
|
21
|
+
def validate_key(self):
|
|
22
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.key)):
|
|
23
|
+
self.add_validation_message(
|
|
24
|
+
source=f"TREX segment key {self.key}",
|
|
25
|
+
type="Error",
|
|
26
|
+
msg=f"Segment key contains invalid characters: {','.join(not_allowed_chars)}",
|
|
27
|
+
highlight_pattern = f'{self.key}$',
|
|
28
|
+
highlight_sub=not_allowed_chars
|
|
29
|
+
)
|
|
30
|
+
return self
|
|
31
|
+
|
|
32
|
+
@abstractmethod
|
|
33
|
+
def serialize_for_trex(self):
|
|
34
|
+
raise NotImplementedError("Subclasses must implement 'serialize_as_trex()' method")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ValueMixin(BaseModelWithValidationMessages, ABC):
|
|
40
|
+
value:str
|
|
41
|
+
|
|
42
|
+
def serialize_for_trex(self):
|
|
43
|
+
return self.value
|
|
44
|
+
|
|
45
|
+
# @abstractclassmethod
|
|
46
|
+
# def from_python_type(cls, v):
|
|
47
|
+
# ...
|
|
48
|
+
|
|
49
|
+
# @abstractmethod
|
|
50
|
+
# def to_python_type(self):
|
|
51
|
+
# ...
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class NumericValue(ValueMixin):
|
|
55
|
+
@model_validator(mode='after')
|
|
56
|
+
def validate(self):
|
|
57
|
+
value = self.value
|
|
58
|
+
if not_allowed_chars := set(re.sub(r'[0-9\.-]', '', value)):
|
|
59
|
+
self.add_validation_message(
|
|
60
|
+
source=f"TREX numeric value {value}",
|
|
61
|
+
type="Error",
|
|
62
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in quantity segment. Base36 encoding only allows A-Z0-9",
|
|
63
|
+
highlight_pattern = f'{value}',
|
|
64
|
+
highlight_sub=not_allowed_chars
|
|
65
|
+
)
|
|
66
|
+
if not re.fullmatch(r'-?\d+(\.\d+)?', value):
|
|
67
|
+
self.add_validation_message(
|
|
68
|
+
source=f"TREX numeric value {value}",
|
|
69
|
+
type="Error",
|
|
70
|
+
msg=f"{value} cannot be converted to number",
|
|
71
|
+
highlight_pattern = f'{value}'
|
|
72
|
+
)
|
|
73
|
+
return self
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class DateValue(ValueMixin):
|
|
78
|
+
@model_validator(mode='after')
|
|
79
|
+
def validate(self):
|
|
80
|
+
pattern:str = r'((?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2}))?(T(?P<hour>\d{2})(?P<minute>\d{2})(?P<second>\d{2})?(\.(?P<millisecond>\d{3}))?)?'
|
|
81
|
+
value=self.value
|
|
82
|
+
if not re.fullmatch(pattern, value):
|
|
83
|
+
self.add_validation_message(
|
|
84
|
+
source=f"TREX date value {value}",
|
|
85
|
+
type="Error",
|
|
86
|
+
msg=f'{value} is not in a valid format. Valid format for date: YYYYMMDD; Valid for time: THHMM, THHMMSS, THHMMSS.SSS; Datetime any combination of valid date and time',
|
|
87
|
+
highlight_pattern = f'{value}'
|
|
88
|
+
)
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
matches = re.match(pattern, value)
|
|
92
|
+
d = matches.groupdict()
|
|
93
|
+
d = {k: int(v) for k,v in d.items() if v }
|
|
94
|
+
if 'millisecond' in d.keys():
|
|
95
|
+
ms = d.pop('millisecond')
|
|
96
|
+
d.update({'microsecond': ms * 1000})
|
|
97
|
+
try:
|
|
98
|
+
if d.get('year'): # input is only a time
|
|
99
|
+
datetime(**d)
|
|
100
|
+
else:
|
|
101
|
+
time(**d)
|
|
102
|
+
except ValueError as e:
|
|
103
|
+
self.add_validation_message(
|
|
104
|
+
source=f"TREX date value {value}",
|
|
105
|
+
type="Error",
|
|
106
|
+
msg=f'{value} is no valid date or time.',
|
|
107
|
+
highlight_pattern = f'{value}'
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
return self
|
|
111
|
+
|
|
112
|
+
def to_python_type(self) -> str:
|
|
113
|
+
...
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def from_python_type(v:date|time|datetime):
|
|
117
|
+
sd = ""
|
|
118
|
+
st = ""
|
|
119
|
+
match v:
|
|
120
|
+
case date() | datetime():
|
|
121
|
+
sd = v.strftime('%Y%m%d')
|
|
122
|
+
case time() | datetime():
|
|
123
|
+
if v.microsecond:
|
|
124
|
+
st = v.strftime("T%H:%M:%S.") + f"{v.microsecond // 1000:03d}"
|
|
125
|
+
elif v.seconds:
|
|
126
|
+
st = v.strftime("T%H:%M:%S")
|
|
127
|
+
else:
|
|
128
|
+
st = v.strftime("T%H:%M")
|
|
129
|
+
|
|
130
|
+
return DateValue(value= sd + st)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class BoolValue(ValueMixin):
|
|
134
|
+
@model_validator(mode='after')
|
|
135
|
+
def validate(self):
|
|
136
|
+
if not self.value in ['T', 'F']:
|
|
137
|
+
self.add_validation_message(
|
|
138
|
+
source=f"TREX boolean value {self.value}",
|
|
139
|
+
type="Error",
|
|
140
|
+
msg=f'{self.value} is no valid boolean. Must be T or F',
|
|
141
|
+
highlight_pattern = f'{self.value}',
|
|
142
|
+
highlight_sub=[c for c in self.value]
|
|
143
|
+
)
|
|
144
|
+
return self
|
|
145
|
+
|
|
146
|
+
def to_python_type(self) -> str:
|
|
147
|
+
if self == 'T':
|
|
148
|
+
return True
|
|
149
|
+
elif self == 'F':
|
|
150
|
+
return False
|
|
151
|
+
else:
|
|
152
|
+
Exception(f'{self} is not valid boolean. That really should not have been possible -- Contact the maintainers of the library')
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class AlphanumericValue(ValueMixin):
|
|
156
|
+
@model_validator(mode='after')
|
|
157
|
+
def validate(self):
|
|
158
|
+
if re.match(r'[a-z]', self.value):
|
|
159
|
+
self.add_validation_message(
|
|
160
|
+
source=f"TREX value {self.value}",
|
|
161
|
+
type="Error",
|
|
162
|
+
msg=f"Lower case characters are not allowed.",
|
|
163
|
+
highlight_pattern = self.value
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.value)):
|
|
167
|
+
self.add_validation_message(
|
|
168
|
+
source=f"TREX value {self.value}",
|
|
169
|
+
type="Error",
|
|
170
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in alphanumeric segment",
|
|
171
|
+
highlight_pattern = self.value,
|
|
172
|
+
highlight_sub=not_allowed_chars
|
|
173
|
+
)
|
|
174
|
+
return self
|
|
175
|
+
|
|
176
|
+
def to_python_type(self) -> str:
|
|
177
|
+
return self
|
|
178
|
+
|
|
179
|
+
@classmethod
|
|
180
|
+
def from_python_type(cls, v):
|
|
181
|
+
raise NotImplementedError()
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class TextValue(ValueMixin):
|
|
185
|
+
@model_validator(mode='after')
|
|
186
|
+
def validate(self):
|
|
187
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', self.value)):
|
|
188
|
+
self.add_validation_message(
|
|
189
|
+
source=f"TREX value {self.value}",
|
|
190
|
+
type="Error",
|
|
191
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9",
|
|
192
|
+
highlight_pattern = self.value,
|
|
193
|
+
highlight_sub=not_allowed_chars
|
|
194
|
+
)
|
|
195
|
+
return self
|
|
196
|
+
|
|
197
|
+
def to_python_type(self) -> str:
|
|
198
|
+
decoded = from_base36(self)
|
|
199
|
+
return decoded
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class BinaryValue(ValueMixin):
|
|
203
|
+
@model_validator(mode='after')
|
|
204
|
+
def validate(self):
|
|
205
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', self.value)):
|
|
206
|
+
self.add_validation_message(
|
|
207
|
+
source=f"TREX value {self.value}",
|
|
208
|
+
type="Error",
|
|
209
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9",
|
|
210
|
+
highlight_pattern = self.value,
|
|
211
|
+
highlight_sub=not_allowed_chars
|
|
212
|
+
)
|
|
213
|
+
return self
|
|
214
|
+
|
|
215
|
+
def to_python_type(self) -> bytes:
|
|
216
|
+
decoded = bytes(from_base36(self))
|
|
217
|
+
return decoded
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class ErrorValue(ValueMixin):
|
|
221
|
+
@model_validator(mode='after')
|
|
222
|
+
def validate(self):
|
|
223
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.value)):
|
|
224
|
+
self.add_validation_message(
|
|
225
|
+
source=f"TREX value {self.value}",
|
|
226
|
+
type="Error",
|
|
227
|
+
msg=f"Characters {','.join(not_allowed_chars)} are not allowed in error segment",
|
|
228
|
+
highlight_pattern = self.value,
|
|
229
|
+
highlight_sub=not_allowed_chars
|
|
230
|
+
)
|
|
231
|
+
return self
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def to_python_type(self) -> str:
|
|
235
|
+
return self
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class ValueSegment(TREX_Segment, ValueMixin, ABC):
|
|
241
|
+
type:str
|
|
242
|
+
|
|
243
|
+
@model_validator(mode='after')
|
|
244
|
+
def validate_type(self):
|
|
245
|
+
valid_types = valid_types = unece_unit_codes() + ['T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E']
|
|
246
|
+
if not self.type in valid_types:
|
|
247
|
+
self.add_validation_message(
|
|
248
|
+
source=f"TREX value segment {self.key}",
|
|
249
|
+
type="Error",
|
|
250
|
+
msg=f"Type {self.type} is invalid. Must be 'T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E' or a UNECE unit",
|
|
251
|
+
highlight_pattern = self.type
|
|
252
|
+
)
|
|
253
|
+
return self
|
|
254
|
+
|
|
255
|
+
@classmethod
|
|
256
|
+
def get_subclass(cls, type:str, key:str, value:str):
|
|
257
|
+
match type:
|
|
258
|
+
case 'T.D':
|
|
259
|
+
model = DateSegment(key=key, value=value, type=type)
|
|
260
|
+
case 'T.B':
|
|
261
|
+
model = BoolSegment(key=key, value=value, type=type)
|
|
262
|
+
case 'T.A':
|
|
263
|
+
model = AlphanumericSegment(key=key, value=value, type=type)
|
|
264
|
+
case 'T.T':
|
|
265
|
+
model = TextSegment(key=key, value=value, type=type)
|
|
266
|
+
case 'T.X':
|
|
267
|
+
model = BinarySegment(key=key, value=value, type=type)
|
|
268
|
+
case 'E':
|
|
269
|
+
model = ErrorSegment(key=key, value=value, type=type)
|
|
270
|
+
case _:
|
|
271
|
+
model = NumericSegment(value=value, key=key, type=type)
|
|
272
|
+
|
|
273
|
+
return model
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def serialize_for_trex(self) -> str:
|
|
277
|
+
return f'{self.key}${self.type}:{self.value}'
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class NumericSegment(ValueSegment, NumericValue):
|
|
282
|
+
type: str
|
|
283
|
+
|
|
284
|
+
class DateSegment(ValueSegment, DateValue):
|
|
285
|
+
type: str = Field('T.D', frozen=True)
|
|
286
|
+
|
|
287
|
+
class BoolSegment(ValueSegment, BoolValue):
|
|
288
|
+
type: str = Field('T.A', frozen=True)
|
|
289
|
+
|
|
290
|
+
class AlphanumericSegment(ValueSegment, AlphanumericValue):
|
|
291
|
+
type: str = Field('T.A', frozen=True)
|
|
292
|
+
|
|
293
|
+
class TextSegment(ValueSegment, TextValue):
|
|
294
|
+
type: str = Field('T.T', frozen=True)
|
|
295
|
+
|
|
296
|
+
class BinarySegment(ValueSegment, BinaryValue):
|
|
297
|
+
type: str = Field('T.X', frozen=True)
|
|
298
|
+
|
|
299
|
+
class ErrorSegment(ValueSegment, ErrorValue):
|
|
300
|
+
type: str = Field('E', frozen=True)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class ColumnHeader(BaseModelWithValidationMessages):
|
|
305
|
+
key:str
|
|
306
|
+
type:str
|
|
307
|
+
|
|
308
|
+
@model_validator(mode='after')
|
|
309
|
+
def validate_key(self):
|
|
310
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.key)):
|
|
311
|
+
self.add_validation_message(
|
|
312
|
+
source=f"TREX table column {self.key}",
|
|
313
|
+
type="Error",
|
|
314
|
+
msg=f"Column header key contains invalid characters: {','.join(not_allowed_chars)}",
|
|
315
|
+
highlight_pattern = f'{self.key}$',
|
|
316
|
+
highlight_sub=not_allowed_chars
|
|
317
|
+
)
|
|
318
|
+
return self
|
|
319
|
+
|
|
320
|
+
@model_validator(mode='after')
|
|
321
|
+
def validate_type(self):
|
|
322
|
+
valid_types = unece_unit_codes() + ['T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E']
|
|
323
|
+
if not self.type in valid_types:
|
|
324
|
+
self.add_validation_message(
|
|
325
|
+
source=f"TREX table column {self.key}",
|
|
326
|
+
type="Error",
|
|
327
|
+
msg=f"Type '{self.type}' is invalid. Must be 'T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E' or a UNECE unit",
|
|
328
|
+
highlight_pattern = self.type
|
|
329
|
+
)
|
|
330
|
+
return self
|
|
331
|
+
|
|
332
|
+
class TableRow(RootModel[list[ValueMixin]]):
|
|
333
|
+
def serialize_for_trex(self):
|
|
334
|
+
return ':'.join([e.serialize_for_trex() for e in self.root])
|
|
335
|
+
|
|
336
|
+
def __len__(self):
|
|
337
|
+
return len(self.root)
|
|
338
|
+
|
|
339
|
+
def __iter__(self):
|
|
340
|
+
return iter(self.root)
|
|
341
|
+
|
|
342
|
+
class TREX_Table(TREX_Segment):
|
|
343
|
+
column_headers: list[ColumnHeader]
|
|
344
|
+
data: list[TableRow]
|
|
345
|
+
|
|
346
|
+
@property
|
|
347
|
+
def column_names(self):
|
|
348
|
+
return [h.key for h in self.column_headers]
|
|
349
|
+
|
|
350
|
+
@property
|
|
351
|
+
def column_types(self):
|
|
352
|
+
return [h.type for h in self.column_headers]
|
|
353
|
+
|
|
354
|
+
@model_validator(mode='after')
|
|
355
|
+
def validate_sizes(self):
|
|
356
|
+
sizes = [len(self.column_headers)]
|
|
357
|
+
sizes.extend( [ len(row) for row in self.data ] )
|
|
358
|
+
most_common_len, count = Counter(sizes).most_common(1)[0]
|
|
359
|
+
|
|
360
|
+
if len(self.column_headers) != most_common_len:
|
|
361
|
+
self.add_validation_message(
|
|
362
|
+
source=f"Table {self.key}",
|
|
363
|
+
type="Error",
|
|
364
|
+
msg=f"Size mismatch: Table header contains {self.col_names} keys, while most rows have {most_common_len}",
|
|
365
|
+
highlight_pattern = self.key
|
|
366
|
+
)
|
|
367
|
+
expected_row_len = most_common_len
|
|
368
|
+
else:
|
|
369
|
+
expected_row_len = len(self.column_headers)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
for i, row in enumerate(self.data):
|
|
373
|
+
if len(row) != expected_row_len:
|
|
374
|
+
self.add_validation_message(
|
|
375
|
+
source=f"Table {self.key}",
|
|
376
|
+
type="Error",
|
|
377
|
+
msg=f"Size mismatch: Table row {i} contains {len(row)} elements. Expected size is {expected_row_len}",
|
|
378
|
+
highlight_pattern = row.serialize_for_trex()
|
|
379
|
+
)
|
|
380
|
+
return self
|
|
381
|
+
|
|
382
|
+
@model_validator(mode='after')
|
|
383
|
+
def validate_data_types(self):
|
|
384
|
+
expected_types = self.column_types
|
|
385
|
+
i = 0
|
|
386
|
+
for row in self.data:
|
|
387
|
+
for e, t_expected, nm in zip(row, expected_types, self.column_names):
|
|
388
|
+
try:
|
|
389
|
+
match t_expected:
|
|
390
|
+
case 'T.D':
|
|
391
|
+
assert isinstance(e, DateValue)
|
|
392
|
+
case 'T.B':
|
|
393
|
+
assert isinstance(e, BoolValue)
|
|
394
|
+
case 'T.A':
|
|
395
|
+
assert isinstance(e, AlphanumericValue)
|
|
396
|
+
|
|
397
|
+
case 'T.T':
|
|
398
|
+
assert isinstance(e, TextValue)
|
|
399
|
+
case 'T.X':
|
|
400
|
+
assert isinstance(e, BinaryValue)
|
|
401
|
+
case 'E':
|
|
402
|
+
assert isinstance(e, ErrorValue)
|
|
403
|
+
case _:
|
|
404
|
+
assert isinstance(e, NumericValue)
|
|
405
|
+
except AssertionError:
|
|
406
|
+
self.add_validation_message(
|
|
407
|
+
source=f"Table {self.key}",
|
|
408
|
+
type="Error",
|
|
409
|
+
msg=f"Size mismatch: Table row {i}, column {nm} is of wrong type. Is {e.type_identifier}, should be {t_expected}",
|
|
410
|
+
highlight_pattern = row.serialize_for_trex(),
|
|
411
|
+
highlight_sub=[c for c in e.value]
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
if msg := e.get_errors():
|
|
415
|
+
for m in msg:
|
|
416
|
+
self.add_validation_message(
|
|
417
|
+
source=f"Table {self.key}",
|
|
418
|
+
type="Error",
|
|
419
|
+
msg=m.problem_msg,
|
|
420
|
+
highlight_pattern = row.serialize_for_trex(),
|
|
421
|
+
highlight_sub=[c for c in e.value]
|
|
422
|
+
)
|
|
423
|
+
i += 1
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _get_col_index(self, col:str|int):
|
|
428
|
+
if isinstance(col, str):
|
|
429
|
+
col_index = self.column_names.index(col)
|
|
430
|
+
elif isinstance(col, int):
|
|
431
|
+
col_index = col
|
|
432
|
+
else:
|
|
433
|
+
raise TypeError(f"Column must be specified as string or int: {col.__name__}")
|
|
434
|
+
return col_index
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def serialize_for_trex(self):
|
|
439
|
+
header = ':'.join([f'{h.key}${h.type}' for h in self.column_headers])
|
|
440
|
+
data = '::'.join([r.serialize_for_trex() for r in self.data])
|
|
441
|
+
s = f'{self.key}$${header}::{data}'
|
|
442
|
+
return s
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def n_rows(self) -> int:
|
|
450
|
+
return len(self.data)
|
|
451
|
+
|
|
452
|
+
def n_cols(self) -> int:
|
|
453
|
+
return len(self.column_headers)
|
|
454
|
+
|
|
455
|
+
def row_data(self, row:int) -> list:
|
|
456
|
+
out = self.data[row]
|
|
457
|
+
return out
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def column_data(self, col:str|int) -> list:
|
|
461
|
+
col_index = self._get_col_index(col)
|
|
462
|
+
type = self.column_headers[col_index].type
|
|
463
|
+
out = [row[col_index] for row in self.data]
|
|
464
|
+
return out
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def cell_data(self, row:int, col:str|int):
|
|
468
|
+
try:
|
|
469
|
+
col_index = self._get_col_index(col)
|
|
470
|
+
value = self.data[row][col_index]
|
|
471
|
+
except ValueError:
|
|
472
|
+
logging.warning(f"row {row}, column {col} not found")
|
|
473
|
+
return None
|
|
474
|
+
return value
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
class TREX(Extension, BaseModelWithValidationMessages):
|
|
480
|
+
name_:str
|
|
481
|
+
segments: list[TREX_Segment] = Field(default_factory=list)
|
|
482
|
+
|
|
483
|
+
@property
|
|
484
|
+
def name(self)->str:
|
|
485
|
+
return self.name_
|
|
486
|
+
|
|
487
|
+
@property
|
|
488
|
+
def type(self)->str:
|
|
489
|
+
return 'TREX'
|
|
490
|
+
|
|
491
|
+
@property
|
|
492
|
+
def data(self)->str:
|
|
493
|
+
seg_strings = list()
|
|
494
|
+
for s in self.segments:
|
|
495
|
+
seg_strings.append(s.serialize_for_trex())
|
|
496
|
+
s_out = '+'.join(seg_strings)
|
|
497
|
+
return s_out
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def get_segment(self, segment_key:str) -> TREX_Segment:
|
|
501
|
+
s = [s for s in self.segments if s.key == segment_key]
|
|
502
|
+
if s:
|
|
503
|
+
return s[0]
|
|
504
|
+
else:
|
|
505
|
+
return None
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
@field_validator('segments')
|
|
509
|
+
@classmethod
|
|
510
|
+
def validate_segments(cls, segments):
|
|
511
|
+
segment_keys = [s.key for s in segments]
|
|
512
|
+
duplicates = [item for item, count in Counter(segment_keys).items() if count > 1]
|
|
513
|
+
if duplicates:
|
|
514
|
+
raise ValueError(f"Duplicate segment keys: {','.join(duplicates)}")
|
|
515
|
+
return segments
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
@staticmethod
|
|
520
|
+
def from_spec_fields(name, type, data):
|
|
521
|
+
...
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
class TREX_Struct(TREX_Segment):
|
|
526
|
+
"""Struct is a special interpretation of a T-REX Table with one row"""
|
|
527
|
+
wrapped_table:TREX_Table
|
|
528
|
+
|
|
529
|
+
@property
|
|
530
|
+
def segment_name_(self):
|
|
531
|
+
return self.wrapped_table.key
|
|
532
|
+
|
|
533
|
+
@field_validator('wrapped_table')
|
|
534
|
+
def validate_table(table):
|
|
535
|
+
if len(table.data) != 1:
|
|
536
|
+
raise ValidationError("Too many input rows. Struct can only have one row")
|
|
537
|
+
return table
|
|
538
|
+
|
|
539
|
+
def get(self, key):
|
|
540
|
+
return self.wrapped_table.cell_data(0, key)
|
|
541
|
+
|
|
542
|
+
def keys(self):
|
|
543
|
+
return self.wrapped_table.col_names
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
# class Value(str, ABC):
|
|
554
|
+
# @classmethod
|
|
555
|
+
# def __get_validators__(cls):
|
|
556
|
+
# yield cls.validate
|
|
557
|
+
|
|
558
|
+
# def __new__(cls, value):
|
|
559
|
+
# # validate manually
|
|
560
|
+
# validated = cls.validate(value)
|
|
561
|
+
# return super().__new__(cls, validated)
|
|
562
|
+
|
|
563
|
+
# @abstractclassmethod
|
|
564
|
+
# def validate(cls, value):
|
|
565
|
+
# ...
|
|
566
|
+
|
|
567
|
+
# @abstractclassmethod
|
|
568
|
+
# def from_python_type(v):
|
|
569
|
+
# ...
|
|
570
|
+
|
|
571
|
+
# @abstractmethod
|
|
572
|
+
# def to_python_type():
|
|
573
|
+
# ...
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
# class NumericValue(Value):
|
|
578
|
+
# @classmethod
|
|
579
|
+
# def validate(cls, value):
|
|
580
|
+
# if not_allowed_chars := set(re.sub(r'[0-9\.-]', '', value)):
|
|
581
|
+
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in quantity segment. Base36 encoding only allows A-Z0-9")
|
|
582
|
+
# if not re.fullmatch(r'-?\d+(\.\d+)?', value):
|
|
583
|
+
# raise ValueError(f"{value} cannot be converted to number")
|
|
584
|
+
# return value
|
|
585
|
+
|
|
586
|
+
# def to_python_type(self) -> str:
|
|
587
|
+
# ...
|
|
588
|
+
|
|
589
|
+
# class DateValue(Value):
|
|
590
|
+
# pattern = r'((?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2}))?(T(?P<hours>\d{2})(?P<minutes>\d{2})(?P<seconds>\d{2})?(\.(?P<milliseconds>\d{3}))?)?'
|
|
591
|
+
|
|
592
|
+
# @classmethod
|
|
593
|
+
# def validate(cls, value):
|
|
594
|
+
# if not re.fullmatch(cls.pattern, value):
|
|
595
|
+
# raise ValueError(f'{value} is no valid date or time.')
|
|
596
|
+
# return value
|
|
597
|
+
|
|
598
|
+
# def to_python_type(self) -> str:
|
|
599
|
+
# ...
|
|
600
|
+
|
|
601
|
+
# @classmethod
|
|
602
|
+
# def from_python_type(v:date|time|datetime):
|
|
603
|
+
# sd = ""
|
|
604
|
+
# st = ""
|
|
605
|
+
# match v:
|
|
606
|
+
# case date() | datetime():
|
|
607
|
+
# sd = v.strftime('%Y%m%d')
|
|
608
|
+
# case time() | datetime():
|
|
609
|
+
# if v.microsecond:
|
|
610
|
+
# st = v.strftime("T%H:%M:%S.") + f"{v.microsecond // 1000:03d}"
|
|
611
|
+
# elif v.seconds:
|
|
612
|
+
# st = v.strftime("T%H:%M:%S")
|
|
613
|
+
# else:
|
|
614
|
+
# st = v.strftime("T%H:%M")
|
|
615
|
+
|
|
616
|
+
# return DateValue(sd + st)
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
# class BoolValue(Value):
|
|
622
|
+
# @classmethod
|
|
623
|
+
# def validate(cls, value):
|
|
624
|
+
# if not value in ['T', 'F']:
|
|
625
|
+
# raise ValueError(f'{value} is no valid boolean. Must be T or F')
|
|
626
|
+
# return value
|
|
627
|
+
|
|
628
|
+
# def to_python_type(self) -> str:
|
|
629
|
+
# if self == 'T':
|
|
630
|
+
# return True
|
|
631
|
+
# elif self == 'F':
|
|
632
|
+
# return False
|
|
633
|
+
# else:
|
|
634
|
+
# Exception(f'{self} is not valid boolean. That really should not have been possible -- Contact the maintainers of the library')
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
# class AlphanumericValue(Value):
|
|
638
|
+
# @classmethod
|
|
639
|
+
# def validate(cls, value):
|
|
640
|
+
# if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', value)):
|
|
641
|
+
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in alphanumeric segment")
|
|
642
|
+
# else:
|
|
643
|
+
# return value
|
|
644
|
+
|
|
645
|
+
# @property
|
|
646
|
+
# def trex_type(self):
|
|
647
|
+
# return 'T.A'
|
|
648
|
+
|
|
649
|
+
# def to_python_type(self) -> str:
|
|
650
|
+
# return self
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
# class TextValue(Value):
|
|
654
|
+
|
|
655
|
+
# @classmethod
|
|
656
|
+
# def validate(cls, value):
|
|
657
|
+
# if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', value)):
|
|
658
|
+
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9")
|
|
659
|
+
# else:
|
|
660
|
+
# return value
|
|
661
|
+
|
|
662
|
+
# @property
|
|
663
|
+
# def trex_type(self):
|
|
664
|
+
# return 'T.A'
|
|
665
|
+
|
|
666
|
+
# def to_python_type(self) -> str:
|
|
667
|
+
# decoded = from_base36(self)
|
|
668
|
+
# return decoded
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
# class BinaryValue(Value):
|
|
672
|
+
# @classmethod
|
|
673
|
+
# def validate(cls, value):
|
|
674
|
+
# if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', value)):
|
|
675
|
+
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9")
|
|
676
|
+
# else:
|
|
677
|
+
# return value
|
|
678
|
+
|
|
679
|
+
# @property
|
|
680
|
+
# def trex_type(self):
|
|
681
|
+
# return 'T.X'
|
|
682
|
+
|
|
683
|
+
# def to_python_type(self) -> bytes:
|
|
684
|
+
# decoded = bytes(from_base36(self))
|
|
685
|
+
# return decoded
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
# class ErrorValue(Value):
|
|
689
|
+
# @classmethod
|
|
690
|
+
# def validate(cls, value):
|
|
691
|
+
# if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', value)):
|
|
692
|
+
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in error segment")
|
|
693
|
+
# else:
|
|
694
|
+
# return value
|
|
695
|
+
|
|
696
|
+
# @property
|
|
697
|
+
# def trex_type(self):
|
|
698
|
+
# return 'E'
|
|
699
|
+
|
|
700
|
+
# def to_python_type(self) -> str:
|
|
701
|
+
# return self
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
# class TREX_Segment(BaseModelWithValidationMessages, ABC):
|
|
705
|
+
# key: str
|
|
706
|
+
|
|
707
|
+
# @field_validator('key')
|
|
708
|
+
# def validate_name(cls, v):
|
|
709
|
+
# if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', v)):
|
|
710
|
+
# raise ValueError(f"Segment name contains invalid characters: {','.join(not_allowed_chars)}")
|
|
711
|
+
# return v
|
|
712
|
+
|
|
713
|
+
# @abstractmethod
|
|
714
|
+
# def serialize_for_trex(self):
|
|
715
|
+
# raise NotImplementedError("Subclasses must implement 'serialize_as_trex()' method")
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
# class ValueSegment(TREX_Segment):
|
|
720
|
+
# type: str
|
|
721
|
+
# value: str #NumericValue|DateValue|BoolValue|AlphanumericValue|TextValue|BinaryValue|ErrorValue
|
|
722
|
+
|
|
723
|
+
# @model_validator(mode='before')
|
|
724
|
+
# @classmethod
|
|
725
|
+
# def validate(cls, model):
|
|
726
|
+
# t = model.get('type')
|
|
727
|
+
# v = model.get('value')
|
|
728
|
+
# match t:
|
|
729
|
+
# case 'T.D':
|
|
730
|
+
# v = DateValue(v)
|
|
731
|
+
# case 'T.B':
|
|
732
|
+
# v = BoolValue(v)
|
|
733
|
+
# case 'T.A':
|
|
734
|
+
# v = AlphanumericValue(v)
|
|
735
|
+
# case 'T.T':
|
|
736
|
+
# v = TextValue(v)
|
|
737
|
+
# case 'T.X':
|
|
738
|
+
# v = BinaryValue(v)
|
|
739
|
+
# case 'E':
|
|
740
|
+
# v = ErrorValue(v)
|
|
741
|
+
# case _:
|
|
742
|
+
# if not t in unece_unit_codes():
|
|
743
|
+
# raise ValueError(f'Invalid unit code. {t} is not in UNECE list of common codes')
|
|
744
|
+
# v = NumericValue(v)
|
|
745
|
+
|
|
746
|
+
# model['value'] = v
|
|
747
|
+
# return model
|
|
748
|
+
|
|
749
|
+
|
|
750
|
+
# def serialize_for_trex(self) -> str:
|
|
751
|
+
# return f'{self.key}${self.type}:{self.value}'
|
|
752
|
+
|
|
753
|
+
# class UNECEQuantity(BaseModelWithWarnings):
|
|
754
|
+
# value:int|float
|
|
755
|
+
# unece_code:str
|
|
756
|
+
# unit_name: str|None = ""
|
|
757
|
+
# unit_symbol: str|None = ""
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
# def as_strings(self):
|
|
761
|
+
# unit_symbol = self.unit_symbol
|
|
762
|
+
# if unit_symbol == "dimensionless":
|
|
763
|
+
# unit_symbol = ""
|
|
764
|
+
# s = ''
|
|
765
|
+
|
|
766
|
+
# val_str = self.value
|
|
767
|
+
# return f"{val_str}", f"{unit_symbol}", f"{val_str} {unit_symbol}"
|
|
768
|
+
|
|
769
|
+
# def __str__(self):
|
|
770
|
+
# unit_symbol = self.unit_symbol
|
|
771
|
+
# if unit_symbol == "dimensionless":
|
|
772
|
+
# unit_symbol = ""
|
|
773
|
+
|
|
774
|
+
# s = f"{self.value} {unit_symbol}"
|
|
775
|
+
# return s
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
# class ValueSegment2(TREX_Segment, ValueMixin, ABC):
|
|
781
|
+
# type:str
|
|
782
|
+
|
|
783
|
+
# @model_validator(mode='before')
|
|
784
|
+
# @classmethod
|
|
785
|
+
# def convert_str_value(cls, model):
|
|
786
|
+
# if isinstance(model.get('value'), str):
|
|
787
|
+
# bases = [base for base in cls.__bases__ if base is not ValueSegment2 and issubclass(base, ValueMixin)]
|
|
788
|
+
# base = bases[0]
|
|
789
|
+
# v = base(value = model.get('value'))
|
|
790
|
+
# model['value'] = v
|
|
791
|
+
# return model
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
# @model_validator(mode='before')
|
|
795
|
+
# @classmethod
|
|
796
|
+
# def cast_to_subclass(cls, model):
|
|
797
|
+
|
|
798
|
+
# # this method should do anything if called by the subclasses
|
|
799
|
+
# if cls is not ValueSegment2:
|
|
800
|
+
# return model
|
|
801
|
+
|
|
802
|
+
# k = model.get('key')
|
|
803
|
+
# t = model.get('type')
|
|
804
|
+
# v = model.get('value')
|
|
805
|
+
# match t:
|
|
806
|
+
# case 'T.D':
|
|
807
|
+
# model = DateSegment(key=k, value=v, type=t)
|
|
808
|
+
# case 'T.B':
|
|
809
|
+
# model = BoolSegment(key=k, value=v, type=t)
|
|
810
|
+
# case 'T.A':
|
|
811
|
+
# model = AlphanumericSegment(key=k, value=v, type=t)
|
|
812
|
+
# case 'T.T':
|
|
813
|
+
# model = TextSegment(key=k, value=v, type=t)
|
|
814
|
+
# case 'T.X':
|
|
815
|
+
# model = BinarySegment(key=k, value=v, type=t)
|
|
816
|
+
# case 'E':
|
|
817
|
+
# model = ErrorSegment(key=k, value=v, type=t)
|
|
818
|
+
# case _:
|
|
819
|
+
# if not t in unece_unit_codes():
|
|
820
|
+
# raise ValueError(f'Invalid unit code. {t} is not in UNECE list of common codes')
|
|
821
|
+
# model = NumericSegment(key=k, value=v, type=t)
|
|
822
|
+
|
|
823
|
+
# return model
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
# class ValueSegment(TREX_Segment, ValueMixin, ABC):
|
|
827
|
+
# type:str
|
|
828
|
+
|
|
829
|
+
# @model_validator(mode='before')
|
|
830
|
+
# @classmethod
|
|
831
|
+
# def convert_str_value(cls, model):
|
|
832
|
+
# if isinstance(model.get('value'), str):
|
|
833
|
+
# bases = [base for base in cls.__bases__ if base is not ValueSegment2 and issubclass(base, ValueMixin)]
|
|
834
|
+
# base = bases[0]
|
|
835
|
+
# v = base(value = model.get('value'))
|
|
836
|
+
# model['value'] = v
|
|
837
|
+
# return model
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
# @model_validator(mode='before')
|
|
841
|
+
# @classmethod
|
|
842
|
+
# def cast_to_subclass(cls, model):
|
|
843
|
+
|
|
844
|
+
# # this method should do anything if called by the subclasses
|
|
845
|
+
# if cls is not ValueSegment2:
|
|
846
|
+
# return model
|
|
847
|
+
|
|
848
|
+
# k = model.get('key')
|
|
849
|
+
# t = model.get('type')
|
|
850
|
+
# v = model.get('value')
|
|
851
|
+
# match t:
|
|
852
|
+
# case 'T.D':
|
|
853
|
+
# model = DateSegment(key=k, value=v, type=t)
|
|
854
|
+
# case 'T.B':
|
|
855
|
+
# model = BoolSegment(key=k, value=v, type=t)
|
|
856
|
+
# case 'T.A':
|
|
857
|
+
# model = AlphanumericSegment(key=k, value=v, type=t)
|
|
858
|
+
# case 'T.T':
|
|
859
|
+
# model = TextSegment(key=k, value=v, type=t)
|
|
860
|
+
# case 'T.X':
|
|
861
|
+
# model = BinarySegment(key=k, value=v, type=t)
|
|
862
|
+
# case 'E':
|
|
863
|
+
# model = ErrorSegment(key=k, value=v, type=t)
|
|
864
|
+
# case _:
|
|
865
|
+
# if not t in unece_unit_codes():
|
|
866
|
+
# raise ValueError(f'Invalid unit code. {t} is not in UNECE list of common codes')
|
|
867
|
+
# model = NumericSegment(key=k, value=v, type=t)
|
|
868
|
+
|
|
869
|
+
# return model
|