labfreed 0.0.9__py2.py3-none-any.whl → 0.0.11__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labfreed/DisplayNameExtension/DisplayNameExtension.py +5 -2
- labfreed/PAC_CAT/data_model copy.py +232 -0
- labfreed/PAC_CAT/data_model.py +319 -59
- labfreed/PAC_ID/data_model.py +42 -112
- labfreed/PAC_ID/extensions.py +55 -0
- labfreed/TREX/data_model.py +316 -396
- labfreed/TREX/parse.py +1 -69
- labfreed/TREX/unece_units.py +17 -1
- labfreed/__init__.py +1 -1
- labfreed/{PAC_ID/parse.py → parse_pac.py} +104 -59
- labfreed/utilities/base36.py +29 -13
- labfreed/utilities/extension_intertpreters.py +4 -0
- labfreed/utilities/utility_types.py +103 -0
- labfreed/{PAC_ID/well_known_segment_keys.py → utilities/well_known_keys.py} +1 -1
- labfreed/validation.py +3 -1
- {labfreed-0.0.9.dist-info → labfreed-0.0.11.dist-info}/METADATA +1 -1
- labfreed-0.0.11.dist-info/RECORD +22 -0
- labfreed/PAC_ID/serialize.py +0 -60
- labfreed/TREX/serialize.py +0 -3
- labfreed/conversion_tools/uncertainty.py +0 -32
- labfreed/conversion_tools/unit_utilities.py +0 -109
- labfreed-0.0.9.dist-info/RECORD +0 -22
- {labfreed-0.0.9.dist-info → labfreed-0.0.11.dist-info}/WHEEL +0 -0
- {labfreed-0.0.9.dist-info → labfreed-0.0.11.dist-info}/licenses/LICENSE +0 -0
labfreed/TREX/data_model.py
CHANGED
|
@@ -2,15 +2,17 @@ from datetime import date, datetime, time
|
|
|
2
2
|
import logging
|
|
3
3
|
import re
|
|
4
4
|
from collections import Counter
|
|
5
|
+
from typing import Annotated, Literal
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
from pydantic import RootModel, ValidationError, field_validator, model_validator, Field
|
|
8
|
-
from labfreed.TREX.unece_units import unece_unit_codes
|
|
8
|
+
from pydantic import PrivateAttr, RootModel, ValidationError, field_validator, model_validator, Field
|
|
9
|
+
from labfreed.TREX.unece_units import unece_unit, unece_unit_codes, unece_units, unit_name, unit_symbol
|
|
10
|
+
from labfreed.utilities.utility_types import DataTable, Quantity, Unit, unece_unit_code_from_quantity
|
|
9
11
|
from labfreed.validation import BaseModelWithValidationMessages
|
|
10
12
|
from abc import ABC, abstractmethod
|
|
11
13
|
|
|
12
|
-
from
|
|
13
|
-
from labfreed.utilities.base36 import to_base36, from_base36
|
|
14
|
+
from labfreed.PAC_ID.extensions import Extension
|
|
15
|
+
from labfreed.utilities.base36 import base36, to_base36, from_base36
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
|
|
@@ -33,6 +35,14 @@ class TREX_Segment(BaseModelWithValidationMessages, ABC):
|
|
|
33
35
|
def serialize_for_trex(self):
|
|
34
36
|
raise NotImplementedError("Subclasses must implement 'serialize_as_trex()' method")
|
|
35
37
|
|
|
38
|
+
# @abstractmethod
|
|
39
|
+
# def to_python_type(self):
|
|
40
|
+
# raise NotImplementedError("Subclasses must implement 'to_python_type()' method")
|
|
41
|
+
|
|
42
|
+
# @abstractmethod
|
|
43
|
+
# def from_python_type(self):
|
|
44
|
+
# raise NotImplementedError("Subclasses must implement 'from_python_type()' method")
|
|
45
|
+
|
|
36
46
|
|
|
37
47
|
|
|
38
48
|
|
|
@@ -46,16 +56,23 @@ class ValueMixin(BaseModelWithValidationMessages, ABC):
|
|
|
46
56
|
# def from_python_type(cls, v):
|
|
47
57
|
# ...
|
|
48
58
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
59
|
+
@abstractmethod
|
|
60
|
+
def value_to_python_type(self):
|
|
61
|
+
...
|
|
52
62
|
|
|
53
63
|
|
|
54
64
|
class NumericValue(ValueMixin):
|
|
65
|
+
@field_validator('value', mode='before')
|
|
66
|
+
@classmethod
|
|
67
|
+
def from_python_type(cls, v:str| int|float):
|
|
68
|
+
if isinstance(v, str):
|
|
69
|
+
return v
|
|
70
|
+
return str(v)
|
|
71
|
+
|
|
55
72
|
@model_validator(mode='after')
|
|
56
73
|
def validate(self):
|
|
57
74
|
value = self.value
|
|
58
|
-
if not_allowed_chars := set(re.sub(r'[0-9
|
|
75
|
+
if not_allowed_chars := set(re.sub(r'[0-9\.\-E]', '', value)):
|
|
59
76
|
self.add_validation_message(
|
|
60
77
|
source=f"TREX numeric value {value}",
|
|
61
78
|
type="Error",
|
|
@@ -63,7 +80,7 @@ class NumericValue(ValueMixin):
|
|
|
63
80
|
highlight_pattern = f'{value}',
|
|
64
81
|
highlight_sub=not_allowed_chars
|
|
65
82
|
)
|
|
66
|
-
if not re.fullmatch(r'-?\d+(\.\d+)?', value):
|
|
83
|
+
if not re.fullmatch(r'-?\d+(\.\d+)?(E-?\d+)?', value):
|
|
67
84
|
self.add_validation_message(
|
|
68
85
|
source=f"TREX numeric value {value}",
|
|
69
86
|
type="Error",
|
|
@@ -72,9 +89,36 @@ class NumericValue(ValueMixin):
|
|
|
72
89
|
)
|
|
73
90
|
return self
|
|
74
91
|
|
|
92
|
+
def value_to_python_type(self) -> str:
|
|
93
|
+
v = float(self.value)
|
|
94
|
+
if not '.' in self.value and not 'E' in self.value:
|
|
95
|
+
return int(v)
|
|
96
|
+
else:
|
|
97
|
+
return v
|
|
75
98
|
|
|
76
99
|
|
|
77
100
|
class DateValue(ValueMixin):
|
|
101
|
+
_date_time_dict:dict|None = PrivateAttr(default=None)
|
|
102
|
+
@field_validator('value', mode='before')
|
|
103
|
+
@classmethod
|
|
104
|
+
def from_python_type(cls, v:str| date|time|datetime):
|
|
105
|
+
if isinstance(v, str):
|
|
106
|
+
return v
|
|
107
|
+
|
|
108
|
+
sd = ""
|
|
109
|
+
st = ""
|
|
110
|
+
if isinstance(v, date) or isinstance(v, datetime):
|
|
111
|
+
sd = v.strftime('%Y%m%d')
|
|
112
|
+
if isinstance(v, time) or isinstance(v, datetime):
|
|
113
|
+
if v.microsecond:
|
|
114
|
+
st = v.strftime("T%H%M%S.") + f"{v.microsecond // 1000:03d}"
|
|
115
|
+
elif v.second:
|
|
116
|
+
st = v.strftime("T%H%M%S")
|
|
117
|
+
else:
|
|
118
|
+
st = v.strftime("T%H%M")
|
|
119
|
+
|
|
120
|
+
return sd + st
|
|
121
|
+
|
|
78
122
|
@model_validator(mode='after')
|
|
79
123
|
def validate(self):
|
|
80
124
|
pattern:str = r'((?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2}))?(T(?P<hour>\d{2})(?P<minute>\d{2})(?P<second>\d{2})?(\.(?P<millisecond>\d{3}))?)?'
|
|
@@ -89,7 +133,7 @@ class DateValue(ValueMixin):
|
|
|
89
133
|
return self
|
|
90
134
|
|
|
91
135
|
matches = re.match(pattern, value)
|
|
92
|
-
d = matches.groupdict()
|
|
136
|
+
d = matches.groupdict()
|
|
93
137
|
d = {k: int(v) for k,v in d.items() if v }
|
|
94
138
|
if 'millisecond' in d.keys():
|
|
95
139
|
ms = d.pop('millisecond')
|
|
@@ -106,31 +150,31 @@ class DateValue(ValueMixin):
|
|
|
106
150
|
msg=f'{value} is no valid date or time.',
|
|
107
151
|
highlight_pattern = f'{value}'
|
|
108
152
|
)
|
|
109
|
-
|
|
153
|
+
|
|
154
|
+
self._date_time_dict = d
|
|
110
155
|
return self
|
|
111
156
|
|
|
112
|
-
def
|
|
113
|
-
|
|
157
|
+
def value_to_python_type(self) -> str:
|
|
158
|
+
d = self._date_time_dict
|
|
159
|
+
if d.get('year') and d.get('hour'): # input is only a time
|
|
160
|
+
return datetime(**d)
|
|
161
|
+
elif d.get('year'):
|
|
162
|
+
return date(**d)
|
|
163
|
+
else:
|
|
164
|
+
return time(**d)
|
|
114
165
|
|
|
115
|
-
|
|
116
|
-
def from_python_type(v:date|time|datetime):
|
|
117
|
-
sd = ""
|
|
118
|
-
st = ""
|
|
119
|
-
match v:
|
|
120
|
-
case date() | datetime():
|
|
121
|
-
sd = v.strftime('%Y%m%d')
|
|
122
|
-
case time() | datetime():
|
|
123
|
-
if v.microsecond:
|
|
124
|
-
st = v.strftime("T%H:%M:%S.") + f"{v.microsecond // 1000:03d}"
|
|
125
|
-
elif v.seconds:
|
|
126
|
-
st = v.strftime("T%H:%M:%S")
|
|
127
|
-
else:
|
|
128
|
-
st = v.strftime("T%H:%M")
|
|
129
|
-
|
|
130
|
-
return DateValue(value= sd + st)
|
|
166
|
+
|
|
131
167
|
|
|
132
168
|
|
|
133
169
|
class BoolValue(ValueMixin):
|
|
170
|
+
@field_validator('value', mode='before')
|
|
171
|
+
@classmethod
|
|
172
|
+
def from_python_type(cls, v:str| bool):
|
|
173
|
+
if isinstance(v, str):
|
|
174
|
+
return v
|
|
175
|
+
|
|
176
|
+
return 'T' if v else 'F'
|
|
177
|
+
|
|
134
178
|
@model_validator(mode='after')
|
|
135
179
|
def validate(self):
|
|
136
180
|
if not self.value in ['T', 'F']:
|
|
@@ -143,16 +187,21 @@ class BoolValue(ValueMixin):
|
|
|
143
187
|
)
|
|
144
188
|
return self
|
|
145
189
|
|
|
146
|
-
def
|
|
147
|
-
if self == 'T':
|
|
190
|
+
def value_to_python_type(self) -> str:
|
|
191
|
+
if self.value == 'T':
|
|
148
192
|
return True
|
|
149
|
-
elif self == 'F':
|
|
193
|
+
elif self.value == 'F':
|
|
150
194
|
return False
|
|
151
195
|
else:
|
|
152
196
|
Exception(f'{self} is not valid boolean. That really should not have been possible -- Contact the maintainers of the library')
|
|
153
197
|
|
|
154
198
|
|
|
155
199
|
class AlphanumericValue(ValueMixin):
|
|
200
|
+
@field_validator('value', mode='before')
|
|
201
|
+
@classmethod
|
|
202
|
+
def from_python_type(cls, v:str):
|
|
203
|
+
return v
|
|
204
|
+
|
|
156
205
|
@model_validator(mode='after')
|
|
157
206
|
def validate(self):
|
|
158
207
|
if re.match(r'[a-z]', self.value):
|
|
@@ -173,15 +222,21 @@ class AlphanumericValue(ValueMixin):
|
|
|
173
222
|
)
|
|
174
223
|
return self
|
|
175
224
|
|
|
176
|
-
def
|
|
177
|
-
return self
|
|
225
|
+
def value_to_python_type(self) -> str:
|
|
226
|
+
return self.value
|
|
178
227
|
|
|
179
|
-
@classmethod
|
|
180
|
-
def from_python_type(cls, v):
|
|
181
|
-
raise NotImplementedError()
|
|
182
228
|
|
|
183
229
|
|
|
184
230
|
class TextValue(ValueMixin):
|
|
231
|
+
@field_validator('value', mode='before')
|
|
232
|
+
@classmethod
|
|
233
|
+
def from_python_type(cls, v:base36|str):
|
|
234
|
+
if isinstance(v, str):
|
|
235
|
+
logging.info('Got str for text value > converting to base36')
|
|
236
|
+
return to_base36(v).root
|
|
237
|
+
else:
|
|
238
|
+
return v.root
|
|
239
|
+
|
|
185
240
|
@model_validator(mode='after')
|
|
186
241
|
def validate(self):
|
|
187
242
|
if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', self.value)):
|
|
@@ -194,12 +249,20 @@ class TextValue(ValueMixin):
|
|
|
194
249
|
)
|
|
195
250
|
return self
|
|
196
251
|
|
|
197
|
-
def
|
|
198
|
-
decoded = from_base36(self)
|
|
252
|
+
def value_to_python_type(self) -> str:
|
|
253
|
+
decoded = from_base36(self.value)
|
|
199
254
|
return decoded
|
|
200
255
|
|
|
201
256
|
|
|
202
257
|
class BinaryValue(ValueMixin):
|
|
258
|
+
@field_validator('value', mode='before')
|
|
259
|
+
@classmethod
|
|
260
|
+
def from_python_type(cls, v:base36|str):
|
|
261
|
+
if isinstance(v, str):
|
|
262
|
+
return v
|
|
263
|
+
else:
|
|
264
|
+
return v.root
|
|
265
|
+
|
|
203
266
|
@model_validator(mode='after')
|
|
204
267
|
def validate(self):
|
|
205
268
|
if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', self.value)):
|
|
@@ -212,7 +275,7 @@ class BinaryValue(ValueMixin):
|
|
|
212
275
|
)
|
|
213
276
|
return self
|
|
214
277
|
|
|
215
|
-
def
|
|
278
|
+
def value_to_python_type(self) -> bytes:
|
|
216
279
|
decoded = bytes(from_base36(self))
|
|
217
280
|
return decoded
|
|
218
281
|
|
|
@@ -231,8 +294,8 @@ class ErrorValue(ValueMixin):
|
|
|
231
294
|
return self
|
|
232
295
|
|
|
233
296
|
|
|
234
|
-
def
|
|
235
|
-
return self
|
|
297
|
+
def value_to_python_type(self) -> str:
|
|
298
|
+
return self.value
|
|
236
299
|
|
|
237
300
|
|
|
238
301
|
|
|
@@ -252,52 +315,64 @@ class ValueSegment(TREX_Segment, ValueMixin, ABC):
|
|
|
252
315
|
)
|
|
253
316
|
return self
|
|
254
317
|
|
|
255
|
-
@classmethod
|
|
256
|
-
def get_subclass(cls, type:str, key:str, value:str):
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
318
|
+
# @classmethod
|
|
319
|
+
# def get_subclass(cls, type:str, key:str, value:str):
|
|
320
|
+
# match type:
|
|
321
|
+
# case 'T.D':
|
|
322
|
+
# model = DateSegment(key=key, value=value, type=type)
|
|
323
|
+
# case 'T.B':
|
|
324
|
+
# model = BoolSegment(key=key, value=value, type=type)
|
|
325
|
+
# case 'T.A':
|
|
326
|
+
# model = AlphanumericSegment(key=key, value=value, type=type)
|
|
327
|
+
# case 'T.T':
|
|
328
|
+
# model = TextSegment(key=key, value=value, type=type)
|
|
329
|
+
# case 'T.X':
|
|
330
|
+
# model = BinarySegment(key=key, value=value, type=type)
|
|
331
|
+
# case 'E':
|
|
332
|
+
# model = ErrorSegment(key=key, value=value, type=type)
|
|
333
|
+
# case _:
|
|
334
|
+
# model = NumericSegment(value=value, key=key, type=type)
|
|
272
335
|
|
|
273
|
-
|
|
336
|
+
# return model
|
|
274
337
|
|
|
275
338
|
|
|
276
339
|
def serialize_for_trex(self) -> str:
|
|
277
340
|
return f'{self.key}${self.type}:{self.value}'
|
|
278
341
|
|
|
342
|
+
def to_python_type(self):
|
|
343
|
+
return self.value_to_python_type()
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
|
|
279
349
|
|
|
280
350
|
|
|
281
351
|
class NumericSegment(ValueSegment, NumericValue):
|
|
282
352
|
type: str
|
|
283
353
|
|
|
284
|
-
|
|
285
|
-
|
|
354
|
+
def to_python_type(self):
|
|
355
|
+
unit = unece_unit(self.type)
|
|
356
|
+
out = Quantity(value=self.value_to_python_type(), unit=Unit(name=unit_name(unit), symbol=unit_symbol(unit)))
|
|
357
|
+
return out
|
|
286
358
|
|
|
359
|
+
class DateSegment(ValueSegment, DateValue):
|
|
360
|
+
type: Literal['T.D'] = Field('T.D', frozen=True)
|
|
361
|
+
|
|
287
362
|
class BoolSegment(ValueSegment, BoolValue):
|
|
288
|
-
type:
|
|
363
|
+
type: Literal['T.B'] = Field('T.B', frozen=True)
|
|
289
364
|
|
|
290
365
|
class AlphanumericSegment(ValueSegment, AlphanumericValue):
|
|
291
|
-
type:
|
|
366
|
+
type: Literal['T.A'] = Field('T.A', frozen=True)
|
|
292
367
|
|
|
293
368
|
class TextSegment(ValueSegment, TextValue):
|
|
294
|
-
type:
|
|
369
|
+
type: Literal['T.T'] = Field('T.T', frozen=True)
|
|
295
370
|
|
|
296
371
|
class BinarySegment(ValueSegment, BinaryValue):
|
|
297
|
-
type:
|
|
372
|
+
type: Literal['T.X'] = Field('T.X', frozen=True)
|
|
298
373
|
|
|
299
374
|
class ErrorSegment(ValueSegment, ErrorValue):
|
|
300
|
-
type:
|
|
375
|
+
type: Literal['E'] = Field('E', frozen=True)
|
|
301
376
|
|
|
302
377
|
|
|
303
378
|
|
|
@@ -406,7 +481,7 @@ class TREX_Table(TREX_Segment):
|
|
|
406
481
|
self.add_validation_message(
|
|
407
482
|
source=f"Table {self.key}",
|
|
408
483
|
type="Error",
|
|
409
|
-
msg=f"
|
|
484
|
+
msg=f"Type mismatch: Table row {i}, column {nm} is of wrong type. According to the header it should be {t_expected}",
|
|
410
485
|
highlight_pattern = row.serialize_for_trex(),
|
|
411
486
|
highlight_sub=[c for c in e.value]
|
|
412
487
|
)
|
|
@@ -421,8 +496,7 @@ class TREX_Table(TREX_Segment):
|
|
|
421
496
|
highlight_sub=[c for c in e.value]
|
|
422
497
|
)
|
|
423
498
|
i += 1
|
|
424
|
-
|
|
425
|
-
|
|
499
|
+
|
|
426
500
|
|
|
427
501
|
def _get_col_index(self, col:str|int):
|
|
428
502
|
if isinstance(col, str):
|
|
@@ -440,9 +514,21 @@ class TREX_Table(TREX_Segment):
|
|
|
440
514
|
data = '::'.join([r.serialize_for_trex() for r in self.data])
|
|
441
515
|
s = f'{self.key}$${header}::{data}'
|
|
442
516
|
return s
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def to_python_type(self):
|
|
520
|
+
table = DataTable([ch.key for ch in self.column_headers])
|
|
521
|
+
for row in self.data:
|
|
522
|
+
r = []
|
|
523
|
+
for e, h in zip(row, self.column_headers):
|
|
524
|
+
if isinstance(e, NumericValue):
|
|
525
|
+
u = unece_unit(h.type)
|
|
526
|
+
unit = Unit(name=u.get('name'), symbol=u.get('symbol'))
|
|
527
|
+
r.append(Quantity(value=e.value, unit=unit))
|
|
528
|
+
else:
|
|
529
|
+
r.append(e.value_to_python_type())
|
|
530
|
+
table.append(r)
|
|
531
|
+
return table
|
|
446
532
|
|
|
447
533
|
|
|
448
534
|
|
|
@@ -505,6 +591,76 @@ class TREX(Extension, BaseModelWithValidationMessages):
|
|
|
505
591
|
return None
|
|
506
592
|
|
|
507
593
|
|
|
594
|
+
def update(self, segments: dict[str, Quantity|datetime|time|date|bool|str|base36|DataTable] ):
|
|
595
|
+
for k, v in segments.items():
|
|
596
|
+
if isinstance(v, bool):
|
|
597
|
+
self.segments.append(BoolSegment(key=k, value=v))
|
|
598
|
+
elif isinstance(v, Quantity):
|
|
599
|
+
unece_code = unece_unit_code_from_quantity(v)
|
|
600
|
+
self.segments.append(NumericSegment(key=k, value=v.value, type=unece_code))
|
|
601
|
+
elif isinstance(v, (int, float)):
|
|
602
|
+
self.segments.append(NumericSegment(key=k, value=v, type='C63')) # unitless
|
|
603
|
+
elif isinstance(v, (datetime, time, date)):
|
|
604
|
+
self.segments.append(DateSegment(key=k, value=v))
|
|
605
|
+
elif isinstance(v, str):
|
|
606
|
+
if re.fullmatch(r'[A-Z0-9\-\.]*', v):
|
|
607
|
+
self.segments.append(AlphanumericSegment(key=k, value=v))
|
|
608
|
+
else:
|
|
609
|
+
v = to_base36(v)
|
|
610
|
+
self.segments.append(TextSegment(key=k, value=v))
|
|
611
|
+
elif isinstance(v, base36):
|
|
612
|
+
self.segments.append(TextSegment(key=k, value=v))
|
|
613
|
+
elif isinstance(v, DataTable):
|
|
614
|
+
v:DataTable = v
|
|
615
|
+
headers = list()
|
|
616
|
+
for nm, rt in zip(v.col_names, v.row_template):
|
|
617
|
+
if isinstance(rt, bool): # must come first otherwise int matches the bool
|
|
618
|
+
t = 'T.B'
|
|
619
|
+
elif isinstance(rt, Quantity):
|
|
620
|
+
unece_code = unece_unit_code_from_quantity(rt)
|
|
621
|
+
t = unece_code
|
|
622
|
+
elif isinstance(rt, (datetime, time, date)):
|
|
623
|
+
t = 'T.D'
|
|
624
|
+
elif isinstance(rt, str):
|
|
625
|
+
if re.fullmatch(r'[A-Z0-9\-\.]*', rt):
|
|
626
|
+
t = 'T.A'
|
|
627
|
+
else:
|
|
628
|
+
v = to_base36(rt)
|
|
629
|
+
t = 'T.X'
|
|
630
|
+
elif isinstance(rt, base36):
|
|
631
|
+
t = 'T.X'
|
|
632
|
+
|
|
633
|
+
headers.append(ColumnHeader(key=nm, type=t))
|
|
634
|
+
data = []
|
|
635
|
+
for row in v:
|
|
636
|
+
r = []
|
|
637
|
+
for e in row:
|
|
638
|
+
if isinstance(e, bool): # must come first otherwise int matches the bool
|
|
639
|
+
r.append(BoolValue(value=e))
|
|
640
|
+
elif isinstance(e, Quantity):
|
|
641
|
+
r.append(NumericValue(value=e.value))
|
|
642
|
+
elif isinstance(e, (int, float)):
|
|
643
|
+
r.append(NumericValue(value=e))
|
|
644
|
+
elif isinstance(e, (datetime, time, date)):
|
|
645
|
+
r.append(DateValue(value=e))
|
|
646
|
+
elif isinstance(e, str):
|
|
647
|
+
if re.fullmatch(r'[A-Z0-9\-\.]*', e):
|
|
648
|
+
r.append(AlphanumericValue(value=e))
|
|
649
|
+
else:
|
|
650
|
+
e = to_base36(e)
|
|
651
|
+
r.append(TextValue(value=e))
|
|
652
|
+
elif isinstance(e, base36):
|
|
653
|
+
r.append(TextValue(value=e))
|
|
654
|
+
data.append(r)
|
|
655
|
+
|
|
656
|
+
self.segments.append(TREX_Table(key=k, column_headers=headers, data=data))
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def dict(self):
|
|
660
|
+
return {s.key: s.to_python_type() for s in self.segments}
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
|
|
508
664
|
@field_validator('segments')
|
|
509
665
|
@classmethod
|
|
510
666
|
def validate_segments(cls, segments):
|
|
@@ -517,8 +673,93 @@ class TREX(Extension, BaseModelWithValidationMessages):
|
|
|
517
673
|
|
|
518
674
|
|
|
519
675
|
@staticmethod
|
|
520
|
-
def from_spec_fields(name,
|
|
521
|
-
|
|
676
|
+
def from_spec_fields(*, name, data, type='TREX'):
|
|
677
|
+
segment_strings = data.split('+')
|
|
678
|
+
out_segments = list()
|
|
679
|
+
for s in segment_strings:
|
|
680
|
+
# there are only two valid options. The segment is a scalar or a table.
|
|
681
|
+
# Constructors do the parsing anyways and raise exceptions if invalid data
|
|
682
|
+
# try both options and then let it fail
|
|
683
|
+
segment = _deserialize_table_segment_from_trex_segment_str(s)
|
|
684
|
+
if not segment:
|
|
685
|
+
segment = _deserialize_value_segment_from_trex_segment_str(s)
|
|
686
|
+
if not segment:
|
|
687
|
+
raise ValueError('TREX contains neither valid value segment nor table')
|
|
688
|
+
|
|
689
|
+
out_segments.append(segment)
|
|
690
|
+
trex = TREX(name_= name, segments=out_segments)
|
|
691
|
+
|
|
692
|
+
return trex
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def _deserialize_value_segment_from_trex_segment_str(trex_segment_str) -> ValueSegment:
|
|
696
|
+
#re_scalar_pattern = re.compile(f"(?P<name>[\w\.-]*?)\$(?P<unit>[\w\.]*?):(?P<value>.*)")
|
|
697
|
+
re_scalar_pattern = re.compile(f"(?P<name>.+?)\$(?P<unit>.+?):(?P<value>.+)")
|
|
698
|
+
matches = re_scalar_pattern.match(trex_segment_str)
|
|
699
|
+
if not matches:
|
|
700
|
+
return None
|
|
701
|
+
|
|
702
|
+
key, type_, value = matches.groups()
|
|
703
|
+
|
|
704
|
+
match type_:
|
|
705
|
+
case 'T.D':
|
|
706
|
+
out = DateSegment(key=key, value=value, type=type_)
|
|
707
|
+
case 'T.B':
|
|
708
|
+
out = BoolSegment(key=key, value=value, type=type_)
|
|
709
|
+
case 'T.A':
|
|
710
|
+
out = AlphanumericSegment(key=key, value=value, type=type_)
|
|
711
|
+
case 'T.T':
|
|
712
|
+
out = TextSegment(key=key, value=base36(value), type=type_) # prevent repeated conversion from str to base36 and make explict that when parsing we assume the string tpo be base36 already
|
|
713
|
+
case 'T.X':
|
|
714
|
+
out = BinarySegment(key=key, value=base36(value), type=type_) # prevent repeated conversion from str to base36 and make explict that when parsing we assume the string tpo be base36 already
|
|
715
|
+
case 'E':
|
|
716
|
+
out = ErrorSegment(key=key, value=value, type=type_)
|
|
717
|
+
case _:
|
|
718
|
+
out = NumericSegment(value=value, key=key, type=type_)
|
|
719
|
+
|
|
720
|
+
return out
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def _deserialize_table_segment_from_trex_segment_str(trex_segment_str) -> TREX_Table:
|
|
725
|
+
# re_table_pattern = re.compile(f"(?P<tablename>[\w\.-]*?)\$\$(?P<header>[\w\.,\$:]*?)::(?P<body>.*)")
|
|
726
|
+
# re_col_head_pattern = re.compile(f"(?P<name>[\w\.-]*?)\$(?P<unit>[\w\.]*)")
|
|
727
|
+
re_table_pattern = re.compile(r"(?P<tablename>.+?)\$\$(?P<header>.+?)::(?P<body>.+)")
|
|
728
|
+
|
|
729
|
+
matches = re_table_pattern.match(trex_segment_str)
|
|
730
|
+
if not matches:
|
|
731
|
+
return None
|
|
732
|
+
name, header, body = matches.groups()
|
|
733
|
+
|
|
734
|
+
column_headers_str = header.split(':')
|
|
735
|
+
|
|
736
|
+
headers = []
|
|
737
|
+
for colum_header in column_headers_str:
|
|
738
|
+
ch = colum_header.split('$')
|
|
739
|
+
col_key = ch[0]
|
|
740
|
+
col_type = ch[1] if len(ch) > 1 else ''
|
|
741
|
+
headers.append(ColumnHeader(key=col_key, type=col_type))
|
|
742
|
+
|
|
743
|
+
data = [row.split(':') for row in body.split('::') ]
|
|
744
|
+
col_types = [h.type for h in headers]
|
|
745
|
+
# convert to correct value types
|
|
746
|
+
data_with_types = [[str_to_value_type(c,t) for c, t in zip(r, col_types)] for r in data]
|
|
747
|
+
data = [ TableRow(r) for r in data_with_types]
|
|
748
|
+
|
|
749
|
+
out = TREX_Table(column_headers=headers, data=data_with_types, key=name)
|
|
750
|
+
return out
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
def str_to_value_type(s:str, t:str):
|
|
754
|
+
match t:
|
|
755
|
+
case 'T.D': v = DateValue(value=s)
|
|
756
|
+
case 'T.B': v = BoolValue(value=s)
|
|
757
|
+
case 'T.A': v = AlphanumericValue(value=s)
|
|
758
|
+
case 'T.T': v = TextValue(value=base36(s))
|
|
759
|
+
case 'T.X': v = BinaryValue(value=s)
|
|
760
|
+
case 'E' : v = ErrorValue(value=s)
|
|
761
|
+
case _ : v = NumericValue(value=s)
|
|
762
|
+
return v
|
|
522
763
|
|
|
523
764
|
|
|
524
765
|
|
|
@@ -546,324 +787,3 @@ class TREX_Struct(TREX_Segment):
|
|
|
546
787
|
|
|
547
788
|
|
|
548
789
|
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
# class Value(str, ABC):
|
|
554
|
-
# @classmethod
|
|
555
|
-
# def __get_validators__(cls):
|
|
556
|
-
# yield cls.validate
|
|
557
|
-
|
|
558
|
-
# def __new__(cls, value):
|
|
559
|
-
# # validate manually
|
|
560
|
-
# validated = cls.validate(value)
|
|
561
|
-
# return super().__new__(cls, validated)
|
|
562
|
-
|
|
563
|
-
# @abstractclassmethod
|
|
564
|
-
# def validate(cls, value):
|
|
565
|
-
# ...
|
|
566
|
-
|
|
567
|
-
# @abstractclassmethod
|
|
568
|
-
# def from_python_type(v):
|
|
569
|
-
# ...
|
|
570
|
-
|
|
571
|
-
# @abstractmethod
|
|
572
|
-
# def to_python_type():
|
|
573
|
-
# ...
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
# class NumericValue(Value):
|
|
578
|
-
# @classmethod
|
|
579
|
-
# def validate(cls, value):
|
|
580
|
-
# if not_allowed_chars := set(re.sub(r'[0-9\.-]', '', value)):
|
|
581
|
-
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in quantity segment. Base36 encoding only allows A-Z0-9")
|
|
582
|
-
# if not re.fullmatch(r'-?\d+(\.\d+)?', value):
|
|
583
|
-
# raise ValueError(f"{value} cannot be converted to number")
|
|
584
|
-
# return value
|
|
585
|
-
|
|
586
|
-
# def to_python_type(self) -> str:
|
|
587
|
-
# ...
|
|
588
|
-
|
|
589
|
-
# class DateValue(Value):
|
|
590
|
-
# pattern = r'((?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2}))?(T(?P<hours>\d{2})(?P<minutes>\d{2})(?P<seconds>\d{2})?(\.(?P<milliseconds>\d{3}))?)?'
|
|
591
|
-
|
|
592
|
-
# @classmethod
|
|
593
|
-
# def validate(cls, value):
|
|
594
|
-
# if not re.fullmatch(cls.pattern, value):
|
|
595
|
-
# raise ValueError(f'{value} is no valid date or time.')
|
|
596
|
-
# return value
|
|
597
|
-
|
|
598
|
-
# def to_python_type(self) -> str:
|
|
599
|
-
# ...
|
|
600
|
-
|
|
601
|
-
# @classmethod
|
|
602
|
-
# def from_python_type(v:date|time|datetime):
|
|
603
|
-
# sd = ""
|
|
604
|
-
# st = ""
|
|
605
|
-
# match v:
|
|
606
|
-
# case date() | datetime():
|
|
607
|
-
# sd = v.strftime('%Y%m%d')
|
|
608
|
-
# case time() | datetime():
|
|
609
|
-
# if v.microsecond:
|
|
610
|
-
# st = v.strftime("T%H:%M:%S.") + f"{v.microsecond // 1000:03d}"
|
|
611
|
-
# elif v.seconds:
|
|
612
|
-
# st = v.strftime("T%H:%M:%S")
|
|
613
|
-
# else:
|
|
614
|
-
# st = v.strftime("T%H:%M")
|
|
615
|
-
|
|
616
|
-
# return DateValue(sd + st)
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
# class BoolValue(Value):
|
|
622
|
-
# @classmethod
|
|
623
|
-
# def validate(cls, value):
|
|
624
|
-
# if not value in ['T', 'F']:
|
|
625
|
-
# raise ValueError(f'{value} is no valid boolean. Must be T or F')
|
|
626
|
-
# return value
|
|
627
|
-
|
|
628
|
-
# def to_python_type(self) -> str:
|
|
629
|
-
# if self == 'T':
|
|
630
|
-
# return True
|
|
631
|
-
# elif self == 'F':
|
|
632
|
-
# return False
|
|
633
|
-
# else:
|
|
634
|
-
# Exception(f'{self} is not valid boolean. That really should not have been possible -- Contact the maintainers of the library')
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
# class AlphanumericValue(Value):
|
|
638
|
-
# @classmethod
|
|
639
|
-
# def validate(cls, value):
|
|
640
|
-
# if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', value)):
|
|
641
|
-
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in alphanumeric segment")
|
|
642
|
-
# else:
|
|
643
|
-
# return value
|
|
644
|
-
|
|
645
|
-
# @property
|
|
646
|
-
# def trex_type(self):
|
|
647
|
-
# return 'T.A'
|
|
648
|
-
|
|
649
|
-
# def to_python_type(self) -> str:
|
|
650
|
-
# return self
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
# class TextValue(Value):
|
|
654
|
-
|
|
655
|
-
# @classmethod
|
|
656
|
-
# def validate(cls, value):
|
|
657
|
-
# if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', value)):
|
|
658
|
-
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9")
|
|
659
|
-
# else:
|
|
660
|
-
# return value
|
|
661
|
-
|
|
662
|
-
# @property
|
|
663
|
-
# def trex_type(self):
|
|
664
|
-
# return 'T.A'
|
|
665
|
-
|
|
666
|
-
# def to_python_type(self) -> str:
|
|
667
|
-
# decoded = from_base36(self)
|
|
668
|
-
# return decoded
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
# class BinaryValue(Value):
|
|
672
|
-
# @classmethod
|
|
673
|
-
# def validate(cls, value):
|
|
674
|
-
# if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', value)):
|
|
675
|
-
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9")
|
|
676
|
-
# else:
|
|
677
|
-
# return value
|
|
678
|
-
|
|
679
|
-
# @property
|
|
680
|
-
# def trex_type(self):
|
|
681
|
-
# return 'T.X'
|
|
682
|
-
|
|
683
|
-
# def to_python_type(self) -> bytes:
|
|
684
|
-
# decoded = bytes(from_base36(self))
|
|
685
|
-
# return decoded
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
# class ErrorValue(Value):
|
|
689
|
-
# @classmethod
|
|
690
|
-
# def validate(cls, value):
|
|
691
|
-
# if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', value)):
|
|
692
|
-
# raise ValueError(f"Characters {','.join(not_allowed_chars)} are not allowed in error segment")
|
|
693
|
-
# else:
|
|
694
|
-
# return value
|
|
695
|
-
|
|
696
|
-
# @property
|
|
697
|
-
# def trex_type(self):
|
|
698
|
-
# return 'E'
|
|
699
|
-
|
|
700
|
-
# def to_python_type(self) -> str:
|
|
701
|
-
# return self
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
# class TREX_Segment(BaseModelWithValidationMessages, ABC):
|
|
705
|
-
# key: str
|
|
706
|
-
|
|
707
|
-
# @field_validator('key')
|
|
708
|
-
# def validate_name(cls, v):
|
|
709
|
-
# if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', v)):
|
|
710
|
-
# raise ValueError(f"Segment name contains invalid characters: {','.join(not_allowed_chars)}")
|
|
711
|
-
# return v
|
|
712
|
-
|
|
713
|
-
# @abstractmethod
|
|
714
|
-
# def serialize_for_trex(self):
|
|
715
|
-
# raise NotImplementedError("Subclasses must implement 'serialize_as_trex()' method")
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
# class ValueSegment(TREX_Segment):
|
|
720
|
-
# type: str
|
|
721
|
-
# value: str #NumericValue|DateValue|BoolValue|AlphanumericValue|TextValue|BinaryValue|ErrorValue
|
|
722
|
-
|
|
723
|
-
# @model_validator(mode='before')
|
|
724
|
-
# @classmethod
|
|
725
|
-
# def validate(cls, model):
|
|
726
|
-
# t = model.get('type')
|
|
727
|
-
# v = model.get('value')
|
|
728
|
-
# match t:
|
|
729
|
-
# case 'T.D':
|
|
730
|
-
# v = DateValue(v)
|
|
731
|
-
# case 'T.B':
|
|
732
|
-
# v = BoolValue(v)
|
|
733
|
-
# case 'T.A':
|
|
734
|
-
# v = AlphanumericValue(v)
|
|
735
|
-
# case 'T.T':
|
|
736
|
-
# v = TextValue(v)
|
|
737
|
-
# case 'T.X':
|
|
738
|
-
# v = BinaryValue(v)
|
|
739
|
-
# case 'E':
|
|
740
|
-
# v = ErrorValue(v)
|
|
741
|
-
# case _:
|
|
742
|
-
# if not t in unece_unit_codes():
|
|
743
|
-
# raise ValueError(f'Invalid unit code. {t} is not in UNECE list of common codes')
|
|
744
|
-
# v = NumericValue(v)
|
|
745
|
-
|
|
746
|
-
# model['value'] = v
|
|
747
|
-
# return model
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
# def serialize_for_trex(self) -> str:
|
|
751
|
-
# return f'{self.key}${self.type}:{self.value}'
|
|
752
|
-
|
|
753
|
-
# class UNECEQuantity(BaseModelWithWarnings):
|
|
754
|
-
# value:int|float
|
|
755
|
-
# unece_code:str
|
|
756
|
-
# unit_name: str|None = ""
|
|
757
|
-
# unit_symbol: str|None = ""
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
# def as_strings(self):
|
|
761
|
-
# unit_symbol = self.unit_symbol
|
|
762
|
-
# if unit_symbol == "dimensionless":
|
|
763
|
-
# unit_symbol = ""
|
|
764
|
-
# s = ''
|
|
765
|
-
|
|
766
|
-
# val_str = self.value
|
|
767
|
-
# return f"{val_str}", f"{unit_symbol}", f"{val_str} {unit_symbol}"
|
|
768
|
-
|
|
769
|
-
# def __str__(self):
|
|
770
|
-
# unit_symbol = self.unit_symbol
|
|
771
|
-
# if unit_symbol == "dimensionless":
|
|
772
|
-
# unit_symbol = ""
|
|
773
|
-
|
|
774
|
-
# s = f"{self.value} {unit_symbol}"
|
|
775
|
-
# return s
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
# class ValueSegment2(TREX_Segment, ValueMixin, ABC):
|
|
781
|
-
# type:str
|
|
782
|
-
|
|
783
|
-
# @model_validator(mode='before')
|
|
784
|
-
# @classmethod
|
|
785
|
-
# def convert_str_value(cls, model):
|
|
786
|
-
# if isinstance(model.get('value'), str):
|
|
787
|
-
# bases = [base for base in cls.__bases__ if base is not ValueSegment2 and issubclass(base, ValueMixin)]
|
|
788
|
-
# base = bases[0]
|
|
789
|
-
# v = base(value = model.get('value'))
|
|
790
|
-
# model['value'] = v
|
|
791
|
-
# return model
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
# @model_validator(mode='before')
|
|
795
|
-
# @classmethod
|
|
796
|
-
# def cast_to_subclass(cls, model):
|
|
797
|
-
|
|
798
|
-
# # this method should do anything if called by the subclasses
|
|
799
|
-
# if cls is not ValueSegment2:
|
|
800
|
-
# return model
|
|
801
|
-
|
|
802
|
-
# k = model.get('key')
|
|
803
|
-
# t = model.get('type')
|
|
804
|
-
# v = model.get('value')
|
|
805
|
-
# match t:
|
|
806
|
-
# case 'T.D':
|
|
807
|
-
# model = DateSegment(key=k, value=v, type=t)
|
|
808
|
-
# case 'T.B':
|
|
809
|
-
# model = BoolSegment(key=k, value=v, type=t)
|
|
810
|
-
# case 'T.A':
|
|
811
|
-
# model = AlphanumericSegment(key=k, value=v, type=t)
|
|
812
|
-
# case 'T.T':
|
|
813
|
-
# model = TextSegment(key=k, value=v, type=t)
|
|
814
|
-
# case 'T.X':
|
|
815
|
-
# model = BinarySegment(key=k, value=v, type=t)
|
|
816
|
-
# case 'E':
|
|
817
|
-
# model = ErrorSegment(key=k, value=v, type=t)
|
|
818
|
-
# case _:
|
|
819
|
-
# if not t in unece_unit_codes():
|
|
820
|
-
# raise ValueError(f'Invalid unit code. {t} is not in UNECE list of common codes')
|
|
821
|
-
# model = NumericSegment(key=k, value=v, type=t)
|
|
822
|
-
|
|
823
|
-
# return model
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
# class ValueSegment(TREX_Segment, ValueMixin, ABC):
|
|
827
|
-
# type:str
|
|
828
|
-
|
|
829
|
-
# @model_validator(mode='before')
|
|
830
|
-
# @classmethod
|
|
831
|
-
# def convert_str_value(cls, model):
|
|
832
|
-
# if isinstance(model.get('value'), str):
|
|
833
|
-
# bases = [base for base in cls.__bases__ if base is not ValueSegment2 and issubclass(base, ValueMixin)]
|
|
834
|
-
# base = bases[0]
|
|
835
|
-
# v = base(value = model.get('value'))
|
|
836
|
-
# model['value'] = v
|
|
837
|
-
# return model
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
# @model_validator(mode='before')
|
|
841
|
-
# @classmethod
|
|
842
|
-
# def cast_to_subclass(cls, model):
|
|
843
|
-
|
|
844
|
-
# # this method should do anything if called by the subclasses
|
|
845
|
-
# if cls is not ValueSegment2:
|
|
846
|
-
# return model
|
|
847
|
-
|
|
848
|
-
# k = model.get('key')
|
|
849
|
-
# t = model.get('type')
|
|
850
|
-
# v = model.get('value')
|
|
851
|
-
# match t:
|
|
852
|
-
# case 'T.D':
|
|
853
|
-
# model = DateSegment(key=k, value=v, type=t)
|
|
854
|
-
# case 'T.B':
|
|
855
|
-
# model = BoolSegment(key=k, value=v, type=t)
|
|
856
|
-
# case 'T.A':
|
|
857
|
-
# model = AlphanumericSegment(key=k, value=v, type=t)
|
|
858
|
-
# case 'T.T':
|
|
859
|
-
# model = TextSegment(key=k, value=v, type=t)
|
|
860
|
-
# case 'T.X':
|
|
861
|
-
# model = BinarySegment(key=k, value=v, type=t)
|
|
862
|
-
# case 'E':
|
|
863
|
-
# model = ErrorSegment(key=k, value=v, type=t)
|
|
864
|
-
# case _:
|
|
865
|
-
# if not t in unece_unit_codes():
|
|
866
|
-
# raise ValueError(f'Invalid unit code. {t} is not in UNECE list of common codes')
|
|
867
|
-
# model = NumericSegment(key=k, value=v, type=t)
|
|
868
|
-
|
|
869
|
-
# return model
|