labfreed 0.0.4__py3-none-any.whl → 0.2.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labfreed/PAC_CAT/__init__.py +16 -0
- labfreed/PAC_CAT/category_base.py +51 -0
- labfreed/PAC_CAT/pac_cat.py +159 -0
- labfreed/PAC_CAT/predefined_categories.py +190 -0
- labfreed/PAC_ID/__init__.py +19 -0
- labfreed/PAC_ID/extension.py +48 -0
- labfreed/PAC_ID/id_segment.py +90 -0
- labfreed/PAC_ID/pac_id.py +140 -0
- labfreed/PAC_ID/url_parser.py +154 -0
- labfreed/PAC_ID/url_serializer.py +80 -0
- labfreed/PAC_ID_Resolver/__init__.py +2 -0
- labfreed/PAC_ID_Resolver/cit_v1.py +149 -0
- labfreed/PAC_ID_Resolver/cit_v2.py +303 -0
- labfreed/PAC_ID_Resolver/resolver.py +81 -0
- labfreed/PAC_ID_Resolver/services.py +80 -0
- labfreed/__init__.py +4 -1
- labfreed/labfreed_infrastructure.py +276 -0
- labfreed/qr/__init__.py +1 -0
- labfreed/qr/generate_qr.py +422 -0
- labfreed/trex/__init__.py +16 -0
- labfreed/trex/python_convenience/__init__.py +3 -0
- labfreed/trex/python_convenience/data_table.py +45 -0
- labfreed/trex/python_convenience/pyTREX.py +242 -0
- labfreed/trex/python_convenience/quantity.py +46 -0
- labfreed/trex/table_segment.py +227 -0
- labfreed/trex/trex.py +69 -0
- labfreed/trex/trex_base_models.py +336 -0
- labfreed/trex/value_segments.py +111 -0
- labfreed/{DisplayNameExtension → utilities}/base36.py +29 -13
- labfreed/well_known_extensions/__init__.py +5 -0
- labfreed/well_known_extensions/default_extension_interpreters.py +7 -0
- labfreed/well_known_extensions/display_name_extension.py +40 -0
- labfreed/well_known_extensions/trex_extension.py +31 -0
- labfreed/well_known_keys/gs1/__init__.py +6 -0
- labfreed/well_known_keys/gs1/gs1.py +4 -0
- labfreed/well_known_keys/gs1/gs1_ai_enum_sorted.py +57 -0
- labfreed/well_known_keys/labfreed/well_known_keys.py +16 -0
- labfreed/well_known_keys/unece/UneceUnits.json +33730 -0
- labfreed/well_known_keys/unece/__init__.py +4 -0
- labfreed/well_known_keys/unece/unece_units.py +68 -0
- labfreed-0.2.0b0.dist-info/METADATA +329 -0
- labfreed-0.2.0b0.dist-info/RECORD +44 -0
- {labfreed-0.0.4.dist-info → labfreed-0.2.0b0.dist-info}/WHEEL +1 -1
- labfreed/DisplayNameExtension/DisplayNameExtension.py +0 -34
- labfreed/PAC_CAT/data_model.py +0 -109
- labfreed/PAC_ID/data_model.py +0 -114
- labfreed/PAC_ID/parse.py +0 -133
- labfreed/PAC_ID/serialize.py +0 -57
- labfreed/TREXExtension/data_model.py +0 -239
- labfreed/TREXExtension/parse.py +0 -46
- labfreed/TREXExtension/uncertainty.py +0 -32
- labfreed/TREXExtension/unit_utilities.py +0 -134
- labfreed-0.0.4.dist-info/METADATA +0 -15
- labfreed-0.0.4.dist-info/RECORD +0 -17
- {labfreed-0.0.4.dist-info → labfreed-0.2.0b0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .trex import TREX
|
|
2
|
+
from .value_segments import NumericSegment, DateSegment, BoolSegment, AlphanumericSegment, TextSegment, ErrorSegment
|
|
3
|
+
from .table_segment import TableSegment, ColumnHeader, TableRow
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"TREX",
|
|
7
|
+
"NumericSegment",
|
|
8
|
+
"DateSegment",
|
|
9
|
+
"BoolSegment",
|
|
10
|
+
"AlphanumericSegment",
|
|
11
|
+
"TextSegment",
|
|
12
|
+
"ErrorSegment",
|
|
13
|
+
"TableSegment",
|
|
14
|
+
"ColumnHeader",
|
|
15
|
+
"TableRow"
|
|
16
|
+
]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from datetime import date, datetime, time
|
|
4
|
+
from pydantic import BaseModel, Field, PrivateAttr
|
|
5
|
+
|
|
6
|
+
from labfreed.utilities.base36 import base36
|
|
7
|
+
from labfreed.trex.python_convenience.quantity import Quantity
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DataTable(BaseModel):
|
|
11
|
+
_row_template:list[str, Quantity | datetime | time | date | bool | str | base36] = PrivateAttr(default_factory=list)
|
|
12
|
+
col_names: list[str] = Field(default_factory=list)
|
|
13
|
+
data:list[str, Quantity | datetime | time | date | bool | str | base36] = Field(default_factory=list)
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def row_template(self):
|
|
17
|
+
return self._row_template
|
|
18
|
+
|
|
19
|
+
def append(self, row:list):
|
|
20
|
+
if not isinstance(row, list):
|
|
21
|
+
raise ValueError('row must be a list of values')
|
|
22
|
+
if not self._row_template:
|
|
23
|
+
self._row_template = row.copy()
|
|
24
|
+
if not len(row) == len(self._row_template):
|
|
25
|
+
raise ValueError('row is not of same length as the row template.')
|
|
26
|
+
if not self.col_names:
|
|
27
|
+
self.col_names = [f"Col{i}" for i in range(len(self._row_template))]
|
|
28
|
+
|
|
29
|
+
# make sure int and float have a unit, if the row_tempalet has one
|
|
30
|
+
for i, e in enumerate(row):
|
|
31
|
+
if isinstance(e, float|int) and isinstance(self._row_template[i], Quantity):
|
|
32
|
+
unit = self._row_template[i].unit
|
|
33
|
+
row[i] = Quantity(value=e, unit=unit)
|
|
34
|
+
self.data.append(row)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def extend(self, iterable):
|
|
38
|
+
for item in iterable:
|
|
39
|
+
if not len(item) == len(self._row_template):
|
|
40
|
+
raise ValueError('row is not of same length as the row template.')
|
|
41
|
+
self.data.append(item)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
|
|
2
|
+
from datetime import date, datetime, time
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
from pydantic import RootModel
|
|
7
|
+
from labfreed.well_known_keys.unece.unece_units import unece_unit
|
|
8
|
+
from labfreed.trex.python_convenience.data_table import DataTable
|
|
9
|
+
from labfreed.utilities.base36 import from_base36, base36, to_base36
|
|
10
|
+
|
|
11
|
+
from labfreed.trex.python_convenience.quantity import Quantity, unece_unit_code_from_quantity
|
|
12
|
+
from labfreed.trex.table_segment import ColumnHeader, TableSegment
|
|
13
|
+
from labfreed.trex.trex import TREX
|
|
14
|
+
from labfreed.trex.trex_base_models import AlphanumericValue, BinaryValue, BoolValue, DateValue, ErrorValue, NumericValue, TextValue
|
|
15
|
+
from labfreed.trex.value_segments import BoolSegment, TextSegment, NumericSegment, AlphanumericSegment, DateSegment, ValueSegment
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class pyTREX(RootModel[dict[str, Quantity | datetime | time | date | bool | str | base36 | DataTable]]):
|
|
19
|
+
''' A wrapper around dict, which knows how to convert to and from TREX.
|
|
20
|
+
It restricts the types allowed as values. Keys must be str.
|
|
21
|
+
'''
|
|
22
|
+
model_config = {'arbitrary_types_allowed':True} # needed to allow Quantity and DataTable w/o implementing the pydantic schema
|
|
23
|
+
'''@private'''
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def from_trex(cls, trex:TREX):
|
|
28
|
+
'''Creates a pyTREX from a TREX'''
|
|
29
|
+
return {seg.key: _trex_segment_to_python_type(seg) for seg in trex.segments}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def to_trex(self):
|
|
33
|
+
'''Creates a TREX'''
|
|
34
|
+
segments = list()
|
|
35
|
+
for k, v in self.root.items():
|
|
36
|
+
if isinstance(v, bool):
|
|
37
|
+
value = _bool_value_from_python_type(v)
|
|
38
|
+
segments.append(BoolSegment(key=k, value=value.value))
|
|
39
|
+
elif isinstance(v, Quantity):
|
|
40
|
+
unece_code = unece_unit_code_from_quantity(v)
|
|
41
|
+
value = _numeric_value_from_python_type(v.value)
|
|
42
|
+
segments.append(NumericSegment(key=k, value=value.value, type=unece_code))
|
|
43
|
+
elif isinstance(v, (int, float)):
|
|
44
|
+
value = _numeric_value_from_python_type(v)
|
|
45
|
+
segments.append(NumericSegment(key=k, value=value.value, type='C63')) # unitless
|
|
46
|
+
elif isinstance(v, (datetime, time, date)):
|
|
47
|
+
value = _date_value_from_python_type(v)
|
|
48
|
+
segments.append(DateSegment(key=k, value=value.value))
|
|
49
|
+
elif isinstance(v, str):
|
|
50
|
+
if re.fullmatch(r'[A-Z0-9\-\.]*', v):
|
|
51
|
+
value = _alphanumeric_value_from_python_type(v)
|
|
52
|
+
segments.append(AlphanumericSegment(key=k, value=value.value))
|
|
53
|
+
else:
|
|
54
|
+
v = to_base36(v)
|
|
55
|
+
value = _text_value_from_python_type(v)
|
|
56
|
+
segments.append(TextSegment(key=k, value=value.value))
|
|
57
|
+
elif isinstance(v, base36):
|
|
58
|
+
value = _text_value_from_python_type(v)
|
|
59
|
+
segments.append(TextSegment(key=k, value=value.value))
|
|
60
|
+
|
|
61
|
+
elif isinstance(v, DataTable):
|
|
62
|
+
v:DataTable = v
|
|
63
|
+
headers = list()
|
|
64
|
+
for nm, rt in zip(v.col_names, v.row_template):
|
|
65
|
+
if isinstance(rt, bool): # must come first otherwise int matches the bool
|
|
66
|
+
t = 'T.B'
|
|
67
|
+
elif isinstance(rt, Quantity):
|
|
68
|
+
unece_code = unece_unit_code_from_quantity(rt)
|
|
69
|
+
t = unece_code
|
|
70
|
+
elif isinstance(rt, (datetime, time, date)):
|
|
71
|
+
t = 'T.D'
|
|
72
|
+
elif isinstance(rt, str):
|
|
73
|
+
if re.fullmatch(r'[A-Z0-9\-\.]*', rt):
|
|
74
|
+
t = 'T.A'
|
|
75
|
+
else:
|
|
76
|
+
v = to_base36(rt)
|
|
77
|
+
t = 'T.X'
|
|
78
|
+
elif isinstance(rt, base36):
|
|
79
|
+
t = 'T.X'
|
|
80
|
+
|
|
81
|
+
headers.append(ColumnHeader(key=nm, type=t))
|
|
82
|
+
data = []
|
|
83
|
+
for row in v.data:
|
|
84
|
+
r = []
|
|
85
|
+
for e in row:
|
|
86
|
+
if isinstance(e, bool): # must come first otherwise int matches the bool
|
|
87
|
+
r.append(_bool_value_from_python_type(e))
|
|
88
|
+
elif isinstance(e, Quantity):
|
|
89
|
+
r.append(_numeric_value_from_python_type(e.value))
|
|
90
|
+
elif isinstance(e, (int, float)):
|
|
91
|
+
r.append(_numeric_value_from_python_type(e))
|
|
92
|
+
elif isinstance(e, (datetime, time, date)):
|
|
93
|
+
r.append(_date_value_from_python_type(e))
|
|
94
|
+
elif isinstance(e, str):
|
|
95
|
+
if re.fullmatch(r'[A-Z0-9\-\.]*', e):
|
|
96
|
+
r.append(_alphanumeric_value_from_python_type(e))
|
|
97
|
+
else:
|
|
98
|
+
e = to_base36(e)
|
|
99
|
+
r.append(_text_value_from_python_type(e))
|
|
100
|
+
elif isinstance(e, base36):
|
|
101
|
+
r.append(_text_value_from_python_type(e))
|
|
102
|
+
data.append(r)
|
|
103
|
+
segments.append(TableSegment(key=k, column_headers=headers, data=data))
|
|
104
|
+
return TREX(segments=segments)
|
|
105
|
+
|
|
106
|
+
# make the usual dict methods available, for convenience
|
|
107
|
+
def __getitem__(self, key): return self.root[key]
|
|
108
|
+
def __setitem__(self, key, value): self.root[key] = value
|
|
109
|
+
def update(self, *args, **kwargs):
|
|
110
|
+
return self.root.update(*args, **kwargs)
|
|
111
|
+
def keys(self): return self.root.keys()
|
|
112
|
+
def values(self): return self.root.values()
|
|
113
|
+
def items(self): return self.root.items()
|
|
114
|
+
def __contains__(self, key): return key in self.root
|
|
115
|
+
def __iter__(self): return iter(self.root)
|
|
116
|
+
def __len__(self): return len(self.root)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# Helper functions to convert python types to TREX types
|
|
121
|
+
|
|
122
|
+
def _numeric_value_from_python_type(v:int|float):
|
|
123
|
+
return NumericValue(value = str(v))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _date_value_from_python_type(v:date|time|datetime):
|
|
127
|
+
sd = ""
|
|
128
|
+
st = ""
|
|
129
|
+
if isinstance(v, date) or isinstance(v, datetime):
|
|
130
|
+
sd = v.strftime('%Y%m%d')
|
|
131
|
+
if isinstance(v, time) or isinstance(v, datetime):
|
|
132
|
+
if v.microsecond:
|
|
133
|
+
st = v.strftime("T%H%M%S.") + f"{v.microsecond // 1000:03d}"
|
|
134
|
+
elif v.second:
|
|
135
|
+
st = v.strftime("T%H%M%S")
|
|
136
|
+
else:
|
|
137
|
+
st = v.strftime("T%H%M")
|
|
138
|
+
|
|
139
|
+
return DateValue(value = sd + st)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _bool_value_from_python_type(v:bool):
|
|
143
|
+
return BoolValue(value = 'T' if v else 'F')
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _alphanumeric_value_from_python_type(v:str):
|
|
147
|
+
return AlphanumericValue(value = v)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _text_value_from_python_type(v:base36|str):
|
|
151
|
+
if isinstance(v, str):
|
|
152
|
+
logging.info('Got str for text value > converting to base36')
|
|
153
|
+
out = to_base36(v).root
|
|
154
|
+
else:
|
|
155
|
+
out = v.root
|
|
156
|
+
return TextValue(value = out)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _binary_value_from_python_type(v:base36|str):
|
|
160
|
+
if isinstance(v, str):
|
|
161
|
+
out = v
|
|
162
|
+
else:
|
|
163
|
+
out = v.root
|
|
164
|
+
return BinaryValue(value = out)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _error_value_from_python_type(v:str):
|
|
168
|
+
return ErrorValue(value = v)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# Helper functions to convert from TREX types to python types
|
|
173
|
+
def _trex_segment_to_python_type(v):
|
|
174
|
+
'''Converts a TREX segment to a python value. Note the segment key must be handles outside.'''
|
|
175
|
+
if isinstance(v, NumericSegment):
|
|
176
|
+
num_val = _trex_value_to_python_type(v)
|
|
177
|
+
u = unece_unit(v.type)
|
|
178
|
+
unit = u.get('symbol')
|
|
179
|
+
return Quantity(value=num_val, unit=unit)
|
|
180
|
+
|
|
181
|
+
# value segments are derived from their respective value type
|
|
182
|
+
elif isinstance(v, ValueSegment):
|
|
183
|
+
return _trex_value_to_python_type(v)
|
|
184
|
+
|
|
185
|
+
elif isinstance(v, TableSegment):
|
|
186
|
+
table = DataTable(col_names=[ch.key for ch in v.column_headers])
|
|
187
|
+
for row in v.data:
|
|
188
|
+
r = []
|
|
189
|
+
for e, h in zip(row, v.column_headers):
|
|
190
|
+
if isinstance(e, NumericValue):
|
|
191
|
+
u = unece_unit(h.type)
|
|
192
|
+
unit = u.get('symbol')
|
|
193
|
+
r.append(Quantity(value=e.value, unit=unit))
|
|
194
|
+
else:
|
|
195
|
+
r.append(_trex_value_to_python_type(e))
|
|
196
|
+
table.append(r)
|
|
197
|
+
return table
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _trex_value_to_python_type(v):
|
|
202
|
+
'''Converts a TREX value to the corresponding python type'''
|
|
203
|
+
if isinstance(v, NumericValue):
|
|
204
|
+
if '.' not in v.value and 'E' not in v.value:
|
|
205
|
+
return int(v)
|
|
206
|
+
else:
|
|
207
|
+
return float(v.value)
|
|
208
|
+
|
|
209
|
+
elif isinstance(v,DateValue):
|
|
210
|
+
d = v._date_time_dict
|
|
211
|
+
if d.get('year') and d.get('hour'): # input is only a time
|
|
212
|
+
return datetime(**d)
|
|
213
|
+
elif d.get('year'):
|
|
214
|
+
return date(**d)
|
|
215
|
+
else:
|
|
216
|
+
return time(**d)
|
|
217
|
+
|
|
218
|
+
elif isinstance(v, BoolValue):
|
|
219
|
+
if v.value == 'T':
|
|
220
|
+
return True
|
|
221
|
+
elif v.value == 'F':
|
|
222
|
+
return False
|
|
223
|
+
else:
|
|
224
|
+
Exception(f'{v} is not valid boolean. That really should not have been possible -- Contact the maintainers of the library')
|
|
225
|
+
|
|
226
|
+
elif isinstance(v, AlphanumericValue):
|
|
227
|
+
return v.value
|
|
228
|
+
|
|
229
|
+
elif isinstance(v, TextValue):
|
|
230
|
+
decoded = from_base36(v.value)
|
|
231
|
+
return decoded
|
|
232
|
+
|
|
233
|
+
elif isinstance(v, BinaryValue):
|
|
234
|
+
decoded = bytes(from_base36(v.value))
|
|
235
|
+
return decoded
|
|
236
|
+
|
|
237
|
+
elif isinstance(v, ErrorValue):
|
|
238
|
+
return v.value
|
|
239
|
+
|
|
240
|
+
else:
|
|
241
|
+
raise (TypeError(f'Invalid type {type(v)} of segment'))
|
|
242
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from pydantic import BaseModel, model_validator
|
|
2
|
+
from labfreed.well_known_keys.unece.unece_units import unece_units
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Quantity(BaseModel):
|
|
6
|
+
''' Represents a quantity'''
|
|
7
|
+
value: float|int
|
|
8
|
+
unit: str
|
|
9
|
+
significant_digits: int|None = None
|
|
10
|
+
|
|
11
|
+
@model_validator(mode='after')
|
|
12
|
+
def significat_digits_for_int(self):
|
|
13
|
+
if isinstance(self.value, int):
|
|
14
|
+
self.significant_digits = 0
|
|
15
|
+
return self
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def float(self) -> float:
|
|
19
|
+
''' for clarity returns the value'''
|
|
20
|
+
return self.value
|
|
21
|
+
|
|
22
|
+
def __str__(self):
|
|
23
|
+
unit_symbol = self.unit
|
|
24
|
+
if self.unit == "dimensionless" or not self.unit:
|
|
25
|
+
unit_symbol = ""
|
|
26
|
+
if self.significant_digits is not None:
|
|
27
|
+
val = f"{self.value:.{self.significant_digits}f}"
|
|
28
|
+
else:
|
|
29
|
+
val = str(self.value)
|
|
30
|
+
return f"{val} {unit_symbol}"
|
|
31
|
+
|
|
32
|
+
def __repr__(self):
|
|
33
|
+
return f'Quantity: {self.__repr__()}'
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def unece_unit_code_from_quantity(q:Quantity):
|
|
39
|
+
by_name = [ u['commonCode'] for u in unece_units() if u.get('name','') == q.unit]
|
|
40
|
+
by_symbol = [ u['commonCode'] for u in unece_units() if u.get('symbol','') == q.unit]
|
|
41
|
+
code = list(set(by_name) | set(by_symbol))
|
|
42
|
+
if len(code) != 1:
|
|
43
|
+
raise ValueError(f'No UNECE unit code found for Quantity {str(q)}' )
|
|
44
|
+
return code[0]
|
|
45
|
+
|
|
46
|
+
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from pydantic import RootModel, model_validator
|
|
8
|
+
from labfreed.trex.trex_base_models import Value
|
|
9
|
+
from labfreed.well_known_keys.unece.unece_units import unece_unit_codes
|
|
10
|
+
from labfreed.labfreed_infrastructure import LabFREED_BaseModel, ValidationMsgLevel, _quote_texts
|
|
11
|
+
from labfreed.trex.trex_base_models import AlphanumericValue, BinaryValue, BoolValue, DateValue, ErrorValue, NumericValue, TREX_Segment, TextValue, str_to_value_type
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ColumnHeader(LabFREED_BaseModel):
|
|
15
|
+
'''Header of a table Column'''
|
|
16
|
+
key:str
|
|
17
|
+
type:str
|
|
18
|
+
|
|
19
|
+
@model_validator(mode='after')
|
|
20
|
+
def _validate_key(self):
|
|
21
|
+
if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.key)):
|
|
22
|
+
self._add_validation_message(
|
|
23
|
+
source=f"TREX table column {self.key}",
|
|
24
|
+
level= ValidationMsgLevel.ERROR,
|
|
25
|
+
msg=f"Column header key contains invalid characters: {_quote_texts(not_allowed_chars)}",
|
|
26
|
+
highlight_pattern = f'{self.key}$',
|
|
27
|
+
highlight_sub=not_allowed_chars
|
|
28
|
+
)
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
@model_validator(mode='after')
|
|
32
|
+
def _validate_type(self):
|
|
33
|
+
valid_types = unece_unit_codes() + ['T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E']
|
|
34
|
+
if self.type not in valid_types:
|
|
35
|
+
self._add_validation_message(
|
|
36
|
+
source=f"TREX table column {self.key}",
|
|
37
|
+
level= ValidationMsgLevel.ERROR,
|
|
38
|
+
msg=f"Type '{self.type}' is invalid. Must be 'T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E' or a UNECE unit",
|
|
39
|
+
highlight_pattern = self.type
|
|
40
|
+
)
|
|
41
|
+
return self
|
|
42
|
+
|
|
43
|
+
class TableRow(RootModel[list[Value]]):
|
|
44
|
+
"""
|
|
45
|
+
Represents a row in a table.
|
|
46
|
+
|
|
47
|
+
This class is a Pydantic RootModel that wraps a `list[ValueMixin]`.
|
|
48
|
+
Each element in the list corresponds to a cell in the row.
|
|
49
|
+
|
|
50
|
+
All common list operations (indexing, iteration, append, pop, etc.) are supported.
|
|
51
|
+
Internally, it wraps a list in the `.root` attribute.
|
|
52
|
+
"""
|
|
53
|
+
def serialize(self):
|
|
54
|
+
return ':'.join([e.serialize() for e in self.root])
|
|
55
|
+
|
|
56
|
+
def __len__(self):
|
|
57
|
+
return len(self.root)
|
|
58
|
+
|
|
59
|
+
def __iter__(self):
|
|
60
|
+
return iter(self.root)
|
|
61
|
+
|
|
62
|
+
def __repr__(self):
|
|
63
|
+
return f"TableRow({self.root!r}) # wraps list[{Value.__name__}]"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TableSegment(TREX_Segment):
|
|
67
|
+
'''TREX Segment which represents tabular data'''
|
|
68
|
+
key:str
|
|
69
|
+
column_headers: list[ColumnHeader]
|
|
70
|
+
data: list[TableRow]
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def column_names(self):
|
|
74
|
+
return [h.key for h in self.column_headers]
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def column_types(self):
|
|
78
|
+
return [h.type for h in self.column_headers]
|
|
79
|
+
|
|
80
|
+
@model_validator(mode='after')
|
|
81
|
+
def _validate_sizes(self):
|
|
82
|
+
sizes = [len(self.column_headers)]
|
|
83
|
+
sizes.extend( [ len(row) for row in self.data ] )
|
|
84
|
+
most_common_len, count = Counter(sizes).most_common(1)[0]
|
|
85
|
+
|
|
86
|
+
if len(self.column_headers) != most_common_len:
|
|
87
|
+
self._add_validation_message(
|
|
88
|
+
source=f"Table {self.key}",
|
|
89
|
+
level= ValidationMsgLevel.ERROR,
|
|
90
|
+
msg=f"Size mismatch: Table header contains {self.column_names} keys, while most rows have {most_common_len}",
|
|
91
|
+
highlight_pattern = self.key
|
|
92
|
+
)
|
|
93
|
+
expected_row_len = most_common_len
|
|
94
|
+
else:
|
|
95
|
+
expected_row_len = len(self.column_headers)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
for i, row in enumerate(self.data):
|
|
99
|
+
if len(row) != expected_row_len:
|
|
100
|
+
self._add_validation_message(
|
|
101
|
+
source=f"Table {self.key}",
|
|
102
|
+
level= ValidationMsgLevel.ERROR,
|
|
103
|
+
msg=f"Size mismatch: Table row {i} contains {len(row)} elements. Expected size is {expected_row_len}",
|
|
104
|
+
highlight_pattern = row.serialize()
|
|
105
|
+
)
|
|
106
|
+
return self
|
|
107
|
+
|
|
108
|
+
@model_validator(mode='after')
|
|
109
|
+
def _validate_data_types(self):
|
|
110
|
+
expected_types = self.column_types
|
|
111
|
+
i = 0
|
|
112
|
+
for row in self.data:
|
|
113
|
+
for e, t_expected, nm in zip(row, expected_types, self.column_names):
|
|
114
|
+
try:
|
|
115
|
+
match t_expected:
|
|
116
|
+
case 'T.D':
|
|
117
|
+
assert isinstance(e, DateValue)
|
|
118
|
+
case 'T.B':
|
|
119
|
+
assert isinstance(e, BoolValue)
|
|
120
|
+
case 'T.A':
|
|
121
|
+
assert isinstance(e, AlphanumericValue)
|
|
122
|
+
case 'T.T':
|
|
123
|
+
assert isinstance(e, TextValue)
|
|
124
|
+
case 'T.X':
|
|
125
|
+
assert isinstance(e, BinaryValue)
|
|
126
|
+
case 'E':
|
|
127
|
+
assert isinstance(e, ErrorValue)
|
|
128
|
+
case _:
|
|
129
|
+
assert isinstance(e, NumericValue)
|
|
130
|
+
except AssertionError:
|
|
131
|
+
self._add_validation_message(
|
|
132
|
+
source=f"Table {self.key}",
|
|
133
|
+
level= ValidationMsgLevel.ERROR,
|
|
134
|
+
msg=f"Type mismatch: Table row {i}, column {nm} is of wrong type. According to the header it should be {t_expected}",
|
|
135
|
+
highlight_pattern = row.serialize(),
|
|
136
|
+
highlight_sub=[c for c in e.value]
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if msg := e.errors():
|
|
140
|
+
for m in msg:
|
|
141
|
+
self._add_validation_message(
|
|
142
|
+
source=f"Table {self.key}",
|
|
143
|
+
level= ValidationMsgLevel.ERROR,
|
|
144
|
+
msg=m.msg,
|
|
145
|
+
highlight_pattern = row.serialize(),
|
|
146
|
+
highlight_sub=[c for c in e.value]
|
|
147
|
+
)
|
|
148
|
+
i += 1
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _get_col_index(self, col:str|int):
|
|
152
|
+
if isinstance(col, str):
|
|
153
|
+
col_index = self.column_names.index(col)
|
|
154
|
+
elif isinstance(col, int):
|
|
155
|
+
col_index = col
|
|
156
|
+
else:
|
|
157
|
+
raise TypeError(f"Column must be specified as string or int: {col.__name__}")
|
|
158
|
+
return col_index
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def serialize(self):
|
|
163
|
+
header = ':'.join([f'{h.key}${h.type}' for h in self.column_headers])
|
|
164
|
+
data = '::'.join([r.serialize() for r in self.data])
|
|
165
|
+
s = f'{self.key}$${header}::{data}'
|
|
166
|
+
return s
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def n_rows(self) -> int:
|
|
170
|
+
return len(self.data)
|
|
171
|
+
|
|
172
|
+
def n_cols(self) -> int:
|
|
173
|
+
return len(self.column_headers)
|
|
174
|
+
|
|
175
|
+
def row_data(self, row:int) -> list:
|
|
176
|
+
out = self.data[row]
|
|
177
|
+
return out
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def column_data(self, col:str|int) -> list:
|
|
181
|
+
col_index = self._get_col_index(col)
|
|
182
|
+
out = [row[col_index] for row in self.data]
|
|
183
|
+
return out
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def cell_data(self, row:int, col:str|int):
|
|
187
|
+
try:
|
|
188
|
+
col_index = self._get_col_index(col)
|
|
189
|
+
value = self.data[row][col_index]
|
|
190
|
+
except ValueError:
|
|
191
|
+
logging.warning(f"row {row}, column {col} not found")
|
|
192
|
+
return None
|
|
193
|
+
return value
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _deserialize_table_segment_from_trex_segment_str(trex_segment_str) -> TableSegment:
|
|
201
|
+
# re_table_pattern = re.compile(f"(?P<tablename>[\w\.-]*?)\$\$(?P<header>[\w\.,\$:]*?)::(?P<body>.*)")
|
|
202
|
+
# re_col_head_pattern = re.compile(f"(?P<name>[\w\.-]*?)\$(?P<unit>[\w\.]*)")
|
|
203
|
+
re_table_pattern = re.compile(r"(?P<tablename>.+?)\$\$(?P<header>.+?)::(?P<body>.+)")
|
|
204
|
+
|
|
205
|
+
matches = re_table_pattern.match(trex_segment_str)
|
|
206
|
+
if not matches:
|
|
207
|
+
return None
|
|
208
|
+
name, header, body = matches.groups()
|
|
209
|
+
|
|
210
|
+
column_headers_str = header.split(':')
|
|
211
|
+
|
|
212
|
+
headers = []
|
|
213
|
+
for colum_header in column_headers_str:
|
|
214
|
+
ch = colum_header.split('$')
|
|
215
|
+
col_key = ch[0]
|
|
216
|
+
col_type = ch[1] if len(ch) > 1 else ''
|
|
217
|
+
headers.append(ColumnHeader(key=col_key, type=col_type))
|
|
218
|
+
|
|
219
|
+
data = [row.split(':') for row in body.split('::') ]
|
|
220
|
+
col_types = [h.type for h in headers]
|
|
221
|
+
# convert to correct value types
|
|
222
|
+
data_with_types = [[str_to_value_type(c,t) for c, t in zip(r, col_types)] for r in data]
|
|
223
|
+
data = [ TableRow(r) for r in data_with_types]
|
|
224
|
+
|
|
225
|
+
out = TableSegment(column_headers=headers, data=data_with_types, key=name)
|
|
226
|
+
return out
|
|
227
|
+
|
labfreed/trex/trex.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
from typing import Self
|
|
3
|
+
from pydantic import Field, field_validator
|
|
4
|
+
|
|
5
|
+
from labfreed.labfreed_infrastructure import LabFREED_BaseModel
|
|
6
|
+
from labfreed.trex.table_segment import _deserialize_table_segment_from_trex_segment_str
|
|
7
|
+
from labfreed.trex.trex_base_models import TREX_Segment
|
|
8
|
+
from labfreed.trex.value_segments import _deserialize_value_segment_from_trex_segment_str
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TREX(LabFREED_BaseModel):
|
|
12
|
+
'''Represents a T-REX extension'''
|
|
13
|
+
segments: list[TREX_Segment] = Field(default_factory=list)
|
|
14
|
+
|
|
15
|
+
@classmethod
|
|
16
|
+
def deserialize(cls, data) -> Self:
|
|
17
|
+
segment_strings = data.split('+')
|
|
18
|
+
segments = list()
|
|
19
|
+
for s in segment_strings:
|
|
20
|
+
# there are only two valid options. The segment is a scalar or a table.
|
|
21
|
+
# Constructors do the parsing anyways and raise exceptions if invalid data
|
|
22
|
+
# try both options and then let it fail
|
|
23
|
+
segment = _deserialize_table_segment_from_trex_segment_str(s)
|
|
24
|
+
if not segment:
|
|
25
|
+
segment = _deserialize_value_segment_from_trex_segment_str(s)
|
|
26
|
+
if not segment:
|
|
27
|
+
raise ValueError('TREX contains neither valid value segment nor table')
|
|
28
|
+
|
|
29
|
+
segments.append(segment)
|
|
30
|
+
trex = TREX(segments=segments)
|
|
31
|
+
return trex
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def serialize(self):
|
|
35
|
+
seg_strings = list()
|
|
36
|
+
for s in self.segments:
|
|
37
|
+
seg_strings.append(s.serialize())
|
|
38
|
+
s_out = '+'.join(seg_strings)
|
|
39
|
+
return s_out
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_segment(self, segment_key:str) -> TREX_Segment:
|
|
43
|
+
'''Get a segment by key'''
|
|
44
|
+
s = [s for s in self.segments if s.key == segment_key]
|
|
45
|
+
if s:
|
|
46
|
+
return s[0]
|
|
47
|
+
else:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def __str__(self):
|
|
52
|
+
s = self.serialize().replace('+', '\n+').replace('::', '::\n ')
|
|
53
|
+
return s
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@field_validator('segments')
|
|
58
|
+
@classmethod
|
|
59
|
+
def _validate_segments(cls, segments):
|
|
60
|
+
segment_keys = [s.key for s in segments]
|
|
61
|
+
duplicates = [item for item, count in Counter(segment_keys).items() if count > 1]
|
|
62
|
+
if duplicates:
|
|
63
|
+
raise ValueError(f"Duplicate segment keys: {','.join(duplicates)}")
|
|
64
|
+
return segments
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
|