labfreed 0.0.8__py2.py3-none-any.whl → 0.0.10__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,789 @@
1
+ from datetime import date, datetime, time
2
+ import logging
3
+ import re
4
+ from collections import Counter
5
+ from typing import Annotated, Literal
6
+
7
+
8
+ from pydantic import PrivateAttr, RootModel, ValidationError, field_validator, model_validator, Field
9
+ from labfreed.TREX.unece_units import unece_unit, unece_unit_codes, unece_units, unit_name, unit_symbol
10
+ from labfreed.utilities.utility_types import DataTable, Quantity, Unit, unece_unit_code_from_quantity
11
+ from labfreed.validation import BaseModelWithValidationMessages
12
+ from abc import ABC, abstractmethod
13
+
14
+ from labfreed.PAC_ID.extensions import Extension
15
+ from labfreed.utilities.base36 import base36, to_base36, from_base36
16
+
17
+
18
+
19
+ class TREX_Segment(BaseModelWithValidationMessages, ABC):
20
+ key: str
21
+
22
+ @model_validator(mode='after')
23
+ def validate_key(self):
24
+ if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.key)):
25
+ self.add_validation_message(
26
+ source=f"TREX segment key {self.key}",
27
+ type="Error",
28
+ msg=f"Segment key contains invalid characters: {','.join(not_allowed_chars)}",
29
+ highlight_pattern = f'{self.key}$',
30
+ highlight_sub=not_allowed_chars
31
+ )
32
+ return self
33
+
34
+ @abstractmethod
35
+ def serialize_for_trex(self):
36
+ raise NotImplementedError("Subclasses must implement 'serialize_as_trex()' method")
37
+
38
+ # @abstractmethod
39
+ # def to_python_type(self):
40
+ # raise NotImplementedError("Subclasses must implement 'to_python_type()' method")
41
+
42
+ # @abstractmethod
43
+ # def from_python_type(self):
44
+ # raise NotImplementedError("Subclasses must implement 'from_python_type()' method")
45
+
46
+
47
+
48
+
49
+ class ValueMixin(BaseModelWithValidationMessages, ABC):
50
+ value:str
51
+
52
+ def serialize_for_trex(self):
53
+ return self.value
54
+
55
+ # @abstractclassmethod
56
+ # def from_python_type(cls, v):
57
+ # ...
58
+
59
+ @abstractmethod
60
+ def value_to_python_type(self):
61
+ ...
62
+
63
+
64
+ class NumericValue(ValueMixin):
65
+ @field_validator('value', mode='before')
66
+ @classmethod
67
+ def from_python_type(cls, v:str| int|float):
68
+ if isinstance(v, str):
69
+ return v
70
+ return str(v)
71
+
72
+ @model_validator(mode='after')
73
+ def validate(self):
74
+ value = self.value
75
+ if not_allowed_chars := set(re.sub(r'[0-9\.\-E]', '', value)):
76
+ self.add_validation_message(
77
+ source=f"TREX numeric value {value}",
78
+ type="Error",
79
+ msg=f"Characters {','.join(not_allowed_chars)} are not allowed in quantity segment. Base36 encoding only allows A-Z0-9",
80
+ highlight_pattern = f'{value}',
81
+ highlight_sub=not_allowed_chars
82
+ )
83
+ if not re.fullmatch(r'-?\d+(\.\d+)?(E-?\d+)?', value):
84
+ self.add_validation_message(
85
+ source=f"TREX numeric value {value}",
86
+ type="Error",
87
+ msg=f"{value} cannot be converted to number",
88
+ highlight_pattern = f'{value}'
89
+ )
90
+ return self
91
+
92
+ def value_to_python_type(self) -> str:
93
+ v = float(self.value)
94
+ if not '.' in self.value and not 'E' in self.value:
95
+ return int(v)
96
+ else:
97
+ return v
98
+
99
+
100
+ class DateValue(ValueMixin):
101
+ _date_time_dict:dict|None = PrivateAttr(default=None)
102
+ @field_validator('value', mode='before')
103
+ @classmethod
104
+ def from_python_type(cls, v:str| date|time|datetime):
105
+ if isinstance(v, str):
106
+ return v
107
+
108
+ sd = ""
109
+ st = ""
110
+ if isinstance(v, date) or isinstance(v, datetime):
111
+ sd = v.strftime('%Y%m%d')
112
+ if isinstance(v, time) or isinstance(v, datetime):
113
+ if v.microsecond:
114
+ st = v.strftime("T%H%M%S.") + f"{v.microsecond // 1000:03d}"
115
+ elif v.second:
116
+ st = v.strftime("T%H%M%S")
117
+ else:
118
+ st = v.strftime("T%H%M")
119
+
120
+ return sd + st
121
+
122
+ @model_validator(mode='after')
123
+ def validate(self):
124
+ pattern:str = r'((?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2}))?(T(?P<hour>\d{2})(?P<minute>\d{2})(?P<second>\d{2})?(\.(?P<millisecond>\d{3}))?)?'
125
+ value=self.value
126
+ if not re.fullmatch(pattern, value):
127
+ self.add_validation_message(
128
+ source=f"TREX date value {value}",
129
+ type="Error",
130
+ msg=f'{value} is not in a valid format. Valid format for date: YYYYMMDD; Valid for time: THHMM, THHMMSS, THHMMSS.SSS; Datetime any combination of valid date and time',
131
+ highlight_pattern = f'{value}'
132
+ )
133
+ return self
134
+
135
+ matches = re.match(pattern, value)
136
+ d = matches.groupdict()
137
+ d = {k: int(v) for k,v in d.items() if v }
138
+ if 'millisecond' in d.keys():
139
+ ms = d.pop('millisecond')
140
+ d.update({'microsecond': ms * 1000})
141
+ try:
142
+ if d.get('year'): # input is only a time
143
+ datetime(**d)
144
+ else:
145
+ time(**d)
146
+ except ValueError as e:
147
+ self.add_validation_message(
148
+ source=f"TREX date value {value}",
149
+ type="Error",
150
+ msg=f'{value} is no valid date or time.',
151
+ highlight_pattern = f'{value}'
152
+ )
153
+
154
+ self._date_time_dict = d
155
+ return self
156
+
157
+ def value_to_python_type(self) -> str:
158
+ d = self._date_time_dict
159
+ if d.get('year') and d.get('hour'): # input is only a time
160
+ return datetime(**d)
161
+ elif d.get('year'):
162
+ return date(**d)
163
+ else:
164
+ return time(**d)
165
+
166
+
167
+
168
+
169
+ class BoolValue(ValueMixin):
170
+ @field_validator('value', mode='before')
171
+ @classmethod
172
+ def from_python_type(cls, v:str| bool):
173
+ if isinstance(v, str):
174
+ return v
175
+
176
+ return 'T' if v else 'F'
177
+
178
+ @model_validator(mode='after')
179
+ def validate(self):
180
+ if not self.value in ['T', 'F']:
181
+ self.add_validation_message(
182
+ source=f"TREX boolean value {self.value}",
183
+ type="Error",
184
+ msg=f'{self.value} is no valid boolean. Must be T or F',
185
+ highlight_pattern = f'{self.value}',
186
+ highlight_sub=[c for c in self.value]
187
+ )
188
+ return self
189
+
190
+ def value_to_python_type(self) -> str:
191
+ if self.value == 'T':
192
+ return True
193
+ elif self.value == 'F':
194
+ return False
195
+ else:
196
+ Exception(f'{self} is not valid boolean. That really should not have been possible -- Contact the maintainers of the library')
197
+
198
+
199
+ class AlphanumericValue(ValueMixin):
200
+ @field_validator('value', mode='before')
201
+ @classmethod
202
+ def from_python_type(cls, v:str):
203
+ return v
204
+
205
+ @model_validator(mode='after')
206
+ def validate(self):
207
+ if re.match(r'[a-z]', self.value):
208
+ self.add_validation_message(
209
+ source=f"TREX value {self.value}",
210
+ type="Error",
211
+ msg=f"Lower case characters are not allowed.",
212
+ highlight_pattern = self.value
213
+ )
214
+
215
+ if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.value)):
216
+ self.add_validation_message(
217
+ source=f"TREX value {self.value}",
218
+ type="Error",
219
+ msg=f"Characters {','.join(not_allowed_chars)} are not allowed in alphanumeric segment",
220
+ highlight_pattern = self.value,
221
+ highlight_sub=not_allowed_chars
222
+ )
223
+ return self
224
+
225
+ def value_to_python_type(self) -> str:
226
+ return self.value
227
+
228
+
229
+
230
+ class TextValue(ValueMixin):
231
+ @field_validator('value', mode='before')
232
+ @classmethod
233
+ def from_python_type(cls, v:base36|str):
234
+ if isinstance(v, str):
235
+ logging.info('Got str for text value > converting to base36')
236
+ return to_base36(v).root
237
+ else:
238
+ return v.root
239
+
240
+ @model_validator(mode='after')
241
+ def validate(self):
242
+ if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', self.value)):
243
+ self.add_validation_message(
244
+ source=f"TREX value {self.value}",
245
+ type="Error",
246
+ msg=f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9",
247
+ highlight_pattern = self.value,
248
+ highlight_sub=not_allowed_chars
249
+ )
250
+ return self
251
+
252
+ def value_to_python_type(self) -> str:
253
+ decoded = from_base36(self.value)
254
+ return decoded
255
+
256
+
257
+ class BinaryValue(ValueMixin):
258
+ @field_validator('value', mode='before')
259
+ @classmethod
260
+ def from_python_type(cls, v:base36|str):
261
+ if isinstance(v, str):
262
+ return v
263
+ else:
264
+ return v.root
265
+
266
+ @model_validator(mode='after')
267
+ def validate(self):
268
+ if not_allowed_chars := set(re.sub(r'[A-Z0-9]', '', self.value)):
269
+ self.add_validation_message(
270
+ source=f"TREX value {self.value}",
271
+ type="Error",
272
+ msg=f"Characters {','.join(not_allowed_chars)} are not allowed in text segment. Base36 encoding only allows A-Z0-9",
273
+ highlight_pattern = self.value,
274
+ highlight_sub=not_allowed_chars
275
+ )
276
+ return self
277
+
278
+ def value_to_python_type(self) -> bytes:
279
+ decoded = bytes(from_base36(self))
280
+ return decoded
281
+
282
+
283
+ class ErrorValue(ValueMixin):
284
+ @model_validator(mode='after')
285
+ def validate(self):
286
+ if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.value)):
287
+ self.add_validation_message(
288
+ source=f"TREX value {self.value}",
289
+ type="Error",
290
+ msg=f"Characters {','.join(not_allowed_chars)} are not allowed in error segment",
291
+ highlight_pattern = self.value,
292
+ highlight_sub=not_allowed_chars
293
+ )
294
+ return self
295
+
296
+
297
+ def value_to_python_type(self) -> str:
298
+ return self.value
299
+
300
+
301
+
302
+
303
+ class ValueSegment(TREX_Segment, ValueMixin, ABC):
304
+ type:str
305
+
306
+ @model_validator(mode='after')
307
+ def validate_type(self):
308
+ valid_types = valid_types = unece_unit_codes() + ['T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E']
309
+ if not self.type in valid_types:
310
+ self.add_validation_message(
311
+ source=f"TREX value segment {self.key}",
312
+ type="Error",
313
+ msg=f"Type {self.type} is invalid. Must be 'T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E' or a UNECE unit",
314
+ highlight_pattern = self.type
315
+ )
316
+ return self
317
+
318
+ # @classmethod
319
+ # def get_subclass(cls, type:str, key:str, value:str):
320
+ # match type:
321
+ # case 'T.D':
322
+ # model = DateSegment(key=key, value=value, type=type)
323
+ # case 'T.B':
324
+ # model = BoolSegment(key=key, value=value, type=type)
325
+ # case 'T.A':
326
+ # model = AlphanumericSegment(key=key, value=value, type=type)
327
+ # case 'T.T':
328
+ # model = TextSegment(key=key, value=value, type=type)
329
+ # case 'T.X':
330
+ # model = BinarySegment(key=key, value=value, type=type)
331
+ # case 'E':
332
+ # model = ErrorSegment(key=key, value=value, type=type)
333
+ # case _:
334
+ # model = NumericSegment(value=value, key=key, type=type)
335
+
336
+ # return model
337
+
338
+
339
+ def serialize_for_trex(self) -> str:
340
+ return f'{self.key}${self.type}:{self.value}'
341
+
342
+ def to_python_type(self):
343
+ return self.value_to_python_type()
344
+
345
+
346
+
347
+
348
+
349
+
350
+
351
+ class NumericSegment(ValueSegment, NumericValue):
352
+ type: str
353
+
354
+ def to_python_type(self):
355
+ unit = unece_unit(self.type)
356
+ out = Quantity(value=self.value_to_python_type(), unit=Unit(name=unit_name(unit), symbol=unit_symbol(unit)))
357
+ return out
358
+
359
+ class DateSegment(ValueSegment, DateValue):
360
+ type: Literal['T.D'] = Field('T.D', frozen=True)
361
+
362
+ class BoolSegment(ValueSegment, BoolValue):
363
+ type: Literal['T.B'] = Field('T.B', frozen=True)
364
+
365
+ class AlphanumericSegment(ValueSegment, AlphanumericValue):
366
+ type: Literal['T.A'] = Field('T.A', frozen=True)
367
+
368
+ class TextSegment(ValueSegment, TextValue):
369
+ type: Literal['T.T'] = Field('T.T', frozen=True)
370
+
371
+ class BinarySegment(ValueSegment, BinaryValue):
372
+ type: Literal['T.X'] = Field('T.X', frozen=True)
373
+
374
+ class ErrorSegment(ValueSegment, ErrorValue):
375
+ type: Literal['E'] = Field('E', frozen=True)
376
+
377
+
378
+
379
+ class ColumnHeader(BaseModelWithValidationMessages):
380
+ key:str
381
+ type:str
382
+
383
+ @model_validator(mode='after')
384
+ def validate_key(self):
385
+ if not_allowed_chars := set(re.sub(r'[A-Z0-9\.-]', '', self.key)):
386
+ self.add_validation_message(
387
+ source=f"TREX table column {self.key}",
388
+ type="Error",
389
+ msg=f"Column header key contains invalid characters: {','.join(not_allowed_chars)}",
390
+ highlight_pattern = f'{self.key}$',
391
+ highlight_sub=not_allowed_chars
392
+ )
393
+ return self
394
+
395
+ @model_validator(mode='after')
396
+ def validate_type(self):
397
+ valid_types = unece_unit_codes() + ['T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E']
398
+ if not self.type in valid_types:
399
+ self.add_validation_message(
400
+ source=f"TREX table column {self.key}",
401
+ type="Error",
402
+ msg=f"Type '{self.type}' is invalid. Must be 'T.D', 'T.B', 'T.A', 'T.T', 'T.X', 'E' or a UNECE unit",
403
+ highlight_pattern = self.type
404
+ )
405
+ return self
406
+
407
+ class TableRow(RootModel[list[ValueMixin]]):
408
+ def serialize_for_trex(self):
409
+ return ':'.join([e.serialize_for_trex() for e in self.root])
410
+
411
+ def __len__(self):
412
+ return len(self.root)
413
+
414
+ def __iter__(self):
415
+ return iter(self.root)
416
+
417
+ class TREX_Table(TREX_Segment):
418
+ column_headers: list[ColumnHeader]
419
+ data: list[TableRow]
420
+
421
+ @property
422
+ def column_names(self):
423
+ return [h.key for h in self.column_headers]
424
+
425
+ @property
426
+ def column_types(self):
427
+ return [h.type for h in self.column_headers]
428
+
429
+ @model_validator(mode='after')
430
+ def validate_sizes(self):
431
+ sizes = [len(self.column_headers)]
432
+ sizes.extend( [ len(row) for row in self.data ] )
433
+ most_common_len, count = Counter(sizes).most_common(1)[0]
434
+
435
+ if len(self.column_headers) != most_common_len:
436
+ self.add_validation_message(
437
+ source=f"Table {self.key}",
438
+ type="Error",
439
+ msg=f"Size mismatch: Table header contains {self.col_names} keys, while most rows have {most_common_len}",
440
+ highlight_pattern = self.key
441
+ )
442
+ expected_row_len = most_common_len
443
+ else:
444
+ expected_row_len = len(self.column_headers)
445
+
446
+
447
+ for i, row in enumerate(self.data):
448
+ if len(row) != expected_row_len:
449
+ self.add_validation_message(
450
+ source=f"Table {self.key}",
451
+ type="Error",
452
+ msg=f"Size mismatch: Table row {i} contains {len(row)} elements. Expected size is {expected_row_len}",
453
+ highlight_pattern = row.serialize_for_trex()
454
+ )
455
+ return self
456
+
457
+ @model_validator(mode='after')
458
+ def validate_data_types(self):
459
+ expected_types = self.column_types
460
+ i = 0
461
+ for row in self.data:
462
+ for e, t_expected, nm in zip(row, expected_types, self.column_names):
463
+ try:
464
+ match t_expected:
465
+ case 'T.D':
466
+ assert isinstance(e, DateValue)
467
+ case 'T.B':
468
+ assert isinstance(e, BoolValue)
469
+ case 'T.A':
470
+ assert isinstance(e, AlphanumericValue)
471
+
472
+ case 'T.T':
473
+ assert isinstance(e, TextValue)
474
+ case 'T.X':
475
+ assert isinstance(e, BinaryValue)
476
+ case 'E':
477
+ assert isinstance(e, ErrorValue)
478
+ case _:
479
+ assert isinstance(e, NumericValue)
480
+ except AssertionError:
481
+ self.add_validation_message(
482
+ source=f"Table {self.key}",
483
+ type="Error",
484
+ msg=f"Type mismatch: Table row {i}, column {nm} is of wrong type. According to the header it should be {t_expected}",
485
+ highlight_pattern = row.serialize_for_trex(),
486
+ highlight_sub=[c for c in e.value]
487
+ )
488
+
489
+ if msg := e.get_errors():
490
+ for m in msg:
491
+ self.add_validation_message(
492
+ source=f"Table {self.key}",
493
+ type="Error",
494
+ msg=m.problem_msg,
495
+ highlight_pattern = row.serialize_for_trex(),
496
+ highlight_sub=[c for c in e.value]
497
+ )
498
+ i += 1
499
+
500
+
501
+ def _get_col_index(self, col:str|int):
502
+ if isinstance(col, str):
503
+ col_index = self.column_names.index(col)
504
+ elif isinstance(col, int):
505
+ col_index = col
506
+ else:
507
+ raise TypeError(f"Column must be specified as string or int: {col.__name__}")
508
+ return col_index
509
+
510
+
511
+
512
+ def serialize_for_trex(self):
513
+ header = ':'.join([f'{h.key}${h.type}' for h in self.column_headers])
514
+ data = '::'.join([r.serialize_for_trex() for r in self.data])
515
+ s = f'{self.key}$${header}::{data}'
516
+ return s
517
+
518
+
519
+ def to_python_type(self):
520
+ table = DataTable([ch.key for ch in self.column_headers])
521
+ for row in self.data:
522
+ r = []
523
+ for e, h in zip(row, self.column_headers):
524
+ if isinstance(e, NumericValue):
525
+ u = unece_unit(h.type)
526
+ unit = Unit(name=u.get('name'), symbol=u.get('symbol'))
527
+ r.append(Quantity(value=e.value, unit=unit))
528
+ else:
529
+ r.append(e.value_to_python_type())
530
+ table.append(r)
531
+ return table
532
+
533
+
534
+
535
+ def n_rows(self) -> int:
536
+ return len(self.data)
537
+
538
+ def n_cols(self) -> int:
539
+ return len(self.column_headers)
540
+
541
+ def row_data(self, row:int) -> list:
542
+ out = self.data[row]
543
+ return out
544
+
545
+
546
+ def column_data(self, col:str|int) -> list:
547
+ col_index = self._get_col_index(col)
548
+ type = self.column_headers[col_index].type
549
+ out = [row[col_index] for row in self.data]
550
+ return out
551
+
552
+
553
+ def cell_data(self, row:int, col:str|int):
554
+ try:
555
+ col_index = self._get_col_index(col)
556
+ value = self.data[row][col_index]
557
+ except ValueError:
558
+ logging.warning(f"row {row}, column {col} not found")
559
+ return None
560
+ return value
561
+
562
+
563
+
564
+
565
+ class TREX(Extension, BaseModelWithValidationMessages):
566
+ name_:str
567
+ segments: list[TREX_Segment] = Field(default_factory=list)
568
+
569
+ @property
570
+ def name(self)->str:
571
+ return self.name_
572
+
573
+ @property
574
+ def type(self)->str:
575
+ return 'TREX'
576
+
577
+ @property
578
+ def data(self)->str:
579
+ seg_strings = list()
580
+ for s in self.segments:
581
+ seg_strings.append(s.serialize_for_trex())
582
+ s_out = '+'.join(seg_strings)
583
+ return s_out
584
+
585
+
586
+ def get_segment(self, segment_key:str) -> TREX_Segment:
587
+ s = [s for s in self.segments if s.key == segment_key]
588
+ if s:
589
+ return s[0]
590
+ else:
591
+ return None
592
+
593
+
594
+ def update(self, segments: dict[str, Quantity|datetime|time|date|bool|str|base36|DataTable] ):
595
+ for k, v in segments.items():
596
+ if isinstance(v, bool):
597
+ self.segments.append(BoolSegment(key=k, value=v))
598
+ elif isinstance(v, Quantity):
599
+ unece_code = unece_unit_code_from_quantity(v)
600
+ self.segments.append(NumericSegment(key=k, value=v.value, type=unece_code))
601
+ elif isinstance(v, (int, float)):
602
+ self.segments.append(NumericSegment(key=k, value=v, type='C63')) # unitless
603
+ elif isinstance(v, (datetime, time, date)):
604
+ self.segments.append(DateSegment(key=k, value=v))
605
+ elif isinstance(v, str):
606
+ if re.fullmatch(r'[A-Z0-9\-\.]*', v):
607
+ self.segments.append(AlphanumericSegment(key=k, value=v))
608
+ else:
609
+ v = to_base36(v)
610
+ self.segments.append(TextSegment(key=k, value=v))
611
+ elif isinstance(v, base36):
612
+ self.segments.append(TextSegment(key=k, value=v))
613
+ elif isinstance(v, DataTable):
614
+ v:DataTable = v
615
+ headers = list()
616
+ for nm, rt in zip(v.col_names, v.row_template):
617
+ if isinstance(rt, bool): # must come first otherwise int matches the bool
618
+ t = 'T.B'
619
+ elif isinstance(rt, Quantity):
620
+ unece_code = unece_unit_code_from_quantity(rt)
621
+ t = unece_code
622
+ elif isinstance(rt, (datetime, time, date)):
623
+ t = 'T.D'
624
+ elif isinstance(rt, str):
625
+ if re.fullmatch(r'[A-Z0-9\-\.]*', rt):
626
+ t = 'T.A'
627
+ else:
628
+ v = to_base36(rt)
629
+ t = 'T.X'
630
+ elif isinstance(rt, base36):
631
+ t = 'T.X'
632
+
633
+ headers.append(ColumnHeader(key=nm, type=t))
634
+ data = []
635
+ for row in v:
636
+ r = []
637
+ for e in row:
638
+ if isinstance(e, bool): # must come first otherwise int matches the bool
639
+ r.append(BoolValue(value=e))
640
+ elif isinstance(e, Quantity):
641
+ r.append(NumericValue(value=e.value))
642
+ elif isinstance(e, (int, float)):
643
+ r.append(NumericValue(value=e))
644
+ elif isinstance(e, (datetime, time, date)):
645
+ r.append(DateValue(value=e))
646
+ elif isinstance(e, str):
647
+ if re.fullmatch(r'[A-Z0-9\-\.]*', e):
648
+ r.append(AlphanumericValue(value=e))
649
+ else:
650
+ e = to_base36(e)
651
+ r.append(TextValue(value=e))
652
+ elif isinstance(e, base36):
653
+ r.append(TextValue(value=e))
654
+ data.append(r)
655
+
656
+ self.segments.append(TREX_Table(key=k, column_headers=headers, data=data))
657
+
658
+
659
+ def dict(self):
660
+ return {s.key: s.to_python_type() for s in self.segments}
661
+
662
+
663
+
664
+ @field_validator('segments')
665
+ @classmethod
666
+ def validate_segments(cls, segments):
667
+ segment_keys = [s.key for s in segments]
668
+ duplicates = [item for item, count in Counter(segment_keys).items() if count > 1]
669
+ if duplicates:
670
+ raise ValueError(f"Duplicate segment keys: {','.join(duplicates)}")
671
+ return segments
672
+
673
+
674
+
675
+ @staticmethod
676
+ def from_spec_fields(*, name, data, type='TREX'):
677
+ segment_strings = data.split('+')
678
+ out_segments = list()
679
+ for s in segment_strings:
680
+ # there are only two valid options. The segment is a scalar or a table.
681
+ # Constructors do the parsing anyways and raise exceptions if invalid data
682
+ # try both options and then let it fail
683
+ segment = _deserialize_table_segment_from_trex_segment_str(s)
684
+ if not segment:
685
+ segment = _deserialize_value_segment_from_trex_segment_str(s)
686
+ if not segment:
687
+ raise ValueError('TREX contains neither valid value segment nor table')
688
+
689
+ out_segments.append(segment)
690
+ trex = TREX(name_= name, segments=out_segments)
691
+
692
+ return trex
693
+
694
+
695
+ def _deserialize_value_segment_from_trex_segment_str(trex_segment_str) -> ValueSegment:
696
+ #re_scalar_pattern = re.compile(f"(?P<name>[\w\.-]*?)\$(?P<unit>[\w\.]*?):(?P<value>.*)")
697
+ re_scalar_pattern = re.compile(f"(?P<name>.+?)\$(?P<unit>.+?):(?P<value>.+)")
698
+ matches = re_scalar_pattern.match(trex_segment_str)
699
+ if not matches:
700
+ return None
701
+
702
+ key, type_, value = matches.groups()
703
+
704
+ match type_:
705
+ case 'T.D':
706
+ out = DateSegment(key=key, value=value, type=type_)
707
+ case 'T.B':
708
+ out = BoolSegment(key=key, value=value, type=type_)
709
+ case 'T.A':
710
+ out = AlphanumericSegment(key=key, value=value, type=type_)
711
+ case 'T.T':
712
+ out = TextSegment(key=key, value=base36(value), type=type_) # prevent repeated conversion from str to base36 and make explict that when parsing we assume the string tpo be base36 already
713
+ case 'T.X':
714
+ out = BinarySegment(key=key, value=base36(value), type=type_) # prevent repeated conversion from str to base36 and make explict that when parsing we assume the string tpo be base36 already
715
+ case 'E':
716
+ out = ErrorSegment(key=key, value=value, type=type_)
717
+ case _:
718
+ out = NumericSegment(value=value, key=key, type=type_)
719
+
720
+ return out
721
+
722
+
723
+
724
+ def _deserialize_table_segment_from_trex_segment_str(trex_segment_str) -> TREX_Table:
725
+ # re_table_pattern = re.compile(f"(?P<tablename>[\w\.-]*?)\$\$(?P<header>[\w\.,\$:]*?)::(?P<body>.*)")
726
+ # re_col_head_pattern = re.compile(f"(?P<name>[\w\.-]*?)\$(?P<unit>[\w\.]*)")
727
+ re_table_pattern = re.compile(r"(?P<tablename>.+?)\$\$(?P<header>.+?)::(?P<body>.+)")
728
+
729
+ matches = re_table_pattern.match(trex_segment_str)
730
+ if not matches:
731
+ return None
732
+ name, header, body = matches.groups()
733
+
734
+ column_headers_str = header.split(':')
735
+
736
+ headers = []
737
+ for colum_header in column_headers_str:
738
+ ch = colum_header.split('$')
739
+ col_key = ch[0]
740
+ col_type = ch[1] if len(ch) > 1 else ''
741
+ headers.append(ColumnHeader(key=col_key, type=col_type))
742
+
743
+ data = [row.split(':') for row in body.split('::') ]
744
+ col_types = [h.type for h in headers]
745
+ # convert to correct value types
746
+ data_with_types = [[str_to_value_type(c,t) for c, t in zip(r, col_types)] for r in data]
747
+ data = [ TableRow(r) for r in data_with_types]
748
+
749
+ out = TREX_Table(column_headers=headers, data=data_with_types, key=name)
750
+ return out
751
+
752
+
753
+ def str_to_value_type(s:str, t:str):
754
+ match t:
755
+ case 'T.D': v = DateValue(value=s)
756
+ case 'T.B': v = BoolValue(value=s)
757
+ case 'T.A': v = AlphanumericValue(value=s)
758
+ case 'T.T': v = TextValue(value=base36(s))
759
+ case 'T.X': v = BinaryValue(value=s)
760
+ case 'E' : v = ErrorValue(value=s)
761
+ case _ : v = NumericValue(value=s)
762
+ return v
763
+
764
+
765
+
766
+ class TREX_Struct(TREX_Segment):
767
+ """Struct is a special interpretation of a T-REX Table with one row"""
768
+ wrapped_table:TREX_Table
769
+
770
+ @property
771
+ def segment_name_(self):
772
+ return self.wrapped_table.key
773
+
774
+ @field_validator('wrapped_table')
775
+ def validate_table(table):
776
+ if len(table.data) != 1:
777
+ raise ValidationError("Too many input rows. Struct can only have one row")
778
+ return table
779
+
780
+ def get(self, key):
781
+ return self.wrapped_table.cell_data(0, key)
782
+
783
+ def keys(self):
784
+ return self.wrapped_table.col_names
785
+
786
+
787
+
788
+
789
+