labfreed 0.0.8__py2.py3-none-any.whl → 0.0.10__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,24 @@
1
1
  import logging
2
2
  import re
3
3
 
4
- from .data_model import TREX, T_REX_Segment_ParseError, TREX_SimpleSegment, TREX_Table
4
+ from .data_model import *
5
+ from labfreed.validation import LabFREEDValidationError
5
6
 
7
+ class TREX_Parser():
8
+ def __init__(self, suppress_errors=False):
9
+ self._suppress_errors = suppress_errors
10
+
11
+ def parse_trex_str(self, trex_str, name=None) -> TREX:
12
+ trex = _from_trex_string(trex_str, name=name)
13
+
14
+ trex.print_validation_messages(trex_str)
15
+ if not trex.is_valid() and not self._suppress_errors:
16
+ raise LabFREEDValidationError(validation_msgs = trex.get_nested_validation_messages())
17
+
18
+ return trex
6
19
 
7
- def from_trex_string(trex_str, name=None, enforce_type=True) -> TREX:
20
+
21
+ def _from_trex_string(trex_str, name=None, enforce_type=True) -> TREX:
8
22
  if not trex_str:
9
23
  raise ValueError(f'T-REX must be a string of non zero length')
10
24
 
@@ -37,17 +51,10 @@ def from_trex_string(trex_str, name=None, enforce_type=True) -> TREX:
37
51
 
38
52
  data = d.get('data')
39
53
 
40
- segment_strings = data.split('+')
41
- out_segments = dict()
42
- for s in segment_strings:
43
- # there are only two valid options. The segment is a scalar or a table.
44
- # Constructors do the parsing anyways and raise exceptions if invalid data
45
- # try both options and then let it fail
46
- try:
47
- segment = TREX_SimpleSegment.from_trex_segmentstring(s)
48
- except T_REX_Segment_ParseError:
49
- segment = TREX_Table.from_trex_segmentstring(s)
50
- out_segments[segment.segment_name] = segment
51
- trex = TREX(name_= name, segments=out_segments)
52
- trex._trex_str = trex_str
53
- return trex
54
+ trex = TREX.from_spec_fields(name=name, data=data)
55
+
56
+ return trex
57
+
58
+
59
+
60
+
@@ -0,0 +1,106 @@
1
+ from functools import cache
2
+ import json
3
+ from pathlib import Path
4
+
5
+
6
+ @cache
7
+ def unece_units() -> list[dict]:
8
+ p = Path(__file__).parent / 'UneceUnits.json'
9
+ with open(p) as f:
10
+ l = json.load(f)
11
+ return l
12
+
13
+ @cache
14
+ def unece_unit_codes():
15
+ codes= [u.get('commonCode') for u in unece_units() if u.get('state') == 'ACTIVE']
16
+ return codes
17
+
18
+
19
+ def unece_unit(unit_code):
20
+ unit = [u for u in unece_units() if u['commonCode'] == unit_code]
21
+ if len(unit) == 0:
22
+ return None
23
+ else:
24
+ return unit[0]
25
+
26
+ def unit_symbol(unit:dict) ->str:
27
+ return unit.get('symbol')
28
+
29
+ def unit_name(unit:dict) ->str:
30
+ return unit.get('name')
31
+
32
+
33
+
34
+
35
+
36
+ # def quantity_from_UN_CEFACT(value:str, unit_UN_CEFACT) -> UnitQuantity:
37
+ # """
38
+ # Maps units from https://unece.org/trade/documents/revision-17-annexes-i-iii
39
+ # to an object of the quantities library https://python-quantities.readthedocs.io/en/latest/index.html
40
+ # """
41
+ # # cast to numeric type. try int first, which will fail if string has no decimals.
42
+ # # nothing to worry yet: try floast next. if that fails the input was not a str representation of a number
43
+ # try:
44
+ # value_out = int(value)
45
+ # except ValueError:
46
+ # try:
47
+ # value_out = float(value)
48
+ # except ValueError as e:
49
+ # raise Exception(f'Input {value} is not a str representation of a number') from e
50
+
51
+ # d = {um[0]: um[1] for um in unit_map}
52
+
53
+ # unit = d.get(unit_UN_CEFACT)
54
+ # if not unit:
55
+ # raise NotImplementedError(f"lookup for unit {unit} not implemented")
56
+ # out = UnitQuantity(data=value_out, unit_name=unit.name, unit_symbol=unit.symbol)
57
+
58
+ # return out
59
+
60
+
61
+
62
+ # def quantity_to_UN_CEFACT(value:UnitQuantity ) -> Tuple[int|float, str]:
63
+ # d = {um[1].symbol: um[0] for um in unit_map}
64
+
65
+ # unit_un_cefact = d.get(value.unit_symbol)
66
+ # if not unit_un_cefact:
67
+ # raise NotImplementedError(f"lookup for unit {value.unit_symbol} not implemented")
68
+ # return value.data, unit_un_cefact
69
+
70
+
71
+
72
+
73
+
74
+ def check_compatibility_unece_quantities():
75
+ unece = get_unece_units()
76
+ print(f'Number of units in file: {len(unece)}')
77
+
78
+ failed = list()
79
+ sucess = list()
80
+ for u in unece:
81
+ if u.get('state') == 'ACTIVE':
82
+ try:
83
+ if not u.get('symbol'):
84
+ assert False
85
+ u.get('name')
86
+ validate_unit(u.get('symbol'))
87
+ sucess.append(u)
88
+ except AssertionError as e:
89
+ failed.append(u)
90
+ else:
91
+ pass
92
+
93
+
94
+
95
+ print('[blue] FAILED [/blue]')
96
+ for u in failed:
97
+ print(f'{u.get('commonCode')}: {u.get('name')}')
98
+
99
+ print('[yellow] SUCCESSFUL [/yellow]')
100
+ for u in sucess:
101
+ print(u)
102
+
103
+ print(f'{len(failed)} / {len(unece)} failed to convert')
104
+
105
+
106
+
labfreed/__init__.py CHANGED
@@ -2,4 +2,4 @@
2
2
  Python implementation of LabFREED building blocks
3
3
  '''
4
4
 
5
- __version__ = "0.0.8"
5
+ __version__ = "0.0.10"
labfreed/parse_pac.py ADDED
@@ -0,0 +1,189 @@
1
+
2
+
3
+ import re
4
+ from types import MappingProxyType
5
+
6
+ from labfreed.DisplayNameExtension.DisplayNameExtension import DisplayNames
7
+ from labfreed.PAC_CAT.data_model import PAC_CAT
8
+ from labfreed.PAC_ID.extensions import Extension, UnknownExtension
9
+ from labfreed.TREX.data_model import TREX
10
+
11
+
12
+ from .PAC_ID.data_model import *
13
+
14
+ from .validation import ValidationMessage, LabFREEDValidationError
15
+
16
+
17
+
18
+
19
+
20
+
21
+ class PACID_With_Extensions(BaseModelWithValidationMessages):
22
+ pac_id: PACID
23
+ extensions: list[Extension] = Field(default_factory=list)
24
+
25
+ def __str__(self):
26
+ out = str(self.pac_id)
27
+ out += '*'.join(str(e) for e in self.extensions)
28
+
29
+ def get_extension_of_type(self, type:str) -> list[Extension]:
30
+ return [e for e in self.extensions if e.type == type]
31
+
32
+ def get_extension(self, name:str) -> Extension|None:
33
+ out = [e for e in self.extensions if e.name == name]
34
+ if not out:
35
+ return None
36
+ return out[0]
37
+
38
+
39
+ def serialize(self, use_short_notation_for_extensions=False, uppercase_only=False):
40
+ extensions_str = self._serialize_extensions(self.extensions, use_short_notation_for_extensions)
41
+ out = self.pac_id.serialize() + extensions_str
42
+ if uppercase_only:
43
+ out = out.upper()
44
+ return out
45
+
46
+ def to_url(self, use_short_notation_for_extensions=False, uppercase_only=False) -> str:
47
+ return self.serialize(use_short_notation_for_extensions, uppercase_only)
48
+
49
+ @classmethod
50
+ def deserialize(cls, url, extension_interpreters ):
51
+ parser = PAC_Parser(extension_interpreters)
52
+ return parser.parse_pac_with_extensions(url)
53
+
54
+
55
+
56
+
57
+ def _serialize_extensions(self, extensions:list[Extension], use_short_notation_for_extensions):
58
+ out = ''
59
+ short_notation = use_short_notation_for_extensions
60
+ for i, e in enumerate(extensions):
61
+
62
+ if short_notation and i==0:
63
+ if e.name=='N':
64
+ out += f'*{e.data}'
65
+ continue
66
+ else:
67
+ short_notation = False
68
+ if short_notation and i==1:
69
+ if e.name=='SUM':
70
+ out += f'*{e.data}'
71
+ continue
72
+ else:
73
+ short_notation = False
74
+
75
+ out += f'*{e.name}${e.type}/{e.data}'
76
+ return out
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+ class PAC_Parser():
86
+
87
+ def __init__(self, extension_interpreters:dict[str, Extension]=None):
88
+ self.extension_interpreters = extension_interpreters or {'TREX': TREX, 'N': DisplayNames}
89
+
90
+ def parse_pac_with_extensions(self, pac_url:str) -> PACID_With_Extensions:
91
+ if '*' in pac_url:
92
+ id_str, ext_str = pac_url.split('*', 1)
93
+ else:
94
+ id_str = pac_url
95
+ ext_str = ""
96
+
97
+ pac_id = self.parse_pac_id(id_str)
98
+ extensions = self.parse_extensions(ext_str)
99
+
100
+ pac_with_extension = PACID_With_Extensions(pac_id=pac_id, extensions=extensions)
101
+ if not pac_with_extension.is_valid():
102
+ raise LabFREEDValidationError(validation_msgs = pac_with_extension.get_nested_validation_messages())
103
+
104
+ return pac_with_extension
105
+
106
+
107
+ def parse_pac_id(self,id_str:str) -> PACID:
108
+ m = re.match(f'(HTTPS://)?(PAC.)?(?P<issuer>.+?\..+?)/(?P<identifier>.*)', id_str)
109
+ d = m.groupdict()
110
+
111
+ id_segments = list()
112
+ default_keys = None
113
+ id_segments = self._parse_id_segments(d.get('identifier'))
114
+
115
+ pac = PACID(issuer= d.get('issuer'),
116
+ identifier=id_segments
117
+ )
118
+
119
+ # if a segment starts with '-' the pac is interpreted as category
120
+ if any([s for s in pac.identifier if '-' in s.value]):
121
+ pac = PAC_CAT.from_pac_id(pac)
122
+
123
+ return pac
124
+
125
+
126
+
127
+
128
+ def _parse_id_segments(self, identifier:str):
129
+ if not identifier:
130
+ return []
131
+
132
+ id_segments = list()
133
+ if len(identifier) > 0 and identifier[0] == '/':
134
+ identifier = identifier[1:]
135
+ for s in identifier.split('/'):
136
+ tmp = s.split(':')
137
+
138
+ if len(tmp) == 1:
139
+ segment = IDSegment(value=tmp[0])
140
+ elif len(tmp) == 2:
141
+ segment = IDSegment(key=tmp[0], value=tmp[1])
142
+ else:
143
+ raise ValueError(f'invalid segment: {s}')
144
+
145
+ id_segments.append(segment)
146
+ return id_segments
147
+
148
+
149
+
150
+
151
+ def parse_extensions(self, extensions_str:str|None) -> list[Extension]:
152
+
153
+ extensions = list()
154
+
155
+ if not extensions_str:
156
+ return extensions
157
+
158
+ defaults = MappingProxyType(
159
+ {
160
+ 0: { 'name': 'N', 'type': 'N'},
161
+ 1: { 'name': 'SUM', 'type': 'TREX'}
162
+ }
163
+ )
164
+ for i, e in enumerate(extensions_str.split('*')):
165
+ if e == '': #this will happen if first extension starts with *
166
+ continue
167
+ d = re.match('((?P<name>.+)\$(?P<type>.+)/)?(?P<data>.+)', e).groupdict()
168
+
169
+ name = d.get('name')
170
+ type = d.get('type')
171
+ data = d.get('data')
172
+
173
+ if name:
174
+ defaults = None # once a name was specified no longer assign defaults
175
+ else:
176
+ if defaults:
177
+ name = defaults.get(i).get('name')
178
+ type = defaults.get(i).get('type')
179
+ else:
180
+ raise ValueError('extension number {i}, must have name and type')
181
+
182
+ #convert to subtype if they were given
183
+ subtype = self.extension_interpreters.get(type) or UnknownExtension
184
+ e = subtype.from_spec_fields(name=name, type=type, data=data)
185
+ extensions.append(e)
186
+
187
+ return extensions
188
+
189
+
@@ -1,31 +1,37 @@
1
+ import re
1
2
  import string
2
3
 
3
- def alphabet(base):
4
- """ returns an alphabet, which corresponds to what pythons int(s:str, base:int=10) function used.
5
- """
6
- if base < 2 or base > 36:
7
- ValueError('base can only be between 2 and 36')
8
- alphabet = (string.digits + string.ascii_uppercase)[0:base]
9
- return alphabet
4
+ from pydantic import field_validator, RootModel
5
+
6
+ class base36(RootModel[str]):
7
+ @field_validator('root')
8
+ @classmethod
9
+ def validate_format(cls, v: str) -> str:
10
+ if not re.fullmatch(r'[A-Z0-9]*', v):
11
+ raise ValueError("Value must only contain uppercase letters and digits (A-Z, 0-9)")
12
+ return v
13
+
10
14
 
11
- def to_base36(s:str):
15
+ def to_base36(s:str) -> base36:
12
16
  """Takes a string, encodes it in UTF-8 and then as base36 string."""
13
17
  utf8_encoded = s.encode('utf-8')
14
18
  num = int.from_bytes(utf8_encoded, byteorder='big', signed=False)
15
19
 
16
20
  # note: this cannot be arbitrarily chosen. The choice here corresponds to what pythons int(s:str, base:int=10) function used.
17
- base36_chars = alphabet(base=36)
21
+ base36_chars = _alphabet(base=36)
18
22
  if num == 0:
19
23
  return base36_chars[0]
20
- base36 = []
24
+ base_36 = []
21
25
  _num = num
22
26
  while _num:
23
27
  _num, i = divmod(_num, 36)
24
- base36.append(base36_chars[i])
25
- return ''.join(reversed(base36))
28
+ base_36.append(base36_chars[i])
29
+ b36_str = ''.join(reversed(base_36))
30
+ b36_str = base36(b36_str)
31
+ return b36_str
26
32
 
27
33
 
28
- def from_base36(s36:str):
34
+ def from_base36(s36:base36) -> str:
29
35
  """inverse of to_base36"""
30
36
  # this built in function interprets each character as number in a base represented by the standartd alphabet [0-9 (A-Z|a-z)][0:base] it is case INsensitive.
31
37
  num = int(s36, 36)
@@ -34,6 +40,16 @@ def from_base36(s36:str):
34
40
  s = _bytes.decode('utf-8')
35
41
  return s
36
42
 
43
+
44
+ def _alphabet(base):
45
+ """ returns an alphabet, which corresponds to what pythons int(s:str, base:int=10) function used.
46
+ """
47
+ if base < 2 or base > 36:
48
+ ValueError('base can only be between 2 and 36')
49
+ alphabet = (string.digits + string.ascii_uppercase)[0:base]
50
+ return alphabet
51
+
52
+
37
53
  if __name__ == "__main__":
38
54
  ss = ["A",
39
55
  "B-500 B",
@@ -0,0 +1,4 @@
1
+ from labfreed.DisplayNameExtension.DisplayNameExtension import DisplayNames
2
+ from labfreed.TREX.data_model import TREX
3
+
4
+
@@ -0,0 +1,103 @@
1
+ from functools import cache
2
+ import json
3
+ from pathlib import Path
4
+
5
+ from rich import print
6
+
7
+ from typing import Any, Tuple
8
+ from typing_extensions import Annotated
9
+ from pydantic import BaseModel, AfterValidator
10
+ import quantities as pq
11
+ from quantities import units
12
+
13
+ from labfreed.TREX.unece_units import unece_units
14
+
15
+ def validate_unit(unit_name:str) -> str :
16
+ """
17
+ Pydantic validator function for the unit.
18
+ Checks if the unit is a valid unit.
19
+
20
+
21
+ Args:
22
+ unit (str): unit symbol, e.g. 'kg'
23
+
24
+ Returns:
25
+ str: the input unit.
26
+
27
+ Errors:
28
+ raises an AssertionError if validation fails
29
+ """
30
+ if hasattr(pq, unit_name):
31
+ return unit_name
32
+ else:
33
+ assert False
34
+
35
+
36
+ class Unit(BaseModel):
37
+ name: str
38
+ symbol: str
39
+
40
+
41
+ class Quantity(BaseModel):
42
+ value:int|float
43
+ unit: Unit
44
+
45
+ def __str__(self):
46
+ unit_symbol = self.unit.symbol
47
+ if unit_symbol == "dimensionless":
48
+ unit_symbol = ""
49
+
50
+ s = f"{str(self.value)} {unit_symbol}"
51
+ return s
52
+
53
+
54
+ def unece_unit_code_from_quantity(q:Quantity):
55
+ by_name = [ u['commonCode'] for u in unece_units() if u.get('name','') == q.unit.name]
56
+ by_symbol = [ u['commonCode'] for u in unece_units() if u.get('symbol','') == q.unit.symbol]
57
+ code = list(set(by_name) | set(by_symbol))
58
+ if len(code) != 1:
59
+ raise ValueError(f'No UNECE unit code found for Quantity {str(q)}' )
60
+ return code[0]
61
+
62
+
63
+ # class DataTable(list):
64
+ # def __init__(self, headers:tuple[str, Any]):
65
+ # for h in headers:
66
+ # if len(h) != 2:
67
+ # raise ValueError(f'Headers must be tuples of length two. With a column name and type.')
68
+ # if not isinstance(h[0], str):
69
+ # raise ValueError(f'Invalid type of header name {h[0]}. Must be str')
70
+ # if not (h[1]):
71
+ # raise ValueError(f'Header type cannot be None')
72
+ # self.headers = headers
73
+ # super().__init__()
74
+
75
+ # def append(self, row:list):
76
+ # if len(row) != len(self.headers):
77
+ # raise ValueError(f'Row has different length than headers')
78
+ # super().append(row)
79
+
80
+ class DataTable(list):
81
+ def __init__(self, col_names:list[str]=None):
82
+ self.col_names = col_names
83
+ self.row_template = None
84
+ super().__init__()
85
+
86
+ def append(self, row:list):
87
+ if not self.row_template:
88
+ self.row_template = row.copy()
89
+ super().append(row)
90
+
91
+ def extend(self, iterable):
92
+ for item in iterable:
93
+ self.append(item)
94
+
95
+
96
+
97
+
98
+ if __name__ == "__main__":
99
+ pass
100
+
101
+
102
+
103
+
@@ -1,7 +1,7 @@
1
1
  from enum import Enum
2
2
 
3
3
 
4
- class WellKnownSegmentKeys(Enum):
4
+ class WellKnownKeys(Enum):
5
5
  GTIN = '01'
6
6
  BATCH = '10'
7
7
  SERIAL = '21'