labfreed 0.0.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
labfreed/TREX/parse.py ADDED
@@ -0,0 +1,60 @@
1
+ import logging
2
+ import re
3
+
4
+ from .data_model import *
5
+ from labfreed.validation import LabFREEDValidationError
6
+
7
+ class TREX_Parser():
8
+ def __init__(self, suppress_errors=False):
9
+ self._suppress_errors = suppress_errors
10
+
11
+ def parse_trex_str(self, trex_str, name=None) -> TREX:
12
+ trex = _from_trex_string(trex_str, name=name)
13
+
14
+ trex.print_validation_messages(trex_str)
15
+ if not trex.is_valid() and not self._suppress_errors:
16
+ raise LabFREEDValidationError(validation_msgs = trex.get_nested_validation_messages())
17
+
18
+ return trex
19
+
20
+
21
+ def _from_trex_string(trex_str, name=None, enforce_type=True) -> TREX:
22
+ if not trex_str:
23
+ raise ValueError(f'T-REX must be a string of non zero length')
24
+
25
+ # remove extension indicator. Precaution in case it is not done yet
26
+ if trex_str[0]=="*":
27
+ trex_str=trex_str[1:-1]
28
+ # remove line breaks. for editing T-REXes it's more convenient to have them in, so one never knows
29
+ trex_str = trex_str.replace('\n','')
30
+
31
+ d = re.match('((?P<name>.+)\$(?P<type>.+)/)?(?P<data>.+)', trex_str).groupdict()
32
+ if not d:
33
+ raise ValueError('TREX is invalid.')
34
+
35
+ type = d.get('type')
36
+ if not type:
37
+ logging.warning('No type given. Assume its trex')
38
+ elif type != 'TREX' and enforce_type:
39
+ logging.error(f'Extension type {type} is not TREX. Aborting')
40
+ raise ValueError(f'Extension type {type} is not TREX.')
41
+ else:
42
+ logging.warning('Extension type {type} is not TREX. Try anyways')
43
+
44
+ s_name = d.get('name')
45
+ if name and s_name:
46
+ logging.warning(f'conflicting names given. The string contained {s_name}, method parameter was {name}. Method parameter wins.')
47
+ elif not name and not s_name:
48
+ raise ValueError('No extension name was given')
49
+ elif s_name:
50
+ name = s_name
51
+
52
+ data = d.get('data')
53
+
54
+ trex = TREX.from_spec_fields(name=name, data=data)
55
+
56
+ return trex
57
+
58
+
59
+
60
+
@@ -0,0 +1,106 @@
1
+ from functools import cache
2
+ import json
3
+ from pathlib import Path
4
+
5
+
6
+ @cache
7
+ def unece_units() -> list[dict]:
8
+ p = Path(__file__).parent / 'UneceUnits.json'
9
+ with open(p) as f:
10
+ l = json.load(f)
11
+ return l
12
+
13
+ @cache
14
+ def unece_unit_codes():
15
+ codes= [u.get('commonCode') for u in unece_units() if u.get('state') == 'ACTIVE']
16
+ return codes
17
+
18
+
19
+ def unece_unit(unit_code):
20
+ unit = [u for u in unece_units() if u['commonCode'] == unit_code]
21
+ if len(unit) == 0:
22
+ return None
23
+ else:
24
+ return unit[0]
25
+
26
+ def unit_symbol(unit:dict) ->str:
27
+ return unit.get('symbol')
28
+
29
+ def unit_name(unit:dict) ->str:
30
+ return unit.get('name')
31
+
32
+
33
+
34
+
35
+
36
+ # def quantity_from_UN_CEFACT(value:str, unit_UN_CEFACT) -> UnitQuantity:
37
+ # """
38
+ # Maps units from https://unece.org/trade/documents/revision-17-annexes-i-iii
39
+ # to an object of the quantities library https://python-quantities.readthedocs.io/en/latest/index.html
40
+ # """
41
+ # # cast to numeric type. try int first, which will fail if string has no decimals.
42
+ # # nothing to worry yet: try floast next. if that fails the input was not a str representation of a number
43
+ # try:
44
+ # value_out = int(value)
45
+ # except ValueError:
46
+ # try:
47
+ # value_out = float(value)
48
+ # except ValueError as e:
49
+ # raise Exception(f'Input {value} is not a str representation of a number') from e
50
+
51
+ # d = {um[0]: um[1] for um in unit_map}
52
+
53
+ # unit = d.get(unit_UN_CEFACT)
54
+ # if not unit:
55
+ # raise NotImplementedError(f"lookup for unit {unit} not implemented")
56
+ # out = UnitQuantity(data=value_out, unit_name=unit.name, unit_symbol=unit.symbol)
57
+
58
+ # return out
59
+
60
+
61
+
62
+ # def quantity_to_UN_CEFACT(value:UnitQuantity ) -> Tuple[int|float, str]:
63
+ # d = {um[1].symbol: um[0] for um in unit_map}
64
+
65
+ # unit_un_cefact = d.get(value.unit_symbol)
66
+ # if not unit_un_cefact:
67
+ # raise NotImplementedError(f"lookup for unit {value.unit_symbol} not implemented")
68
+ # return value.data, unit_un_cefact
69
+
70
+
71
+
72
+
73
+
74
+ def check_compatibility_unece_quantities():
75
+ unece = get_unece_units()
76
+ print(f'Number of units in file: {len(unece)}')
77
+
78
+ failed = list()
79
+ sucess = list()
80
+ for u in unece:
81
+ if u.get('state') == 'ACTIVE':
82
+ try:
83
+ if not u.get('symbol'):
84
+ assert False
85
+ u.get('name')
86
+ validate_unit(u.get('symbol'))
87
+ sucess.append(u)
88
+ except AssertionError as e:
89
+ failed.append(u)
90
+ else:
91
+ pass
92
+
93
+
94
+
95
+ print('[blue] FAILED [/blue]')
96
+ for u in failed:
97
+ print(f'{u.get('commonCode')}: {u.get('name')}')
98
+
99
+ print('[yellow] SUCCESSFUL [/yellow]')
100
+ for u in sucess:
101
+ print(u)
102
+
103
+ print(f'{len(failed)} / {len(unece)} failed to convert')
104
+
105
+
106
+
labfreed/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ '''
2
+ Python implementation of LabFREED building blocks
3
+ '''
4
+
5
+ __version__ = "0.0.3"
labfreed/parse_pac.py ADDED
@@ -0,0 +1,189 @@
1
+
2
+
3
+ import re
4
+ from types import MappingProxyType
5
+
6
+ from labfreed.DisplayNameExtension.DisplayNameExtension import DisplayNames
7
+ from labfreed.PAC_CAT.data_model import PAC_CAT
8
+ from labfreed.PAC_ID.extensions import Extension, UnknownExtension
9
+ from labfreed.TREX.data_model import TREX
10
+
11
+
12
+ from .PAC_ID.data_model import *
13
+
14
+ from .validation import ValidationMessage, LabFREEDValidationError
15
+
16
+
17
+
18
+
19
+
20
+
21
+ class PACID_With_Extensions(BaseModelWithValidationMessages):
22
+ pac_id: PACID
23
+ extensions: list[Extension] = Field(default_factory=list)
24
+
25
+ def __str__(self):
26
+ out = str(self.pac_id)
27
+ out += '*'.join(str(e) for e in self.extensions)
28
+
29
+ def get_extension_of_type(self, type:str) -> list[Extension]:
30
+ return [e for e in self.extensions if e.type == type]
31
+
32
+ def get_extension(self, name:str) -> Extension|None:
33
+ out = [e for e in self.extensions if e.name == name]
34
+ if not out:
35
+ return None
36
+ return out[0]
37
+
38
+
39
+ def serialize(self, use_short_notation_for_extensions=False, uppercase_only=False):
40
+ extensions_str = self._serialize_extensions(self.extensions, use_short_notation_for_extensions)
41
+ out = self.pac_id.serialize() + extensions_str
42
+ if uppercase_only:
43
+ out = out.upper()
44
+ return out
45
+
46
+ def to_url(self, use_short_notation_for_extensions=False, uppercase_only=False) -> str:
47
+ return self.serialize(use_short_notation_for_extensions, uppercase_only)
48
+
49
+ @classmethod
50
+ def deserialize(cls, url, extension_interpreters ):
51
+ parser = PAC_Parser(extension_interpreters)
52
+ return parser.parse_pac_with_extensions(url)
53
+
54
+
55
+
56
+
57
+ def _serialize_extensions(self, extensions:list[Extension], use_short_notation_for_extensions):
58
+ out = ''
59
+ short_notation = use_short_notation_for_extensions
60
+ for i, e in enumerate(extensions):
61
+
62
+ if short_notation and i==0:
63
+ if e.name=='N':
64
+ out += f'*{e.data}'
65
+ continue
66
+ else:
67
+ short_notation = False
68
+ if short_notation and i==1:
69
+ if e.name=='SUM':
70
+ out += f'*{e.data}'
71
+ continue
72
+ else:
73
+ short_notation = False
74
+
75
+ out += f'*{e.name}${e.type}/{e.data}'
76
+ return out
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+ class PAC_Parser():
86
+
87
+ def __init__(self, extension_interpreters:dict[str, Extension]=None):
88
+ self.extension_interpreters = extension_interpreters or {'TREX': TREX, 'N': DisplayNames}
89
+
90
+ def parse_pac_with_extensions(self, pac_url:str) -> PACID_With_Extensions:
91
+ if '*' in pac_url:
92
+ id_str, ext_str = pac_url.split('*', 1)
93
+ else:
94
+ id_str = pac_url
95
+ ext_str = ""
96
+
97
+ pac_id = self.parse_pac_id(id_str)
98
+ extensions = self.parse_extensions(ext_str)
99
+
100
+ pac_with_extension = PACID_With_Extensions(pac_id=pac_id, extensions=extensions)
101
+ if not pac_with_extension.is_valid():
102
+ raise LabFREEDValidationError(validation_msgs = pac_with_extension.get_nested_validation_messages())
103
+
104
+ return pac_with_extension
105
+
106
+
107
+ def parse_pac_id(self,id_str:str) -> PACID:
108
+ m = re.match(f'(HTTPS://)?(PAC.)?(?P<issuer>.+?\..+?)/(?P<identifier>.*)', id_str)
109
+ d = m.groupdict()
110
+
111
+ id_segments = list()
112
+ default_keys = None
113
+ id_segments = self._parse_id_segments(d.get('identifier'))
114
+
115
+ pac = PACID(issuer= d.get('issuer'),
116
+ identifier=id_segments
117
+ )
118
+
119
+ # if a segment starts with '-' the pac is interpreted as category
120
+ if any([s for s in pac.identifier if '-' in s.value]):
121
+ pac = PAC_CAT.from_pac_id(pac)
122
+
123
+ return pac
124
+
125
+
126
+
127
+
128
+ def _parse_id_segments(self, identifier:str):
129
+ if not identifier:
130
+ return []
131
+
132
+ id_segments = list()
133
+ if len(identifier) > 0 and identifier[0] == '/':
134
+ identifier = identifier[1:]
135
+ for s in identifier.split('/'):
136
+ tmp = s.split(':')
137
+
138
+ if len(tmp) == 1:
139
+ segment = IDSegment(value=tmp[0])
140
+ elif len(tmp) == 2:
141
+ segment = IDSegment(key=tmp[0], value=tmp[1])
142
+ else:
143
+ raise ValueError(f'invalid segment: {s}')
144
+
145
+ id_segments.append(segment)
146
+ return id_segments
147
+
148
+
149
+
150
+
151
+ def parse_extensions(self, extensions_str:str|None) -> list[Extension]:
152
+
153
+ extensions = list()
154
+
155
+ if not extensions_str:
156
+ return extensions
157
+
158
+ defaults = MappingProxyType(
159
+ {
160
+ 0: { 'name': 'N', 'type': 'N'},
161
+ 1: { 'name': 'SUM', 'type': 'TREX'}
162
+ }
163
+ )
164
+ for i, e in enumerate(extensions_str.split('*')):
165
+ if e == '': #this will happen if first extension starts with *
166
+ continue
167
+ d = re.match('((?P<name>.+)\$(?P<type>.+)/)?(?P<data>.+)', e).groupdict()
168
+
169
+ name = d.get('name')
170
+ type = d.get('type')
171
+ data = d.get('data')
172
+
173
+ if name:
174
+ defaults = None # once a name was specified no longer assign defaults
175
+ else:
176
+ if defaults:
177
+ name = defaults.get(i).get('name')
178
+ type = defaults.get(i).get('type')
179
+ else:
180
+ raise ValueError('extension number {i}, must have name and type')
181
+
182
+ #convert to subtype if they were given
183
+ subtype = self.extension_interpreters.get(type) or UnknownExtension
184
+ e = subtype.from_spec_fields(name=name, type=type, data=data)
185
+ extensions.append(e)
186
+
187
+ return extensions
188
+
189
+
@@ -0,0 +1,82 @@
1
+ import re
2
+ import string
3
+
4
+ from pydantic import field_validator, RootModel
5
+
6
+ class base36(RootModel[str]):
7
+ @field_validator('root')
8
+ @classmethod
9
+ def validate_format(cls, v: str) -> str:
10
+ if not re.fullmatch(r'[A-Z0-9]*', v):
11
+ raise ValueError("Value must only contain uppercase letters and digits (A-Z, 0-9)")
12
+ return v
13
+
14
+
15
+ def to_base36(s:str) -> base36:
16
+ """Takes a string, encodes it in UTF-8 and then as base36 string."""
17
+ utf8_encoded = s.encode('utf-8')
18
+ num = int.from_bytes(utf8_encoded, byteorder='big', signed=False)
19
+
20
+ # note: this cannot be arbitrarily chosen. The choice here corresponds to what pythons int(s:str, base:int=10) function used.
21
+ base36_chars = _alphabet(base=36)
22
+ if num == 0:
23
+ return base36_chars[0]
24
+ base_36 = []
25
+ _num = num
26
+ while _num:
27
+ _num, i = divmod(_num, 36)
28
+ base_36.append(base36_chars[i])
29
+ b36_str = ''.join(reversed(base_36))
30
+ b36_str = base36(b36_str)
31
+ return b36_str
32
+
33
+
34
+ def from_base36(s36:base36) -> str:
35
+ """inverse of to_base36"""
36
+ # this built in function interprets each character as number in a base represented by the standartd alphabet [0-9 (A-Z|a-z)][0:base] it is case INsensitive.
37
+ num = int(s36, 36)
38
+ num_bytes = (num.bit_length() + 7) // 8
39
+ _bytes = num.to_bytes(num_bytes, byteorder='big')
40
+ s = _bytes.decode('utf-8')
41
+ return s
42
+
43
+
44
+ def _alphabet(base):
45
+ """ returns an alphabet, which corresponds to what pythons int(s:str, base:int=10) function used.
46
+ """
47
+ if base < 2 or base > 36:
48
+ ValueError('base can only be between 2 and 36')
49
+ alphabet = (string.digits + string.ascii_uppercase)[0:base]
50
+ return alphabet
51
+
52
+
53
+ if __name__ == "__main__":
54
+ ss = ["A",
55
+ "B-500 B",
56
+ "B-500 Ba",
57
+ "B-500 Bal",
58
+ "B-500 Bala",
59
+ "B-500 Balanc",
60
+ "B-500 Balance",
61
+ "B-500 D",
62
+ "Mini Spray Dryer S-300",
63
+ "w3ApashAt!!£NAGDSAF*ç%&/()",
64
+ "HELLOWORLD",
65
+ "Helloworld",
66
+ "$£äö!'?^{]<@#¦&¬|¢)&§°😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉",
67
+ "往跟住!師立甲錯什正再圓身升因月室",
68
+ "Balance BAL500 @☣️Lab",
69
+ "BAL500 @☣️Lab",
70
+ "BAL-CLEAN",
71
+ "Smørrebrød µ-Nutrients",
72
+ "Demo Result from R-300",
73
+ "Rotavapor R-300",
74
+ "Rotavapor R-250",
75
+ "Rotavapor R-220",
76
+ "SyncorePlus"
77
+ ]
78
+ for s in ss:
79
+ s36 = to_base36(s)
80
+ s_back = from_base36(s36)
81
+ identical = (s == s_back)
82
+ print(f'{s} >> {s36} >> {s_back}: match:{identical}')
@@ -0,0 +1,4 @@
1
+ from labfreed.DisplayNameExtension.DisplayNameExtension import DisplayNames
2
+ from labfreed.TREX.data_model import TREX
3
+
4
+
@@ -0,0 +1,103 @@
1
+ from functools import cache
2
+ import json
3
+ from pathlib import Path
4
+
5
+ from rich import print
6
+
7
+ from typing import Any, Tuple
8
+ from typing_extensions import Annotated
9
+ from pydantic import BaseModel, AfterValidator
10
+ import quantities as pq
11
+ from quantities import units
12
+
13
+ from labfreed.TREX.unece_units import unece_units
14
+
15
+ def validate_unit(unit_name:str) -> str :
16
+ """
17
+ Pydantic validator function for the unit.
18
+ Checks if the unit is a valid unit.
19
+
20
+
21
+ Args:
22
+ unit (str): unit symbol, e.g. 'kg'
23
+
24
+ Returns:
25
+ str: the input unit.
26
+
27
+ Errors:
28
+ raises an AssertionError if validation fails
29
+ """
30
+ if hasattr(pq, unit_name):
31
+ return unit_name
32
+ else:
33
+ assert False
34
+
35
+
36
+ class Unit(BaseModel):
37
+ name: str
38
+ symbol: str
39
+
40
+
41
+ class Quantity(BaseModel):
42
+ value:int|float
43
+ unit: Unit
44
+
45
+ def __str__(self):
46
+ unit_symbol = self.unit.symbol
47
+ if unit_symbol == "dimensionless":
48
+ unit_symbol = ""
49
+
50
+ s = f"{str(self.value)} {unit_symbol}"
51
+ return s
52
+
53
+
54
+ def unece_unit_code_from_quantity(q:Quantity):
55
+ by_name = [ u['commonCode'] for u in unece_units() if u.get('name','') == q.unit.name]
56
+ by_symbol = [ u['commonCode'] for u in unece_units() if u.get('symbol','') == q.unit.symbol]
57
+ code = list(set(by_name) | set(by_symbol))
58
+ if len(code) != 1:
59
+ raise ValueError(f'No UNECE unit code found for Quantity {str(q)}' )
60
+ return code[0]
61
+
62
+
63
+ # class DataTable(list):
64
+ # def __init__(self, headers:tuple[str, Any]):
65
+ # for h in headers:
66
+ # if len(h) != 2:
67
+ # raise ValueError(f'Headers must be tuples of length two. With a column name and type.')
68
+ # if not isinstance(h[0], str):
69
+ # raise ValueError(f'Invalid type of header name {h[0]}. Must be str')
70
+ # if not (h[1]):
71
+ # raise ValueError(f'Header type cannot be None')
72
+ # self.headers = headers
73
+ # super().__init__()
74
+
75
+ # def append(self, row:list):
76
+ # if len(row) != len(self.headers):
77
+ # raise ValueError(f'Row has different length than headers')
78
+ # super().append(row)
79
+
80
+ class DataTable(list):
81
+ def __init__(self, col_names:list[str]=None):
82
+ self.col_names = col_names
83
+ self.row_template = None
84
+ super().__init__()
85
+
86
+ def append(self, row:list):
87
+ if not self.row_template:
88
+ self.row_template = row.copy()
89
+ super().append(row)
90
+
91
+ def extend(self, iterable):
92
+ for item in iterable:
93
+ self.append(item)
94
+
95
+
96
+
97
+
98
+ if __name__ == "__main__":
99
+ pass
100
+
101
+
102
+
103
+
@@ -0,0 +1,16 @@
1
+ from enum import Enum
2
+
3
+
4
+ class WellKnownKeys(Enum):
5
+ GTIN = '01'
6
+ BATCH = '10'
7
+ SERIAL = '21'
8
+ ADDITIONAL_IDINTIFIER = '240'
9
+ RUN_ID_ABSOLUTE = 'RNR'
10
+ SAMPLE_ID = 'SMP'
11
+ EXPERIMENT_ID = 'EXP'
12
+ RESULT_ID = 'RST'
13
+ METHOD_ID = 'MTD'
14
+ REPORT_ID = 'RPT'
15
+ TIMESTAMP = 'TS'
16
+ VERSION = 'V'