PyPI - labfreed - Versions diffs - 0.0.3__py2.py3-none-any.whl - Mend

labfreed 0.0.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

labfreed/DisplayNameExtension/DisplayNameExtension.py +37 -0
labfreed/PAC_CAT/__init__.py +1 -0
labfreed/PAC_CAT/data_model copy.py +232 -0
labfreed/PAC_CAT/data_model.py +369 -0
labfreed/PAC_ID/__init__.py +0 -0
labfreed/PAC_ID/data_model.py +177 -0
labfreed/PAC_ID/extensions.py +55 -0
labfreed/TREX/UneceUnits.json +33730 -0
labfreed/TREX/data_model.py +789 -0
labfreed/TREX/parse.py +60 -0
labfreed/TREX/unece_units.py +106 -0
labfreed/__init__.py +5 -0
labfreed/parse_pac.py +189 -0
labfreed/utilities/base36.py +82 -0
labfreed/utilities/extension_intertpreters.py +4 -0
labfreed/utilities/utility_types.py +103 -0
labfreed/utilities/well_known_keys.py +16 -0
labfreed/validation.py +149 -0
labfreed-0.0.3.dist-info/METADATA +7 -0
labfreed-0.0.3.dist-info/RECORD +22 -0
labfreed-0.0.3.dist-info/WHEEL +5 -0
labfreed-0.0.3.dist-info/licenses/LICENSE +21 -0

labfreed/TREX/parse.py ADDED Viewed

@@ -0,0 +1,60 @@
+import logging
+import re
+from .data_model import *
+from labfreed.validation import LabFREEDValidationError
+class TREX_Parser():
+    def __init__(self, suppress_errors=False):
+        self._suppress_errors = suppress_errors
+    def parse_trex_str(self, trex_str, name=None) -> TREX:
+        trex = _from_trex_string(trex_str, name=name)
+        trex.print_validation_messages(trex_str)
+        if not trex.is_valid() and not self._suppress_errors:
+            raise LabFREEDValidationError(validation_msgs = trex.get_nested_validation_messages())
+        return trex
+def _from_trex_string(trex_str, name=None, enforce_type=True) -> TREX:
+    if not trex_str:
+        raise ValueError(f'T-REX must be a string of non zero length')
+    # remove extension indicator. Precaution in case it is not done yet
+    if trex_str[0]=="*":
+        trex_str=trex_str[1:-1]
+    # remove line breaks. for editing T-REXes it's more convenient to have them in, so one never knows
+    trex_str = trex_str.replace('\n','')
+    d = re.match('((?P<name>.+)\$(?P<type>.+)/)?(?P<data>.+)', trex_str).groupdict()
+    if not d:
+        raise ValueError('TREX is invalid.')
+    type = d.get('type')
+    if not type:
+        logging.warning('No type given. Assume its trex')
+    elif type != 'TREX' and enforce_type:
+        logging.error(f'Extension type {type} is not TREX. Aborting')
+        raise ValueError(f'Extension type {type} is not TREX.')
+    else:
+        logging.warning('Extension type {type} is not TREX. Try anyways')
+    s_name = d.get('name')
+    if name and s_name:
+        logging.warning(f'conflicting names given. The string contained {s_name}, method parameter was {name}. Method parameter wins.')
+    elif not name and not s_name:
+        raise ValueError('No extension name was given')
+    elif s_name:
+        name = s_name
+    data = d.get('data')
+    trex = TREX.from_spec_fields(name=name, data=data)
+    return trex

labfreed/TREX/unece_units.py ADDED Viewed

@@ -0,0 +1,106 @@
+from functools import cache
+import json
+from pathlib import Path
+@cache
+def unece_units() -> list[dict]:
+    p = Path(__file__).parent / 'UneceUnits.json'
+    with open(p) as f:
+        l = json.load(f)
+    return l
+@cache
+def unece_unit_codes():
+    codes= [u.get('commonCode') for u in unece_units() if u.get('state') == 'ACTIVE']
+    return codes
+def unece_unit(unit_code):
+    unit =  [u for u in unece_units() if u['commonCode'] == unit_code]
+    if len(unit) == 0:
+        return None
+    else:
+        return unit[0]
+def unit_symbol(unit:dict) ->str:
+    return unit.get('symbol')
+def unit_name(unit:dict) ->str:
+    return unit.get('name')
+# def quantity_from_UN_CEFACT(value:str, unit_UN_CEFACT) -> UnitQuantity:
+#     """
+#     Maps units from https://unece.org/trade/documents/revision-17-annexes-i-iii
+#     to an object of the quantities library https://python-quantities.readthedocs.io/en/latest/index.html
+#     """
+#     # cast to numeric type. try int first, which will fail if string has no decimals.
+#     # nothing to worry yet: try floast next. if that fails the input was not a str representation of a number
+#     try:
+#         value_out = int(value)
+#     except ValueError:
+#         try:
+#             value_out = float(value)
+#         except ValueError as e:
+#             raise Exception(f'Input {value} is not a str representation of a number') from e
+#     d = {um[0]: um[1] for um in unit_map}
+#     unit = d.get(unit_UN_CEFACT)
+#     if not unit:
+#         raise NotImplementedError(f"lookup for unit {unit} not implemented")
+#     out = UnitQuantity(data=value_out, unit_name=unit.name, unit_symbol=unit.symbol)
+#     return out
+# def quantity_to_UN_CEFACT(value:UnitQuantity ) -> Tuple[int|float, str]:
+#     d = {um[1].symbol: um[0] for um in unit_map}
+#     unit_un_cefact = d.get(value.unit_symbol)
+#     if not unit_un_cefact:
+#         raise NotImplementedError(f"lookup for unit {value.unit_symbol} not implemented")
+#     return value.data, unit_un_cefact
+def check_compatibility_unece_quantities():
+    unece = get_unece_units()
+    print(f'Number of units in file: {len(unece)}')
+    failed = list()
+    sucess = list()
+    for u in unece:
+        if u.get('state') ==  'ACTIVE':
+            try:
+                if not u.get('symbol'):
+                    assert False
+                u.get('name')
+                validate_unit(u.get('symbol'))
+                sucess.append(u)
+            except AssertionError as e:
+                failed.append(u)
+        else:
+            pass
+    print('[blue] FAILED [/blue]')
+    for u in failed:
+        print(f'{u.get('commonCode')}: {u.get('name')}')
+    print('[yellow] SUCCESSFUL [/yellow]')
+    for u in sucess:
+        print(u)
+    print(f'{len(failed)} / {len(unece)} failed to convert')

labfreed/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+'''
+Python implementation of LabFREED building blocks
+'''
+__version__ = "0.0.3"

labfreed/parse_pac.py ADDED Viewed

@@ -0,0 +1,189 @@
+import re
+from types import MappingProxyType
+from labfreed.DisplayNameExtension.DisplayNameExtension import DisplayNames
+from labfreed.PAC_CAT.data_model import PAC_CAT
+from labfreed.PAC_ID.extensions import Extension, UnknownExtension
+from labfreed.TREX.data_model import TREX
+from .PAC_ID.data_model import *
+from .validation import ValidationMessage, LabFREEDValidationError
+class PACID_With_Extensions(BaseModelWithValidationMessages):
+    pac_id: PACID
+    extensions: list[Extension] = Field(default_factory=list)
+    def __str__(self):
+        out = str(self.pac_id)
+        out += '*'.join(str(e) for e in self.extensions)
+    def get_extension_of_type(self, type:str) -> list[Extension]:
+        return [e for e in self.extensions if e.type == type]
+    def get_extension(self, name:str) -> Extension|None:
+        out = [e for e in self.extensions if e.name == name]
+        if not out:
+            return None
+        return out[0]
+    def serialize(self, use_short_notation_for_extensions=False, uppercase_only=False):
+        extensions_str = self._serialize_extensions(self.extensions, use_short_notation_for_extensions)
+        out = self.pac_id.serialize() + extensions_str
+        if uppercase_only:
+            out = out.upper()
+        return out
+    def to_url(self, use_short_notation_for_extensions=False, uppercase_only=False) -> str:
+        return self.serialize(use_short_notation_for_extensions, uppercase_only)
+    @classmethod
+    def deserialize(cls, url, extension_interpreters ):
+        parser = PAC_Parser(extension_interpreters)
+        return parser.parse_pac_with_extensions(url)
+    def _serialize_extensions(self, extensions:list[Extension], use_short_notation_for_extensions):
+        out = ''
+        short_notation = use_short_notation_for_extensions
+        for i, e in enumerate(extensions):
+            if short_notation and i==0:
+                if e.name=='N':
+                    out += f'*{e.data}'
+                    continue
+                else:
+                    short_notation = False
+            if short_notation and i==1:
+                if e.name=='SUM':
+                    out += f'*{e.data}'
+                    continue
+                else:
+                    short_notation = False
+            out += f'*{e.name}${e.type}/{e.data}'
+        return out
+class PAC_Parser():
+    def __init__(self, extension_interpreters:dict[str, Extension]=None):
+        self.extension_interpreters = extension_interpreters or {'TREX': TREX, 'N': DisplayNames}
+    def parse_pac_with_extensions(self, pac_url:str) -> PACID_With_Extensions:
+        if '*' in pac_url:
+            id_str, ext_str = pac_url.split('*', 1)
+        else:
+            id_str = pac_url
+            ext_str = ""
+        pac_id = self.parse_pac_id(id_str)
+        extensions = self.parse_extensions(ext_str)
+        pac_with_extension = PACID_With_Extensions(pac_id=pac_id, extensions=extensions)
+        if not pac_with_extension.is_valid():
+            raise LabFREEDValidationError(validation_msgs = pac_with_extension.get_nested_validation_messages())
+        return pac_with_extension
+    def parse_pac_id(self,id_str:str) -> PACID:
+        m = re.match(f'(HTTPS://)?(PAC.)?(?P<issuer>.+?\..+?)/(?P<identifier>.*)', id_str)
+        d = m.groupdict()
+        id_segments = list()
+        default_keys = None
+        id_segments = self._parse_id_segments(d.get('identifier'))
+        pac = PACID(issuer= d.get('issuer'),
+                     identifier=id_segments
+        )
+        # if a segment starts with '-' the pac is interpreted as category
+        if any([s for s in pac.identifier if '-' in s.value]):
+            pac = PAC_CAT.from_pac_id(pac)
+        return pac
+    def _parse_id_segments(self, identifier:str):
+        if not identifier:
+            return []
+        id_segments = list()
+        if len(identifier) > 0 and identifier[0] == '/':
+            identifier = identifier[1:]
+        for s in identifier.split('/'):
+            tmp = s.split(':')
+            if len(tmp) == 1:
+                segment = IDSegment(value=tmp[0])
+            elif len(tmp) == 2:
+                segment = IDSegment(key=tmp[0], value=tmp[1])
+            else:
+                raise ValueError(f'invalid segment: {s}')
+            id_segments.append(segment)
+        return id_segments
+    def parse_extensions(self, extensions_str:str|None) -> list[Extension]:
+        extensions = list()
+        if not extensions_str:
+            return extensions
+        defaults =  MappingProxyType(
+                                {
+                                    0: { 'name': 'N', 'type': 'N'},
+                                    1: { 'name': 'SUM', 'type': 'TREX'}
+                                }
+        )
+        for i, e in enumerate(extensions_str.split('*')):
+            if e == '': #this will happen if first extension starts with *
+                continue
+            d = re.match('((?P<name>.+)\$(?P<type>.+)/)?(?P<data>.+)', e).groupdict()
+            name = d.get('name')
+            type = d.get('type')
+            data = d.get('data')
+            if name:
+                defaults = None # once a name was specified no longer assign defaults
+            else:
+                if defaults:
+                    name = defaults.get(i).get('name')
+                    type = defaults.get(i).get('type')
+                else:
+                    raise ValueError('extension number {i}, must have name and type')
+            #convert to subtype if they were given
+            subtype = self.extension_interpreters.get(type) or UnknownExtension
+            e = subtype.from_spec_fields(name=name, type=type, data=data)
+            extensions.append(e)
+        return extensions

labfreed/utilities/base36.py ADDED Viewed

@@ -0,0 +1,82 @@
+import re
+import string
+from pydantic import field_validator, RootModel
+class base36(RootModel[str]):
+    @field_validator('root')
+    @classmethod
+    def validate_format(cls, v: str) -> str:
+        if not re.fullmatch(r'[A-Z0-9]*', v):
+            raise ValueError("Value must only contain uppercase letters and digits (A-Z, 0-9)")
+        return v
+def to_base36(s:str) -> base36:
+    """Takes a string, encodes it in UTF-8 and then as base36 string."""
+    utf8_encoded = s.encode('utf-8')
+    num = int.from_bytes(utf8_encoded, byteorder='big', signed=False)
+    # note: this cannot be arbitrarily chosen. The choice here corresponds to what pythons int(s:str, base:int=10) function used.
+    base36_chars = _alphabet(base=36)
+    if num == 0:
+        return base36_chars[0]
+    base_36 = []
+    _num = num
+    while _num:
+        _num, i = divmod(_num, 36)
+        base_36.append(base36_chars[i])
+    b36_str = ''.join(reversed(base_36))
+    b36_str = base36(b36_str)
+    return b36_str
+def from_base36(s36:base36) -> str:
+    """inverse of to_base36"""
+    # this built in function interprets each character as number in a base represented by the standartd alphabet [0-9 (A-Z|a-z)][0:base] it is case INsensitive.
+    num = int(s36, 36)
+    num_bytes = (num.bit_length() + 7) // 8
+    _bytes = num.to_bytes(num_bytes, byteorder='big')
+    s = _bytes.decode('utf-8')
+    return s
+def _alphabet(base):
+    """ returns an alphabet, which corresponds to what pythons int(s:str, base:int=10) function used.
+    """
+    if base < 2 or base > 36:
+        ValueError('base can only be between 2 and 36')
+    alphabet = (string.digits + string.ascii_uppercase)[0:base]
+    return alphabet
+if __name__ == "__main__":
+    ss = ["A",
+      "B-500 B",
+      "B-500 Ba",
+      "B-500 Bal",
+      "B-500 Bala",
+      "B-500 Balanc",
+      "B-500 Balance",
+      "B-500 D",
+      "Mini Spray Dryer S-300",
+      "w3ApashAt!!£NAGDSAF*ç%&/()",
+      "HELLOWORLD",
+      "Helloworld",
+      "$£äö!'?^{]<@#¦&¬|¢)&§°😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉😀你好🌍🏯😇🎵🔥你👻🐉",
+      "往跟住！師立甲錯什正再圓身升因月室",
+      "Balance BAL500 @☣️Lab",
+      "BAL500 @☣️Lab",
+      "BAL-CLEAN",
+      "Smørrebrød µ-Nutrients",
+      "Demo Result from R-300",
+      "Rotavapor R-300",
+      "Rotavapor R-250",
+      "Rotavapor R-220",
+      "SyncorePlus"
+      ]
+    for s in ss:
+        s36 = to_base36(s)
+        s_back = from_base36(s36)
+        identical = (s == s_back)
+        print(f'{s} >> {s36} >> {s_back}: match:{identical}')

labfreed/utilities/extension_intertpreters.py ADDED Viewed

@@ -0,0 +1,4 @@
+from labfreed.DisplayNameExtension.DisplayNameExtension import DisplayNames
+from labfreed.TREX.data_model import TREX

labfreed/utilities/utility_types.py ADDED Viewed

@@ -0,0 +1,103 @@
+from functools import cache
+import json
+from pathlib import Path
+from rich import print
+from typing import Any, Tuple
+from typing_extensions import Annotated
+from pydantic import BaseModel, AfterValidator
+import quantities as pq
+from quantities import  units
+from labfreed.TREX.unece_units import unece_units
+def validate_unit(unit_name:str) -> str :
+    """
+    Pydantic validator function for the unit.
+    Checks if the unit is a valid unit.
+    Args:
+        unit (str): unit symbol, e.g. 'kg'
+    Returns:
+        str: the input unit.
+    Errors:
+        raises an AssertionError if validation fails
+    """
+    if hasattr(pq, unit_name):
+        return unit_name
+    else:
+        assert False
+class Unit(BaseModel):
+    name: str
+    symbol: str
+class Quantity(BaseModel):
+    value:int|float
+    unit: Unit
+    def __str__(self):
+        unit_symbol = self.unit.symbol
+        if unit_symbol == "dimensionless":
+            unit_symbol = ""
+        s = f"{str(self.value)} {unit_symbol}"
+        return s
+def unece_unit_code_from_quantity(q:Quantity):
+        by_name =   [ u['commonCode'] for u in unece_units() if u.get('name','') == q.unit.name]
+        by_symbol = [ u['commonCode'] for u in unece_units() if u.get('symbol','') == q.unit.symbol]
+        code = list(set(by_name) | set(by_symbol))
+        if len(code) != 1:
+            raise ValueError(f'No UNECE unit code found for Quantity {str(q)}' )
+        return code[0]
+# class DataTable(list):
+#     def __init__(self, headers:tuple[str, Any]):
+#         for h in headers:
+#             if len(h) != 2:
+#                 raise ValueError(f'Headers must be tuples of length two. With a column name and type.')
+#             if not isinstance(h[0], str):
+#                 raise ValueError(f'Invalid type of header name {h[0]}. Must be str')
+#             if not (h[1]):
+#                 raise ValueError(f'Header type cannot be None')
+#         self.headers = headers
+#         super().__init__()
+#     def append(self, row:list):
+#         if len(row) != len(self.headers):
+#             raise ValueError(f'Row has different length than headers')
+#         super().append(row)
+class DataTable(list):
+    def __init__(self, col_names:list[str]=None):
+        self.col_names = col_names
+        self.row_template = None
+        super().__init__()
+    def append(self, row:list):
+        if not self.row_template:
+            self.row_template = row.copy()
+        super().append(row)
+    def extend(self, iterable):
+        for item in iterable:
+            self.append(item)
+if __name__ == "__main__":
+    pass

labfreed/utilities/well_known_keys.py ADDED Viewed

@@ -0,0 +1,16 @@
+from enum import Enum
+class WellKnownKeys(Enum):
+    GTIN = '01'
+    BATCH = '10'
+    SERIAL = '21'
+    ADDITIONAL_IDINTIFIER = '240'
+    RUN_ID_ABSOLUTE = 'RNR'
+    SAMPLE_ID = 'SMP'
+    EXPERIMENT_ID = 'EXP'
+    RESULT_ID = 'RST'
+    METHOD_ID = 'MTD'
+    REPORT_ID = 'RPT'
+    TIMESTAMP = 'TS'
+    VERSION = 'V'