PyPI - labfreed - Versions diffs - 0.0.8__py2.py3-none-any.whl → 0.0.10__py2.py3-none-any.whl - Mend

labfreed 0.0.8py2.py3-none-any.whl → 0.0.10py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

labfreed/DisplayNameExtension/DisplayNameExtension.py +6 -3
labfreed/PAC_CAT/data_model copy.py +232 -0
labfreed/PAC_CAT/data_model.py +319 -59
labfreed/PAC_ID/data_model.py +89 -127
labfreed/PAC_ID/extensions.py +55 -0
labfreed/TREX/UneceUnits.json +33730 -0
labfreed/TREX/data_model.py +789 -0
labfreed/{TREXExtension → TREX}/parse.py +23 -16
labfreed/TREX/unece_units.py +106 -0
labfreed/__init__.py +1 -1
labfreed/parse_pac.py +189 -0
labfreed/{DisplayNameExtension → utilities}/base36.py +29 -13
labfreed/utilities/extension_intertpreters.py +4 -0
labfreed/utilities/utility_types.py +103 -0
labfreed/{PAC_ID/well_known_segment_keys.py → utilities/well_known_keys.py} +1 -1
labfreed/validation.py +117 -39
{labfreed-0.0.8.dist-info → labfreed-0.0.10.dist-info}/METADATA +1 -1
labfreed-0.0.10.dist-info/RECORD +22 -0
labfreed/PAC_ID/parse.py +0 -142
labfreed/PAC_ID/serialize.py +0 -60
labfreed/TREXExtension/data_model.py +0 -239
labfreed/TREXExtension/uncertainty.py +0 -32
labfreed/TREXExtension/unit_utilities.py +0 -143
labfreed-0.0.8.dist-info/RECORD +0 -19
{labfreed-0.0.8.dist-info → labfreed-0.0.10.dist-info}/WHEEL +0 -0
{labfreed-0.0.8.dist-info → labfreed-0.0.10.dist-info}/licenses/LICENSE +0 -0

labfreed/{TREXExtension → TREX}/parse.py RENAMED Viewed

@@ -1,10 +1,24 @@
 import logging
 import re
-from .data_model import TREX, T_REX_Segment_ParseError, TREX_SimpleSegment, TREX_Table
+from .data_model import *
+from labfreed.validation import LabFREEDValidationError
+class TREX_Parser():
+    def __init__(self, suppress_errors=False):
+        self._suppress_errors = suppress_errors
+    def parse_trex_str(self, trex_str, name=None) -> TREX:
+        trex = _from_trex_string(trex_str, name=name)
+        trex.print_validation_messages(trex_str)
+        if not trex.is_valid() and not self._suppress_errors:
+            raise LabFREEDValidationError(validation_msgs = trex.get_nested_validation_messages())
+        return trex
-def from_trex_string(trex_str, name=None, enforce_type=True) -> TREX:
+def _from_trex_string(trex_str, name=None, enforce_type=True) -> TREX:
     if not trex_str:
         raise ValueError(f'T-REX must be a string of non zero length')
@@ -37,17 +51,10 @@ def from_trex_string(trex_str, name=None, enforce_type=True) -> TREX:
     data = d.get('data')
-    segment_strings = data.split('+')
-    out_segments = dict()
-    for s in segment_strings:
-        # there are only two valid options. The segment is a scalar or a table.
-        # Constructors do the parsing anyways and raise exceptions if invalid data
-        # try both options and then let it fail
-        try:
-            segment = TREX_SimpleSegment.from_trex_segmentstring(s)
-        except T_REX_Segment_ParseError:
-            segment = TREX_Table.from_trex_segmentstring(s)
-        out_segments[segment.segment_name] = segment
-    trex = TREX(name_= name, segments=out_segments)
-    trex._trex_str = trex_str
-    return trex
+    trex = TREX.from_spec_fields(name=name, data=data)
+    return trex

labfreed/TREX/unece_units.py ADDED Viewed

@@ -0,0 +1,106 @@
+from functools import cache
+import json
+from pathlib import Path
+@cache
+def unece_units() -> list[dict]:
+    p = Path(__file__).parent / 'UneceUnits.json'
+    with open(p) as f:
+        l = json.load(f)
+    return l
+@cache
+def unece_unit_codes():
+    codes= [u.get('commonCode') for u in unece_units() if u.get('state') == 'ACTIVE']
+    return codes
+def unece_unit(unit_code):
+    unit =  [u for u in unece_units() if u['commonCode'] == unit_code]
+    if len(unit) == 0:
+        return None
+    else:
+        return unit[0]
+def unit_symbol(unit:dict) ->str:
+    return unit.get('symbol')
+def unit_name(unit:dict) ->str:
+    return unit.get('name')
+# def quantity_from_UN_CEFACT(value:str, unit_UN_CEFACT) -> UnitQuantity:
+#     """
+#     Maps units from https://unece.org/trade/documents/revision-17-annexes-i-iii
+#     to an object of the quantities library https://python-quantities.readthedocs.io/en/latest/index.html
+#     """
+#     # cast to numeric type. try int first, which will fail if string has no decimals.
+#     # nothing to worry yet: try floast next. if that fails the input was not a str representation of a number
+#     try:
+#         value_out = int(value)
+#     except ValueError:
+#         try:
+#             value_out = float(value)
+#         except ValueError as e:
+#             raise Exception(f'Input {value} is not a str representation of a number') from e
+#     d = {um[0]: um[1] for um in unit_map}
+#     unit = d.get(unit_UN_CEFACT)
+#     if not unit:
+#         raise NotImplementedError(f"lookup for unit {unit} not implemented")
+#     out = UnitQuantity(data=value_out, unit_name=unit.name, unit_symbol=unit.symbol)
+#     return out
+# def quantity_to_UN_CEFACT(value:UnitQuantity ) -> Tuple[int|float, str]:
+#     d = {um[1].symbol: um[0] for um in unit_map}
+#     unit_un_cefact = d.get(value.unit_symbol)
+#     if not unit_un_cefact:
+#         raise NotImplementedError(f"lookup for unit {value.unit_symbol} not implemented")
+#     return value.data, unit_un_cefact
+def check_compatibility_unece_quantities():
+    unece = get_unece_units()
+    print(f'Number of units in file: {len(unece)}')
+    failed = list()
+    sucess = list()
+    for u in unece:
+        if u.get('state') ==  'ACTIVE':
+            try:
+                if not u.get('symbol'):
+                    assert False
+                u.get('name')
+                validate_unit(u.get('symbol'))
+                sucess.append(u)
+            except AssertionError as e:
+                failed.append(u)
+        else:
+            pass
+    print('[blue] FAILED [/blue]')
+    for u in failed:
+        print(f'{u.get('commonCode')}: {u.get('name')}')
+    print('[yellow] SUCCESSFUL [/yellow]')
+    for u in sucess:
+        print(u)
+    print(f'{len(failed)} / {len(unece)} failed to convert')

labfreed/__init__.py CHANGED Viewed

@@ -2,4 +2,4 @@
 Python implementation of LabFREED building blocks
 '''
-__version__ = "0.0.8"
+__version__ = "0.0.10"

labfreed/parse_pac.py ADDED Viewed

@@ -0,0 +1,189 @@
+import re
+from types import MappingProxyType
+from labfreed.DisplayNameExtension.DisplayNameExtension import DisplayNames
+from labfreed.PAC_CAT.data_model import PAC_CAT
+from labfreed.PAC_ID.extensions import Extension, UnknownExtension
+from labfreed.TREX.data_model import TREX
+from .PAC_ID.data_model import *
+from .validation import ValidationMessage, LabFREEDValidationError
+class PACID_With_Extensions(BaseModelWithValidationMessages):
+    pac_id: PACID
+    extensions: list[Extension] = Field(default_factory=list)
+    def __str__(self):
+        out = str(self.pac_id)
+        out += '*'.join(str(e) for e in self.extensions)
+    def get_extension_of_type(self, type:str) -> list[Extension]:
+        return [e for e in self.extensions if e.type == type]
+    def get_extension(self, name:str) -> Extension|None:
+        out = [e for e in self.extensions if e.name == name]
+        if not out:
+            return None
+        return out[0]
+    def serialize(self, use_short_notation_for_extensions=False, uppercase_only=False):
+        extensions_str = self._serialize_extensions(self.extensions, use_short_notation_for_extensions)
+        out = self.pac_id.serialize() + extensions_str
+        if uppercase_only:
+            out = out.upper()
+        return out
+    def to_url(self, use_short_notation_for_extensions=False, uppercase_only=False) -> str:
+        return self.serialize(use_short_notation_for_extensions, uppercase_only)
+    @classmethod
+    def deserialize(cls, url, extension_interpreters ):
+        parser = PAC_Parser(extension_interpreters)
+        return parser.parse_pac_with_extensions(url)
+    def _serialize_extensions(self, extensions:list[Extension], use_short_notation_for_extensions):
+        out = ''
+        short_notation = use_short_notation_for_extensions
+        for i, e in enumerate(extensions):
+            if short_notation and i==0:
+                if e.name=='N':
+                    out += f'*{e.data}'
+                    continue
+                else:
+                    short_notation = False
+            if short_notation and i==1:
+                if e.name=='SUM':
+                    out += f'*{e.data}'
+                    continue
+                else:
+                    short_notation = False
+            out += f'*{e.name}${e.type}/{e.data}'
+        return out
+class PAC_Parser():
+    def __init__(self, extension_interpreters:dict[str, Extension]=None):
+        self.extension_interpreters = extension_interpreters or {'TREX': TREX, 'N': DisplayNames}
+    def parse_pac_with_extensions(self, pac_url:str) -> PACID_With_Extensions:
+        if '*' in pac_url:
+            id_str, ext_str = pac_url.split('*', 1)
+        else:
+            id_str = pac_url
+            ext_str = ""
+        pac_id = self.parse_pac_id(id_str)
+        extensions = self.parse_extensions(ext_str)
+        pac_with_extension = PACID_With_Extensions(pac_id=pac_id, extensions=extensions)
+        if not pac_with_extension.is_valid():
+            raise LabFREEDValidationError(validation_msgs = pac_with_extension.get_nested_validation_messages())
+        return pac_with_extension
+    def parse_pac_id(self,id_str:str) -> PACID:
+        m = re.match(f'(HTTPS://)?(PAC.)?(?P<issuer>.+?\..+?)/(?P<identifier>.*)', id_str)
+        d = m.groupdict()
+        id_segments = list()
+        default_keys = None
+        id_segments = self._parse_id_segments(d.get('identifier'))
+        pac = PACID(issuer= d.get('issuer'),
+                     identifier=id_segments
+        )
+        # if a segment starts with '-' the pac is interpreted as category
+        if any([s for s in pac.identifier if '-' in s.value]):
+            pac = PAC_CAT.from_pac_id(pac)
+        return pac
+    def _parse_id_segments(self, identifier:str):
+        if not identifier:
+            return []
+        id_segments = list()
+        if len(identifier) > 0 and identifier[0] == '/':
+            identifier = identifier[1:]
+        for s in identifier.split('/'):
+            tmp = s.split(':')
+            if len(tmp) == 1:
+                segment = IDSegment(value=tmp[0])
+            elif len(tmp) == 2:
+                segment = IDSegment(key=tmp[0], value=tmp[1])
+            else:
+                raise ValueError(f'invalid segment: {s}')
+            id_segments.append(segment)
+        return id_segments
+    def parse_extensions(self, extensions_str:str|None) -> list[Extension]:
+        extensions = list()
+        if not extensions_str:
+            return extensions
+        defaults =  MappingProxyType(
+                                {
+                                    0: { 'name': 'N', 'type': 'N'},
+                                    1: { 'name': 'SUM', 'type': 'TREX'}
+                                }
+        )
+        for i, e in enumerate(extensions_str.split('*')):
+            if e == '': #this will happen if first extension starts with *
+                continue
+            d = re.match('((?P<name>.+)\$(?P<type>.+)/)?(?P<data>.+)', e).groupdict()
+            name = d.get('name')
+            type = d.get('type')
+            data = d.get('data')
+            if name:
+                defaults = None # once a name was specified no longer assign defaults
+            else:
+                if defaults:
+                    name = defaults.get(i).get('name')
+                    type = defaults.get(i).get('type')
+                else:
+                    raise ValueError('extension number {i}, must have name and type')
+            #convert to subtype if they were given
+            subtype = self.extension_interpreters.get(type) or UnknownExtension
+            e = subtype.from_spec_fields(name=name, type=type, data=data)
+            extensions.append(e)
+        return extensions

labfreed/{DisplayNameExtension → utilities}/base36.py RENAMED Viewed

@@ -1,31 +1,37 @@
+import re
 import string
-def alphabet(base):
-    """ returns an alphabet, which corresponds to what pythons int(s:str, base:int=10) function used.
-    """
-    if base < 2 or base > 36:
-        ValueError('base can only be between 2 and 36')
-    alphabet = (string.digits + string.ascii_uppercase)[0:base]
-    return alphabet
+from pydantic import field_validator, RootModel
+class base36(RootModel[str]):
+    @field_validator('root')
+    @classmethod
+    def validate_format(cls, v: str) -> str:
+        if not re.fullmatch(r'[A-Z0-9]*', v):
+            raise ValueError("Value must only contain uppercase letters and digits (A-Z, 0-9)")
+        return v
-def to_base36(s:str):
+def to_base36(s:str) -> base36:
     """Takes a string, encodes it in UTF-8 and then as base36 string."""
     utf8_encoded = s.encode('utf-8')
     num = int.from_bytes(utf8_encoded, byteorder='big', signed=False)
     # note: this cannot be arbitrarily chosen. The choice here corresponds to what pythons int(s:str, base:int=10) function used.
-    base36_chars = alphabet(base=36)
+    base36_chars = _alphabet(base=36)
     if num == 0:
         return base36_chars[0]
-    base36 = []
+    base_36 = []
     _num = num
     while _num:
         _num, i = divmod(_num, 36)
-        base36.append(base36_chars[i])
-    return ''.join(reversed(base36))
+        base_36.append(base36_chars[i])
+    b36_str = ''.join(reversed(base_36))
+    b36_str = base36(b36_str)
+    return b36_str
-def from_base36(s36:str):
+def from_base36(s36:base36) -> str:
     """inverse of to_base36"""
     # this built in function interprets each character as number in a base represented by the standartd alphabet [0-9 (A-Z|a-z)][0:base] it is case INsensitive.
     num = int(s36, 36)
@@ -34,6 +40,16 @@ def from_base36(s36:str):
     s = _bytes.decode('utf-8')
     return s
+def _alphabet(base):
+    """ returns an alphabet, which corresponds to what pythons int(s:str, base:int=10) function used.
+    """
+    if base < 2 or base > 36:
+        ValueError('base can only be between 2 and 36')
+    alphabet = (string.digits + string.ascii_uppercase)[0:base]
+    return alphabet
 if __name__ == "__main__":
     ss = ["A",
       "B-500 B",

labfreed/utilities/extension_intertpreters.py ADDED Viewed

@@ -0,0 +1,4 @@
+from labfreed.DisplayNameExtension.DisplayNameExtension import DisplayNames
+from labfreed.TREX.data_model import TREX

labfreed/utilities/utility_types.py ADDED Viewed

@@ -0,0 +1,103 @@
+from functools import cache
+import json
+from pathlib import Path
+from rich import print
+from typing import Any, Tuple
+from typing_extensions import Annotated
+from pydantic import BaseModel, AfterValidator
+import quantities as pq
+from quantities import  units
+from labfreed.TREX.unece_units import unece_units
+def validate_unit(unit_name:str) -> str :
+    """
+    Pydantic validator function for the unit.
+    Checks if the unit is a valid unit.
+    Args:
+        unit (str): unit symbol, e.g. 'kg'
+    Returns:
+        str: the input unit.
+    Errors:
+        raises an AssertionError if validation fails
+    """
+    if hasattr(pq, unit_name):
+        return unit_name
+    else:
+        assert False
+class Unit(BaseModel):
+    name: str
+    symbol: str
+class Quantity(BaseModel):
+    value:int|float
+    unit: Unit
+    def __str__(self):
+        unit_symbol = self.unit.symbol
+        if unit_symbol == "dimensionless":
+            unit_symbol = ""
+        s = f"{str(self.value)} {unit_symbol}"
+        return s
+def unece_unit_code_from_quantity(q:Quantity):
+        by_name =   [ u['commonCode'] for u in unece_units() if u.get('name','') == q.unit.name]
+        by_symbol = [ u['commonCode'] for u in unece_units() if u.get('symbol','') == q.unit.symbol]
+        code = list(set(by_name) | set(by_symbol))
+        if len(code) != 1:
+            raise ValueError(f'No UNECE unit code found for Quantity {str(q)}' )
+        return code[0]
+# class DataTable(list):
+#     def __init__(self, headers:tuple[str, Any]):
+#         for h in headers:
+#             if len(h) != 2:
+#                 raise ValueError(f'Headers must be tuples of length two. With a column name and type.')
+#             if not isinstance(h[0], str):
+#                 raise ValueError(f'Invalid type of header name {h[0]}. Must be str')
+#             if not (h[1]):
+#                 raise ValueError(f'Header type cannot be None')
+#         self.headers = headers
+#         super().__init__()
+#     def append(self, row:list):
+#         if len(row) != len(self.headers):
+#             raise ValueError(f'Row has different length than headers')
+#         super().append(row)
+class DataTable(list):
+    def __init__(self, col_names:list[str]=None):
+        self.col_names = col_names
+        self.row_template = None
+        super().__init__()
+    def append(self, row:list):
+        if not self.row_template:
+            self.row_template = row.copy()
+        super().append(row)
+    def extend(self, iterable):
+        for item in iterable:
+            self.append(item)
+if __name__ == "__main__":
+    pass

labfreed/{PAC_ID/well_known_segment_keys.py → utilities/well_known_keys.py} RENAMED Viewed

@@ -1,7 +1,7 @@
 from enum import Enum
-class WellKnownSegmentKeys(Enum):
+class WellKnownKeys(Enum):
     GTIN = '01'
     BATCH = '10'
     SERIAL = '21'

labfreed 0.0.8__py2.py3-none-any.whl → 0.0.10__py2.py3-none-any.whl

labfreed 0.0.8py2.py3-none-any.whl → 0.0.10py2.py3-none-any.whl