PyPI - kodexa - Versions diffs - 7.0.12399293688__py3-none-any.whl → 7.4.5a13228665254__py3-none-any.whl - Mend

kodexa 7.0.12399293688py3-none-any.whl → 7.4.5a13228665254py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

kodexa/dataclasses/__init__.py +85 -74
kodexa/model/entities/product.py +59 -6
kodexa/model/entities/product_group.py +126 -0
kodexa/model/model.py +7 -4
kodexa/model/objects.py +155 -40
kodexa/model/persistence.py +87 -27
kodexa/pipeline/pipeline.py +6 -4
kodexa/platform/client.py +227 -14
{kodexa-7.0.12399293688.dist-info → kodexa-7.4.5a13228665254.dist-info}/METADATA +1 -1
{kodexa-7.0.12399293688.dist-info → kodexa-7.4.5a13228665254.dist-info}/RECORD +12 -11
{kodexa-7.0.12399293688.dist-info → kodexa-7.4.5a13228665254.dist-info}/LICENSE +0 -0
{kodexa-7.0.12399293688.dist-info → kodexa-7.4.5a13228665254.dist-info}/WHEEL +0 -0

kodexa/dataclasses/__init__.py CHANGED Viewed

@@ -54,21 +54,35 @@ class LLMDataAttribute(BaseModel):
         self.node_uuid_list = source.node_uuid_list
         self.page_number = source.page_number
-    def create_exception(
-            self,
-            exception_type_id: str,
-            exception_type: str,
-            normalized_text: str,
-            message: str,
-            exception_detail: str,
-    ):
-        content_exception = ContentException(
-            exception_type=exception_type,
-            exception_detail=exception_detail,
-            message=message,
-            tag_uuid=self.tag_uuid,
-        )
-        self.exceptions.append(content_exception)
+    def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
+        # Lets make sure we add all the content exceptions
+        if self.exceptions is not None:
+            for exception in self.exceptions:
+                # We have two types of exception, one in the API and one in the
+                # document
+                from kodexa.model import ContentException as KodexaContentException
+                internal_exception = KodexaContentException(
+                    tag=exception.tag,
+                    exception_type=exception.exception_type,
+                    message=exception.message,
+                    exception_details=exception.exception_details,
+                    severity=exception.severity,
+                    group_uuid=self.group_uuid,
+                    tag_uuid=self.tag_uuid,
+                )
+                document.doc.add_exception(internal_exception)
+    def to_dict(self, taxonomy: Taxonomy) -> dict:
+        """Convert attribute to JSON with normalized value"""
+        target_taxon = taxonomy.get_taxon_by_path(self.taxon_path)
+        if target_taxon is None:
+            return {}
+        taxon_external_name = target_taxon.external_name
+        return {
+            taxon_external_name: self.normalized_text if self.normalized_text else self.value
+        }
 class LLMDataObject(BaseModel):
@@ -99,6 +113,24 @@ class LLMDataObject(BaseModel):
     class Config:
         arbitrary_types_allowed = True
+    def process_exceptions(self, document: "KodexaDocumentLLMWrapper"):
+        # Lets make sure we add all the content exceptions
+        if self.exceptions is not None:
+            for exception in self.exceptions:
+                # We have two types of exception, one in the API and one in the
+                # document
+                from kodexa.model import ContentException as KodexaContentException
+                internal_exception = KodexaContentException(
+                    tag=exception.tag,
+                    exception_type=exception.exception_type,
+                    message=exception.message,
+                    exception_details=exception.exception_details,
+                    severity=exception.severity,
+                    group_uuid=self.group_uuid,
+                    tag_uuid=self.tag_uuid,
+                )
+                document.doc.add_exception(internal_exception)
     def get_all_review_pages(self):
         """
         Returns a list of unique page numbers that would be included in the review.
@@ -106,7 +138,7 @@ class LLMDataObject(BaseModel):
         :return: list of unique page numbers
         """
         pages = set()
-        for field in self.__fields__:
+        for field in self.model_fields:
             pages.update(self._get_field_pages(field))
         return sorted(list(pages))
@@ -154,6 +186,35 @@ class LLMDataObject(BaseModel):
         if 'normalized_text' in field_data:
             attr.normalized_text = field_data['normalized_text']
+    def to_dict(self, taxonomy: Taxonomy) -> dict:
+        """Convert data object to JSON using normalized values and taxon paths"""
+        result = {}
+        for field in self.model_fields:
+            value = getattr(self, field)
+            if isinstance(value, list) and len(value) > 0:
+                if isinstance(value[0], LLMDataObject):
+                    # We need to find the first field of the object that is a LLMDataAttribute
+                    # and use that to derive the taxon path of the LLMDataObject
+                    data_attribute = None
+                    for child_field in value[0].model_fields:
+                        child_attr = getattr(value[0], child_field)
+                        if isinstance(child_attr, LLMDataAttribute):
+                            data_attribute = child_attr
+                            break
+                    if data_attribute is not None:
+                        taxon_path = data_attribute.taxon_path.rsplit('/', 1)[0]
+                        target_taxon = taxonomy.get_taxon_by_path(taxon_path)
+                        if target_taxon is not None:
+                            result[target_taxon.external_name] = [item.to_dict(taxonomy) for item in value if isinstance(item, (LLMDataObject, LLMDataAttribute))]
+            elif isinstance(value, LLMDataAttribute):
+                result.update(value.to_dict(taxonomy))
+            elif isinstance(value, LLMDataObject):
+                target_taxon = taxonomy.get_taxon_by_path(value.taxon_path)
+                if target_taxon is not None:
+                    result[target_taxon.external_name] = value.to_dict(taxonomy)
+        return result
     def to_review(self, page_number=None):
         """
         Build a representation of the data object and its data attributes that is a dict that includes the
@@ -164,7 +225,7 @@ class LLMDataObject(BaseModel):
         :return: dict of this data object and children for the specified page
         """
         review = {}
-        for field in self.__fields__:
+        for field in self.model_fields:
             review_field = self._build_review(field, page_number)
             if review_field:
                 review[field] = review_field
@@ -193,26 +254,6 @@ class LLMDataObject(BaseModel):
         return None
-    def create_exception(
-            self,
-            exception_type_id: str,
-            exception_type: str,
-            message: str,
-            exception_detail: str,
-            severity: str = "ERROR",
-    ):
-        content_exception = ContentException(
-            exception_type=exception_type,
-            exception_details=exception_detail,
-            message=message,
-            group_uuid=self.group_uuid,
-            severity=severity,
-        )
-        if self.exceptions is None:
-            self.exceptions = []
-        self.exceptions.append(content_exception)
     def apply_labels(
             self, document: "KodexaDocumentLLMWrapper", parent_group_uuid: str = None,
             assistant: Optional["Assistant"] = None
@@ -234,24 +275,11 @@ class LLMDataObject(BaseModel):
         """
         # Lets make sure we add all the content exceptions
-        if self.exceptions is not None:
-            for exception in self.exceptions:
-                # We have two types of exception, one in the API and one in the
-                # document
-                from kodexa.model import ContentException as KodexaContentException
-                internal_exception = KodexaContentException(
-                    exception_type=exception.exception_type,
-                    message=exception.message,
-                    exception_details=exception.exception_details,
-                    severity=exception.severity,
-                    group_uuid=exception.group_uuid,
-                    tag_uuid=exception.tag_uuid,
-                )
-                document.doc.add_exception(internal_exception)
+        self.process_exceptions(document)
         # Let's go through this data object and find all the attributes that have a value
         # then we will apply the labels to the document
-        for field in self.__fields__:
+        for field in self.model_fields:
             logger.info(f"Processing field {field}")
             value = getattr(self, field)
@@ -270,8 +298,6 @@ class LLMDataObject(BaseModel):
             # We need to add the label to the document for this attribute
             tag = value.taxon_path
-            # TODO need to work out why we are missing them?
             logger.info(f"Value: {value.normalized_text}, node_uuid_list: {value.node_uuid_list}")
             if value.node_uuid_list is None:
                 value.node_uuid_list = value.line_ids
@@ -320,31 +346,16 @@ class LLMDataObject(BaseModel):
                         current_value.append(new_tag)
                         node.remove_feature("tag", tag)
                         node.add_feature("tag", tag, current_value, single=False)
-                        # try:
-                        #     if value.data_type == 'Derived':
-                        #         logger.info(f"Node already has tag {tag} - Tagging something nearby {node.get_all_content()}")
-                        #         nearby_node = find_nearby_word_to_tag(node, tag)
-                        #         nearby_node.tag(
-                        #             tag_to_apply=tag,
-                        #             value=value.normalized_text,
-                        #             tag_uuid=tag_uuid,
-                        #             cell_index=self.cell_index,
-                        #             selector="//word",
-                        #             confidence=-1,
-                        #             group_uuid=self.group_uuid,
-                        #             parent_group_uuid=parent_group_uuid,
-                        #             owner_uri=f"assistant://{assistant.id}" if assistant else f"model://taxonomy-llm",
-                        #         )
-                        #     else:
-                        #         logger.info(f"Node already has tag {tag} - Skipping.")
-                        # except:
-                        #     logger.error(f"Error tagging nearby node with tag {tag}")
             logger.info(f"Applied label {tag} to {len(nodes_to_label)} nodes")
+            # Lets make sure we add all the content exceptions
+            self.process_exceptions(document)
         if isinstance(value, LLMDataObject):
             # We need to apply the labels to the document for this object
             value.apply_labels(document, parent_group_uuid=self.group_uuid)
-            # logger.info(f"Applied labels to data object {value.group_uuid}")
+            logger.info(f"Applied labels to data object {value.group_uuid}")
 def find_nearby_word_to_tag(node, tag):

kodexa/model/entities/product.py CHANGED Viewed

@@ -1,13 +1,17 @@
-from typing import Optional, List
+from decimal import Decimal
+from typing import Optional, List, Set
 from pydantic import BaseModel, ConfigDict, Field
 from kodexa.model.base import StandardDateTime
 from kodexa.platform.client import EntityEndpoint, PageEndpoint, EntitiesEndpoint
+from .product_group import ProductGroup
+from ..objects import ProjectTemplate
-class Product(BaseModel):
+class ProjectTemplateMetadata(BaseModel):
     """
+    A project template metadata entity
     """
     model_config = ConfigDict(
         populate_by_name=True,
@@ -15,11 +19,41 @@ class Product(BaseModel):
         arbitrary_types_allowed=True,
         protected_namespaces=("model_config",),
     )
+    id: str
+class ProductProjectTemplate(BaseModel):
     """
-    A product
+    A product project template entity representing the relationship between products and project templates
     """
+    model_config = ConfigDict(
+        populate_by_name=True,
+        use_enum_values=True,
+        arbitrary_types_allowed=True,
+        protected_namespaces=("model_config",),
+    )
+    id: Optional[str] = None
+    uuid: Optional[str] = None
+    change_sequence: Optional[int] = Field(None, alias="changeSequence")
+    created_on: Optional[StandardDateTime] = Field(None, alias="createdOn")
+    updated_on: Optional[StandardDateTime] = Field(None, alias="updatedOn")
+    display_order: Optional[int] = Field(None, alias="displayOrder")
+    project_template_metadata: Optional[ProjectTemplateMetadata] = Field(None, alias="projectTemplateMetadata")
+class Product(BaseModel):
+    """
+    A product entity representing a product in the Kodexa platform
+    """
+    model_config = ConfigDict(
+        populate_by_name=True,
+        use_enum_values=True,
+        arbitrary_types_allowed=True,
+        protected_namespaces=("model_config",),
+    )
     id: Optional[str] = None
     uuid: Optional[str] = None
     change_sequence: Optional[int] = Field(None, alias="changeSequence")
@@ -28,6 +62,26 @@ class Product(BaseModel):
     name: str
     description: Optional[str] = None
     overview_markdown: Optional[str] = Field(None, alias="overviewMarkdown")
+    product_group: ProductGroup = Field(..., alias="productGroup")
+    parent: Optional['Product'] = None
+    image_url: Optional[str] = Field(None, alias="imageUrl")
+    price_id: Optional[str] = Field(None, alias="priceId")
+    price: Optional[Decimal] = None
+    number_of_credits: Optional[int] = Field(None, alias="numberOfCredits")
+    price_suffix: Optional[str] = Field(None, alias="priceSuffix")
+    has_quantity: bool = Field(False, alias="hasQuantity")
+    active: bool = True
+    order: Optional[int] = None
+    promoted: Optional[bool] = None
+    project_templates: Optional[Set[ProjectTemplate]] = Field(None, alias="projectTemplates")
+    search_text: Optional[str] = None
+    def update_search_text(self):
+        """Updates the search text for the product"""
+        if self.product_group:
+            self.search_text = f"{self.name.lower()} {self.product_group.name.lower()}"
+        else:
+            self.search_text = self.name.lower()
 class ProductEndpoint(Product, EntityEndpoint):
@@ -53,7 +107,7 @@ class ProductEndpoint(Product, EntityEndpoint):
 class PageProduct(BaseModel):
     """
+    Represents a paginated list of products
     """
     model_config = ConfigDict(
         populate_by_name=True,
@@ -66,7 +120,6 @@ class PageProduct(BaseModel):
     size: Optional[int] = None
     content: Optional[List[Product]] = None
     number: Optional[int] = None
     number_of_elements: Optional[int] = Field(None, alias="numberOfElements")
     first: Optional[bool] = None
     last: Optional[bool] = None

kodexa/model/entities/product_group.py ADDED Viewed

@@ -0,0 +1,126 @@
+from typing import Optional, List
+from pydantic import BaseModel, ConfigDict, Field
+from kodexa.model.base import StandardDateTime
+from kodexa.platform.client import EntityEndpoint, PageEndpoint, EntitiesEndpoint
+class ProductGroup(BaseModel):
+    """
+    """
+    model_config = ConfigDict(
+        populate_by_name=True,
+        use_enum_values=True,
+        arbitrary_types_allowed=True,
+        protected_namespaces=("model_config",),
+    )
+    """
+    A product group
+    """
+    id: Optional[str] = None
+    uuid: Optional[str] = None
+    change_sequence: Optional[int] = Field(None, alias="changeSequence")
+    created_on: Optional[StandardDateTime] = Field(None, alias="createdOn")
+    updated_on: Optional[StandardDateTime] = Field(None, alias="updatedOn")
+    name: str
+    description: Optional[str] = None
+    overview_markdown: Optional[str] = Field(None, alias="overviewMarkdown")
+class ProductGroupEndpoint(ProductGroup, EntityEndpoint):
+    """Handles the endpoint for a product group
+    This class is a combination of DataException and EntityEndpoint. It is used
+    to manage the endpoint for data exceptions.
+    Methods:
+        get_type: Returns the type of the endpoint.
+    """
+    def get_type(self) -> str:
+        """Gets the type of the endpoint.
+        This method returns the type of the endpoint which is "exceptions".
+        Returns:
+            str: The type of the endpoint.
+        """
+        return "product-groups"
+class PageProductGroup(BaseModel):
+    """
+    """
+    model_config = ConfigDict(
+        populate_by_name=True,
+        use_enum_values=True,
+        arbitrary_types_allowed=True,
+        protected_namespaces=("model_config",),
+    )
+    total_pages: Optional[int] = Field(None, alias="totalPages")
+    total_elements: Optional[int] = Field(None, alias="totalElements")
+    size: Optional[int] = None
+    content: Optional[List[ProductGroup]] = None
+    number: Optional[int] = None
+    number_of_elements: Optional[int] = Field(None, alias="numberOfElements")
+    first: Optional[bool] = None
+    last: Optional[bool] = None
+    empty: Optional[bool] = None
+class PageProductGroupEndpoint(PageProductGroup, PageEndpoint):
+    def get_type(self) -> Optional[str]:
+        return "product-group"
+class ProductGroupsEndpoint(EntitiesEndpoint):
+    """Represents the product groups endpoint
+    This class is used to represent the product groups endpoint in the system.
+    Attributes:
+        object_dict: A dictionary containing the object data.
+    """
+    """Represents a assistants endpoint"""
+    def get_type(self) -> str:
+        """Get the type of the endpoint
+        This method is used to get the type of the endpoint.
+        Returns:
+            str: The type of the endpoint.
+        """
+        return "product-groups"
+    def get_instance_class(self, object_dict=None):
+        """Get the instance class of the endpoint
+        This method is used to get the instance class of the endpoint.
+        Args:
+            object_dict (dict, optional): A dictionary containing the object data.
+        Returns:
+            AssistantEndpoint: The instance class of the endpoint.
+        """
+        return ProductGroupEndpoint
+    def get_page_class(self, object_dict=None):
+        """Get the page class of the endpoint
+        This method is used to get the page class of the endpoint.
+        Args:
+            object_dict (dict, optional): A dictionary containing the object data.
+        Returns:
+            PageAssistantEndpoint: The page class of the endpoint.
+        """
+        return PageProductGroupEndpoint

kodexa/model/model.py CHANGED Viewed

@@ -2443,11 +2443,14 @@ class Document(object):
     def get_exceptions(self) -> List[ContentException]:
         return self._persistence_layer.get_exceptions()
-    def get_external_data(self) -> dict:
-        return self._persistence_layer.get_external_data()
+    def get_external_data(self, key="default") -> dict:
+        return self._persistence_layer.get_external_data(key)
-    def set_external_data(self, external_data:dict):
-        return self._persistence_layer.set_external_data(external_data)
+    def get_external_data_keys(self) -> list[str]:
+        return self._persistence_layer.get_external_data_keys()
+    def set_external_data(self, external_data:dict, key="default"):
+        return self._persistence_layer.set_external_data(external_data, key)
     def get_steps(self) -> list[ProcessingStep]:
         return self._persistence_layer.get_steps()

kodexa 7.0.12399293688__py3-none-any.whl → 7.4.5a13228665254__py3-none-any.whl

kodexa 7.0.12399293688py3-none-any.whl → 7.4.5a13228665254py3-none-any.whl