PyPI - kodexa - Versions diffs - 6.0.180__tar.gz → 6.0.184__tar.gz - Mend

kodexa 6.0.180tar.gz → 6.0.184tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{kodexa-6.0.180 → kodexa-6.0.184}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: kodexa
-Version: 6.0.180
+Version: 6.0.184
 Summary: Python SDK for the Kodexa Platform
 Author: Austin Redenbaugh
 Author-email: austin@kodexa.com

{kodexa-6.0.180 → kodexa-6.0.184}/kodexa/model/model.py RENAMED Viewed

@@ -2016,10 +2016,7 @@ class Document(object):
                 'content_node': self.content_node.to_dict() if self.content_node else None,
                 'source': clean_none_values(dataclasses.asdict(self.source)),
                 'mixins': self._mixins,
-                'taxonomies': self.taxonomies,
                 'classes': [content_class.to_dict() for content_class in self.classes],
-                'exceptions': self.exceptions,
-                'log': self.log,
                 'labels': self.labels,
                 'uuid': self.uuid}
@@ -2039,8 +2036,6 @@ class Document(object):
         new_document = Document(DocumentMetadata(doc_dict['metadata']))
         new_document.version = doc_dict['version'] if 'version' in doc_dict and doc_dict[
             'version'] else Document.PREVIOUS_VERSION  # some older docs don't have a version or it's None
-        new_document.log = doc_dict['log'] if 'log' in doc_dict else []
-        new_document.exceptions = doc_dict['exceptions'] if 'exceptions' in doc_dict else []
         new_document.uuid = doc_dict['uuid'] if 'uuid' in doc_dict else str(
             uuid.uuid5(uuid.NAMESPACE_DNS, 'kodexa.com'))
@@ -2051,11 +2046,6 @@ class Document(object):
             new_document.source = SourceMetadata.from_dict(doc_dict['source'])
         if 'labels' in doc_dict and doc_dict['labels']:
             new_document.labels = doc_dict['labels']
-        if 'taxomomies' in doc_dict and doc_dict['taxomomies']:
-            new_document.labels = doc_dict['taxomomies']
-        if 'classes' in doc_dict and doc_dict['classes']:
-            new_document.classes = [ContentClassification.from_dict(content_class) for content_class in
-                                    doc_dict['classes']]
         new_document.get_persistence().update_metadata()
         return new_document

{kodexa-6.0.180 → kodexa-6.0.184}/kodexa/pipeline/pipeline.py RENAMED Viewed

@@ -603,7 +603,6 @@ class PipelineStatistics:
     def __init__(self):
         self.documents_processed = 0
-        self.document_exceptions = 0
     def processed_document(self, document):
         """Update statistics based on this document completing processing
@@ -615,6 +614,3 @@ class PipelineStatistics:
         """
         self.documents_processed += 1
-        if document and document.exceptions:
-            self.document_exceptions += 1

{kodexa-6.0.180 → kodexa-6.0.184}/kodexa/spatial/azure_models.py RENAMED Viewed

@@ -68,6 +68,8 @@ def create_kddb_from_azure(azure_data, keep_azure_lines=True, overlap_percentage
         if issue_found:
             return None
     document.content_node = root_node
     document.add_mixin('spatial')
@@ -231,6 +233,22 @@ def get_azure_next_line(document_lines, ref_line, direction='right', overlap_per
         return sorted_next_up_lines[0]
+    elif direction == 'up_left':
+        # Get all the lines above of the cell, where the x is to the left of the cell
+        up_left_lines = [up_line for up_line in possible_lines if
+                         up_line.get_bbox()[1] >= ref_bbox[3] and
+                         ref_bbox[2] > up_line.get_x() and ref_bbox[0] - up_line.get_bbox()[2] <= 0.75]
+        if not up_left_lines:
+            return None
+        # Sort by y (decreasing since 0 is at the bottom of the page)
+        sorted_next_up_left_lines = [up_left_lines[0]]
+        [sorted_next_up_left_lines.insert(0, up_left_line) for up_left_line in up_left_lines
+         if up_left_line.get_bbox()[1] < sorted_next_up_left_lines[0].get_bbox()[1]]
+        return sorted_next_up_left_lines[0]
     return None

{kodexa-6.0.180 → kodexa-6.0.184}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "kodexa"
-version = "6.0.180"
+version = "6.0.184"
 description = "Python SDK for the Kodexa Platform"
 authors = ["Austin Redenbaugh <austin@kodexa.com>", "Philip Dodds <philip@kodexa.com>", "Romar Cablao <rcablao@kodexa.com>", "Amadea Paula Dodds <amadeapaula@kodexa.com>"]
 readme = "README.md"

{kodexa-6.0.180 → kodexa-6.0.184}/setup.py RENAMED Viewed

@@ -37,7 +37,7 @@ install_requires = \
 setup_kwargs = {
     'name': 'kodexa',
-    'version': '6.0.180',
+    'version': '6.0.184',
     'description': 'Python SDK for the Kodexa Platform',
     'long_description': '# Kodexa\n\n[![Build and Package with Poetry](https://github.com/kodexa-ai/kodexa/actions/workflows/main.yml/badge.svg?branch=main)](https://github.com/kodexa-ai/kodexa/actions/workflows/main.yml)\n\n![img.png](https://docs.kodexa.com/img.png)\n\nKodexa is a platform for building intelligent document processing pipelines. It is a set of tools and services that\nallow you to build a pipeline that can take a document, extract the content, and then process it to extract the\ninformation you need.\n\nIt is built on a set of core principles:\n\n* **Document Centric** - Kodexa is built around the idea of a document. A document is a collection of content\n  nodes that are connected together. This is a powerful model that allows you to build pipelines that can\n  extract content from a wide range of sources.\n\n* **Pipeline Oriented** - Kodexa is built around the idea of a pipeline. A pipeline is a series of steps that\n  can be executed on a document. This allows you to build a pipeline that can extract content from a wide range\n  of sources.\n\n* **Extensible** - Kodexa is built around the idea of a pipeline. A pipeline is a series of steps that can be executed\n  on a document. This allows you to build a pipeline that can extract content from a wide range of sources.\n\n* **Label Driven** - Kodexa focuses on the idea of labels. Labels are a way to identify content within a document\n  and then use that content to drive the processing of the document.\n\n# Python SDK\n\nThis repository contains the Python SDK for Kodexa. The SDK is the primary way to interact with Kodexa. It allows you to\ndefine actions, models, and pipelines that can be executed on Kodexa. It also includes a complete SDK client for\nworking with a Kodexa platform instance.\n\n## Documentation & Examples\n\nDocumentation is available at the [Kodexa Documentation Portal](https://docs.kodexa.com)\n\n## Current Development\n\nThe main branch is 6.0 which is a production release.\n\n## Set-up\n\nWe use poetry to manage our dependencies, so you can install them with:\n\n    poetry install\n\nYou can then run the tests with:\n\n    poetry run pytest\n\n# Contributing\n\nWe welcome contributions to the Kodexa platform. Please see our [contributing guide](CONTRIBUTING.md) for more details.\n\n# License\n\nApache 2.0\n',
     'author': 'Austin Redenbaugh',