PyPI - kodexa - Versions diffs - 7.0.11920845564__py3-none-any.whl → 7.0.12200160150__py3-none-any.whl - Mend

kodexa 7.0.11920845564py3-none-any.whl → 7.0.12200160150py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

kodexa/model/utils.py ADDED Viewed

@@ -0,0 +1,92 @@
+import logging
+from kodexa import ContentNode
+logger = logging.getLogger(__name__)
+def get_pretty_text_from_lines(lines: list[ContentNode], scale, include_line_uuid=False) -> str:
+    pretty_text = ""
+    for line_index, line in enumerate(lines):
+        line_content = f"('{line.uuid}')" if include_line_uuid else ""
+        current_x = 0
+        for word in line.select('//word'):
+            x = int(word.get_bbox()[0] * scale)
+            spaces_needed = max(1, x - current_x)  # Ensure at least one space
+            line_content += " " * spaces_needed
+            line_content += f"{word.get_all_content()}"
+            current_x = x + len(word.get_all_content())
+        pretty_text += line_content + "\n"
+    return pretty_text
+def get_max_width(lines: list[ContentNode], max_width=None) -> int:
+    if max_width is None:
+        # Find the line with the most words
+        max_words_line = max(lines, key=lambda line: sum(len(word.get_all_content()) for word in line.select('//word')))
+        # Calculate max_width based on the length of all words plus spaces
+        max_width = sum(len(word.get_all_content()) for word in max_words_line.select('//word')) + (len(max_words_line.select('//word'))*4) - 1
+    if max_width < 250:
+        max_width = 250
+    return max_width
+def get_scale_from_words(words: list[ContentNode], max_width) -> float:
+    # Get the bboxes
+    bboxes = [word.get_bbox() for word in words]
+    # Find the overall bounding box
+    min_x = min(bbox[0] for bbox in bboxes)
+    max_x = max(bbox[2] for bbox in bboxes)
+    min_y = min(bbox[1] for bbox in bboxes)
+    max_y = max(bbox[3] for bbox in bboxes)
+    # Invert y-axis
+    max_y, min_y = min_y, max_y
+    # Calculate scale factor to fit within max_width
+    scale = max_width / (max_x - min_x)
+    return scale
+def get_pretty_page(page: ContentNode, max_width=None, include_line_uuid=False) -> str:
+    """
+    Get a pretty representation of the page
+    :param page: The page to get the pretty representation for
+    :param max_width: The maximum width of the page
+    :param include_line_uuid: Include the line UUID in the pretty representation
+    :return: A pretty representation of the page
+    """
+    logger.info(f"Getting pretty page {page.index}")
+    pretty_text = ""
+    content_areas = page.select('//content-area')
+    lines = page.select('//line')
+    max_width = get_max_width(lines, max_width)
+    logger.info(f"Max width: {max_width}")
+    words = page.select('//word')
+    if len(words) == 0:
+        return page.get_all_content()
+    scale = get_scale_from_words(words, max_width)
+    for area_index, area in enumerate(content_areas):
+        if area_index > 0:
+            pretty_text += "\n\n"  # Add extra newline between content areas
+        pretty_text += get_pretty_text_from_lines(area.select('//line'), scale, include_line_uuid)
+    logger.debug(f"Pretty Page: {page.index}: \n{pretty_text}")
+    return pretty_text

{kodexa-7.0.11920845564.dist-info → kodexa-7.0.12200160150.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: kodexa
-Version: 7.0.11920845564
+Version: 7.0.12200160150
 Summary: Python SDK for the Kodexa Platform
 Author: Austin Redenbaugh
 Author-email: austin@kodexa.com

{kodexa-7.0.11920845564.dist-info → kodexa-7.0.12200160150.dist-info}/RECORD RENAMED Viewed

@@ -14,6 +14,7 @@ kodexa/model/entities/product_subscription.py,sha256=UcmWR-qgLfdV7VCtJNwzgkanoS8
 kodexa/model/model.py,sha256=wY5HnpsAnKlH_aDEHWNf-ZrhdrBg-DtqGFszjkdZtPU,118340
 kodexa/model/objects.py,sha256=CE76KwQwIT6FdWJuac8aIumX_Ok6-9oq1JXz0K_gdwo,185117
 kodexa/model/persistence.py,sha256=PTh9jmqYCDuWfiuCssLttFaYWiMA_fCiwjgsYDW4AhE,68281
+kodexa/model/utils.py,sha256=6R-3rFiW9irBwj0Mq5yhp7EDXkNUFaeFhr3bWmnlW4g,2961
 kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
 kodexa/pipeline/pipeline.py,sha256=ZYpJAWcwV4YRK589DUhU0vXGQlkNSj4J2TsGbYqTLjo,25221
 kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
@@ -42,7 +43,7 @@ kodexa/testing/test_utils.py,sha256=DrLCkHxdb6AbZ-X3WmTMbQmnVIm55VEBL8MjtUK9POs,
 kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
 kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 kodexa/utils/__init__.py,sha256=Pnim1o9_db5YEnNvDTxpM7HG-qTlL6n8JwFwOafU9wo,5928
-kodexa-7.0.11920845564.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
-kodexa-7.0.11920845564.dist-info/METADATA,sha256=rL71hJphMwIizznP28gRoyhyo9-tviViV3gaTl_zU4A,3527
-kodexa-7.0.11920845564.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
-kodexa-7.0.11920845564.dist-info/RECORD,,
+kodexa-7.0.12200160150.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
+kodexa-7.0.12200160150.dist-info/METADATA,sha256=YeHgvKBNAQbHkYNNR3xHuf4LJNbuJkCClUbWs1C85nk,3527
+kodexa-7.0.12200160150.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
+kodexa-7.0.12200160150.dist-info/RECORD,,

{kodexa-7.0.11920845564.dist-info → kodexa-7.0.12200160150.dist-info}/LICENSE RENAMED Viewed

File without changes

{kodexa-7.0.11920845564.dist-info → kodexa-7.0.12200160150.dist-info}/WHEEL RENAMED Viewed

File without changes

kodexa 7.0.11920845564__py3-none-any.whl → 7.0.12200160150__py3-none-any.whl

kodexa 7.0.11920845564py3-none-any.whl → 7.0.12200160150py3-none-any.whl