kodexa 7.0.11915973289__py3-none-any.whl → 7.0.12200160150__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kodexa/model/utils.py ADDED
@@ -0,0 +1,92 @@
1
+ import logging
2
+
3
+ from kodexa import ContentNode
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ def get_pretty_text_from_lines(lines: list[ContentNode], scale, include_line_uuid=False) -> str:
8
+ pretty_text = ""
9
+ for line_index, line in enumerate(lines):
10
+ line_content = f"('{line.uuid}')" if include_line_uuid else ""
11
+ current_x = 0
12
+ for word in line.select('//word'):
13
+ x = int(word.get_bbox()[0] * scale)
14
+ spaces_needed = max(1, x - current_x) # Ensure at least one space
15
+ line_content += " " * spaces_needed
16
+ line_content += f"{word.get_all_content()}"
17
+ current_x = x + len(word.get_all_content())
18
+
19
+ pretty_text += line_content + "\n"
20
+
21
+ return pretty_text
22
+
23
+
24
+ def get_max_width(lines: list[ContentNode], max_width=None) -> int:
25
+ if max_width is None:
26
+ # Find the line with the most words
27
+ max_words_line = max(lines, key=lambda line: sum(len(word.get_all_content()) for word in line.select('//word')))
28
+
29
+ # Calculate max_width based on the length of all words plus spaces
30
+ max_width = sum(len(word.get_all_content()) for word in max_words_line.select('//word')) + (len(max_words_line.select('//word'))*4) - 1
31
+
32
+ if max_width < 250:
33
+ max_width = 250
34
+
35
+ return max_width
36
+
37
+
38
+ def get_scale_from_words(words: list[ContentNode], max_width) -> float:
39
+ # Get the bboxes
40
+ bboxes = [word.get_bbox() for word in words]
41
+
42
+ # Find the overall bounding box
43
+ min_x = min(bbox[0] for bbox in bboxes)
44
+ max_x = max(bbox[2] for bbox in bboxes)
45
+ min_y = min(bbox[1] for bbox in bboxes)
46
+ max_y = max(bbox[3] for bbox in bboxes)
47
+
48
+ # Invert y-axis
49
+ max_y, min_y = min_y, max_y
50
+
51
+ # Calculate scale factor to fit within max_width
52
+ scale = max_width / (max_x - min_x)
53
+
54
+ return scale
55
+
56
+
57
+ def get_pretty_page(page: ContentNode, max_width=None, include_line_uuid=False) -> str:
58
+ """
59
+ Get a pretty representation of the page
60
+
61
+ :param page: The page to get the pretty representation for
62
+ :param max_width: The maximum width of the page
63
+ :param include_line_uuid: Include the line UUID in the pretty representation
64
+
65
+ :return: A pretty representation of the page
66
+ """
67
+
68
+ logger.info(f"Getting pretty page {page.index}")
69
+
70
+ pretty_text = ""
71
+ content_areas = page.select('//content-area')
72
+
73
+ lines = page.select('//line')
74
+
75
+ max_width = get_max_width(lines, max_width)
76
+ logger.info(f"Max width: {max_width}")
77
+
78
+ words = page.select('//word')
79
+ if len(words) == 0:
80
+ return page.get_all_content()
81
+
82
+ scale = get_scale_from_words(words, max_width)
83
+ for area_index, area in enumerate(content_areas):
84
+
85
+ if area_index > 0:
86
+ pretty_text += "\n\n" # Add extra newline between content areas
87
+
88
+ pretty_text += get_pretty_text_from_lines(area.select('//line'), scale, include_line_uuid)
89
+
90
+ logger.debug(f"Pretty Page: {page.index}: \n{pretty_text}")
91
+
92
+ return pretty_text
kodexa/platform/kodexa.py CHANGED
@@ -927,6 +927,7 @@ class EventHelper:
927
927
  if ce.content_object:
928
928
  logger.info(f"Setting content object for context: {ce.content_object}")
929
929
  context.content_object = ce.content_object
930
+ context.document_family = ce.document_family
930
931
  logger.info(f"Getting document store for content object: {context.content_object.store_ref}")
931
932
  context.document_store = KodexaClient().get_object_by_ref("store", context.content_object.store_ref)
932
933
  logger.info("Returning context")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 7.0.11915973289
3
+ Version: 7.0.12200160150
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -14,12 +14,13 @@ kodexa/model/entities/product_subscription.py,sha256=UcmWR-qgLfdV7VCtJNwzgkanoS8
14
14
  kodexa/model/model.py,sha256=wY5HnpsAnKlH_aDEHWNf-ZrhdrBg-DtqGFszjkdZtPU,118340
15
15
  kodexa/model/objects.py,sha256=CE76KwQwIT6FdWJuac8aIumX_Ok6-9oq1JXz0K_gdwo,185117
16
16
  kodexa/model/persistence.py,sha256=PTh9jmqYCDuWfiuCssLttFaYWiMA_fCiwjgsYDW4AhE,68281
17
+ kodexa/model/utils.py,sha256=6R-3rFiW9irBwj0Mq5yhp7EDXkNUFaeFhr3bWmnlW4g,2961
17
18
  kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
18
19
  kodexa/pipeline/pipeline.py,sha256=ZYpJAWcwV4YRK589DUhU0vXGQlkNSj4J2TsGbYqTLjo,25221
19
20
  kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
20
21
  kodexa/platform/client.py,sha256=PWn-Xk3vH993Ne5YXDKqJT9rmj7okwVBivuQv9rt2UQ,226591
21
22
  kodexa/platform/interaction.py,sha256=6zpcwXKNZstUGNS6m4JsoRXAqCZPJHWI-ZN3co8nnF0,1055
22
- kodexa/platform/kodexa.py,sha256=3qRbEtLOw4yl7OV5ISZZ85N9gKeb5DA4XtHzieFIdYc,34796
23
+ kodexa/platform/kodexa.py,sha256=tPXHO500q3S75GhKGDcaxO51Viq2PNlHmAzpBZlahgo,34857
23
24
  kodexa/selectors/__init__.py,sha256=xA9-4vpyaAZWPSk3bh2kvDLkdv6XEmm7PjFbpziiTIk,100
24
25
  kodexa/selectors/ast.py,sha256=gG-1st841IntgBE5V7p3Cq9azaym2jV5lB_AIywQTCI,13269
25
26
  kodexa/selectors/core.py,sha256=kkt02DN20gXeaDGoGubPPeeTV7rCr4sxTyELrI0l1YU,3691
@@ -42,7 +43,7 @@ kodexa/testing/test_utils.py,sha256=DrLCkHxdb6AbZ-X3WmTMbQmnVIm55VEBL8MjtUK9POs,
42
43
  kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
43
44
  kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
45
  kodexa/utils/__init__.py,sha256=Pnim1o9_db5YEnNvDTxpM7HG-qTlL6n8JwFwOafU9wo,5928
45
- kodexa-7.0.11915973289.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
46
- kodexa-7.0.11915973289.dist-info/METADATA,sha256=l2eEBRv0T2-WqQCG8XhtgDp8DXkFCDS6YLbOSI2Biic,3527
47
- kodexa-7.0.11915973289.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
48
- kodexa-7.0.11915973289.dist-info/RECORD,,
46
+ kodexa-7.0.12200160150.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
+ kodexa-7.0.12200160150.dist-info/METADATA,sha256=YeHgvKBNAQbHkYNNR3xHuf4LJNbuJkCClUbWs1C85nk,3527
48
+ kodexa-7.0.12200160150.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
49
+ kodexa-7.0.12200160150.dist-info/RECORD,,