kodexa 6.0.180__py3-none-any.whl → 6.0.184__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/model/model.py +0 -10
- kodexa/pipeline/pipeline.py +0 -4
- kodexa/spatial/azure_models.py +18 -0
- {kodexa-6.0.180.dist-info → kodexa-6.0.184.dist-info}/METADATA +1 -1
- {kodexa-6.0.180.dist-info → kodexa-6.0.184.dist-info}/RECORD +7 -7
- {kodexa-6.0.180.dist-info → kodexa-6.0.184.dist-info}/WHEEL +1 -1
- {kodexa-6.0.180.dist-info → kodexa-6.0.184.dist-info}/LICENSE +0 -0
kodexa/model/model.py
CHANGED
@@ -2016,10 +2016,7 @@ class Document(object):
|
|
2016
2016
|
'content_node': self.content_node.to_dict() if self.content_node else None,
|
2017
2017
|
'source': clean_none_values(dataclasses.asdict(self.source)),
|
2018
2018
|
'mixins': self._mixins,
|
2019
|
-
'taxonomies': self.taxonomies,
|
2020
2019
|
'classes': [content_class.to_dict() for content_class in self.classes],
|
2021
|
-
'exceptions': self.exceptions,
|
2022
|
-
'log': self.log,
|
2023
2020
|
'labels': self.labels,
|
2024
2021
|
'uuid': self.uuid}
|
2025
2022
|
|
@@ -2039,8 +2036,6 @@ class Document(object):
|
|
2039
2036
|
new_document = Document(DocumentMetadata(doc_dict['metadata']))
|
2040
2037
|
new_document.version = doc_dict['version'] if 'version' in doc_dict and doc_dict[
|
2041
2038
|
'version'] else Document.PREVIOUS_VERSION # some older docs don't have a version or it's None
|
2042
|
-
new_document.log = doc_dict['log'] if 'log' in doc_dict else []
|
2043
|
-
new_document.exceptions = doc_dict['exceptions'] if 'exceptions' in doc_dict else []
|
2044
2039
|
new_document.uuid = doc_dict['uuid'] if 'uuid' in doc_dict else str(
|
2045
2040
|
uuid.uuid5(uuid.NAMESPACE_DNS, 'kodexa.com'))
|
2046
2041
|
|
@@ -2051,11 +2046,6 @@ class Document(object):
|
|
2051
2046
|
new_document.source = SourceMetadata.from_dict(doc_dict['source'])
|
2052
2047
|
if 'labels' in doc_dict and doc_dict['labels']:
|
2053
2048
|
new_document.labels = doc_dict['labels']
|
2054
|
-
if 'taxomomies' in doc_dict and doc_dict['taxomomies']:
|
2055
|
-
new_document.labels = doc_dict['taxomomies']
|
2056
|
-
if 'classes' in doc_dict and doc_dict['classes']:
|
2057
|
-
new_document.classes = [ContentClassification.from_dict(content_class) for content_class in
|
2058
|
-
doc_dict['classes']]
|
2059
2049
|
|
2060
2050
|
new_document.get_persistence().update_metadata()
|
2061
2051
|
return new_document
|
kodexa/pipeline/pipeline.py
CHANGED
@@ -603,7 +603,6 @@ class PipelineStatistics:
|
|
603
603
|
|
604
604
|
def __init__(self):
|
605
605
|
self.documents_processed = 0
|
606
|
-
self.document_exceptions = 0
|
607
606
|
|
608
607
|
def processed_document(self, document):
|
609
608
|
"""Update statistics based on this document completing processing
|
@@ -615,6 +614,3 @@ class PipelineStatistics:
|
|
615
614
|
|
616
615
|
"""
|
617
616
|
self.documents_processed += 1
|
618
|
-
|
619
|
-
if document and document.exceptions:
|
620
|
-
self.document_exceptions += 1
|
kodexa/spatial/azure_models.py
CHANGED
@@ -68,6 +68,8 @@ def create_kddb_from_azure(azure_data, keep_azure_lines=True, overlap_percentage
|
|
68
68
|
if issue_found:
|
69
69
|
return None
|
70
70
|
|
71
|
+
|
72
|
+
|
71
73
|
document.content_node = root_node
|
72
74
|
document.add_mixin('spatial')
|
73
75
|
|
@@ -231,6 +233,22 @@ def get_azure_next_line(document_lines, ref_line, direction='right', overlap_per
|
|
231
233
|
|
232
234
|
return sorted_next_up_lines[0]
|
233
235
|
|
236
|
+
elif direction == 'up_left':
|
237
|
+
# Get all the lines above of the cell, where the x is to the left of the cell
|
238
|
+
up_left_lines = [up_line for up_line in possible_lines if
|
239
|
+
up_line.get_bbox()[1] >= ref_bbox[3] and
|
240
|
+
ref_bbox[2] > up_line.get_x() and ref_bbox[0] - up_line.get_bbox()[2] <= 0.75]
|
241
|
+
|
242
|
+
if not up_left_lines:
|
243
|
+
return None
|
244
|
+
|
245
|
+
# Sort by y (decreasing since 0 is at the bottom of the page)
|
246
|
+
sorted_next_up_left_lines = [up_left_lines[0]]
|
247
|
+
[sorted_next_up_left_lines.insert(0, up_left_line) for up_left_line in up_left_lines
|
248
|
+
if up_left_line.get_bbox()[1] < sorted_next_up_left_lines[0].get_bbox()[1]]
|
249
|
+
|
250
|
+
return sorted_next_up_left_lines[0]
|
251
|
+
|
234
252
|
return None
|
235
253
|
|
236
254
|
|
@@ -5,11 +5,11 @@ kodexa/connectors/__init__.py,sha256=WCUEzFGjHcgPAMFIKLaRTXAkGHx3vUCD8APMhOrNNgM
|
|
5
5
|
kodexa/connectors/connectors.py,sha256=25-TffyGDjxHyp9ITug0qgr1nhqMAekmV5NVvbPGs7o,7722
|
6
6
|
kodexa/model/__init__.py,sha256=DyCgkJU7rOfd4SMvPRLaPdklCNlkqCRRWiVPwjYn2GE,720
|
7
7
|
kodexa/model/base.py,sha256=6IraEK3RomjPgFpPYkxjuLUriF958AusgJO21Dcopeg,753
|
8
|
-
kodexa/model/model.py,sha256=
|
8
|
+
kodexa/model/model.py,sha256=A9v5CRLe-Y28tj0C-oe8L1Gvu3m6bYfPvQQK89tCFzw,88524
|
9
9
|
kodexa/model/objects.py,sha256=1cOpzFUriSQjIx3snJ4TNgRRo-8GN54cPanqnsSOmQ4,114246
|
10
10
|
kodexa/model/persistence.py,sha256=rRBY_onLcSTFlZZmitU8_FLffP7elDHhcmF8yYT94HE,37655
|
11
11
|
kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
|
12
|
-
kodexa/pipeline/pipeline.py,sha256=
|
12
|
+
kodexa/pipeline/pipeline.py,sha256=uzxe7HuSW1CKDZOrnV_LRHj3SHhbs14lvmMGJ_DIVdw,19763
|
13
13
|
kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
|
14
14
|
kodexa/platform/client.py,sha256=nv6o3UDzSv_Sr4FbtsuHr5_Nn5xUYy5YVBaImRDyscw,106727
|
15
15
|
kodexa/platform/kodexa.py,sha256=HcwQh1NKkwSzkngKLp_kEsLW0N_JlzORszoknSDFLT0,27604
|
@@ -24,7 +24,7 @@ kodexa/selectors/parserules.pyi,sha256=UQrLMI_bYxdyGjwd4wJDfJevi5lpku8LSbHXGmNpx
|
|
24
24
|
kodexa/selectors/parsetab.py,sha256=JFQMAOjcGu-a5QBJvp77xpQ4Y8J6hQAzDgzl6tMIjYw,21267
|
25
25
|
kodexa/selectors/parsetab.pyi,sha256=UQrLMI_bYxdyGjwd4wJDfJevi5lpku8LSbHXGmNpx_g,60
|
26
26
|
kodexa/spatial/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
-
kodexa/spatial/azure_models.py,sha256=
|
27
|
+
kodexa/spatial/azure_models.py,sha256=nxpu1QAduZIeym3k9Wf5A1rOHFE3ahheOWEuNepF6gM,18991
|
28
28
|
kodexa/spatial/bbox_common.py,sha256=WArS8zv-swd7w6devmCAzZF7MXhD1bzIOZAAFKLVzdE,2738
|
29
29
|
kodexa/spatial/table_form_common.py,sha256=K015yXdsK3higBBmt3Kkk3sUwOAlh-1i79ei6Sz2ea4,34222
|
30
30
|
kodexa/steps/__init__.py,sha256=crCQCfwjg5QpqRjD8kSNI6QuUvc6O_an6ZKhRgKfShU,160
|
@@ -34,7 +34,7 @@ kodexa/testing/test_components.py,sha256=i_9M6-bfUBdR1uYAzZZzWiW0M1DGKzE5mkNuHq4
|
|
34
34
|
kodexa/testing/test_utils.py,sha256=HXM3S5FDzarzS6R7jkOHps6d6Ox2UtNqymoK6VCw8Zg,13596
|
35
35
|
kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
|
36
36
|
kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
-
kodexa-6.0.
|
38
|
-
kodexa-6.0.
|
39
|
-
kodexa-6.0.
|
40
|
-
kodexa-6.0.
|
37
|
+
kodexa-6.0.184.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
38
|
+
kodexa-6.0.184.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
|
39
|
+
kodexa-6.0.184.dist-info/METADATA,sha256=E_9yTsoZ87ZHtIngeCrom1FRjL20vU1am9_0YN-6V-Y,3602
|
40
|
+
kodexa-6.0.184.dist-info/RECORD,,
|
File without changes
|