kodexa 6.0.180__py3-none-any.whl → 6.0.184__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kodexa/model/model.py CHANGED
@@ -2016,10 +2016,7 @@ class Document(object):
2016
2016
  'content_node': self.content_node.to_dict() if self.content_node else None,
2017
2017
  'source': clean_none_values(dataclasses.asdict(self.source)),
2018
2018
  'mixins': self._mixins,
2019
- 'taxonomies': self.taxonomies,
2020
2019
  'classes': [content_class.to_dict() for content_class in self.classes],
2021
- 'exceptions': self.exceptions,
2022
- 'log': self.log,
2023
2020
  'labels': self.labels,
2024
2021
  'uuid': self.uuid}
2025
2022
 
@@ -2039,8 +2036,6 @@ class Document(object):
2039
2036
  new_document = Document(DocumentMetadata(doc_dict['metadata']))
2040
2037
  new_document.version = doc_dict['version'] if 'version' in doc_dict and doc_dict[
2041
2038
  'version'] else Document.PREVIOUS_VERSION # some older docs don't have a version or it's None
2042
- new_document.log = doc_dict['log'] if 'log' in doc_dict else []
2043
- new_document.exceptions = doc_dict['exceptions'] if 'exceptions' in doc_dict else []
2044
2039
  new_document.uuid = doc_dict['uuid'] if 'uuid' in doc_dict else str(
2045
2040
  uuid.uuid5(uuid.NAMESPACE_DNS, 'kodexa.com'))
2046
2041
 
@@ -2051,11 +2046,6 @@ class Document(object):
2051
2046
  new_document.source = SourceMetadata.from_dict(doc_dict['source'])
2052
2047
  if 'labels' in doc_dict and doc_dict['labels']:
2053
2048
  new_document.labels = doc_dict['labels']
2054
- if 'taxomomies' in doc_dict and doc_dict['taxomomies']:
2055
- new_document.labels = doc_dict['taxomomies']
2056
- if 'classes' in doc_dict and doc_dict['classes']:
2057
- new_document.classes = [ContentClassification.from_dict(content_class) for content_class in
2058
- doc_dict['classes']]
2059
2049
 
2060
2050
  new_document.get_persistence().update_metadata()
2061
2051
  return new_document
@@ -603,7 +603,6 @@ class PipelineStatistics:
603
603
 
604
604
  def __init__(self):
605
605
  self.documents_processed = 0
606
- self.document_exceptions = 0
607
606
 
608
607
  def processed_document(self, document):
609
608
  """Update statistics based on this document completing processing
@@ -615,6 +614,3 @@ class PipelineStatistics:
615
614
 
616
615
  """
617
616
  self.documents_processed += 1
618
-
619
- if document and document.exceptions:
620
- self.document_exceptions += 1
@@ -68,6 +68,8 @@ def create_kddb_from_azure(azure_data, keep_azure_lines=True, overlap_percentage
68
68
  if issue_found:
69
69
  return None
70
70
 
71
+
72
+
71
73
  document.content_node = root_node
72
74
  document.add_mixin('spatial')
73
75
 
@@ -231,6 +233,22 @@ def get_azure_next_line(document_lines, ref_line, direction='right', overlap_per
231
233
 
232
234
  return sorted_next_up_lines[0]
233
235
 
236
+ elif direction == 'up_left':
237
+ # Get all the lines above of the cell, where the x is to the left of the cell
238
+ up_left_lines = [up_line for up_line in possible_lines if
239
+ up_line.get_bbox()[1] >= ref_bbox[3] and
240
+ ref_bbox[2] > up_line.get_x() and ref_bbox[0] - up_line.get_bbox()[2] <= 0.75]
241
+
242
+ if not up_left_lines:
243
+ return None
244
+
245
+ # Sort by y (decreasing since 0 is at the bottom of the page)
246
+ sorted_next_up_left_lines = [up_left_lines[0]]
247
+ [sorted_next_up_left_lines.insert(0, up_left_line) for up_left_line in up_left_lines
248
+ if up_left_line.get_bbox()[1] < sorted_next_up_left_lines[0].get_bbox()[1]]
249
+
250
+ return sorted_next_up_left_lines[0]
251
+
234
252
  return None
235
253
 
236
254
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 6.0.180
3
+ Version: 6.0.184
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -5,11 +5,11 @@ kodexa/connectors/__init__.py,sha256=WCUEzFGjHcgPAMFIKLaRTXAkGHx3vUCD8APMhOrNNgM
5
5
  kodexa/connectors/connectors.py,sha256=25-TffyGDjxHyp9ITug0qgr1nhqMAekmV5NVvbPGs7o,7722
6
6
  kodexa/model/__init__.py,sha256=DyCgkJU7rOfd4SMvPRLaPdklCNlkqCRRWiVPwjYn2GE,720
7
7
  kodexa/model/base.py,sha256=6IraEK3RomjPgFpPYkxjuLUriF958AusgJO21Dcopeg,753
8
- kodexa/model/model.py,sha256=Zh1Ug_oy8i_ePL30rA3ag1JAS1YkQf_9t13Xr-B_9H0,89156
8
+ kodexa/model/model.py,sha256=A9v5CRLe-Y28tj0C-oe8L1Gvu3m6bYfPvQQK89tCFzw,88524
9
9
  kodexa/model/objects.py,sha256=1cOpzFUriSQjIx3snJ4TNgRRo-8GN54cPanqnsSOmQ4,114246
10
10
  kodexa/model/persistence.py,sha256=rRBY_onLcSTFlZZmitU8_FLffP7elDHhcmF8yYT94HE,37655
11
11
  kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
12
- kodexa/pipeline/pipeline.py,sha256=fofj2wpkbDEyWeyPuebKsKROQeRcfpQWEHZG_GSXWkw,19888
12
+ kodexa/pipeline/pipeline.py,sha256=uzxe7HuSW1CKDZOrnV_LRHj3SHhbs14lvmMGJ_DIVdw,19763
13
13
  kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
14
14
  kodexa/platform/client.py,sha256=nv6o3UDzSv_Sr4FbtsuHr5_Nn5xUYy5YVBaImRDyscw,106727
15
15
  kodexa/platform/kodexa.py,sha256=HcwQh1NKkwSzkngKLp_kEsLW0N_JlzORszoknSDFLT0,27604
@@ -24,7 +24,7 @@ kodexa/selectors/parserules.pyi,sha256=UQrLMI_bYxdyGjwd4wJDfJevi5lpku8LSbHXGmNpx
24
24
  kodexa/selectors/parsetab.py,sha256=JFQMAOjcGu-a5QBJvp77xpQ4Y8J6hQAzDgzl6tMIjYw,21267
25
25
  kodexa/selectors/parsetab.pyi,sha256=UQrLMI_bYxdyGjwd4wJDfJevi5lpku8LSbHXGmNpx_g,60
26
26
  kodexa/spatial/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- kodexa/spatial/azure_models.py,sha256=1NeXqPloxfnDb6iz4eHXrxfOm_kJzO1SYmYJhmJYdzE,18229
27
+ kodexa/spatial/azure_models.py,sha256=nxpu1QAduZIeym3k9Wf5A1rOHFE3ahheOWEuNepF6gM,18991
28
28
  kodexa/spatial/bbox_common.py,sha256=WArS8zv-swd7w6devmCAzZF7MXhD1bzIOZAAFKLVzdE,2738
29
29
  kodexa/spatial/table_form_common.py,sha256=K015yXdsK3higBBmt3Kkk3sUwOAlh-1i79ei6Sz2ea4,34222
30
30
  kodexa/steps/__init__.py,sha256=crCQCfwjg5QpqRjD8kSNI6QuUvc6O_an6ZKhRgKfShU,160
@@ -34,7 +34,7 @@ kodexa/testing/test_components.py,sha256=i_9M6-bfUBdR1uYAzZZzWiW0M1DGKzE5mkNuHq4
34
34
  kodexa/testing/test_utils.py,sha256=HXM3S5FDzarzS6R7jkOHps6d6Ox2UtNqymoK6VCw8Zg,13596
35
35
  kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
36
36
  kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- kodexa-6.0.180.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
38
- kodexa-6.0.180.dist-info/WHEEL,sha256=vxFmldFsRN_Hx10GDvsdv1wroKq8r5Lzvjp6GZ4OO8c,88
39
- kodexa-6.0.180.dist-info/METADATA,sha256=fG-iZkx0ypuzpIaPWy7oE5dZKpEzDrOZVqh81QlvFa0,3602
40
- kodexa-6.0.180.dist-info/RECORD,,
37
+ kodexa-6.0.184.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
38
+ kodexa-6.0.184.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
39
+ kodexa-6.0.184.dist-info/METADATA,sha256=E_9yTsoZ87ZHtIngeCrom1FRjL20vU1am9_0YN-6V-Y,3602
40
+ kodexa-6.0.184.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.3.2
2
+ Generator: poetry-core 1.4.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any