kodexa 7.4.413466578435__py3-none-any.whl → 7.4.413508355193__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kodexa/model/model.py CHANGED
@@ -1244,6 +1244,7 @@ class ContentNode(object):
1244
1244
  status=None,
1245
1245
  owner_uri=None,
1246
1246
  is_dirty=None,
1247
+ sort_by_bbox: bool=False,
1247
1248
  ):
1248
1249
  """
1249
1250
  This will tag (see Feature Tagging) the expression groups identified by the regular expression.
@@ -1305,7 +1306,7 @@ class ContentNode(object):
1305
1306
  return str(uuid.uuid4())
1306
1307
 
1307
1308
  def tag_node_position(
1308
- node_to_check, start, end, node_data, tag_uuid, offset=0, value=None
1309
+ node_to_check, start, end, node_data, tag_uuid, offset=0, value=None, sort_by_bbox: bool=False
1309
1310
  ):
1310
1311
  """
1311
1312
  This function tags a node position in a given data structure. It iterates over the content parts of the node to check,
@@ -1420,6 +1421,7 @@ class ContentNode(object):
1420
1421
  tag_uuid,
1421
1422
  offset=offset,
1422
1423
  value=value,
1424
+ sort_by_bbox=sort_by_bbox,
1423
1425
  )
1424
1426
 
1425
1427
  if result < 0 or (end - result) <= 0:
@@ -1434,7 +1436,16 @@ class ContentNode(object):
1434
1436
  raise Exception("Invalid part?")
1435
1437
 
1436
1438
  # We need to determine if we have missing children and add them to the end
1437
- for child_idx, child_node in enumerate(node_to_check.get_children()):
1439
+ node_children = node_to_check.get_children()
1440
+ if node_children and sort_by_bbox:
1441
+ # Sort nodes by x-coordinate if they have bboxes, otherwise use index
1442
+ try:
1443
+ node_children.sort(key=lambda x: x.get_bbox()[0] if hasattr(x, 'get_bbox') else x.index if hasattr(x, 'index') else 0)
1444
+ except (AttributeError, TypeError, IndexError):
1445
+ # If sorting fails, keep original order
1446
+ pass
1447
+
1448
+ for child_idx, child_node in enumerate(node_children):
1438
1449
  if child_node.index not in node_to_check.get_content_parts():
1439
1450
  if content_length > 0:
1440
1451
  end = end - len(separator)
@@ -1452,6 +1463,7 @@ class ContentNode(object):
1452
1463
  tag_uuid,
1453
1464
  offset=offset,
1454
1465
  value=value,
1466
+ sort_by_bbox=sort_by_bbox,
1455
1467
  )
1456
1468
 
1457
1469
  if result < 0 or (end - result) <= 0:
@@ -1487,6 +1499,7 @@ class ContentNode(object):
1487
1499
  get_tag_uuid(tag_uuid),
1488
1500
  0,
1489
1501
  value=value,
1502
+ sort_by_bbox=sort_by_bbox,
1490
1503
  )
1491
1504
 
1492
1505
  else:
@@ -1558,6 +1571,7 @@ class ContentNode(object):
1558
1571
  data,
1559
1572
  get_tag_uuid(tag_uuid),
1560
1573
  value=value,
1574
+ sort_by_bbox=sort_by_bbox,
1561
1575
  )
1562
1576
 
1563
1577
  else:
@@ -1572,6 +1586,7 @@ class ContentNode(object):
1572
1586
  data,
1573
1587
  get_tag_uuid(tag_uuid),
1574
1588
  value=value,
1589
+ sort_by_bbox=sort_by_bbox,
1575
1590
  )
1576
1591
 
1577
1592
  def get_tags(self):
kodexa/model/objects.py CHANGED
@@ -3108,6 +3108,10 @@ class Task(BaseModel):
3108
3108
  protected_namespaces=("model_config",),
3109
3109
  )
3110
3110
 
3111
+ id: Optional[str] = Field(None)
3112
+ uuid: Optional[str] = None
3113
+ created_on: Optional[StandardDateTime] = Field(None, alias="createdOn")
3114
+ updated_on: Optional[StandardDateTime] = Field(None, alias="updatedOn")
3111
3115
  project: Optional['Project'] = None
3112
3116
  title: Optional[str] = None
3113
3117
  description: Optional[str] = None
kodexa/platform/client.py CHANGED
@@ -4634,14 +4634,18 @@ class DocumentFamilyEndpoint(DocumentFamily, ClientEndpoint):
4634
4634
 
4635
4635
  def get_json(
4636
4636
  self,
4637
- project_id: str,
4637
+ project_id: Optional[str] = None,
4638
4638
  friendly_names=False,
4639
+ include_ids=True,
4640
+ include_exceptions=False,
4639
4641
  ) -> str:
4640
4642
  """Get the JSON export for the document family
4641
4643
 
4642
4644
  Args:
4643
4645
  project_id str: The project ID
4644
4646
  friendly_names (bool): Whether to use friendly names. Defaults to False
4647
+ include_ids (bool): Whether to include the IDs. Defaults to True
4648
+ include_exceptions (bool): Whether to include the exceptions. Defaults to False
4645
4649
 
4646
4650
  Returns:
4647
4651
  str: The JSON
@@ -4656,6 +4660,8 @@ class DocumentFamilyEndpoint(DocumentFamily, ClientEndpoint):
4656
4660
  "format": "json",
4657
4661
  "friendlyNames": friendly_names,
4658
4662
  "projectId": project_id,
4663
+ "includeIds": include_ids,
4664
+ "includeExceptions": include_exceptions,
4659
4665
  }
4660
4666
 
4661
4667
  response = self.client.get(url, params=params)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 7.4.413466578435
3
+ Version: 7.4.413508355193
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -12,14 +12,14 @@ kodexa/model/entities/check_response.py,sha256=eqBHxO6G2OAziL3p9bHGI-oiPkAG82H6C
12
12
  kodexa/model/entities/product.py,sha256=StUhTEeLXmc05cj6XnZppQfeJsqCPbX1jdhsysHH--Q,5787
13
13
  kodexa/model/entities/product_group.py,sha256=540fRGyUf34h1BzAN1DiWu6rGgvaj3xDFhZ2k-RvSFY,3617
14
14
  kodexa/model/entities/product_subscription.py,sha256=UcmWR-qgLfdV7VCtJNwzgkanoS8nBSL6ngVuxQUK1M8,3810
15
- kodexa/model/model.py,sha256=qh1YUew3UgtjU0t4fAwSXYYuzQjXTOZWZkafyFp_w8M,118801
16
- kodexa/model/objects.py,sha256=ZEKSGugy7qmVbmW9y6haDCF_Zs1uOZdB5bn8eqyw2Zk,189319
15
+ kodexa/model/model.py,sha256=q3zEm6pPOB-xPCKbOxmTMqLALzmQr2Ppam8knApoSEE,119645
16
+ kodexa/model/objects.py,sha256=C1zdBU7DmnhLwioCd8OEKEGEcDDZbCorolJ4bnTKVg0,189538
17
17
  kodexa/model/persistence.py,sha256=jUgQ8xwsAFIoZ_bEynxCDEWhUII42eN0e0Mum0dkQPg,72043
18
18
  kodexa/model/utils.py,sha256=6R-3rFiW9irBwj0Mq5yhp7EDXkNUFaeFhr3bWmnlW4g,2961
19
19
  kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
20
20
  kodexa/pipeline/pipeline.py,sha256=zyNEpA7KlGhPs_l-vgV6m-OCb16dbxQhl8QezeylugA,25540
21
21
  kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
22
- kodexa/platform/client.py,sha256=ydZrRP3tBNM9E2khjkoHhkG2UphEQx2tbteni4ekhkw,231089
22
+ kodexa/platform/client.py,sha256=e-MZV4chLYVQj24EADsOdYXbRJ6dgdJJmZ0fqfujSQw,231445
23
23
  kodexa/platform/interaction.py,sha256=6zpcwXKNZstUGNS6m4JsoRXAqCZPJHWI-ZN3co8nnF0,1055
24
24
  kodexa/platform/kodexa.py,sha256=tPXHO500q3S75GhKGDcaxO51Viq2PNlHmAzpBZlahgo,34857
25
25
  kodexa/selectors/__init__.py,sha256=xA9-4vpyaAZWPSk3bh2kvDLkdv6XEmm7PjFbpziiTIk,100
@@ -44,7 +44,7 @@ kodexa/testing/test_utils.py,sha256=v44p__gE7ia67W7WeHN2HBFCWSCUrCZt7G4xBNCmwf8,
44
44
  kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
45
45
  kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
46
  kodexa/utils/__init__.py,sha256=Pnim1o9_db5YEnNvDTxpM7HG-qTlL6n8JwFwOafU9wo,5928
47
- kodexa-7.4.413466578435.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
48
- kodexa-7.4.413466578435.dist-info/METADATA,sha256=VHhK3bW8tZV7UsLYKVd654E5pFoRaW6ZcmL-o69oC_Q,2813
49
- kodexa-7.4.413466578435.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
50
- kodexa-7.4.413466578435.dist-info/RECORD,,
47
+ kodexa-7.4.413508355193.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
48
+ kodexa-7.4.413508355193.dist-info/METADATA,sha256=YNvguxKfF49HqQ-wHPTPoD3WrWRN9r29FWSURSO1GOk,2813
49
+ kodexa-7.4.413508355193.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
50
+ kodexa-7.4.413508355193.dist-info/RECORD,,