kodexa 6.0.186__tar.gz → 6.0.192a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kodexa-6.0.186 → kodexa-6.0.192a0}/PKG-INFO +1 -1
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/model/model.py +45 -2
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/platform/client.py +95 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/pyproject.toml +1 -1
- {kodexa-6.0.186 → kodexa-6.0.192a0}/setup.py +1 -1
- {kodexa-6.0.186 → kodexa-6.0.192a0}/LICENSE +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/README.md +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/assistant/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/assistant/assistant.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/connectors/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/connectors/connectors.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/model/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/model/base.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/model/objects.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/model/persistence.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/pipeline/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/pipeline/pipeline.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/platform/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/platform/kodexa.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/ast.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/core.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/lexrules.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/lextab.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/lextab.pyi +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/parserules.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/parserules.pyi +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/parsetab.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/selectors/parsetab.pyi +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/spatial/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/spatial/azure_models.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/spatial/bbox_common.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/spatial/table_form_common.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/steps/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/steps/common.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/testing/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/testing/test_components.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/testing/test_utils.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/training/__init__.py +0 -0
- {kodexa-6.0.186 → kodexa-6.0.192a0}/kodexa/training/train_utils.py +0 -0
@@ -79,7 +79,7 @@ class Tag(Dict):
|
|
79
79
|
"""A string representing the value that was labelled in the node"""
|
80
80
|
self.data: Optional[Any] = data
|
81
81
|
"""Any data object (JSON serializable) that you wish to associate with the label"""
|
82
|
-
self.uuid: Optional[str] = uuid
|
82
|
+
self.uuid: Optional[str] = uuid or str(uuid.uuid4())
|
83
83
|
"""The UUID for this tag instance, this allows tags that are on different content nodes to be related through the same UUID"""
|
84
84
|
self.confidence: Optional[float] = confidence
|
85
85
|
"""The confidence of the tag in a range of 0-1"""
|
@@ -1536,7 +1536,8 @@ class ContentNode(object):
|
|
1536
1536
|
if not node:
|
1537
1537
|
if (traverse == traverse.ALL or traverse == traverse.PARENT) and self.get_parent().get_parent():
|
1538
1538
|
# can now traverse content-areas.. can add traversal of pages if needed, but don't think the scenario exists.
|
1539
|
-
potential_next_node =
|
1539
|
+
potential_next_node = \
|
1540
|
+
self.get_parent().get_parent().get_children()[self.get_parent().index + 1].get_children()[0]
|
1540
1541
|
if potential_next_node:
|
1541
1542
|
return potential_next_node
|
1542
1543
|
return node
|
@@ -1757,6 +1758,9 @@ class Document(object):
|
|
1757
1758
|
self.classes: List[ContentClassification] = []
|
1758
1759
|
"""A list of the content classifications associated at the document level"""
|
1759
1760
|
|
1761
|
+
self.tag_instances: List[TagInstance] = []
|
1762
|
+
"""A list of tag instances that contains a set of tag that has a set of nodes"""
|
1763
|
+
|
1760
1764
|
# Start persistence layer
|
1761
1765
|
from kodexa.model import PersistenceManager
|
1762
1766
|
|
@@ -1765,6 +1769,36 @@ class Document(object):
|
|
1765
1769
|
delete_on_close=delete_on_close)
|
1766
1770
|
self._persistence_layer.initialize()
|
1767
1771
|
|
1772
|
+
def add_tag_instance(self, tag_to_apply, node_list: List[ContentNode]):
|
1773
|
+
"""
|
1774
|
+
This will create a group of a tag with indexes
|
1775
|
+
:param tag: name of the tag
|
1776
|
+
:param node_indices: contains the list of index of a node
|
1777
|
+
:return:
|
1778
|
+
"""
|
1779
|
+
# For each node in the list create/update a feature
|
1780
|
+
tag = Tag()
|
1781
|
+
for node in node_list:
|
1782
|
+
node.add_feature('tag', tag_to_apply, Tag)
|
1783
|
+
# Tag Object
|
1784
|
+
tag_instance = TagInstance(tag, node_list)
|
1785
|
+
self.tag_instances.append(tag_instance)
|
1786
|
+
|
1787
|
+
def update_tag_instance(self, tag_uuid):
|
1788
|
+
for tag_instance in self.tag_instances:
|
1789
|
+
if tag_instance.tag.uuid == tag_uuid:
|
1790
|
+
# Update attributes of a Tag
|
1791
|
+
for node in tag_instance.nodes:
|
1792
|
+
node.get_tag(tag_instance.tag.value, tag_uuid=tag_instance.tag.uuid)
|
1793
|
+
|
1794
|
+
def get_tag_instance(self, tag):
|
1795
|
+
"""
|
1796
|
+
Get the tag instance based on the tag itself
|
1797
|
+
:param tag: name of the tag
|
1798
|
+
:return: a list of tag instance
|
1799
|
+
"""
|
1800
|
+
return [tag_instance for tag_instance in self.tag_instances if tag_instance.tag == tag]
|
1801
|
+
|
1768
1802
|
def get_persistence(self):
|
1769
1803
|
return self._persistence_layer
|
1770
1804
|
|
@@ -2271,6 +2305,15 @@ class Document(object):
|
|
2271
2305
|
return self.labels
|
2272
2306
|
|
2273
2307
|
|
2308
|
+
class TagInstance:
|
2309
|
+
def __init__(self, tag: Tag, nodes):
|
2310
|
+
self.tag = tag
|
2311
|
+
self.nodes = nodes
|
2312
|
+
|
2313
|
+
def add_node(self, nodes: List[ContentNode]):
|
2314
|
+
self.nodes.extend(nodes)
|
2315
|
+
|
2316
|
+
|
2274
2317
|
class ContentObjectReference:
|
2275
2318
|
""" """
|
2276
2319
|
|
@@ -258,6 +258,35 @@ class ComponentEndpoint(ClientEndpoint, OrganizationOwned):
|
|
258
258
|
return None
|
259
259
|
return component_page.content[0]
|
260
260
|
|
261
|
+
def stream_list(self, query="*", page=1, page_size=10, sort=None, filters: List[str] = None):
|
262
|
+
url = f"/api/{self.get_type()}/{self.organization.slug}"
|
263
|
+
|
264
|
+
params = {"query": requests.utils.quote(query),
|
265
|
+
"page": page,
|
266
|
+
"pageSize": page_size}
|
267
|
+
|
268
|
+
if sort is not None:
|
269
|
+
params["sort"] = sort
|
270
|
+
|
271
|
+
if filters is not None:
|
272
|
+
params["legacyFilter"] = True
|
273
|
+
params["filter"] = filters
|
274
|
+
|
275
|
+
while True:
|
276
|
+
list_response = self.client.get(url, params=params)
|
277
|
+
|
278
|
+
# If there are no more results, exit the loop
|
279
|
+
if not list_response.json()["content"]:
|
280
|
+
break
|
281
|
+
|
282
|
+
# Yield each endpoint in the current page
|
283
|
+
for endpoint in self.get_page_class(list_response.json()).parse_obj(list_response.json()).set_client(
|
284
|
+
self.client).to_endpoints():
|
285
|
+
yield endpoint
|
286
|
+
|
287
|
+
# Move to the next page
|
288
|
+
params["page"] += 1
|
289
|
+
|
261
290
|
def list(self, query="*", page=1, page_size=10, sort=None, filters: List[str] = None):
|
262
291
|
url = f"/api/{self.get_type()}/{self.organization.slug}"
|
263
292
|
|
@@ -987,6 +1016,28 @@ class ProjectsEndpoint(EntitiesEndpoint):
|
|
987
1016
|
return ProjectEndpoint.parse_obj(get_response.json()['content'][0]).set_client(self.client)
|
988
1017
|
return None
|
989
1018
|
|
1019
|
+
def stream_query(self, query: str = "*", sort=None):
|
1020
|
+
"""
|
1021
|
+
Stream the query for the project endpoints
|
1022
|
+
:param query: the query to run
|
1023
|
+
:param sort: sorting order of the query
|
1024
|
+
:return:
|
1025
|
+
A generator of the project endpoints
|
1026
|
+
"""
|
1027
|
+
page_size = 5
|
1028
|
+
page = 1
|
1029
|
+
|
1030
|
+
if not sort:
|
1031
|
+
sort = "id"
|
1032
|
+
|
1033
|
+
while True:
|
1034
|
+
page_response = self.query(query=query, page=page, page_size=page_size, sort=sort)
|
1035
|
+
if not page_response.content:
|
1036
|
+
break
|
1037
|
+
for project_endpoint in page_response.content:
|
1038
|
+
yield project_endpoint
|
1039
|
+
page += 1
|
1040
|
+
|
990
1041
|
def query(self, query: str = "*", page: int = 1, page_size: int = 100, sort=None) -> Optional[PageProjectEndpoint]:
|
991
1042
|
params = {
|
992
1043
|
'page': page,
|
@@ -2004,6 +2055,28 @@ class DocumentStoreEndpoint(StoreEndpoint):
|
|
2004
2055
|
f"/api/stores/{self.ref.replace(':', '/')}/families/{document_family_id}")
|
2005
2056
|
return DocumentFamilyEndpoint.parse_obj(document_family_response.json()).set_client(self.client)
|
2006
2057
|
|
2058
|
+
def stream_query(self, query: str = "*", sort=None):
|
2059
|
+
"""
|
2060
|
+
Stream the query for the document family
|
2061
|
+
:param query: the query to run
|
2062
|
+
:param sort: sorting order of the query
|
2063
|
+
:return:
|
2064
|
+
A generator of the document families
|
2065
|
+
"""
|
2066
|
+
page_size = 5
|
2067
|
+
page = 1
|
2068
|
+
|
2069
|
+
if not sort:
|
2070
|
+
sort = "id"
|
2071
|
+
|
2072
|
+
while True:
|
2073
|
+
page_response = self.query(query=query, page=page, page_size=page_size, sort=sort)
|
2074
|
+
if not page_response.content:
|
2075
|
+
break
|
2076
|
+
for document_family in page_response.content:
|
2077
|
+
yield document_family
|
2078
|
+
page += 1
|
2079
|
+
|
2007
2080
|
def query(self, query: str = "*", page: int = 1, page_size: int = 100, sort=None) -> PageDocumentFamilyEndpoint:
|
2008
2081
|
params = {
|
2009
2082
|
'page': page,
|
@@ -2019,6 +2092,28 @@ class DocumentStoreEndpoint(StoreEndpoint):
|
|
2019
2092
|
|
2020
2093
|
return PageDocumentFamilyEndpoint.parse_obj(get_response.json()).set_client(self.client)
|
2021
2094
|
|
2095
|
+
def stream_filter(self, filter_string: str = "", sort=None):
|
2096
|
+
"""
|
2097
|
+
Stream the filter for the document family
|
2098
|
+
:param query: the query to run
|
2099
|
+
:param sort: sorting order of the query
|
2100
|
+
:return:
|
2101
|
+
A generator of the document families
|
2102
|
+
"""
|
2103
|
+
page_size = 5
|
2104
|
+
page = 1
|
2105
|
+
|
2106
|
+
if not sort:
|
2107
|
+
sort = "id"
|
2108
|
+
|
2109
|
+
while True:
|
2110
|
+
page_response = self.filter(filter_string=filter_string, page=page, page_size=page_size, sort=sort)
|
2111
|
+
if not page_response.content:
|
2112
|
+
break
|
2113
|
+
for document_family in page_response.content:
|
2114
|
+
yield document_family
|
2115
|
+
page += 1
|
2116
|
+
|
2022
2117
|
def filter(self, filter_string: str = "", page: int = 1, page_size: int = 100,
|
2023
2118
|
sort=None) -> PageDocumentFamilyEndpoint:
|
2024
2119
|
params = {
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "kodexa"
|
3
|
-
version = "6.0.
|
3
|
+
version = "6.0.192a0"
|
4
4
|
description = "Python SDK for the Kodexa Platform"
|
5
5
|
authors = ["Austin Redenbaugh <austin@kodexa.com>", "Philip Dodds <philip@kodexa.com>", "Romar Cablao <rcablao@kodexa.com>", "Amadea Paula Dodds <amadeapaula@kodexa.com>"]
|
6
6
|
readme = "README.md"
|
@@ -37,7 +37,7 @@ install_requires = \
|
|
37
37
|
|
38
38
|
setup_kwargs = {
|
39
39
|
'name': 'kodexa',
|
40
|
-
'version': '6.0.
|
40
|
+
'version': '6.0.192a0',
|
41
41
|
'description': 'Python SDK for the Kodexa Platform',
|
42
42
|
'long_description': '# Kodexa\n\n[](https://github.com/kodexa-ai/kodexa/actions/workflows/main.yml)\n\n\n\nKodexa is a platform for building intelligent document processing pipelines. It is a set of tools and services that\nallow you to build a pipeline that can take a document, extract the content, and then process it to extract the\ninformation you need.\n\nIt is built on a set of core principles:\n\n* **Document Centric** - Kodexa is built around the idea of a document. A document is a collection of content\n nodes that are connected together. This is a powerful model that allows you to build pipelines that can\n extract content from a wide range of sources.\n\n* **Pipeline Oriented** - Kodexa is built around the idea of a pipeline. A pipeline is a series of steps that\n can be executed on a document. This allows you to build a pipeline that can extract content from a wide range\n of sources.\n\n* **Extensible** - Kodexa is built around the idea of a pipeline. A pipeline is a series of steps that can be executed\n on a document. This allows you to build a pipeline that can extract content from a wide range of sources.\n\n* **Label Driven** - Kodexa focuses on the idea of labels. Labels are a way to identify content within a document\n and then use that content to drive the processing of the document.\n\n# Python SDK\n\nThis repository contains the Python SDK for Kodexa. The SDK is the primary way to interact with Kodexa. It allows you to\ndefine actions, models, and pipelines that can be executed on Kodexa. It also includes a complete SDK client for\nworking with a Kodexa platform instance.\n\n## Documentation & Examples\n\nDocumentation is available at the [Kodexa Documentation Portal](https://docs.kodexa.com)\n\n## Current Development\n\nThe main branch is 6.0 which is a production release.\n\n## Set-up\n\nWe use poetry to manage our dependencies, so you can install them with:\n\n poetry install\n\nYou can then run the tests with:\n\n poetry run pytest\n\n# Contributing\n\nWe welcome contributions to the Kodexa platform. Please see our [contributing guide](CONTRIBUTING.md) for more details.\n\n# License\n\nApache 2.0\n',
|
43
43
|
'author': 'Austin Redenbaugh',
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|