kodexa 6.0.186__py3-none-any.whl → 6.0.192a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kodexa/model/model.py CHANGED
@@ -79,7 +79,7 @@ class Tag(Dict):
79
79
  """A string representing the value that was labelled in the node"""
80
80
  self.data: Optional[Any] = data
81
81
  """Any data object (JSON serializable) that you wish to associate with the label"""
82
- self.uuid: Optional[str] = uuid
82
+ self.uuid: Optional[str] = uuid or str(uuid.uuid4())
83
83
  """The UUID for this tag instance, this allows tags that are on different content nodes to be related through the same UUID"""
84
84
  self.confidence: Optional[float] = confidence
85
85
  """The confidence of the tag in a range of 0-1"""
@@ -1536,7 +1536,8 @@ class ContentNode(object):
1536
1536
  if not node:
1537
1537
  if (traverse == traverse.ALL or traverse == traverse.PARENT) and self.get_parent().get_parent():
1538
1538
  # can now traverse content-areas.. can add traversal of pages if needed, but don't think the scenario exists.
1539
- potential_next_node = self.get_parent().get_parent().get_children()[self.get_parent().index + 1].get_children()[0]
1539
+ potential_next_node = \
1540
+ self.get_parent().get_parent().get_children()[self.get_parent().index + 1].get_children()[0]
1540
1541
  if potential_next_node:
1541
1542
  return potential_next_node
1542
1543
  return node
@@ -1757,6 +1758,9 @@ class Document(object):
1757
1758
  self.classes: List[ContentClassification] = []
1758
1759
  """A list of the content classifications associated at the document level"""
1759
1760
 
1761
+ self.tag_instances: List[TagInstance] = []
1762
+ """A list of tag instances that contains a set of tag that has a set of nodes"""
1763
+
1760
1764
  # Start persistence layer
1761
1765
  from kodexa.model import PersistenceManager
1762
1766
 
@@ -1765,6 +1769,36 @@ class Document(object):
1765
1769
  delete_on_close=delete_on_close)
1766
1770
  self._persistence_layer.initialize()
1767
1771
 
1772
+ def add_tag_instance(self, tag_to_apply, node_list: List[ContentNode]):
1773
+ """
1774
+ This will create a group of a tag with indexes
1775
+ :param tag: name of the tag
1776
+ :param node_indices: contains the list of index of a node
1777
+ :return:
1778
+ """
1779
+ # For each node in the list create/update a feature
1780
+ tag = Tag()
1781
+ for node in node_list:
1782
+ node.add_feature('tag', tag_to_apply, Tag)
1783
+ # Tag Object
1784
+ tag_instance = TagInstance(tag, node_list)
1785
+ self.tag_instances.append(tag_instance)
1786
+
1787
+ def update_tag_instance(self, tag_uuid):
1788
+ for tag_instance in self.tag_instances:
1789
+ if tag_instance.tag.uuid == tag_uuid:
1790
+ # Update attributes of a Tag
1791
+ for node in tag_instance.nodes:
1792
+ node.get_tag(tag_instance.tag.value, tag_uuid=tag_instance.tag.uuid)
1793
+
1794
+ def get_tag_instance(self, tag):
1795
+ """
1796
+ Get the tag instance based on the tag itself
1797
+ :param tag: name of the tag
1798
+ :return: a list of tag instance
1799
+ """
1800
+ return [tag_instance for tag_instance in self.tag_instances if tag_instance.tag == tag]
1801
+
1768
1802
  def get_persistence(self):
1769
1803
  return self._persistence_layer
1770
1804
 
@@ -2271,6 +2305,15 @@ class Document(object):
2271
2305
  return self.labels
2272
2306
 
2273
2307
 
2308
+ class TagInstance:
2309
+ def __init__(self, tag: Tag, nodes):
2310
+ self.tag = tag
2311
+ self.nodes = nodes
2312
+
2313
+ def add_node(self, nodes: List[ContentNode]):
2314
+ self.nodes.extend(nodes)
2315
+
2316
+
2274
2317
  class ContentObjectReference:
2275
2318
  """ """
2276
2319
 
kodexa/platform/client.py CHANGED
@@ -258,6 +258,35 @@ class ComponentEndpoint(ClientEndpoint, OrganizationOwned):
258
258
  return None
259
259
  return component_page.content[0]
260
260
 
261
+ def stream_list(self, query="*", page=1, page_size=10, sort=None, filters: List[str] = None):
262
+ url = f"/api/{self.get_type()}/{self.organization.slug}"
263
+
264
+ params = {"query": requests.utils.quote(query),
265
+ "page": page,
266
+ "pageSize": page_size}
267
+
268
+ if sort is not None:
269
+ params["sort"] = sort
270
+
271
+ if filters is not None:
272
+ params["legacyFilter"] = True
273
+ params["filter"] = filters
274
+
275
+ while True:
276
+ list_response = self.client.get(url, params=params)
277
+
278
+ # If there are no more results, exit the loop
279
+ if not list_response.json()["content"]:
280
+ break
281
+
282
+ # Yield each endpoint in the current page
283
+ for endpoint in self.get_page_class(list_response.json()).parse_obj(list_response.json()).set_client(
284
+ self.client).to_endpoints():
285
+ yield endpoint
286
+
287
+ # Move to the next page
288
+ params["page"] += 1
289
+
261
290
  def list(self, query="*", page=1, page_size=10, sort=None, filters: List[str] = None):
262
291
  url = f"/api/{self.get_type()}/{self.organization.slug}"
263
292
 
@@ -987,6 +1016,28 @@ class ProjectsEndpoint(EntitiesEndpoint):
987
1016
  return ProjectEndpoint.parse_obj(get_response.json()['content'][0]).set_client(self.client)
988
1017
  return None
989
1018
 
1019
+ def stream_query(self, query: str = "*", sort=None):
1020
+ """
1021
+ Stream the query for the project endpoints
1022
+ :param query: the query to run
1023
+ :param sort: sorting order of the query
1024
+ :return:
1025
+ A generator of the project endpoints
1026
+ """
1027
+ page_size = 5
1028
+ page = 1
1029
+
1030
+ if not sort:
1031
+ sort = "id"
1032
+
1033
+ while True:
1034
+ page_response = self.query(query=query, page=page, page_size=page_size, sort=sort)
1035
+ if not page_response.content:
1036
+ break
1037
+ for project_endpoint in page_response.content:
1038
+ yield project_endpoint
1039
+ page += 1
1040
+
990
1041
  def query(self, query: str = "*", page: int = 1, page_size: int = 100, sort=None) -> Optional[PageProjectEndpoint]:
991
1042
  params = {
992
1043
  'page': page,
@@ -2004,6 +2055,28 @@ class DocumentStoreEndpoint(StoreEndpoint):
2004
2055
  f"/api/stores/{self.ref.replace(':', '/')}/families/{document_family_id}")
2005
2056
  return DocumentFamilyEndpoint.parse_obj(document_family_response.json()).set_client(self.client)
2006
2057
 
2058
+ def stream_query(self, query: str = "*", sort=None):
2059
+ """
2060
+ Stream the query for the document family
2061
+ :param query: the query to run
2062
+ :param sort: sorting order of the query
2063
+ :return:
2064
+ A generator of the document families
2065
+ """
2066
+ page_size = 5
2067
+ page = 1
2068
+
2069
+ if not sort:
2070
+ sort = "id"
2071
+
2072
+ while True:
2073
+ page_response = self.query(query=query, page=page, page_size=page_size, sort=sort)
2074
+ if not page_response.content:
2075
+ break
2076
+ for document_family in page_response.content:
2077
+ yield document_family
2078
+ page += 1
2079
+
2007
2080
  def query(self, query: str = "*", page: int = 1, page_size: int = 100, sort=None) -> PageDocumentFamilyEndpoint:
2008
2081
  params = {
2009
2082
  'page': page,
@@ -2019,6 +2092,28 @@ class DocumentStoreEndpoint(StoreEndpoint):
2019
2092
 
2020
2093
  return PageDocumentFamilyEndpoint.parse_obj(get_response.json()).set_client(self.client)
2021
2094
 
2095
+ def stream_filter(self, filter_string: str = "", sort=None):
2096
+ """
2097
+ Stream the filter for the document family
2098
+ :param query: the query to run
2099
+ :param sort: sorting order of the query
2100
+ :return:
2101
+ A generator of the document families
2102
+ """
2103
+ page_size = 5
2104
+ page = 1
2105
+
2106
+ if not sort:
2107
+ sort = "id"
2108
+
2109
+ while True:
2110
+ page_response = self.filter(filter_string=filter_string, page=page, page_size=page_size, sort=sort)
2111
+ if not page_response.content:
2112
+ break
2113
+ for document_family in page_response.content:
2114
+ yield document_family
2115
+ page += 1
2116
+
2022
2117
  def filter(self, filter_string: str = "", page: int = 1, page_size: int = 100,
2023
2118
  sort=None) -> PageDocumentFamilyEndpoint:
2024
2119
  params = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 6.0.186
3
+ Version: 6.0.192a0
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -5,13 +5,13 @@ kodexa/connectors/__init__.py,sha256=WCUEzFGjHcgPAMFIKLaRTXAkGHx3vUCD8APMhOrNNgM
5
5
  kodexa/connectors/connectors.py,sha256=25-TffyGDjxHyp9ITug0qgr1nhqMAekmV5NVvbPGs7o,7722
6
6
  kodexa/model/__init__.py,sha256=DyCgkJU7rOfd4SMvPRLaPdklCNlkqCRRWiVPwjYn2GE,720
7
7
  kodexa/model/base.py,sha256=6IraEK3RomjPgFpPYkxjuLUriF958AusgJO21Dcopeg,753
8
- kodexa/model/model.py,sha256=A9v5CRLe-Y28tj0C-oe8L1Gvu3m6bYfPvQQK89tCFzw,88524
8
+ kodexa/model/model.py,sha256=-UTEHt3hDLGgvVqiJ860m2O907gntTZgOtcwusYHXtY,90090
9
9
  kodexa/model/objects.py,sha256=c58_U0k5wQdha7zcV3oEO_aHlq1Jfh72kzK_6EapZZE,114121
10
10
  kodexa/model/persistence.py,sha256=rRBY_onLcSTFlZZmitU8_FLffP7elDHhcmF8yYT94HE,37655
11
11
  kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
12
12
  kodexa/pipeline/pipeline.py,sha256=uzxe7HuSW1CKDZOrnV_LRHj3SHhbs14lvmMGJ_DIVdw,19763
13
13
  kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
14
- kodexa/platform/client.py,sha256=nv6o3UDzSv_Sr4FbtsuHr5_Nn5xUYy5YVBaImRDyscw,106727
14
+ kodexa/platform/client.py,sha256=v590s5pHtF4ylAiY67HL0uQV-sXqjFcY0woj8GzDy9M,109778
15
15
  kodexa/platform/kodexa.py,sha256=HcwQh1NKkwSzkngKLp_kEsLW0N_JlzORszoknSDFLT0,27604
16
16
  kodexa/selectors/__init__.py,sha256=xA9-4vpyaAZWPSk3bh2kvDLkdv6XEmm7PjFbpziiTIk,100
17
17
  kodexa/selectors/ast.py,sha256=mTAcX_pjDiDy99ELxbMMwAAkzezOqz5uCyui9qz7Q4A,13499
@@ -34,7 +34,7 @@ kodexa/testing/test_components.py,sha256=i_9M6-bfUBdR1uYAzZZzWiW0M1DGKzE5mkNuHq4
34
34
  kodexa/testing/test_utils.py,sha256=HXM3S5FDzarzS6R7jkOHps6d6Ox2UtNqymoK6VCw8Zg,13596
35
35
  kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
36
36
  kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
- kodexa-6.0.186.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
38
- kodexa-6.0.186.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
39
- kodexa-6.0.186.dist-info/METADATA,sha256=FG9e6VqXiMu6VlyckKaTXTtOpVksyHGDMDyY2BKq8eg,3602
40
- kodexa-6.0.186.dist-info/RECORD,,
37
+ kodexa-6.0.192a0.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
38
+ kodexa-6.0.192a0.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
39
+ kodexa-6.0.192a0.dist-info/METADATA,sha256=dwQ5QImPCW7E4eUbUxBDTVALmn6QgE0-89uKN28X6WQ,3604
40
+ kodexa-6.0.192a0.dist-info/RECORD,,