kodexa 6.1.2a4891138870__tar.gz → 6.1.2a5059971029__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/PKG-INFO +1 -1
  2. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/model/model.py +119 -5
  3. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/platform/client.py +29 -4
  4. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/platform/kodexa.py +21 -13
  5. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/pyproject.toml +1 -1
  6. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/setup.py +1 -1
  7. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/LICENSE +0 -0
  8. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/README.md +0 -0
  9. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/__init__.py +0 -0
  10. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/assistant/__init__.py +0 -0
  11. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/assistant/assistant.py +0 -0
  12. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/connectors/__init__.py +0 -0
  13. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/connectors/connectors.py +0 -0
  14. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/model/__init__.py +0 -0
  15. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/model/base.py +0 -0
  16. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/model/objects.py +0 -0
  17. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/model/persistence.py +0 -0
  18. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/pipeline/__init__.py +0 -0
  19. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/pipeline/pipeline.py +0 -0
  20. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/platform/__init__.py +0 -0
  21. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/__init__.py +0 -0
  22. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/ast.py +0 -0
  23. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/core.py +0 -0
  24. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/lexrules.py +0 -0
  25. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/lextab.py +0 -0
  26. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/lextab.pyi +0 -0
  27. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/parserules.py +0 -0
  28. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/parserules.pyi +0 -0
  29. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/parsetab.py +0 -0
  30. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/selectors/parsetab.pyi +0 -0
  31. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/spatial/__init__.py +0 -0
  32. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/spatial/azure_models.py +0 -0
  33. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/spatial/bbox_common.py +0 -0
  34. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/spatial/table_form_common.py +0 -0
  35. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/steps/__init__.py +0 -0
  36. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/steps/common.py +0 -0
  37. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/testing/__init__.py +0 -0
  38. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/testing/test_components.py +0 -0
  39. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/testing/test_utils.py +0 -0
  40. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/training/__init__.py +0 -0
  41. {kodexa-6.1.2a4891138870 → kodexa-6.1.2a5059971029}/kodexa/training/train_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kodexa
3
- Version: 6.1.2a4891138870
3
+ Version: 6.1.2a5059971029
4
4
  Summary: Python SDK for the Kodexa Platform
5
5
  Author: Austin Redenbaugh
6
6
  Author-email: austin@kodexa.com
@@ -15,6 +15,7 @@ from addict import Dict
15
15
 
16
16
  from kodexa.model.base import KodexaBaseModel
17
17
  from kodexa.model.objects import ContentObject, FeatureSet
18
+ import deepdiff
18
19
 
19
20
 
20
21
  class Ref:
@@ -1717,13 +1718,124 @@ class FeatureSetDiff:
1717
1718
  """
1718
1719
 
1719
1720
  def __init__(self, first_feature_set: FeatureSet, second_feature_set: FeatureSet):
1720
- self.first_feature_set = first_feature_set
1721
- self.second_feature_set = second_feature_set
1721
+ self.first_feature_map = self.parse_feature_set(first_feature_set)
1722
+ self.second_feature_map = self.parse_feature_set(second_feature_set)
1723
+ self._differences = deepdiff.DeepDiff(self.first_feature_map, self.second_feature_map,
1724
+ exclude_obj_callback=self.exclude_callback).to_dict()
1725
+ self._changed_nodes = self.get_changed_nodes()
1722
1726
 
1723
- def diff(self):
1724
- # TODO Implement a deepdiff
1727
+ def get_differences(self):
1728
+ """
1729
+ :return: Data dictionaries that contains the differences of two feature sets
1730
+ """
1731
+ if 'type_changes' in self._differences:
1732
+ self._differences.pop('type_changes')
1733
+
1734
+ return self._differences
1735
+
1736
+ def get_changed_nodes(self):
1737
+ """
1738
+ :return: Data dictionary of added and removed nodes
1739
+ """
1740
+ return self._changed_nodes
1741
+
1742
+ def get_exclude_paths(self):
1743
+ """
1744
+ :return: List of paths to exclude
1745
+ """
1746
+ return ['shape', 'group_uuid', 'uuid', 'parent_group_uuid', 'single']
1747
+
1748
+ def exclude_callback(self, path, key):
1749
+ """
1750
+ Checks if the key is to be exluceded from the diff
1751
+ :param path: contains the values of that key
1752
+ :param key: The key of the data dictionary to compare
1753
+ :return: boolean
1754
+ """
1755
+ if any(re.search(exclude_key, key) for exclude_key in self.get_exclude_paths()):
1756
+ return True
1757
+ else:
1758
+ return False
1759
+
1760
+ def parse_feature_set(self, feature_set: FeatureSet):
1761
+ """
1762
+ :param feature_set: The feature set to be parsed
1763
+ :return: Dictionary of feature with the key as the nodeUuid
1764
+ """
1765
+ return {feature.get('nodeUuid'): feature for feature in feature_set.node_features}
1725
1766
 
1726
- pass
1767
+ def parsed_values_changed(self):
1768
+ for key, value in self._differences.get('values_changed').items():
1769
+ # Check if the old_value is stil in the second_feature_map. If it is remove the key
1770
+ if key in self.second_feature_map.node_features:
1771
+ self._differences.get('values_changed').remove(key)
1772
+
1773
+ def is_equal(self) -> bool:
1774
+ """
1775
+ Checks if the two feature set is equal to each other
1776
+ :return: This returns a bool
1777
+ """
1778
+ return self._differences == {}
1779
+
1780
+ def get_changed_nodes(self):
1781
+ """
1782
+ :return: A list of nodes that were changed
1783
+ """
1784
+ if self.is_equal():
1785
+ return []
1786
+
1787
+ # Check for new nodes added in the second_feature_map
1788
+ new_added_nodes = []
1789
+
1790
+ # Checked for removed nodes in the first_feature_map
1791
+ removed_nodes = []
1792
+
1793
+ # Checked for modified nodes
1794
+ modified_nodes = []
1795
+ for key, value in self._differences.get('values_changed').items():
1796
+ modified_nodes.append(self.parsed_node_uuid(key))
1797
+
1798
+ # Merge unique nodeUuid of first_feature_map and second_feature_map
1799
+ merged_node_uuids = set(self.first_feature_map.keys()).union(set(self.second_feature_map.keys()))
1800
+ for node_uuid in merged_node_uuids:
1801
+ if node_uuid not in self.first_feature_map:
1802
+ new_added_nodes.append(node_uuid)
1803
+ elif node_uuid not in self.second_feature_map:
1804
+ removed_nodes.append(node_uuid)
1805
+
1806
+ return {
1807
+ 'new_added_nodes': new_added_nodes,
1808
+ 'removed_nodes': removed_nodes,
1809
+ 'existing_modified_nodes': modified_nodes
1810
+ }
1811
+
1812
+ def get_difference_count(self):
1813
+ """
1814
+ :return: The total number of differences between the feature sets
1815
+ """
1816
+ return len(self._differences().keys())
1817
+
1818
+ def parsed_item_added(self):
1819
+ item_added: Dict = self._differences.get('iterable_item_added')
1820
+ if item_added:
1821
+ return {}
1822
+
1823
+ for key, value in item_added.items():
1824
+ node = self.parsed_node_uuid(key)
1825
+ if node in self._changed_nodes['new_added_nodes']:
1826
+ self._differences['iterable_item_added'][key]['details'] = f'Node: {node} was added'
1827
+ continue
1828
+
1829
+ # if node in
1830
+ return self.get_difference_count()
1831
+
1832
+ def parsed_node_uuid(self, key):
1833
+ """
1834
+ :param key: Key of data dictionary
1835
+ :return: node uuid from the key
1836
+ """
1837
+ node = key.split("['")[1].split("']")[0]
1838
+ return node
1727
1839
 
1728
1840
 
1729
1841
  class Document(object):
@@ -2321,6 +2433,8 @@ class Document(object):
2321
2433
  feature_dict['name'] = feature.name
2322
2434
  node_feature['features'].append(feature_dict)
2323
2435
 
2436
+ return feature_set
2437
+
2324
2438
  def get_all_tagged_nodes(self) -> List[ContentNode]:
2325
2439
  """
2326
2440
  Get all the tagged nodes in the document
@@ -411,6 +411,27 @@ class EntitiesEndpoint:
411
411
  self.client: "KodexaClient" = client
412
412
  self.organization: Optional["OrganizationEndpoint"] = organization
413
413
 
414
+ def stream_list(self, query="*", sort=None, filters: List[str] = None):
415
+ """
416
+ Stream the list of resources
417
+ :param query:
418
+ :param sort:
419
+ :param filters:
420
+ :return:
421
+ """
422
+ page_size = 5
423
+ page = 1
424
+ if not sort:
425
+ sort = "id"
426
+
427
+ while True:
428
+ page_response = self.list(query=query, page=page, page_size=page_size, sort=sort, filters=filters)
429
+ if not page_response.content:
430
+ break
431
+ for resource in page_response.content:
432
+ yield resource
433
+ page += 1
434
+
414
435
  def list(self, query="*", page=1, page_size=10, sort=None, filters: List[str] = None):
415
436
  url = f"/api/{self.get_type()}"
416
437
 
@@ -2170,7 +2191,7 @@ class DocumentStoreEndpoint(StoreEndpoint):
2170
2191
 
2171
2192
  return PageDocumentFamilyEndpoint.parse_obj(get_response.json()).set_client(self.client)
2172
2193
 
2173
- def stream_filter(self, filter_string: str = "", sort=None):
2194
+ def stream_filter(self, filter_string: str = "", sort=None, limit=None):
2174
2195
  """
2175
2196
  Stream the filter for the document family
2176
2197
  :param query: the query to run
@@ -2589,10 +2610,10 @@ class ExtractionEngineEndpoint:
2589
2610
 
2590
2611
  class KodexaClient:
2591
2612
 
2592
- def __init__(self, url=None, access_token=None):
2613
+ def __init__(self, url=None, access_token=None, profile=None):
2593
2614
  from kodexa import KodexaPlatform
2594
- self.base_url = url if url is not None else KodexaPlatform.get_url()
2595
- self.access_token = access_token if access_token is not None else KodexaPlatform.get_access_token()
2615
+ self.base_url = url if url is not None else KodexaPlatform.get_url(profile)
2616
+ self.access_token = access_token if access_token is not None else KodexaPlatform.get_access_token(profile)
2596
2617
  self.organizations = OrganizationsEndpoint(self)
2597
2618
  self.projects = ProjectsEndpoint(self)
2598
2619
  self.workspaces = WorkspacesEndpoint(self)
@@ -2669,6 +2690,9 @@ class KodexaClient:
2669
2690
  headers = {"x-access-token": self.access_token}
2670
2691
  if files is None:
2671
2692
  headers["content-type"] = "application/json"
2693
+ else:
2694
+ headers["content-type"] = "multipart/form-data"
2695
+
2672
2696
  response = requests.post(self.get_url(url), json=body, data=data, files=files, params=params,
2673
2697
  headers=headers)
2674
2698
  return process_response(response)
@@ -2679,6 +2703,7 @@ class KodexaClient:
2679
2703
  headers["content-type"] = "application/json"
2680
2704
  else:
2681
2705
  headers["content-type"] = "multipart/form-data"
2706
+
2682
2707
  response = requests.put(self.get_url(url), json=body, data=data, files=files, params=params,
2683
2708
  headers=headers)
2684
2709
  return process_response(response)
@@ -34,7 +34,7 @@ logger = logging.getLogger()
34
34
  dirs = AppDirs("Kodexa", "Kodexa")
35
35
 
36
36
 
37
- def get_config():
37
+ def get_config(profile=None):
38
38
  """Get the kodexa config object we use when you want to store your PAT locally
39
39
 
40
40
  :return: the config as a dict
@@ -47,9 +47,12 @@ def get_config():
47
47
  path = os.path.join(dirs.user_config_dir, '.kodexa.json')
48
48
  if os.path.exists(path):
49
49
  with open(path, 'r') as outfile:
50
- return json.load(outfile)
50
+ kodexa_config = json.load(outfile)
51
+ if profile and profile not in kodexa_config:
52
+ kodexa_config[profile] = {'url': None, 'access_token': None}
53
+ return kodexa_config
51
54
  else:
52
- return {'url': None, 'access_token': None}
55
+ return {'url': None, 'access_token': None} if not profile else {profile: {'url': None, 'access_token': None}}
53
56
 
54
57
 
55
58
  def save_config(config_obj):
@@ -244,7 +247,7 @@ class KodexaPlatform:
244
247
  return KodexaClient(KodexaPlatform.get_url(), KodexaPlatform.get_access_token())
245
248
 
246
249
  @staticmethod
247
- def get_access_token() -> str:
250
+ def get_access_token(profile=None) -> str:
248
251
  """
249
252
  Returns the access token
250
253
 
@@ -253,12 +256,13 @@ class KodexaPlatform:
253
256
  Returns: The access token if it is defined in the user config store, or as an environment variable
254
257
 
255
258
  """
256
- kodexa_config = get_config()
259
+ kodexa_config = get_config(profile)
257
260
  access_token = os.getenv('KODEXA_ACCESS_TOKEN')
258
- return access_token if access_token is not None else kodexa_config['access_token']
261
+ return access_token if access_token is not None else \
262
+ kodexa_config[profile]['access_token'] if profile else kodexa_config['access_token']
259
263
 
260
264
  @staticmethod
261
- def get_url() -> str:
265
+ def get_url(profile=None) -> str:
262
266
  """
263
267
  Returns the URL to use to access a Kodexa Platform
264
268
 
@@ -269,9 +273,9 @@ class KodexaPlatform:
269
273
  Returns: The URL if it is defined in the user config store, or as an environment variable
270
274
 
271
275
  """
272
- kodexa_config = get_config()
276
+ kodexa_config = get_config(profile)
273
277
  env_url = os.getenv('KODEXA_URL', None)
274
- return env_url if env_url is not None else kodexa_config['url']
278
+ return env_url if env_url is not None else kodexa_config[profile]['url'] if profile else kodexa_config['url']
275
279
 
276
280
  @staticmethod
277
281
  def set_access_token(access_token: str):
@@ -336,15 +340,19 @@ class KodexaPlatform:
336
340
  return [org_slug, slug, version]
337
341
 
338
342
  @classmethod
339
- def login(cls, kodexa_url, username, password):
343
+ def login(cls, kodexa_url, username, password, profile=None):
340
344
  from requests.auth import HTTPBasicAuth
341
345
  obj_response = requests.get(f"{kodexa_url}/api/account/me/token",
342
346
  auth=HTTPBasicAuth(username, password),
343
347
  headers={"content-type": "application/json"})
344
348
  if obj_response.status_code == 200:
345
- kodexa_config = get_config()
346
- kodexa_config['url'] = kodexa_url
347
- kodexa_config['access_token'] = obj_response.text
349
+ kodexa_config = get_config(profile)
350
+ if profile and profile in kodexa_config:
351
+ kodexa_config[profile]['url'] = kodexa_url
352
+ kodexa_config[profile]['access_token'] = obj_response.text
353
+ else:
354
+ kodexa_config['url'] = kodexa_url
355
+ kodexa_config['access_token'] = obj_response.text
348
356
  save_config(kodexa_config)
349
357
  print("Logged in")
350
358
  else:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "kodexa"
3
- version = "6.1.2a4891138870"
3
+ version = "6.1.2a5059971029"
4
4
  description = "Python SDK for the Kodexa Platform"
5
5
  authors = ["Austin Redenbaugh <austin@kodexa.com>", "Philip Dodds <philip@kodexa.com>", "Romar Cablao <rcablao@kodexa.com>", "Amadea Paula Dodds <amadeapaula@kodexa.com>"]
6
6
  readme = "README.md"
@@ -37,7 +37,7 @@ install_requires = \
37
37
 
38
38
  setup_kwargs = {
39
39
  'name': 'kodexa',
40
- 'version': '6.1.2a4891138870',
40
+ 'version': '6.1.2a5059971029',
41
41
  'description': 'Python SDK for the Kodexa Platform',
42
42
  'long_description': '# Kodexa\n\n[![Build and Package with Poetry](https://github.com/kodexa-ai/kodexa/actions/workflows/main.yml/badge.svg?branch=main)](https://github.com/kodexa-ai/kodexa/actions/workflows/main.yml)\n\n![img.png](https://docs.kodexa.com/img.png)\n\nKodexa is a platform for building intelligent document processing pipelines. It is a set of tools and services that\nallow you to build a pipeline that can take a document, extract the content, and then process it to extract the\ninformation you need.\n\nIt is built on a set of core principles:\n\n* **Document Centric** - Kodexa is built around the idea of a document. A document is a collection of content\n nodes that are connected together. This is a powerful model that allows you to build pipelines that can\n extract content from a wide range of sources.\n\n* **Pipeline Oriented** - Kodexa is built around the idea of a pipeline. A pipeline is a series of steps that\n can be executed on a document. This allows you to build a pipeline that can extract content from a wide range\n of sources.\n\n* **Extensible** - Kodexa is built around the idea of a pipeline. A pipeline is a series of steps that can be executed\n on a document. This allows you to build a pipeline that can extract content from a wide range of sources.\n\n* **Label Driven** - Kodexa focuses on the idea of labels. Labels are a way to identify content within a document\n and then use that content to drive the processing of the document.\n\n# Python SDK\n\nThis repository contains the Python SDK for Kodexa. The SDK is the primary way to interact with Kodexa. It allows you to\ndefine actions, models, and pipelines that can be executed on Kodexa. It also includes a complete SDK client for\nworking with a Kodexa platform instance.\n\n## Documentation & Examples\n\nDocumentation is available at the [Kodexa Documentation Portal](https://docs.kodexa.com)\n\n## Current Development\n\n[//]: # (Replace it with the diagrams and descriptions for build releases)\n**BUILD VERSION FLOW**\n![build-version-flow.png](docs%2Fbuild-version-flow.png)\nBuild version will differ based on the branches that are published to pypi.\n\n**GITHUB PROCESS**\n![github-process.png](docs%2Fgithub-process.png)\nChanges that contain bugs, features, and fixes should first be pushed to the test branch. \nOnce these changes are thoroughly tested, they can be submitted as a pull request to the main branch. The pull request should be reviewed and approved by an appropriate person before the changes can be merged.\n\n## Set-up\n\nWe use poetry to manage our dependencies, so you can install them with:\n\n poetry install\n\nYou can then run the tests with:\n\n poetry run pytest\n\n# Contributing\n\nWe welcome contributions to the Kodexa platform. Please see our [contributing guide](CONTRIBUTING.md) for more details.\n\n# License\n\nApache 2.0\n\n',
43
43
  'author': 'Austin Redenbaugh',