kodexa 6.1.2b4615788456__py3-none-any.whl → 6.1.2b5059981385__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/assistant/assistant.py +110 -1
- kodexa/model/model.py +219 -7
- kodexa/model/objects.py +1 -0
- kodexa/model/persistence.py +14 -0
- kodexa/platform/client.py +53 -24
- kodexa/platform/kodexa.py +21 -13
- {kodexa-6.1.2b4615788456.dist-info → kodexa-6.1.2b5059981385.dist-info}/METADATA +11 -2
- {kodexa-6.1.2b4615788456.dist-info → kodexa-6.1.2b5059981385.dist-info}/RECORD +10 -10
- {kodexa-6.1.2b4615788456.dist-info → kodexa-6.1.2b5059981385.dist-info}/LICENSE +0 -0
- {kodexa-6.1.2b4615788456.dist-info → kodexa-6.1.2b5059981385.dist-info}/WHEEL +0 -0
kodexa/assistant/assistant.py
CHANGED
@@ -7,6 +7,115 @@ from typing import List, Optional
|
|
7
7
|
from kodexa.model import ContentObject, Document
|
8
8
|
from kodexa.model.objects import Store, Taxonomy, BaseEvent
|
9
9
|
from kodexa.platform.client import DocumentStoreEndpoint
|
10
|
+
import logging
|
11
|
+
from kodexa.pipeline import Pipeline
|
12
|
+
|
13
|
+
logger = logging.getLogger()
|
14
|
+
|
15
|
+
|
16
|
+
def replace_option(option_type, option_value, event_helper, cache, kodexa_client):
|
17
|
+
logger.info("Replacing option")
|
18
|
+
if option_type == 'document':
|
19
|
+
# We need to download the document from the store and replace the option with it
|
20
|
+
|
21
|
+
full_url = option_value
|
22
|
+
logger.info(f"Breaking down the full url for the document {full_url}")
|
23
|
+
|
24
|
+
if '/' in full_url:
|
25
|
+
url_parts = full_url.split(':')
|
26
|
+
|
27
|
+
# Each part, split by "/"
|
28
|
+
version = url_parts[1].split("/")[0]
|
29
|
+
store_ref = f"{url_parts[0]}:{version}"
|
30
|
+
document_family_id = url_parts[1].split("/")[1]
|
31
|
+
logger.info(f"Downloading document parameter {store_ref}/{document_family_id}")
|
32
|
+
|
33
|
+
if store_ref != "" and document_family_id != "undefined":
|
34
|
+
try:
|
35
|
+
|
36
|
+
logger.info(f"Using store {store_ref}")
|
37
|
+
cache_key = f'store-{store_ref}'
|
38
|
+
|
39
|
+
if cache_key not in cache:
|
40
|
+
logger.info(f"Using store {store_ref}")
|
41
|
+
from kodexa.platform.client import DocumentStoreEndpoint
|
42
|
+
store: DocumentStoreEndpoint = kodexa_client.get_object_by_ref('store', store_ref)
|
43
|
+
from kodexa.platform.client import DocumentFamilyEndpoint
|
44
|
+
document_family: DocumentFamilyEndpoint = store.get_family(document_family_id)
|
45
|
+
document = document_family.get_document()
|
46
|
+
document.ref = full_url
|
47
|
+
cache[cache_key] = document
|
48
|
+
logger.info(f"Downloaded document")
|
49
|
+
|
50
|
+
# We keep the ref of the document so we can use it later
|
51
|
+
return cache[cache_key]
|
52
|
+
|
53
|
+
except Exception as e:
|
54
|
+
logger.warning(f"Unable to access the document ({e})")
|
55
|
+
return None
|
56
|
+
else:
|
57
|
+
logger.info(f"Document reference not available")
|
58
|
+
return None
|
59
|
+
|
60
|
+
elif len(full_url) > 0:
|
61
|
+
# We have an ID to a Content Object
|
62
|
+
return Document.from_kddb(event_helper.get_content_object(content_object_id=full_url).read())
|
63
|
+
|
64
|
+
if 'store' in option_type.lower():
|
65
|
+
logger.info(f"Getting Store object instance {option_type} as {option_value}")
|
66
|
+
|
67
|
+
# We need to create a remote store
|
68
|
+
store = kodexa_client.get_object_by_ref('store', option_value)
|
69
|
+
return store
|
70
|
+
|
71
|
+
if 'pipeline' in option_type.lower():
|
72
|
+
logger.info(f"Getting pipeline instance {option_type} as {option_value}")
|
73
|
+
|
74
|
+
# To create a pipeline to represent the option
|
75
|
+
pipeline = Pipeline()
|
76
|
+
pipeline.steps = []
|
77
|
+
|
78
|
+
if isinstance(option_value['steps'], list):
|
79
|
+
|
80
|
+
for index, step in enumerate(option_value['steps']):
|
81
|
+
logger.info(f"Creating step {step}")
|
82
|
+
pipeline.add_step(step['ref'], name=f'Step {index}', step_type=step['stepType'],
|
83
|
+
options=step['options'] if 'options' in step else {})
|
84
|
+
|
85
|
+
return pipeline
|
86
|
+
|
87
|
+
if option_type == 'taxonomy':
|
88
|
+
logger.info(f"Getting Taxonomy object instance {option_type} as {option_value}")
|
89
|
+
|
90
|
+
try:
|
91
|
+
taxonomy = kodexa_client.get_object_by_ref('taxo', option_value)
|
92
|
+
except Exception as e:
|
93
|
+
logger.warning(f"Unable to access the taxonomy ({e})")
|
94
|
+
return None
|
95
|
+
return taxonomy
|
96
|
+
|
97
|
+
logger.info("Returning option value")
|
98
|
+
return option_value
|
99
|
+
|
100
|
+
|
101
|
+
def replace_options(step, event_helper, cache, kodexa_client):
|
102
|
+
logger.info(f"Determine is we have any options that need replacement")
|
103
|
+
# We need to document any documents that are part of the options
|
104
|
+
for key in step.option_types.keys():
|
105
|
+
logger.info(f"Check option type for key {key}")
|
106
|
+
|
107
|
+
if key in step.options:
|
108
|
+
logger.info(f"Type is {step.option_types[key]}")
|
109
|
+
if step.option_types[key].startswith('list:'):
|
110
|
+
result = []
|
111
|
+
logger.info("Is a list type")
|
112
|
+
for value in step.options[key]:
|
113
|
+
result.append(
|
114
|
+
replace_option(step.option_types[key].split(':')[1], value, event_helper, cache, kodexa_client))
|
115
|
+
step.options[key] = result
|
116
|
+
else:
|
117
|
+
step.options[key] = replace_option(step.option_types[key], step.options[key], event_helper,
|
118
|
+
cache, kodexa_client)
|
10
119
|
|
11
120
|
|
12
121
|
class AssistantMetadata:
|
@@ -201,4 +310,4 @@ class Assistant:
|
|
201
310
|
Returns:
|
202
311
|
AssistantResponse: the response to the event
|
203
312
|
|
204
|
-
"""
|
313
|
+
"""
|
kodexa/model/model.py
CHANGED
@@ -14,7 +14,8 @@ import msgpack
|
|
14
14
|
from addict import Dict
|
15
15
|
|
16
16
|
from kodexa.model.base import KodexaBaseModel
|
17
|
-
from kodexa.model.objects import ContentObject
|
17
|
+
from kodexa.model.objects import ContentObject, FeatureSet
|
18
|
+
import deepdiff
|
18
19
|
|
19
20
|
|
20
21
|
class Ref:
|
@@ -50,7 +51,8 @@ class ContentException(Dict):
|
|
50
51
|
|
51
52
|
def __init__(self, exception_type: str, message: str, severity: str = 'ERROR', tag: Optional[str] = None,
|
52
53
|
group_uuid: Optional[str] = None, tag_uuid: Optional[str] = None,
|
53
|
-
exception_details: Optional[str] = None, node_uuid: Optional[str] = None,
|
54
|
+
exception_details: Optional[str] = None, node_uuid: Optional[str] = None, value: Optional[str] = None,
|
55
|
+
*args, **kwargs):
|
54
56
|
super().__init__(*args, **kwargs)
|
55
57
|
self.tag = tag
|
56
58
|
self.message = message
|
@@ -60,6 +62,7 @@ class ContentException(Dict):
|
|
60
62
|
self.exception_type = exception_type
|
61
63
|
self.node_uuid = node_uuid
|
62
64
|
self.severity = severity
|
65
|
+
self.value = value
|
63
66
|
|
64
67
|
|
65
68
|
class Tag(Dict):
|
@@ -79,7 +82,7 @@ class Tag(Dict):
|
|
79
82
|
"""A string representing the value that was labelled in the node"""
|
80
83
|
self.data: Optional[Any] = data
|
81
84
|
"""Any data object (JSON serializable) that you wish to associate with the label"""
|
82
|
-
self.uuid: Optional[str] = uuid
|
85
|
+
self.uuid: Optional[str] = uuid or str(uuid.uuid4())
|
83
86
|
"""The UUID for this tag instance, this allows tags that are on different content nodes to be related through the same UUID"""
|
84
87
|
self.confidence: Optional[float] = confidence
|
85
88
|
"""The confidence of the tag in a range of 0-1"""
|
@@ -1535,10 +1538,17 @@ class ContentNode(object):
|
|
1535
1538
|
|
1536
1539
|
if not node:
|
1537
1540
|
if (traverse == traverse.ALL or traverse == traverse.PARENT) and self.get_parent().get_parent():
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1541
|
+
try:
|
1542
|
+
potential_next_node = \
|
1543
|
+
self.get_parent().get_parent().get_children()[self.get_parent().index + 1].get_children()[0]
|
1544
|
+
if potential_next_node:
|
1545
|
+
return potential_next_node
|
1546
|
+
except:
|
1547
|
+
# traverse additional layer
|
1548
|
+
potential_next_node = self.get_parent().get_parent().get_parent().get_children()[
|
1549
|
+
self.get_parent().get_parent().index + 1].get_children()[0].get_children()[0]
|
1550
|
+
if potential_next_node:
|
1551
|
+
return potential_next_node
|
1542
1552
|
return node
|
1543
1553
|
|
1544
1554
|
if compiled_node_type_re.match(node.node_type) and (not skip_virtual or not node.virtual):
|
@@ -1702,6 +1712,132 @@ class ContentClassification(object):
|
|
1702
1712
|
selector=dict_val.get('selector'), confidence=dict_val.get('confidence'))
|
1703
1713
|
|
1704
1714
|
|
1715
|
+
class FeatureSetDiff:
|
1716
|
+
"""
|
1717
|
+
A utility class that can be used to diff two feature sets
|
1718
|
+
"""
|
1719
|
+
|
1720
|
+
def __init__(self, first_feature_set: FeatureSet, second_feature_set: FeatureSet):
|
1721
|
+
self.first_feature_map = self.parse_feature_set(first_feature_set)
|
1722
|
+
self.second_feature_map = self.parse_feature_set(second_feature_set)
|
1723
|
+
self._differences = deepdiff.DeepDiff(self.first_feature_map, self.second_feature_map,
|
1724
|
+
exclude_obj_callback=self.exclude_callback).to_dict()
|
1725
|
+
self._changed_nodes = self.get_changed_nodes()
|
1726
|
+
|
1727
|
+
def get_differences(self):
|
1728
|
+
"""
|
1729
|
+
:return: Data dictionaries that contains the differences of two feature sets
|
1730
|
+
"""
|
1731
|
+
if 'type_changes' in self._differences:
|
1732
|
+
self._differences.pop('type_changes')
|
1733
|
+
|
1734
|
+
return self._differences
|
1735
|
+
|
1736
|
+
def get_changed_nodes(self):
|
1737
|
+
"""
|
1738
|
+
:return: Data dictionary of added and removed nodes
|
1739
|
+
"""
|
1740
|
+
return self._changed_nodes
|
1741
|
+
|
1742
|
+
def get_exclude_paths(self):
|
1743
|
+
"""
|
1744
|
+
:return: List of paths to exclude
|
1745
|
+
"""
|
1746
|
+
return ['shape', 'group_uuid', 'uuid', 'parent_group_uuid', 'single']
|
1747
|
+
|
1748
|
+
def exclude_callback(self, path, key):
|
1749
|
+
"""
|
1750
|
+
Checks if the key is to be exluceded from the diff
|
1751
|
+
:param path: contains the values of that key
|
1752
|
+
:param key: The key of the data dictionary to compare
|
1753
|
+
:return: boolean
|
1754
|
+
"""
|
1755
|
+
if any(re.search(exclude_key, key) for exclude_key in self.get_exclude_paths()):
|
1756
|
+
return True
|
1757
|
+
else:
|
1758
|
+
return False
|
1759
|
+
|
1760
|
+
def parse_feature_set(self, feature_set: FeatureSet):
|
1761
|
+
"""
|
1762
|
+
:param feature_set: The feature set to be parsed
|
1763
|
+
:return: Dictionary of feature with the key as the nodeUuid
|
1764
|
+
"""
|
1765
|
+
return {feature.get('nodeUuid'): feature for feature in feature_set.node_features}
|
1766
|
+
|
1767
|
+
def parsed_values_changed(self):
|
1768
|
+
for key, value in self._differences.get('values_changed').items():
|
1769
|
+
# Check if the old_value is stil in the second_feature_map. If it is remove the key
|
1770
|
+
if key in self.second_feature_map.node_features:
|
1771
|
+
self._differences.get('values_changed').remove(key)
|
1772
|
+
|
1773
|
+
def is_equal(self) -> bool:
|
1774
|
+
"""
|
1775
|
+
Checks if the two feature set is equal to each other
|
1776
|
+
:return: This returns a bool
|
1777
|
+
"""
|
1778
|
+
return self._differences == {}
|
1779
|
+
|
1780
|
+
def get_changed_nodes(self):
|
1781
|
+
"""
|
1782
|
+
:return: A list of nodes that were changed
|
1783
|
+
"""
|
1784
|
+
if self.is_equal():
|
1785
|
+
return []
|
1786
|
+
|
1787
|
+
# Check for new nodes added in the second_feature_map
|
1788
|
+
new_added_nodes = []
|
1789
|
+
|
1790
|
+
# Checked for removed nodes in the first_feature_map
|
1791
|
+
removed_nodes = []
|
1792
|
+
|
1793
|
+
# Checked for modified nodes
|
1794
|
+
modified_nodes = []
|
1795
|
+
for key, value in self._differences.get('values_changed').items():
|
1796
|
+
modified_nodes.append(self.parsed_node_uuid(key))
|
1797
|
+
|
1798
|
+
# Merge unique nodeUuid of first_feature_map and second_feature_map
|
1799
|
+
merged_node_uuids = set(self.first_feature_map.keys()).union(set(self.second_feature_map.keys()))
|
1800
|
+
for node_uuid in merged_node_uuids:
|
1801
|
+
if node_uuid not in self.first_feature_map:
|
1802
|
+
new_added_nodes.append(node_uuid)
|
1803
|
+
elif node_uuid not in self.second_feature_map:
|
1804
|
+
removed_nodes.append(node_uuid)
|
1805
|
+
|
1806
|
+
return {
|
1807
|
+
'new_added_nodes': new_added_nodes,
|
1808
|
+
'removed_nodes': removed_nodes,
|
1809
|
+
'existing_modified_nodes': modified_nodes
|
1810
|
+
}
|
1811
|
+
|
1812
|
+
def get_difference_count(self):
|
1813
|
+
"""
|
1814
|
+
:return: The total number of differences between the feature sets
|
1815
|
+
"""
|
1816
|
+
return len(self._differences().keys())
|
1817
|
+
|
1818
|
+
def parsed_item_added(self):
|
1819
|
+
item_added: Dict = self._differences.get('iterable_item_added')
|
1820
|
+
if item_added:
|
1821
|
+
return {}
|
1822
|
+
|
1823
|
+
for key, value in item_added.items():
|
1824
|
+
node = self.parsed_node_uuid(key)
|
1825
|
+
if node in self._changed_nodes['new_added_nodes']:
|
1826
|
+
self._differences['iterable_item_added'][key]['details'] = f'Node: {node} was added'
|
1827
|
+
continue
|
1828
|
+
|
1829
|
+
# if node in
|
1830
|
+
return self.get_difference_count()
|
1831
|
+
|
1832
|
+
def parsed_node_uuid(self, key):
|
1833
|
+
"""
|
1834
|
+
:param key: Key of data dictionary
|
1835
|
+
:return: node uuid from the key
|
1836
|
+
"""
|
1837
|
+
node = key.split("['")[1].split("']")[0]
|
1838
|
+
return node
|
1839
|
+
|
1840
|
+
|
1705
1841
|
class Document(object):
|
1706
1842
|
"""A Document is a collection of metadata and a set of content nodes."""
|
1707
1843
|
|
@@ -1757,6 +1893,9 @@ class Document(object):
|
|
1757
1893
|
self.classes: List[ContentClassification] = []
|
1758
1894
|
"""A list of the content classifications associated at the document level"""
|
1759
1895
|
|
1896
|
+
self.tag_instances: List[TagInstance] = []
|
1897
|
+
"""A list of tag instances that contains a set of tag that has a set of nodes"""
|
1898
|
+
|
1760
1899
|
# Start persistence layer
|
1761
1900
|
from kodexa.model import PersistenceManager
|
1762
1901
|
|
@@ -1765,6 +1904,36 @@ class Document(object):
|
|
1765
1904
|
delete_on_close=delete_on_close)
|
1766
1905
|
self._persistence_layer.initialize()
|
1767
1906
|
|
1907
|
+
def add_tag_instance(self, tag_to_apply, node_list: List[ContentNode]):
|
1908
|
+
"""
|
1909
|
+
This will create a group of a tag with indexes
|
1910
|
+
:param tag: name of the tag
|
1911
|
+
:param node_indices: contains the list of index of a node
|
1912
|
+
:return:
|
1913
|
+
"""
|
1914
|
+
# For each node in the list create/update a feature
|
1915
|
+
tag = Tag()
|
1916
|
+
for node in node_list:
|
1917
|
+
node.add_feature('tag', tag_to_apply, Tag)
|
1918
|
+
# Tag Object
|
1919
|
+
tag_instance = TagInstance(tag, node_list)
|
1920
|
+
self.tag_instances.append(tag_instance)
|
1921
|
+
|
1922
|
+
def update_tag_instance(self, tag_uuid):
|
1923
|
+
for tag_instance in self.tag_instances:
|
1924
|
+
if tag_instance.tag.uuid == tag_uuid:
|
1925
|
+
# Update attributes of a Tag
|
1926
|
+
for node in tag_instance.nodes:
|
1927
|
+
node.get_tag(tag_instance.tag.value, tag_uuid=tag_instance.tag.uuid)
|
1928
|
+
|
1929
|
+
def get_tag_instance(self, tag):
|
1930
|
+
"""
|
1931
|
+
Get the tag instance based on the tag itself
|
1932
|
+
:param tag: name of the tag
|
1933
|
+
:return: a list of tag instance
|
1934
|
+
"""
|
1935
|
+
return [tag_instance for tag_instance in self.tag_instances if tag_instance.tag == tag]
|
1936
|
+
|
1768
1937
|
def get_persistence(self):
|
1769
1938
|
return self._persistence_layer
|
1770
1939
|
|
@@ -2273,6 +2442,49 @@ class Document(object):
|
|
2273
2442
|
"""
|
2274
2443
|
return self.labels
|
2275
2444
|
|
2445
|
+
def get_feature_set(self) -> FeatureSet:
|
2446
|
+
"""
|
2447
|
+
Build a feature set of all the tagged nodes
|
2448
|
+
|
2449
|
+
:return:
|
2450
|
+
"""
|
2451
|
+
feature_set = FeatureSet()
|
2452
|
+
feature_set.node_features = []
|
2453
|
+
for tagged_node in self.get_all_tagged_nodes():
|
2454
|
+
node_feature = {
|
2455
|
+
'nodeUuid': str(tagged_node.uuid),
|
2456
|
+
'features': []
|
2457
|
+
}
|
2458
|
+
|
2459
|
+
feature_set.node_features.append(node_feature)
|
2460
|
+
|
2461
|
+
# TODO this needs to be cleaned up
|
2462
|
+
for feature in tagged_node.get_features():
|
2463
|
+
if feature.feature_type == 'tag':
|
2464
|
+
feature_dict = feature.to_dict()
|
2465
|
+
feature_dict['featureType'] = feature.feature_type
|
2466
|
+
feature_dict['name'] = feature.name
|
2467
|
+
node_feature['features'].append(feature_dict)
|
2468
|
+
|
2469
|
+
return feature_set
|
2470
|
+
|
2471
|
+
def get_all_tagged_nodes(self) -> List[ContentNode]:
|
2472
|
+
"""
|
2473
|
+
Get all the tagged nodes in the document
|
2474
|
+
|
2475
|
+
:return:
|
2476
|
+
"""
|
2477
|
+
return self._persistence_layer.get_all_tagged_nodes()
|
2478
|
+
|
2479
|
+
|
2480
|
+
class TagInstance:
|
2481
|
+
def __init__(self, tag: Tag, nodes):
|
2482
|
+
self.tag = tag
|
2483
|
+
self.nodes = nodes
|
2484
|
+
|
2485
|
+
def add_node(self, nodes: List[ContentNode]):
|
2486
|
+
self.nodes.extend(nodes)
|
2487
|
+
|
2276
2488
|
|
2277
2489
|
class ContentObjectReference:
|
2278
2490
|
""" """
|
kodexa/model/objects.py
CHANGED
@@ -1350,6 +1350,7 @@ class Option(KodexaBaseModel):
|
|
1350
1350
|
support_article: Optional[str] = Field(None, alias='supportArticle')
|
1351
1351
|
overview_markdown: Optional[str] = Field(None, alias='overviewMarkdown')
|
1352
1352
|
show_if: Optional[str] = Field(None, alias='showIf')
|
1353
|
+
show_on_popup: Optional[bool] = Field(None, alias='showOnPopup')
|
1353
1354
|
possible_values: Optional[List[PossibleValue]] = Field(None, alias='possibleValues')
|
1354
1355
|
properties: Optional[Dict[str, Any]] = None
|
1355
1356
|
|
kodexa/model/persistence.py
CHANGED
@@ -581,6 +581,16 @@ class SqliteDocumentPersistence(object):
|
|
581
581
|
def clear_model_insights(self):
|
582
582
|
self.cursor.execute("delete from model_insights")
|
583
583
|
|
584
|
+
def get_all_tagged_nodes(self):
|
585
|
+
content_nodes = []
|
586
|
+
query = f"select cn_id from ft where f_type in (select id from f_type where name like 'tag:%')"
|
587
|
+
for content_node_ids in self.cursor.execute(
|
588
|
+
query).fetchall():
|
589
|
+
content_nodes.append(self.get_node(content_node_ids[0]))
|
590
|
+
|
591
|
+
return content_nodes
|
592
|
+
|
593
|
+
|
584
594
|
|
585
595
|
class SimpleObjectCache(object):
|
586
596
|
"""
|
@@ -669,6 +679,10 @@ class PersistenceManager(object):
|
|
669
679
|
self.flush_cache()
|
670
680
|
return self._underlying_persistence.get_tagged_nodes(tag, tag_uuid)
|
671
681
|
|
682
|
+
def get_all_tagged_nodes(self):
|
683
|
+
self.flush_cache()
|
684
|
+
return self._underlying_persistence.get_all_tagged_nodes()
|
685
|
+
|
672
686
|
def initialize(self):
|
673
687
|
self._underlying_persistence.initialize()
|
674
688
|
|
kodexa/platform/client.py
CHANGED
@@ -192,6 +192,28 @@ class ProjectResourceEndpoint(ClientEndpoint):
|
|
192
192
|
df.drop(columns='client', axis=1)
|
193
193
|
return df
|
194
194
|
|
195
|
+
def stream_list(self, query="*", sort=None, filters: List[str] = None):
|
196
|
+
"""
|
197
|
+
Stream the list of resources
|
198
|
+
:param query:
|
199
|
+
:param sort:
|
200
|
+
:param filters:
|
201
|
+
:return:
|
202
|
+
"""
|
203
|
+
page_size = 5
|
204
|
+
page = 1
|
205
|
+
|
206
|
+
if not sort:
|
207
|
+
sort = "id"
|
208
|
+
|
209
|
+
while True:
|
210
|
+
page_response = self.list(query=query, page=page, page_size=page_size, sort=sort, filters=filters)
|
211
|
+
if not page_response.content:
|
212
|
+
break
|
213
|
+
for resource in page_response.content:
|
214
|
+
yield resource
|
215
|
+
page += 1
|
216
|
+
|
195
217
|
def list(self, query="*", page=1, page_size=10, sort=None, filters: List[str] = None):
|
196
218
|
|
197
219
|
url = f"/api/projects/{self.project.id}/{self.get_type()}"
|
@@ -389,6 +411,27 @@ class EntitiesEndpoint:
|
|
389
411
|
self.client: "KodexaClient" = client
|
390
412
|
self.organization: Optional["OrganizationEndpoint"] = organization
|
391
413
|
|
414
|
+
def stream_list(self, query="*", sort=None, filters: List[str] = None):
|
415
|
+
"""
|
416
|
+
Stream the list of resources
|
417
|
+
:param query:
|
418
|
+
:param sort:
|
419
|
+
:param filters:
|
420
|
+
:return:
|
421
|
+
"""
|
422
|
+
page_size = 5
|
423
|
+
page = 1
|
424
|
+
if not sort:
|
425
|
+
sort = "id"
|
426
|
+
|
427
|
+
while True:
|
428
|
+
page_response = self.list(query=query, page=page, page_size=page_size, sort=sort, filters=filters)
|
429
|
+
if not page_response.content:
|
430
|
+
break
|
431
|
+
for resource in page_response.content:
|
432
|
+
yield resource
|
433
|
+
page += 1
|
434
|
+
|
392
435
|
def list(self, query="*", page=1, page_size=10, sort=None, filters: List[str] = None):
|
393
436
|
url = f"/api/{self.get_type()}"
|
394
437
|
|
@@ -1605,7 +1648,7 @@ class DocumentFamilyEndpoint(DocumentFamily, ClientEndpoint):
|
|
1605
1648
|
self.client)
|
1606
1649
|
if mixin and mixin in updated_document_family.mixins:
|
1607
1650
|
return updated_document_family
|
1608
|
-
if label and label in updated_document_family.labels:
|
1651
|
+
if label and any(l.name == label for l in updated_document_family.labels):
|
1609
1652
|
return updated_document_family
|
1610
1653
|
|
1611
1654
|
time.sleep(5)
|
@@ -1677,28 +1720,10 @@ class DocumentFamilyEndpoint(DocumentFamily, ClientEndpoint):
|
|
1677
1720
|
|
1678
1721
|
def replace_tags(self, document: Document, content_object: Optional[ContentObject] = None):
|
1679
1722
|
"""Replace the tags of the document family"""
|
1680
|
-
feature_set = FeatureSet()
|
1681
1723
|
if content_object is None:
|
1682
1724
|
content_object = self.content_objects[-1]
|
1683
|
-
feature_set.node_features = []
|
1684
|
-
for tagged_node in document.select('//*[hasTag()]'):
|
1685
|
-
node_feature = {
|
1686
|
-
'nodeUuid': str(tagged_node.uuid),
|
1687
|
-
'features': []
|
1688
|
-
}
|
1689
|
-
|
1690
|
-
feature_set.node_features.append(node_feature)
|
1691
|
-
|
1692
|
-
# TODO this needs to be cleaned up
|
1693
|
-
for feature in tagged_node.get_features():
|
1694
|
-
if feature.feature_type == 'tag':
|
1695
|
-
feature_dict = feature.to_dict()
|
1696
|
-
feature_dict['featureType'] = feature.feature_type
|
1697
|
-
feature_dict['name'] = feature.name
|
1698
|
-
node_feature['features'].append(feature_dict)
|
1699
|
-
|
1700
1725
|
url = f"/api/stores/{self.store_ref.replace(':', '/')}/families/{self.id}/objects/{content_object.id}/_replaceTags"
|
1701
|
-
self.client.put(url, body=
|
1726
|
+
self.client.put(url, body=document.get_feature_set().dict(by_alias=True))
|
1702
1727
|
|
1703
1728
|
|
1704
1729
|
class StoreEndpoint(ComponentInstanceEndpoint, Store):
|
@@ -2166,7 +2191,7 @@ class DocumentStoreEndpoint(StoreEndpoint):
|
|
2166
2191
|
|
2167
2192
|
return PageDocumentFamilyEndpoint.parse_obj(get_response.json()).set_client(self.client)
|
2168
2193
|
|
2169
|
-
def stream_filter(self, filter_string: str = "", sort=None):
|
2194
|
+
def stream_filter(self, filter_string: str = "", sort=None, limit=None):
|
2170
2195
|
"""
|
2171
2196
|
Stream the filter for the document family
|
2172
2197
|
:param query: the query to run
|
@@ -2585,10 +2610,10 @@ class ExtractionEngineEndpoint:
|
|
2585
2610
|
|
2586
2611
|
class KodexaClient:
|
2587
2612
|
|
2588
|
-
def __init__(self, url=None, access_token=None):
|
2613
|
+
def __init__(self, url=None, access_token=None, profile=None):
|
2589
2614
|
from kodexa import KodexaPlatform
|
2590
|
-
self.base_url = url if url is not None else KodexaPlatform.get_url()
|
2591
|
-
self.access_token = access_token if access_token is not None else KodexaPlatform.get_access_token()
|
2615
|
+
self.base_url = url if url is not None else KodexaPlatform.get_url(profile)
|
2616
|
+
self.access_token = access_token if access_token is not None else KodexaPlatform.get_access_token(profile)
|
2592
2617
|
self.organizations = OrganizationsEndpoint(self)
|
2593
2618
|
self.projects = ProjectsEndpoint(self)
|
2594
2619
|
self.workspaces = WorkspacesEndpoint(self)
|
@@ -2665,6 +2690,9 @@ class KodexaClient:
|
|
2665
2690
|
headers = {"x-access-token": self.access_token}
|
2666
2691
|
if files is None:
|
2667
2692
|
headers["content-type"] = "application/json"
|
2693
|
+
else:
|
2694
|
+
headers["content-type"] = "multipart/form-data"
|
2695
|
+
|
2668
2696
|
response = requests.post(self.get_url(url), json=body, data=data, files=files, params=params,
|
2669
2697
|
headers=headers)
|
2670
2698
|
return process_response(response)
|
@@ -2675,6 +2703,7 @@ class KodexaClient:
|
|
2675
2703
|
headers["content-type"] = "application/json"
|
2676
2704
|
else:
|
2677
2705
|
headers["content-type"] = "multipart/form-data"
|
2706
|
+
|
2678
2707
|
response = requests.put(self.get_url(url), json=body, data=data, files=files, params=params,
|
2679
2708
|
headers=headers)
|
2680
2709
|
return process_response(response)
|
kodexa/platform/kodexa.py
CHANGED
@@ -34,7 +34,7 @@ logger = logging.getLogger()
|
|
34
34
|
dirs = AppDirs("Kodexa", "Kodexa")
|
35
35
|
|
36
36
|
|
37
|
-
def get_config():
|
37
|
+
def get_config(profile=None):
|
38
38
|
"""Get the kodexa config object we use when you want to store your PAT locally
|
39
39
|
|
40
40
|
:return: the config as a dict
|
@@ -47,9 +47,12 @@ def get_config():
|
|
47
47
|
path = os.path.join(dirs.user_config_dir, '.kodexa.json')
|
48
48
|
if os.path.exists(path):
|
49
49
|
with open(path, 'r') as outfile:
|
50
|
-
|
50
|
+
kodexa_config = json.load(outfile)
|
51
|
+
if profile and profile not in kodexa_config:
|
52
|
+
kodexa_config[profile] = {'url': None, 'access_token': None}
|
53
|
+
return kodexa_config
|
51
54
|
else:
|
52
|
-
return {'url': None, 'access_token': None}
|
55
|
+
return {'url': None, 'access_token': None} if not profile else {profile: {'url': None, 'access_token': None}}
|
53
56
|
|
54
57
|
|
55
58
|
def save_config(config_obj):
|
@@ -244,7 +247,7 @@ class KodexaPlatform:
|
|
244
247
|
return KodexaClient(KodexaPlatform.get_url(), KodexaPlatform.get_access_token())
|
245
248
|
|
246
249
|
@staticmethod
|
247
|
-
def get_access_token() -> str:
|
250
|
+
def get_access_token(profile=None) -> str:
|
248
251
|
"""
|
249
252
|
Returns the access token
|
250
253
|
|
@@ -253,12 +256,13 @@ class KodexaPlatform:
|
|
253
256
|
Returns: The access token if it is defined in the user config store, or as an environment variable
|
254
257
|
|
255
258
|
"""
|
256
|
-
kodexa_config = get_config()
|
259
|
+
kodexa_config = get_config(profile)
|
257
260
|
access_token = os.getenv('KODEXA_ACCESS_TOKEN')
|
258
|
-
return access_token if access_token is not None else
|
261
|
+
return access_token if access_token is not None else \
|
262
|
+
kodexa_config[profile]['access_token'] if profile else kodexa_config['access_token']
|
259
263
|
|
260
264
|
@staticmethod
|
261
|
-
def get_url() -> str:
|
265
|
+
def get_url(profile=None) -> str:
|
262
266
|
"""
|
263
267
|
Returns the URL to use to access a Kodexa Platform
|
264
268
|
|
@@ -269,9 +273,9 @@ class KodexaPlatform:
|
|
269
273
|
Returns: The URL if it is defined in the user config store, or as an environment variable
|
270
274
|
|
271
275
|
"""
|
272
|
-
kodexa_config = get_config()
|
276
|
+
kodexa_config = get_config(profile)
|
273
277
|
env_url = os.getenv('KODEXA_URL', None)
|
274
|
-
return env_url if env_url is not None else kodexa_config['url']
|
278
|
+
return env_url if env_url is not None else kodexa_config[profile]['url'] if profile else kodexa_config['url']
|
275
279
|
|
276
280
|
@staticmethod
|
277
281
|
def set_access_token(access_token: str):
|
@@ -336,15 +340,19 @@ class KodexaPlatform:
|
|
336
340
|
return [org_slug, slug, version]
|
337
341
|
|
338
342
|
@classmethod
|
339
|
-
def login(cls, kodexa_url, username, password):
|
343
|
+
def login(cls, kodexa_url, username, password, profile=None):
|
340
344
|
from requests.auth import HTTPBasicAuth
|
341
345
|
obj_response = requests.get(f"{kodexa_url}/api/account/me/token",
|
342
346
|
auth=HTTPBasicAuth(username, password),
|
343
347
|
headers={"content-type": "application/json"})
|
344
348
|
if obj_response.status_code == 200:
|
345
|
-
kodexa_config = get_config()
|
346
|
-
|
347
|
-
|
349
|
+
kodexa_config = get_config(profile)
|
350
|
+
if profile and profile in kodexa_config:
|
351
|
+
kodexa_config[profile]['url'] = kodexa_url
|
352
|
+
kodexa_config[profile]['access_token'] = obj_response.text
|
353
|
+
else:
|
354
|
+
kodexa_config['url'] = kodexa_url
|
355
|
+
kodexa_config['access_token'] = obj_response.text
|
348
356
|
save_config(kodexa_config)
|
349
357
|
print("Logged in")
|
350
358
|
else:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: kodexa
|
3
|
-
Version: 6.1.
|
3
|
+
Version: 6.1.2b5059981385
|
4
4
|
Summary: Python SDK for the Kodexa Platform
|
5
5
|
Author: Austin Redenbaugh
|
6
6
|
Author-email: austin@kodexa.com
|
@@ -70,7 +70,15 @@ Documentation is available at the [Kodexa Documentation Portal](https://docs.kod
|
|
70
70
|
|
71
71
|
## Current Development
|
72
72
|
|
73
|
-
|
73
|
+
[//]: # (Replace it with the diagrams and descriptions for build releases)
|
74
|
+
**BUILD VERSION FLOW**
|
75
|
+

|
76
|
+
Build version will differ based on the branches that are published to pypi.
|
77
|
+
|
78
|
+
**GITHUB PROCESS**
|
79
|
+

|
80
|
+
Changes that contain bugs, features, and fixes should first be pushed to the test branch.
|
81
|
+
Once these changes are thoroughly tested, they can be submitted as a pull request to the main branch. The pull request should be reviewed and approved by an appropriate person before the changes can be merged.
|
74
82
|
|
75
83
|
## Set-up
|
76
84
|
|
@@ -90,3 +98,4 @@ We welcome contributions to the Kodexa platform. Please see our [contributing gu
|
|
90
98
|
|
91
99
|
Apache 2.0
|
92
100
|
|
101
|
+
|
@@ -1,18 +1,18 @@
|
|
1
1
|
kodexa/__init__.py,sha256=fpYUxplMg8ucktmxv62x3jW-8TXUe4EgTa5vnS6vg78,847
|
2
2
|
kodexa/assistant/__init__.py,sha256=nlXm_YnV_50hgn0TIT2Fkc2fQ-86OjmctY_j8My9nc4,171
|
3
|
-
kodexa/assistant/assistant.py,sha256=
|
3
|
+
kodexa/assistant/assistant.py,sha256=LknWUtItHnMHlxXbw37Q5uUHelmSFGLSkTJ7q6LqEqc,12530
|
4
4
|
kodexa/connectors/__init__.py,sha256=WCUEzFGjHcgPAMFIKLaRTXAkGHx3vUCD8APMhOrNNgM,297
|
5
5
|
kodexa/connectors/connectors.py,sha256=25-TffyGDjxHyp9ITug0qgr1nhqMAekmV5NVvbPGs7o,7722
|
6
6
|
kodexa/model/__init__.py,sha256=DyCgkJU7rOfd4SMvPRLaPdklCNlkqCRRWiVPwjYn2GE,720
|
7
7
|
kodexa/model/base.py,sha256=6IraEK3RomjPgFpPYkxjuLUriF958AusgJO21Dcopeg,753
|
8
|
-
kodexa/model/model.py,sha256=
|
9
|
-
kodexa/model/objects.py,sha256=
|
10
|
-
kodexa/model/persistence.py,sha256=
|
8
|
+
kodexa/model/model.py,sha256=_fV5srWiX_iBQfdtlI7YjyGLJDTBuktysuF8VlO8Sv0,96183
|
9
|
+
kodexa/model/objects.py,sha256=bTj_COUUgvoergrNjFuYOK4fsMnZxZg7Ff4KN7KS3kg,116484
|
10
|
+
kodexa/model/persistence.py,sha256=ZWESzXS-jkGbp-NlPuQzQFxmZeRup7uNJjrUkGaYIOk,38334
|
11
11
|
kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
|
12
12
|
kodexa/pipeline/pipeline.py,sha256=uzxe7HuSW1CKDZOrnV_LRHj3SHhbs14lvmMGJ_DIVdw,19763
|
13
13
|
kodexa/platform/__init__.py,sha256=1O3oiWMg292NPL_NacKDnK1T3_R6cMorrPRue_9e-O4,216
|
14
|
-
kodexa/platform/client.py,sha256=
|
15
|
-
kodexa/platform/kodexa.py,sha256=
|
14
|
+
kodexa/platform/client.py,sha256=LCCTb0TxRBDtv1_8g_jPtaJIiRyx4SfmG__WACh9RI0,110197
|
15
|
+
kodexa/platform/kodexa.py,sha256=OZCdYK34ZnFAvktDdwewAerG7DHEJxau858qo116-vA,28246
|
16
16
|
kodexa/selectors/__init__.py,sha256=xA9-4vpyaAZWPSk3bh2kvDLkdv6XEmm7PjFbpziiTIk,100
|
17
17
|
kodexa/selectors/ast.py,sha256=mTAcX_pjDiDy99ELxbMMwAAkzezOqz5uCyui9qz7Q4A,13499
|
18
18
|
kodexa/selectors/core.py,sha256=FxrKEQNqCRBSyC37ymt6kTvv4O-dj6SN71UGLocxWWA,3698
|
@@ -34,7 +34,7 @@ kodexa/testing/test_components.py,sha256=i_9M6-bfUBdR1uYAzZZzWiW0M1DGKzE5mkNuHq4
|
|
34
34
|
kodexa/testing/test_utils.py,sha256=HXM3S5FDzarzS6R7jkOHps6d6Ox2UtNqymoK6VCw8Zg,13596
|
35
35
|
kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
|
36
36
|
kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
-
kodexa-6.1.
|
38
|
-
kodexa-6.1.
|
39
|
-
kodexa-6.1.
|
40
|
-
kodexa-6.1.
|
37
|
+
kodexa-6.1.2b5059981385.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
|
38
|
+
kodexa-6.1.2b5059981385.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
39
|
+
kodexa-6.1.2b5059981385.dist-info/METADATA,sha256=mmaA2EaUdk2p2viA0kczg9LJQ98vTrCS4vTcOEPezB0,4159
|
40
|
+
kodexa-6.1.2b5059981385.dist-info/RECORD,,
|
File without changes
|
File without changes
|