kodexa 6.1.15059968382__py3-none-any.whl → 6.1.15059972675__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/model/model.py +119 -5
- {kodexa-6.1.15059968382.dist-info → kodexa-6.1.15059972675.dist-info}/METADATA +1 -1
- {kodexa-6.1.15059968382.dist-info → kodexa-6.1.15059972675.dist-info}/RECORD +5 -5
- {kodexa-6.1.15059968382.dist-info → kodexa-6.1.15059972675.dist-info}/LICENSE +0 -0
- {kodexa-6.1.15059968382.dist-info → kodexa-6.1.15059972675.dist-info}/WHEEL +0 -0
kodexa/model/model.py
CHANGED
@@ -15,6 +15,7 @@ from addict import Dict
|
|
15
15
|
|
16
16
|
from kodexa.model.base import KodexaBaseModel
|
17
17
|
from kodexa.model.objects import ContentObject, FeatureSet
|
18
|
+
import deepdiff
|
18
19
|
|
19
20
|
|
20
21
|
class Ref:
|
@@ -1717,13 +1718,124 @@ class FeatureSetDiff:
|
|
1717
1718
|
"""
|
1718
1719
|
|
1719
1720
|
def __init__(self, first_feature_set: FeatureSet, second_feature_set: FeatureSet):
|
1720
|
-
self.
|
1721
|
-
self.
|
1721
|
+
self.first_feature_map = self.parse_feature_set(first_feature_set)
|
1722
|
+
self.second_feature_map = self.parse_feature_set(second_feature_set)
|
1723
|
+
self._differences = deepdiff.DeepDiff(self.first_feature_map, self.second_feature_map,
|
1724
|
+
exclude_obj_callback=self.exclude_callback).to_dict()
|
1725
|
+
self._changed_nodes = self.get_changed_nodes()
|
1722
1726
|
|
1723
|
-
def
|
1724
|
-
|
1727
|
+
def get_differences(self):
|
1728
|
+
"""
|
1729
|
+
:return: Data dictionaries that contains the differences of two feature sets
|
1730
|
+
"""
|
1731
|
+
if 'type_changes' in self._differences:
|
1732
|
+
self._differences.pop('type_changes')
|
1733
|
+
|
1734
|
+
return self._differences
|
1735
|
+
|
1736
|
+
def get_changed_nodes(self):
|
1737
|
+
"""
|
1738
|
+
:return: Data dictionary of added and removed nodes
|
1739
|
+
"""
|
1740
|
+
return self._changed_nodes
|
1741
|
+
|
1742
|
+
def get_exclude_paths(self):
|
1743
|
+
"""
|
1744
|
+
:return: List of paths to exclude
|
1745
|
+
"""
|
1746
|
+
return ['shape', 'group_uuid', 'uuid', 'parent_group_uuid', 'single']
|
1747
|
+
|
1748
|
+
def exclude_callback(self, path, key):
|
1749
|
+
"""
|
1750
|
+
Checks if the key is to be exluceded from the diff
|
1751
|
+
:param path: contains the values of that key
|
1752
|
+
:param key: The key of the data dictionary to compare
|
1753
|
+
:return: boolean
|
1754
|
+
"""
|
1755
|
+
if any(re.search(exclude_key, key) for exclude_key in self.get_exclude_paths()):
|
1756
|
+
return True
|
1757
|
+
else:
|
1758
|
+
return False
|
1759
|
+
|
1760
|
+
def parse_feature_set(self, feature_set: FeatureSet):
|
1761
|
+
"""
|
1762
|
+
:param feature_set: The feature set to be parsed
|
1763
|
+
:return: Dictionary of feature with the key as the nodeUuid
|
1764
|
+
"""
|
1765
|
+
return {feature.get('nodeUuid'): feature for feature in feature_set.node_features}
|
1725
1766
|
|
1726
|
-
|
1767
|
+
def parsed_values_changed(self):
|
1768
|
+
for key, value in self._differences.get('values_changed').items():
|
1769
|
+
# Check if the old_value is stil in the second_feature_map. If it is remove the key
|
1770
|
+
if key in self.second_feature_map.node_features:
|
1771
|
+
self._differences.get('values_changed').remove(key)
|
1772
|
+
|
1773
|
+
def is_equal(self) -> bool:
|
1774
|
+
"""
|
1775
|
+
Checks if the two feature set is equal to each other
|
1776
|
+
:return: This returns a bool
|
1777
|
+
"""
|
1778
|
+
return self._differences == {}
|
1779
|
+
|
1780
|
+
def get_changed_nodes(self):
|
1781
|
+
"""
|
1782
|
+
:return: A list of nodes that were changed
|
1783
|
+
"""
|
1784
|
+
if self.is_equal():
|
1785
|
+
return []
|
1786
|
+
|
1787
|
+
# Check for new nodes added in the second_feature_map
|
1788
|
+
new_added_nodes = []
|
1789
|
+
|
1790
|
+
# Checked for removed nodes in the first_feature_map
|
1791
|
+
removed_nodes = []
|
1792
|
+
|
1793
|
+
# Checked for modified nodes
|
1794
|
+
modified_nodes = []
|
1795
|
+
for key, value in self._differences.get('values_changed').items():
|
1796
|
+
modified_nodes.append(self.parsed_node_uuid(key))
|
1797
|
+
|
1798
|
+
# Merge unique nodeUuid of first_feature_map and second_feature_map
|
1799
|
+
merged_node_uuids = set(self.first_feature_map.keys()).union(set(self.second_feature_map.keys()))
|
1800
|
+
for node_uuid in merged_node_uuids:
|
1801
|
+
if node_uuid not in self.first_feature_map:
|
1802
|
+
new_added_nodes.append(node_uuid)
|
1803
|
+
elif node_uuid not in self.second_feature_map:
|
1804
|
+
removed_nodes.append(node_uuid)
|
1805
|
+
|
1806
|
+
return {
|
1807
|
+
'new_added_nodes': new_added_nodes,
|
1808
|
+
'removed_nodes': removed_nodes,
|
1809
|
+
'existing_modified_nodes': modified_nodes
|
1810
|
+
}
|
1811
|
+
|
1812
|
+
def get_difference_count(self):
|
1813
|
+
"""
|
1814
|
+
:return: The total number of differences between the feature sets
|
1815
|
+
"""
|
1816
|
+
return len(self._differences().keys())
|
1817
|
+
|
1818
|
+
def parsed_item_added(self):
|
1819
|
+
item_added: Dict = self._differences.get('iterable_item_added')
|
1820
|
+
if item_added:
|
1821
|
+
return {}
|
1822
|
+
|
1823
|
+
for key, value in item_added.items():
|
1824
|
+
node = self.parsed_node_uuid(key)
|
1825
|
+
if node in self._changed_nodes['new_added_nodes']:
|
1826
|
+
self._differences['iterable_item_added'][key]['details'] = f'Node: {node} was added'
|
1827
|
+
continue
|
1828
|
+
|
1829
|
+
# if node in
|
1830
|
+
return self.get_difference_count()
|
1831
|
+
|
1832
|
+
def parsed_node_uuid(self, key):
|
1833
|
+
"""
|
1834
|
+
:param key: Key of data dictionary
|
1835
|
+
:return: node uuid from the key
|
1836
|
+
"""
|
1837
|
+
node = key.split("['")[1].split("']")[0]
|
1838
|
+
return node
|
1727
1839
|
|
1728
1840
|
|
1729
1841
|
class Document(object):
|
@@ -2321,6 +2433,8 @@ class Document(object):
|
|
2321
2433
|
feature_dict['name'] = feature.name
|
2322
2434
|
node_feature['features'].append(feature_dict)
|
2323
2435
|
|
2436
|
+
return feature_set
|
2437
|
+
|
2324
2438
|
def get_all_tagged_nodes(self) -> List[ContentNode]:
|
2325
2439
|
"""
|
2326
2440
|
Get all the tagged nodes in the document
|
@@ -5,7 +5,7 @@ kodexa/connectors/__init__.py,sha256=WCUEzFGjHcgPAMFIKLaRTXAkGHx3vUCD8APMhOrNNgM
|
|
5
5
|
kodexa/connectors/connectors.py,sha256=25-TffyGDjxHyp9ITug0qgr1nhqMAekmV5NVvbPGs7o,7722
|
6
6
|
kodexa/model/__init__.py,sha256=DyCgkJU7rOfd4SMvPRLaPdklCNlkqCRRWiVPwjYn2GE,720
|
7
7
|
kodexa/model/base.py,sha256=6IraEK3RomjPgFpPYkxjuLUriF958AusgJO21Dcopeg,753
|
8
|
-
kodexa/model/model.py,sha256=
|
8
|
+
kodexa/model/model.py,sha256=fjfWjT2I93tFZVlDUky8ovqgjsF36y19ZMEunDG3DAY,94643
|
9
9
|
kodexa/model/objects.py,sha256=bTj_COUUgvoergrNjFuYOK4fsMnZxZg7Ff4KN7KS3kg,116484
|
10
10
|
kodexa/model/persistence.py,sha256=ZWESzXS-jkGbp-NlPuQzQFxmZeRup7uNJjrUkGaYIOk,38334
|
11
11
|
kodexa/pipeline/__init__.py,sha256=sA7f5D6qkdMrpp2xTIeefnrUBI6xxEEWostvxfX_1Cs,236
|
@@ -34,7 +34,7 @@ kodexa/testing/test_components.py,sha256=i_9M6-bfUBdR1uYAzZZzWiW0M1DGKzE5mkNuHq4
|
|
34
34
|
kodexa/testing/test_utils.py,sha256=HXM3S5FDzarzS6R7jkOHps6d6Ox2UtNqymoK6VCw8Zg,13596
|
35
35
|
kodexa/training/__init__.py,sha256=xs2L62YpRkIRfslQwtQZ5Yxjhm7sLzX2TrVX6EuBnZQ,52
|
36
36
|
kodexa/training/train_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
-
kodexa-6.1.
|
38
|
-
kodexa-6.1.
|
39
|
-
kodexa-6.1.
|
40
|
-
kodexa-6.1.
|
37
|
+
kodexa-6.1.15059972675.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
|
38
|
+
kodexa-6.1.15059972675.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
39
|
+
kodexa-6.1.15059972675.dist-info/METADATA,sha256=-WjEEJKxs5nJfsWZH70YWASe6Niox-Inbnq5w5pSOdE,4158
|
40
|
+
kodexa-6.1.15059972675.dist-info/RECORD,,
|
File without changes
|
File without changes
|