cognite-neat 0.109.3__py3-none-any.whl → 0.110.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_alpha.py +2 -0
- cognite/neat/_client/_api/schema.py +17 -1
- cognite/neat/_client/data_classes/schema.py +3 -3
- cognite/neat/_constants.py +11 -0
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +9 -10
- cognite/neat/_graph/extractors/_iodd.py +3 -3
- cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
- cognite/neat/_graph/loaders/_rdf2dms.py +285 -346
- cognite/neat/_graph/queries/_base.py +28 -92
- cognite/neat/_graph/transformers/__init__.py +1 -3
- cognite/neat/_graph/transformers/_rdfpath.py +2 -49
- cognite/neat/_issues/__init__.py +1 -6
- cognite/neat/_issues/_base.py +21 -252
- cognite/neat/_issues/_contextmanagers.py +46 -0
- cognite/neat/_issues/_factory.py +61 -0
- cognite/neat/_issues/errors/__init__.py +18 -4
- cognite/neat/_issues/errors/_wrapper.py +81 -3
- cognite/neat/_issues/formatters.py +4 -4
- cognite/neat/_issues/warnings/__init__.py +3 -2
- cognite/neat/_issues/warnings/_properties.py +8 -0
- cognite/neat/_rules/_constants.py +9 -0
- cognite/neat/_rules/_shared.py +3 -2
- cognite/neat/_rules/analysis/__init__.py +2 -3
- cognite/neat/_rules/analysis/_base.py +450 -258
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2excel.py +2 -8
- cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
- cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
- cognite/neat/_rules/importers/_base.py +2 -47
- cognite/neat/_rules/importers/_dms2rules.py +7 -10
- cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +59 -25
- cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
- cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
- cognite/neat/_rules/models/dms/_rules.py +3 -1
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/dms/_validation.py +14 -4
- cognite/neat/_rules/models/entities/_loaders.py +1 -1
- cognite/neat/_rules/models/entities/_multi_value.py +2 -2
- cognite/neat/_rules/models/information/_rules.py +18 -17
- cognite/neat/_rules/models/information/_rules_input.py +2 -1
- cognite/neat/_rules/models/information/_validation.py +3 -1
- cognite/neat/_rules/transformers/__init__.py +8 -2
- cognite/neat/_rules/transformers/_converters.py +242 -43
- cognite/neat/_rules/transformers/_verification.py +5 -10
- cognite/neat/_session/_base.py +4 -4
- cognite/neat/_session/_prepare.py +12 -0
- cognite/neat/_session/_read.py +21 -17
- cognite/neat/_session/_show.py +11 -123
- cognite/neat/_session/_state.py +0 -2
- cognite/neat/_session/_subset.py +64 -0
- cognite/neat/_session/_to.py +63 -12
- cognite/neat/_store/_graph_store.py +5 -246
- cognite/neat/_utils/rdf_.py +2 -2
- cognite/neat/_utils/spreadsheet.py +44 -1
- cognite/neat/_utils/text.py +51 -32
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/RECORD +62 -64
- {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/WHEEL +1 -1
- cognite/neat/_graph/queries/_construct.py +0 -187
- cognite/neat/_graph/queries/_shared.py +0 -173
- cognite/neat/_rules/analysis/_dms.py +0 -57
- cognite/neat/_rules/analysis/_information.py +0 -249
- cognite/neat/_rules/models/_rdfpath.py +0 -372
- {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.109.3.dist-info → cognite_neat-0.110.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
from collections import defaultdict
|
|
2
|
-
|
|
3
|
-
from rdflib import URIRef
|
|
4
|
-
|
|
5
|
-
from cognite.neat._constants import DMS_LISTABLE_PROPERTY_LIMIT
|
|
6
|
-
from cognite.neat._rules.models.dms import DMSProperty, DMSRules, DMSView
|
|
7
|
-
from cognite.neat._rules.models.entities import ViewEntity
|
|
8
|
-
|
|
9
|
-
from ._base import BaseAnalysis
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class DMSAnalysis(BaseAnalysis[DMSRules, DMSView, DMSProperty, ViewEntity, str]):
|
|
13
|
-
"""Assumes analysis over only the complete schema"""
|
|
14
|
-
|
|
15
|
-
def _get_classes(self) -> list[DMSView]:
|
|
16
|
-
return list(self.rules.views)
|
|
17
|
-
|
|
18
|
-
def _get_properties(self) -> list[DMSProperty]:
|
|
19
|
-
return list(self.rules.properties)
|
|
20
|
-
|
|
21
|
-
def _get_cls_entity(self, class_: DMSView | DMSProperty) -> ViewEntity:
|
|
22
|
-
return class_.view
|
|
23
|
-
|
|
24
|
-
def _get_cls_parents(self, class_: DMSView) -> list[ViewEntity] | None:
|
|
25
|
-
return list(class_.implements) if class_.implements else None
|
|
26
|
-
|
|
27
|
-
@classmethod
|
|
28
|
-
def _set_cls_entity(cls, property_: DMSProperty, class_: ViewEntity) -> None:
|
|
29
|
-
property_.view = class_
|
|
30
|
-
|
|
31
|
-
def _get_object(self, property_: DMSProperty) -> ViewEntity | None:
|
|
32
|
-
return property_.value_type if isinstance(property_.value_type, ViewEntity) else None
|
|
33
|
-
|
|
34
|
-
def _get_max_occurrence(self, property_: DMSProperty) -> int | float | None:
|
|
35
|
-
return DMS_LISTABLE_PROPERTY_LIMIT if property_.is_list else 1
|
|
36
|
-
|
|
37
|
-
def subset_rules(self, desired_classes: set[ViewEntity]) -> DMSRules:
|
|
38
|
-
raise NotImplementedError()
|
|
39
|
-
|
|
40
|
-
def _get_prop_entity(self, property_: DMSProperty) -> str:
|
|
41
|
-
return property_.view_property
|
|
42
|
-
|
|
43
|
-
def views_with_properties_linked_to_classes(
|
|
44
|
-
self,
|
|
45
|
-
consider_inheritance: bool = False,
|
|
46
|
-
allow_different_namespace: bool = False,
|
|
47
|
-
) -> dict[ViewEntity, dict[str, URIRef]]:
|
|
48
|
-
view_property_pairs = self.classes_with_properties(consider_inheritance, allow_different_namespace)
|
|
49
|
-
|
|
50
|
-
view_and_properties_with_links: dict[ViewEntity, dict[str, URIRef]] = defaultdict(dict)
|
|
51
|
-
|
|
52
|
-
for view, properties in view_property_pairs.items():
|
|
53
|
-
view_and_properties_with_links[view] = {
|
|
54
|
-
prop.view_property: prop.logical for prop in properties if prop.logical
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
return view_and_properties_with_links
|
|
@@ -1,249 +0,0 @@
|
|
|
1
|
-
import warnings
|
|
2
|
-
from typing import Any, cast
|
|
3
|
-
|
|
4
|
-
from pydantic import ValidationError
|
|
5
|
-
from rdflib import URIRef
|
|
6
|
-
|
|
7
|
-
from cognite.neat._rules._constants import EntityTypes
|
|
8
|
-
from cognite.neat._rules.models import SchemaCompleteness
|
|
9
|
-
from cognite.neat._rules.models._rdfpath import (
|
|
10
|
-
Hop,
|
|
11
|
-
RDFPath,
|
|
12
|
-
SelfReferenceProperty,
|
|
13
|
-
SingleProperty,
|
|
14
|
-
)
|
|
15
|
-
from cognite.neat._rules.models.entities import ClassEntity
|
|
16
|
-
from cognite.neat._rules.models.entities._multi_value import MultiValueTypeInfo
|
|
17
|
-
from cognite.neat._rules.models.information import (
|
|
18
|
-
InformationClass,
|
|
19
|
-
InformationProperty,
|
|
20
|
-
InformationRules,
|
|
21
|
-
)
|
|
22
|
-
from cognite.neat._utils.collection_ import most_occurring_element
|
|
23
|
-
from cognite.neat._utils.rdf_ import get_inheritance_path
|
|
24
|
-
|
|
25
|
-
from ._base import BaseAnalysis
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, InformationProperty, ClassEntity, str]):
|
|
29
|
-
"""Assumes analysis over only the complete schema"""
|
|
30
|
-
|
|
31
|
-
def _get_object(self, property_: InformationProperty) -> ClassEntity | None:
|
|
32
|
-
return property_.value_type if isinstance(property_.value_type, ClassEntity) else None
|
|
33
|
-
|
|
34
|
-
def _get_max_occurrence(self, property_: InformationProperty) -> int | float | None:
|
|
35
|
-
return property_.max_count
|
|
36
|
-
|
|
37
|
-
def _get_cls_entity(self, class_: InformationClass | InformationProperty) -> ClassEntity:
|
|
38
|
-
return class_.class_
|
|
39
|
-
|
|
40
|
-
@classmethod
|
|
41
|
-
def _set_cls_entity(cls, property_: InformationProperty, class_: ClassEntity) -> None:
|
|
42
|
-
property_.class_ = class_
|
|
43
|
-
|
|
44
|
-
def _get_prop_entity(self, property_: InformationProperty) -> str:
|
|
45
|
-
return property_.property_
|
|
46
|
-
|
|
47
|
-
def _get_cls_parents(self, class_: InformationClass) -> list[ClassEntity] | None:
|
|
48
|
-
return list(class_.implements or []) or None
|
|
49
|
-
|
|
50
|
-
def _get_properties(self) -> list[InformationProperty]:
|
|
51
|
-
return list(self.rules.properties)
|
|
52
|
-
|
|
53
|
-
def _get_classes(self) -> list[InformationClass]:
|
|
54
|
-
return list(self.rules.classes)
|
|
55
|
-
|
|
56
|
-
def has_hop_transformations(self):
|
|
57
|
-
return any(
|
|
58
|
-
prop_.instance_source and isinstance(prop_.instance_source.traversal, Hop)
|
|
59
|
-
for prop_ in self.rules.properties
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
def has_self_reference_property_transformations(self):
|
|
63
|
-
return any(
|
|
64
|
-
prop_.instance_source and isinstance(prop_.instance_source.traversal, SelfReferenceProperty)
|
|
65
|
-
for prop_ in self.rules.properties
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
def all_reference_transformations(self):
|
|
69
|
-
return [
|
|
70
|
-
prop_
|
|
71
|
-
for prop_ in self.rules.properties
|
|
72
|
-
if prop_.instance_source and isinstance(prop_.instance_source.traversal, SelfReferenceProperty)
|
|
73
|
-
]
|
|
74
|
-
|
|
75
|
-
def define_property_renaming_config(self, class_: ClassEntity) -> dict[str | URIRef, str]:
|
|
76
|
-
property_renaming_configuration: dict[str | URIRef, str] = {}
|
|
77
|
-
|
|
78
|
-
if definitions := self.class_property_pairs(only_rdfpath=True, consider_inheritance=True).get(class_, None):
|
|
79
|
-
for property_id, definition in definitions.items():
|
|
80
|
-
transformation = cast(RDFPath, definition.instance_source)
|
|
81
|
-
|
|
82
|
-
# use case we have a single property rdf path, and defined prefix
|
|
83
|
-
# in either metadata or prefixes of rules
|
|
84
|
-
if isinstance(
|
|
85
|
-
transformation.traversal,
|
|
86
|
-
SingleProperty,
|
|
87
|
-
) and (
|
|
88
|
-
transformation.traversal.property.prefix in self.rules.prefixes
|
|
89
|
-
or transformation.traversal.property.prefix == self.rules.metadata.prefix
|
|
90
|
-
):
|
|
91
|
-
namespace = (
|
|
92
|
-
self.rules.metadata.namespace
|
|
93
|
-
if transformation.traversal.property.prefix == self.rules.metadata.prefix
|
|
94
|
-
else self.rules.prefixes[transformation.traversal.property.prefix]
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
property_renaming_configuration[namespace[transformation.traversal.property.suffix]] = property_id
|
|
98
|
-
|
|
99
|
-
# otherwise we default to the property id
|
|
100
|
-
else:
|
|
101
|
-
property_renaming_configuration[property_id] = property_id
|
|
102
|
-
|
|
103
|
-
return property_renaming_configuration
|
|
104
|
-
|
|
105
|
-
def neat_id_to_instance_source_property_uri(self, property_neat_id: URIRef) -> URIRef | None:
|
|
106
|
-
if (
|
|
107
|
-
(property_ := self.properties_by_neat_id.get(property_neat_id))
|
|
108
|
-
and property_.instance_source
|
|
109
|
-
and isinstance(
|
|
110
|
-
property_.instance_source.traversal,
|
|
111
|
-
SingleProperty,
|
|
112
|
-
)
|
|
113
|
-
and (
|
|
114
|
-
property_.instance_source.traversal.property.prefix in self.rules.prefixes
|
|
115
|
-
or property_.instance_source.traversal.property.prefix == self.rules.metadata.prefix
|
|
116
|
-
)
|
|
117
|
-
):
|
|
118
|
-
namespace = (
|
|
119
|
-
self.rules.metadata.namespace
|
|
120
|
-
if property_.instance_source.traversal.property.prefix == self.rules.metadata.prefix
|
|
121
|
-
else self.rules.prefixes[property_.instance_source.traversal.property.prefix]
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
return namespace[property_.instance_source.traversal.property.suffix]
|
|
125
|
-
return None
|
|
126
|
-
|
|
127
|
-
def property_types(self, class_: ClassEntity) -> dict[str, EntityTypes]:
|
|
128
|
-
property_types = {}
|
|
129
|
-
if definitions := self.class_property_pairs(consider_inheritance=True).get(class_, None):
|
|
130
|
-
for property_id, definition in definitions.items():
|
|
131
|
-
property_types[property_id] = definition.type_
|
|
132
|
-
|
|
133
|
-
return property_types
|
|
134
|
-
|
|
135
|
-
def most_occurring_class_in_transformations(self, class_: ClassEntity) -> ClassEntity | None:
|
|
136
|
-
classes = []
|
|
137
|
-
if class_property_pairs := self.class_property_pairs(consider_inheritance=True, only_rdfpath=True).get(
|
|
138
|
-
class_, None
|
|
139
|
-
):
|
|
140
|
-
for property_ in class_property_pairs.values():
|
|
141
|
-
classes.append(cast(RDFPath, property_.instance_source).traversal.class_)
|
|
142
|
-
|
|
143
|
-
return cast(ClassEntity, most_occurring_element(classes))
|
|
144
|
-
else:
|
|
145
|
-
return None
|
|
146
|
-
|
|
147
|
-
def subset_rules(self, desired_classes: set[ClassEntity]) -> InformationRules:
|
|
148
|
-
"""
|
|
149
|
-
Subset rules to only include desired classes and their properties.
|
|
150
|
-
|
|
151
|
-
Args:
|
|
152
|
-
desired_classes: Desired classes to include in the reduced data model
|
|
153
|
-
|
|
154
|
-
Returns:
|
|
155
|
-
Instance of InformationRules
|
|
156
|
-
|
|
157
|
-
!!! note "Inheritance"
|
|
158
|
-
If desired classes contain a class that is a subclass of another class(es), the parent class(es)
|
|
159
|
-
will be included in the reduced data model as well even though the parent class(es) are
|
|
160
|
-
not in the desired classes set. This is to ensure that the reduced data model is
|
|
161
|
-
consistent and complete.
|
|
162
|
-
|
|
163
|
-
!!! note "Partial Reduction"
|
|
164
|
-
This method does not perform checks if classes that are value types of desired classes
|
|
165
|
-
properties are part of desired classes. If a class is not part of desired classes, but it
|
|
166
|
-
is a value type of a property of a class that is part of desired classes, derived reduced
|
|
167
|
-
rules will be marked as partial.
|
|
168
|
-
|
|
169
|
-
!!! note "Validation"
|
|
170
|
-
This method will attempt to validate the reduced rules with custom validations.
|
|
171
|
-
If it fails, it will return a partial rules with a warning message, validated
|
|
172
|
-
only with base Pydantic validators.
|
|
173
|
-
"""
|
|
174
|
-
class_as_dict = self.as_class_dict()
|
|
175
|
-
class_parents_pairs = self.class_parent_pairs()
|
|
176
|
-
defined_classes = self.defined_classes(consider_inheritance=True)
|
|
177
|
-
|
|
178
|
-
possible_classes = defined_classes.intersection(desired_classes)
|
|
179
|
-
impossible_classes = desired_classes - possible_classes
|
|
180
|
-
|
|
181
|
-
# need to add all the parent classes of the desired classes to the possible classes
|
|
182
|
-
parents: set[ClassEntity] = set()
|
|
183
|
-
for class_ in possible_classes:
|
|
184
|
-
parents = parents.union({parent for parent in get_inheritance_path(class_, class_parents_pairs)})
|
|
185
|
-
possible_classes = possible_classes.union(parents)
|
|
186
|
-
|
|
187
|
-
if not possible_classes:
|
|
188
|
-
raise ValueError("None of the desired classes are defined in the data model!")
|
|
189
|
-
|
|
190
|
-
if impossible_classes:
|
|
191
|
-
warnings.warn(
|
|
192
|
-
f"Could not find the following classes defined in the data model: {impossible_classes}",
|
|
193
|
-
stacklevel=2,
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
reduced_data_model: dict[str, Any] = {
|
|
197
|
-
"metadata": self.rules.metadata.model_copy(),
|
|
198
|
-
"prefixes": (self.rules.prefixes or {}).copy(),
|
|
199
|
-
"classes": [],
|
|
200
|
-
"properties": [],
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
for class_ in possible_classes:
|
|
204
|
-
reduced_data_model["classes"].append(class_as_dict[str(class_.suffix)])
|
|
205
|
-
|
|
206
|
-
class_property_pairs = self.classes_with_properties(consider_inheritance=False)
|
|
207
|
-
|
|
208
|
-
for class_, properties in class_property_pairs.items():
|
|
209
|
-
if class_ in possible_classes:
|
|
210
|
-
reduced_data_model["properties"].extend(properties)
|
|
211
|
-
|
|
212
|
-
try:
|
|
213
|
-
return type(self.rules)(**reduced_data_model)
|
|
214
|
-
except ValidationError as e:
|
|
215
|
-
warnings.warn(f"Reduced data model is not complete: {e}", stacklevel=2)
|
|
216
|
-
reduced_data_model["metadata"].schema_ = SchemaCompleteness.partial
|
|
217
|
-
return type(self.rules).model_construct(**reduced_data_model)
|
|
218
|
-
|
|
219
|
-
def class_uri(self, class_: ClassEntity) -> URIRef | None:
|
|
220
|
-
"""Get URI for a class entity based on the rules.
|
|
221
|
-
|
|
222
|
-
Args:
|
|
223
|
-
class_: instance of ClassEntity
|
|
224
|
-
|
|
225
|
-
Returns:
|
|
226
|
-
URIRef of the class entity or None if not found
|
|
227
|
-
"""
|
|
228
|
-
|
|
229
|
-
# we need to handle optional renamings and we do this
|
|
230
|
-
# by checking if the most occurring class in transformations alternatively
|
|
231
|
-
# in cases when we are not specifying transformations we default to the class entity
|
|
232
|
-
if not (most_frequent_class := self.most_occurring_class_in_transformations(class_)):
|
|
233
|
-
most_frequent_class = class_
|
|
234
|
-
|
|
235
|
-
# case 1 class prefix in rules.prefixes
|
|
236
|
-
if most_frequent_class.prefix in self.rules.prefixes:
|
|
237
|
-
return self.rules.prefixes[cast(str, most_frequent_class.prefix)][most_frequent_class.suffix]
|
|
238
|
-
|
|
239
|
-
# case 2 class prefix equal to rules.metadata.prefix
|
|
240
|
-
elif most_frequent_class.prefix == self.rules.metadata.prefix:
|
|
241
|
-
return self.rules.metadata.namespace[most_frequent_class.suffix]
|
|
242
|
-
|
|
243
|
-
# case 3 when class prefix is not found in prefixes of rules
|
|
244
|
-
else:
|
|
245
|
-
return None
|
|
246
|
-
|
|
247
|
-
@property
|
|
248
|
-
def multi_value_properties(self) -> list[InformationProperty]:
|
|
249
|
-
return [prop_ for prop_ in self.rules.properties if isinstance(prop_.value_type, MultiValueTypeInfo)]
|
|
@@ -1,372 +0,0 @@
|
|
|
1
|
-
""" """
|
|
2
|
-
|
|
3
|
-
import re
|
|
4
|
-
import sys
|
|
5
|
-
import urllib.parse
|
|
6
|
-
from collections import Counter
|
|
7
|
-
from functools import total_ordering
|
|
8
|
-
from typing import ClassVar, Literal
|
|
9
|
-
|
|
10
|
-
from pydantic import BaseModel, field_validator, model_serializer
|
|
11
|
-
|
|
12
|
-
from cognite.neat._issues.errors import NeatValueError
|
|
13
|
-
|
|
14
|
-
if sys.version_info >= (3, 11):
|
|
15
|
-
from enum import StrEnum
|
|
16
|
-
from typing import Self
|
|
17
|
-
else:
|
|
18
|
-
from backports.strenum import StrEnum
|
|
19
|
-
from typing_extensions import Self
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class TransformationRuleType(StrEnum):
|
|
23
|
-
rdfpath = "rdfpath"
|
|
24
|
-
rawlookup = "rawlookup"
|
|
25
|
-
sparql = "sparql"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class Lookup(StrEnum):
|
|
29
|
-
table = "table"
|
|
30
|
-
key = "key"
|
|
31
|
-
value = "value" # type: ignore
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class EntityTypes(StrEnum):
|
|
35
|
-
class_ = "class"
|
|
36
|
-
property_ = "property"
|
|
37
|
-
undefined = "undefined"
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# FOR PARSING STRINGS:
|
|
41
|
-
PREFIX_REGEX = r"[a-zA-Z]+[a-zA-Z0-9-_.]*[a-zA-Z0-9]+"
|
|
42
|
-
SUFFIX_REGEX = r"[a-zA-Z0-9-_.]+[a-zA-Z0-9]|[-_.]*[a-zA-Z0-9]+"
|
|
43
|
-
VERSION_REGEX = r"[a-zA-Z0-9]([.a-zA-Z0-9_-]{0,41}[a-zA-Z0-9])?"
|
|
44
|
-
|
|
45
|
-
ENTITY_ID_REGEX = rf"{PREFIX_REGEX}:({SUFFIX_REGEX})"
|
|
46
|
-
ENTITY_ID_REGEX_COMPILED = re.compile(rf"^(?P<prefix>{PREFIX_REGEX}):(?P<suffix>{SUFFIX_REGEX})$")
|
|
47
|
-
VERSIONED_ENTITY_REGEX_COMPILED = re.compile(
|
|
48
|
-
rf"^(?P<prefix>{PREFIX_REGEX}):(?P<suffix>{SUFFIX_REGEX})\(version=(?P<version>{VERSION_REGEX})\)$"
|
|
49
|
-
)
|
|
50
|
-
CLASS_ID_REGEX = rf"(?P<{EntityTypes.class_}>{ENTITY_ID_REGEX})"
|
|
51
|
-
CLASS_ID_REGEX_COMPILED = re.compile(rf"^{CLASS_ID_REGEX}$")
|
|
52
|
-
PROPERTY_ID_REGEX = rf"\((?P<{EntityTypes.property_}>{ENTITY_ID_REGEX})\)"
|
|
53
|
-
|
|
54
|
-
# traversal direction
|
|
55
|
-
DIRECTION_REGEX = r"(?P<direction>(->|<-))"
|
|
56
|
-
|
|
57
|
-
# steps
|
|
58
|
-
STEP_REGEX = rf"((->|<-){CLASS_ID_REGEX}({PROPERTY_ID_REGEX})?)"
|
|
59
|
-
STEP_REGEX_COMPILED = re.compile(STEP_REGEX)
|
|
60
|
-
STEP_CLASS_REGEX_COMPILED = re.compile(rf"(^{DIRECTION_REGEX}{CLASS_ID_REGEX})$")
|
|
61
|
-
STEP_CLASS_AND_PROPERTY_REGEX_COMPILED = re.compile(rf"(^{DIRECTION_REGEX}{CLASS_ID_REGEX}{PROPERTY_ID_REGEX}$)")
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
_traversal = "traversal"
|
|
65
|
-
ORIGIN_REGEX = rf"(?P<origin>{ENTITY_ID_REGEX})"
|
|
66
|
-
|
|
67
|
-
HOP_REGEX_COMPILED = re.compile(rf"^{ORIGIN_REGEX}(?P<{_traversal}>{STEP_REGEX}+)$")
|
|
68
|
-
|
|
69
|
-
# grabbing specific property for a class, property can be either object, annotation or data property
|
|
70
|
-
SINGLE_PROPERTY_REGEX_COMPILED = re.compile(rf"^{CLASS_ID_REGEX}{PROPERTY_ID_REGEX}$")
|
|
71
|
-
|
|
72
|
-
# grabbing all properties for a class
|
|
73
|
-
ALL_PROPERTIES_REGEX_COMPILED = re.compile(rf"^{CLASS_ID_REGEX}\(\*\)$")
|
|
74
|
-
|
|
75
|
-
ALL_TRAVERSAL_REGEX_COMPILED = (
|
|
76
|
-
rf"({CLASS_ID_REGEX}\(\*\)|{CLASS_ID_REGEX}{PROPERTY_ID_REGEX}|{ORIGIN_REGEX}(?P<{_traversal}>{STEP_REGEX}+))"
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
TABLE_REGEX_COMPILED = re.compile(
|
|
80
|
-
rf"^(?P<{Lookup.table}>{SUFFIX_REGEX})\((?P<{Lookup.key}>{SUFFIX_REGEX}),\s*(?P<{Lookup.value}>{SUFFIX_REGEX})\)$"
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
StepDirection = Literal["source", "target", "origin"]
|
|
85
|
-
_direction_by_symbol: dict[str, StepDirection] = {"->": "target", "<-": "source"}
|
|
86
|
-
_symbol_by_direction: dict[StepDirection, str] = {"source": "<-", "target": "->"}
|
|
87
|
-
|
|
88
|
-
Undefined = type(object())
|
|
89
|
-
Unknown = type(object())
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
# mypy does not like the sentinel value, and it is not possible to ignore only the line with it below.
|
|
93
|
-
# so we ignore all errors beyond this point.
|
|
94
|
-
# mypy: ignore-errors
|
|
95
|
-
@total_ordering
|
|
96
|
-
class Entity(BaseModel, arbitrary_types_allowed=True):
|
|
97
|
-
"""Entity is a class or property in OWL/RDF sense."""
|
|
98
|
-
|
|
99
|
-
type_: ClassVar[EntityTypes] = EntityTypes.undefined
|
|
100
|
-
prefix: str | Undefined = Undefined
|
|
101
|
-
suffix: str | Unknown
|
|
102
|
-
version: str | None = None
|
|
103
|
-
name: str | None = None
|
|
104
|
-
description: str | None = None
|
|
105
|
-
|
|
106
|
-
def __lt__(self, other: object) -> bool:
|
|
107
|
-
if type(self) is not type(other) or not isinstance(other, Entity):
|
|
108
|
-
return NotImplemented
|
|
109
|
-
return self.versioned_id < other.versioned_id
|
|
110
|
-
|
|
111
|
-
def __eq__(self, other: object) -> bool:
|
|
112
|
-
if type(self) is not type(other) or not isinstance(other, Entity):
|
|
113
|
-
return NotImplemented
|
|
114
|
-
return self.versioned_id == other.versioned_id
|
|
115
|
-
|
|
116
|
-
def __hash__(self) -> int:
|
|
117
|
-
return hash(self.versioned_id)
|
|
118
|
-
|
|
119
|
-
def as_non_versioned_entity(self) -> Self:
|
|
120
|
-
return self.from_string(f"{self.prefix}:{self.suffix}")
|
|
121
|
-
|
|
122
|
-
@property
|
|
123
|
-
def id(self) -> str:
|
|
124
|
-
if self.suffix is Unknown:
|
|
125
|
-
return "#N/A"
|
|
126
|
-
elif self.prefix is Undefined:
|
|
127
|
-
return self.suffix
|
|
128
|
-
else:
|
|
129
|
-
return f"{self.prefix}:{self.suffix}"
|
|
130
|
-
|
|
131
|
-
@property
|
|
132
|
-
def versioned_id(self) -> str:
|
|
133
|
-
if self.version is None:
|
|
134
|
-
return self.id
|
|
135
|
-
else:
|
|
136
|
-
return f"{self.id}(version={self.version})"
|
|
137
|
-
|
|
138
|
-
@property
|
|
139
|
-
def space(self) -> str:
|
|
140
|
-
"""Returns entity space in CDF."""
|
|
141
|
-
return self.prefix
|
|
142
|
-
|
|
143
|
-
@property
|
|
144
|
-
def external_id(self) -> str:
|
|
145
|
-
"""Returns entity external id in CDF."""
|
|
146
|
-
return self.suffix
|
|
147
|
-
|
|
148
|
-
def __repr__(self):
|
|
149
|
-
return self.versioned_id
|
|
150
|
-
|
|
151
|
-
def __str__(self):
|
|
152
|
-
return self.versioned_id
|
|
153
|
-
|
|
154
|
-
@classmethod
|
|
155
|
-
def from_string(cls, entity_string: str, base_prefix: str | None = None) -> Self:
|
|
156
|
-
if entity_string == "#N/A":
|
|
157
|
-
return cls(prefix=Undefined, suffix=Unknown)
|
|
158
|
-
elif result := VERSIONED_ENTITY_REGEX_COMPILED.match(entity_string):
|
|
159
|
-
return cls(
|
|
160
|
-
prefix=result.group("prefix"),
|
|
161
|
-
suffix=result.group("suffix"),
|
|
162
|
-
version=result.group("version"),
|
|
163
|
-
)
|
|
164
|
-
elif result := ENTITY_ID_REGEX_COMPILED.match(entity_string):
|
|
165
|
-
return cls(prefix=result.group("prefix"), suffix=result.group("suffix"))
|
|
166
|
-
elif base_prefix and re.match(SUFFIX_REGEX, entity_string) and re.match(PREFIX_REGEX, base_prefix):
|
|
167
|
-
return cls(prefix=base_prefix, suffix=entity_string)
|
|
168
|
-
else:
|
|
169
|
-
raise ValueError(f"{cls.__name__} is expected to be prefix:suffix, got {entity_string}")
|
|
170
|
-
|
|
171
|
-
@classmethod
|
|
172
|
-
def from_list(cls, entity_strings: list[str], base_prefix: str | None = None) -> list[Self]:
|
|
173
|
-
return [
|
|
174
|
-
cls.from_string(entity_string=entity_string, base_prefix=base_prefix) for entity_string in entity_strings
|
|
175
|
-
]
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
class Step(BaseModel):
|
|
179
|
-
class_: Entity
|
|
180
|
-
property: Entity | None = None # only terminal step has property
|
|
181
|
-
direction: StepDirection
|
|
182
|
-
|
|
183
|
-
@classmethod
|
|
184
|
-
def from_string(cls, raw: str, **kwargs) -> Self:
|
|
185
|
-
if result := STEP_CLASS_AND_PROPERTY_REGEX_COMPILED.match(raw):
|
|
186
|
-
return cls(
|
|
187
|
-
class_=Entity.from_string(result.group(EntityTypes.class_)),
|
|
188
|
-
property=Entity.from_string(result.group(EntityTypes.property_)),
|
|
189
|
-
direction=_direction_by_symbol[result.group("direction")],
|
|
190
|
-
**kwargs,
|
|
191
|
-
)
|
|
192
|
-
elif result := STEP_CLASS_REGEX_COMPILED.match(raw):
|
|
193
|
-
return cls(
|
|
194
|
-
class_=Entity.from_string(result.group(EntityTypes.class_)),
|
|
195
|
-
direction=_direction_by_symbol[result.group("direction")],
|
|
196
|
-
) # type: ignore
|
|
197
|
-
msg = f"Invalid step {raw}, expected in one of the following forms:"
|
|
198
|
-
msg += " ->prefix:suffix, <-prefix:suffix, ->prefix:suffix(prefix:suffix) or <-prefix:suffix(prefix:suffix)"
|
|
199
|
-
raise ValueError(msg)
|
|
200
|
-
|
|
201
|
-
def __str__(self) -> str:
|
|
202
|
-
if self.property:
|
|
203
|
-
return f"{self.class_}({self.property})"
|
|
204
|
-
else:
|
|
205
|
-
return f"{_symbol_by_direction[self.direction]}{self.class_}"
|
|
206
|
-
|
|
207
|
-
def __repr__(self) -> str:
|
|
208
|
-
return self.__str__()
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
class Traversal(BaseModel):
|
|
212
|
-
class_: Entity
|
|
213
|
-
|
|
214
|
-
def __str__(self) -> str:
|
|
215
|
-
return f"{self.class_}"
|
|
216
|
-
|
|
217
|
-
def __repr__(self) -> str:
|
|
218
|
-
return self.__str__()
|
|
219
|
-
|
|
220
|
-
@model_serializer(when_used="unless-none", return_type=str)
|
|
221
|
-
def as_str(self) -> str:
|
|
222
|
-
return str(self)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
class SingleProperty(Traversal):
|
|
226
|
-
property: Entity
|
|
227
|
-
|
|
228
|
-
@classmethod
|
|
229
|
-
def from_string(cls, class_: str, property_: str) -> Self:
|
|
230
|
-
return cls(class_=Entity.from_string(class_), property=Entity.from_string(property_))
|
|
231
|
-
|
|
232
|
-
def __str__(self) -> str:
|
|
233
|
-
return f"{self.class_}({self.property})"
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
class SelfReferenceProperty(Traversal):
|
|
237
|
-
@classmethod
|
|
238
|
-
def from_string(cls, class_: str) -> Self:
|
|
239
|
-
return cls(class_=Entity.from_string(class_))
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
class Origin(BaseModel):
|
|
243
|
-
class_: Entity
|
|
244
|
-
|
|
245
|
-
@field_validator("class_", mode="before")
|
|
246
|
-
def process_if_string(cls, value):
|
|
247
|
-
return Entity.from_string(value) if isinstance(value, str) else value
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
class Hop(Traversal):
|
|
251
|
-
"""Multi or single hop traversal through graph"""
|
|
252
|
-
|
|
253
|
-
traversal: list[Step]
|
|
254
|
-
|
|
255
|
-
@classmethod
|
|
256
|
-
def from_string(cls, class_: str, traversal: str | list[Step]) -> Self:
|
|
257
|
-
return cls(
|
|
258
|
-
class_=Entity.from_string(class_),
|
|
259
|
-
traversal=(
|
|
260
|
-
[Step.from_string(result[0]) for result in STEP_REGEX_COMPILED.findall(traversal)]
|
|
261
|
-
if isinstance(traversal, str)
|
|
262
|
-
else traversal
|
|
263
|
-
),
|
|
264
|
-
)
|
|
265
|
-
|
|
266
|
-
def __str__(self) -> str:
|
|
267
|
-
return f"{self.class_}{''.join([str(step) for step in self.traversal])}"
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
class TableLookup(BaseModel):
|
|
271
|
-
name: str
|
|
272
|
-
key: str
|
|
273
|
-
value: str
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
class Rule(BaseModel):
|
|
277
|
-
pass
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
class Query(BaseModel):
|
|
281
|
-
query: str
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
class RDFPath(Rule):
|
|
285
|
-
traversal: SingleProperty | SelfReferenceProperty | Hop
|
|
286
|
-
|
|
287
|
-
def __str__(self) -> str:
|
|
288
|
-
return f"{self.traversal}"
|
|
289
|
-
|
|
290
|
-
def __repr__(self) -> str:
|
|
291
|
-
return self.__str__()
|
|
292
|
-
|
|
293
|
-
@model_serializer(when_used="unless-none", return_type=str)
|
|
294
|
-
def as_str(self) -> str:
|
|
295
|
-
return str(self)
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
class RawLookup(RDFPath):
|
|
299
|
-
table: TableLookup
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
class SPARQLQuery(RDFPath):
|
|
303
|
-
traversal: Query
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
def parse_traversal(raw: str) -> SelfReferenceProperty | SingleProperty | Hop:
|
|
307
|
-
if result := CLASS_ID_REGEX_COMPILED.match(raw):
|
|
308
|
-
return SelfReferenceProperty.from_string(class_=result.group(EntityTypes.class_))
|
|
309
|
-
elif result := SINGLE_PROPERTY_REGEX_COMPILED.match(raw):
|
|
310
|
-
return SingleProperty.from_string(
|
|
311
|
-
class_=result.group(EntityTypes.class_),
|
|
312
|
-
property_=result.group(EntityTypes.property_),
|
|
313
|
-
)
|
|
314
|
-
elif result := HOP_REGEX_COMPILED.match(raw):
|
|
315
|
-
return Hop.from_string(class_=result.group("origin"), traversal=result.group(_traversal))
|
|
316
|
-
else:
|
|
317
|
-
raise NeatValueError(f"Invalid RDF Path: {raw!r}")
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
def parse_table_lookup(raw: str) -> TableLookup:
|
|
321
|
-
if result := TABLE_REGEX_COMPILED.match(raw):
|
|
322
|
-
return TableLookup(
|
|
323
|
-
name=result.group(Lookup.table),
|
|
324
|
-
key=result.group(Lookup.key),
|
|
325
|
-
value=result.group(Lookup.value),
|
|
326
|
-
)
|
|
327
|
-
raise NeatValueError(f"Invalid table lookup: {raw!r}")
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
def parse_rule(rule_raw: str, rule_type: TransformationRuleType | None) -> RDFPath:
|
|
331
|
-
rule_raw = urllib.parse.unquote(rule_raw)
|
|
332
|
-
match rule_type:
|
|
333
|
-
case TransformationRuleType.rdfpath:
|
|
334
|
-
rule_raw = rule_raw.replace(" ", "")
|
|
335
|
-
return RDFPath(traversal=parse_traversal(rule_raw))
|
|
336
|
-
case TransformationRuleType.rawlookup:
|
|
337
|
-
rule_raw = rule_raw.replace(" ", "")
|
|
338
|
-
if Counter(rule_raw).get("|") != 1:
|
|
339
|
-
raise NeatValueError(f"Invalid rawlookup rule: {rule_raw!r}")
|
|
340
|
-
traversal, table_lookup = rule_raw.split("|")
|
|
341
|
-
return RawLookup(
|
|
342
|
-
traversal=parse_traversal(traversal),
|
|
343
|
-
table=parse_table_lookup(table_lookup),
|
|
344
|
-
)
|
|
345
|
-
case TransformationRuleType.sparql:
|
|
346
|
-
return SPARQLQuery(traversal=Query(query=rule_raw))
|
|
347
|
-
case None:
|
|
348
|
-
raise ValueError("Rule type must be specified")
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
def is_valid_rule(rule_type: TransformationRuleType, rule_raw: str) -> bool:
|
|
352
|
-
is_valid_rule = {
|
|
353
|
-
TransformationRuleType.rdfpath: is_rdfpath,
|
|
354
|
-
TransformationRuleType.rawlookup: is_rawlookup,
|
|
355
|
-
}[rule_type]
|
|
356
|
-
return is_valid_rule(rule_raw)
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
def is_rdfpath(raw: str) -> bool:
|
|
360
|
-
try:
|
|
361
|
-
parse_traversal(raw)
|
|
362
|
-
except ValueError:
|
|
363
|
-
return False
|
|
364
|
-
return True
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
def is_rawlookup(raw: str) -> bool:
|
|
368
|
-
try:
|
|
369
|
-
parse_rule(raw, TransformationRuleType.rawlookup)
|
|
370
|
-
except ValueError:
|
|
371
|
-
return False
|
|
372
|
-
return True
|
|
File without changes
|
|
File without changes
|