cognite-neat 0.109.4__py3-none-any.whl → 0.110.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (67) hide show
  1. cognite/neat/_alpha.py +2 -0
  2. cognite/neat/_client/_api/schema.py +17 -1
  3. cognite/neat/_client/data_classes/schema.py +3 -3
  4. cognite/neat/_constants.py +11 -0
  5. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +9 -10
  6. cognite/neat/_graph/extractors/_iodd.py +3 -3
  7. cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
  8. cognite/neat/_graph/loaders/_rdf2dms.py +285 -346
  9. cognite/neat/_graph/queries/_base.py +28 -92
  10. cognite/neat/_graph/transformers/__init__.py +1 -3
  11. cognite/neat/_graph/transformers/_rdfpath.py +2 -49
  12. cognite/neat/_issues/__init__.py +1 -6
  13. cognite/neat/_issues/_base.py +21 -252
  14. cognite/neat/_issues/_contextmanagers.py +46 -0
  15. cognite/neat/_issues/_factory.py +61 -0
  16. cognite/neat/_issues/errors/__init__.py +18 -4
  17. cognite/neat/_issues/errors/_wrapper.py +81 -3
  18. cognite/neat/_issues/formatters.py +4 -4
  19. cognite/neat/_issues/warnings/__init__.py +3 -2
  20. cognite/neat/_issues/warnings/_properties.py +8 -0
  21. cognite/neat/_rules/_constants.py +9 -0
  22. cognite/neat/_rules/_shared.py +3 -2
  23. cognite/neat/_rules/analysis/__init__.py +2 -3
  24. cognite/neat/_rules/analysis/_base.py +450 -258
  25. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  26. cognite/neat/_rules/exporters/_rules2excel.py +2 -8
  27. cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
  28. cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
  29. cognite/neat/_rules/importers/_base.py +2 -47
  30. cognite/neat/_rules/importers/_dms2rules.py +7 -10
  31. cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
  32. cognite/neat/_rules/importers/_rdf/_inference2rules.py +59 -25
  33. cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
  34. cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
  35. cognite/neat/_rules/models/dms/_rules.py +3 -1
  36. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  37. cognite/neat/_rules/models/dms/_validation.py +14 -4
  38. cognite/neat/_rules/models/entities/_loaders.py +1 -1
  39. cognite/neat/_rules/models/entities/_multi_value.py +2 -2
  40. cognite/neat/_rules/models/information/_rules.py +18 -17
  41. cognite/neat/_rules/models/information/_rules_input.py +2 -1
  42. cognite/neat/_rules/models/information/_validation.py +3 -1
  43. cognite/neat/_rules/transformers/__init__.py +8 -2
  44. cognite/neat/_rules/transformers/_converters.py +228 -43
  45. cognite/neat/_rules/transformers/_verification.py +5 -10
  46. cognite/neat/_session/_base.py +4 -4
  47. cognite/neat/_session/_prepare.py +12 -0
  48. cognite/neat/_session/_read.py +21 -17
  49. cognite/neat/_session/_show.py +11 -123
  50. cognite/neat/_session/_state.py +0 -2
  51. cognite/neat/_session/_subset.py +64 -0
  52. cognite/neat/_session/_to.py +63 -12
  53. cognite/neat/_store/_graph_store.py +5 -246
  54. cognite/neat/_utils/rdf_.py +2 -2
  55. cognite/neat/_utils/spreadsheet.py +44 -1
  56. cognite/neat/_utils/text.py +51 -32
  57. cognite/neat/_version.py +1 -1
  58. {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/METADATA +1 -1
  59. {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/RECORD +62 -64
  60. {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/WHEEL +1 -1
  61. cognite/neat/_graph/queries/_construct.py +0 -187
  62. cognite/neat/_graph/queries/_shared.py +0 -173
  63. cognite/neat/_rules/analysis/_dms.py +0 -57
  64. cognite/neat/_rules/analysis/_information.py +0 -249
  65. cognite/neat/_rules/models/_rdfpath.py +0 -372
  66. {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/LICENSE +0 -0
  67. {cognite_neat-0.109.4.dist-info → cognite_neat-0.110.0.dist-info}/entry_points.txt +0 -0
@@ -1,57 +0,0 @@
1
- from collections import defaultdict
2
-
3
- from rdflib import URIRef
4
-
5
- from cognite.neat._constants import DMS_LISTABLE_PROPERTY_LIMIT
6
- from cognite.neat._rules.models.dms import DMSProperty, DMSRules, DMSView
7
- from cognite.neat._rules.models.entities import ViewEntity
8
-
9
- from ._base import BaseAnalysis
10
-
11
-
12
- class DMSAnalysis(BaseAnalysis[DMSRules, DMSView, DMSProperty, ViewEntity, str]):
13
- """Assumes analysis over only the complete schema"""
14
-
15
- def _get_classes(self) -> list[DMSView]:
16
- return list(self.rules.views)
17
-
18
- def _get_properties(self) -> list[DMSProperty]:
19
- return list(self.rules.properties)
20
-
21
- def _get_cls_entity(self, class_: DMSView | DMSProperty) -> ViewEntity:
22
- return class_.view
23
-
24
- def _get_cls_parents(self, class_: DMSView) -> list[ViewEntity] | None:
25
- return list(class_.implements) if class_.implements else None
26
-
27
- @classmethod
28
- def _set_cls_entity(cls, property_: DMSProperty, class_: ViewEntity) -> None:
29
- property_.view = class_
30
-
31
- def _get_object(self, property_: DMSProperty) -> ViewEntity | None:
32
- return property_.value_type if isinstance(property_.value_type, ViewEntity) else None
33
-
34
- def _get_max_occurrence(self, property_: DMSProperty) -> int | float | None:
35
- return DMS_LISTABLE_PROPERTY_LIMIT if property_.is_list else 1
36
-
37
- def subset_rules(self, desired_classes: set[ViewEntity]) -> DMSRules:
38
- raise NotImplementedError()
39
-
40
- def _get_prop_entity(self, property_: DMSProperty) -> str:
41
- return property_.view_property
42
-
43
- def views_with_properties_linked_to_classes(
44
- self,
45
- consider_inheritance: bool = False,
46
- allow_different_namespace: bool = False,
47
- ) -> dict[ViewEntity, dict[str, URIRef]]:
48
- view_property_pairs = self.classes_with_properties(consider_inheritance, allow_different_namespace)
49
-
50
- view_and_properties_with_links: dict[ViewEntity, dict[str, URIRef]] = defaultdict(dict)
51
-
52
- for view, properties in view_property_pairs.items():
53
- view_and_properties_with_links[view] = {
54
- prop.view_property: prop.logical for prop in properties if prop.logical
55
- }
56
-
57
- return view_and_properties_with_links
@@ -1,249 +0,0 @@
1
- import warnings
2
- from typing import Any, cast
3
-
4
- from pydantic import ValidationError
5
- from rdflib import URIRef
6
-
7
- from cognite.neat._rules._constants import EntityTypes
8
- from cognite.neat._rules.models import SchemaCompleteness
9
- from cognite.neat._rules.models._rdfpath import (
10
- Hop,
11
- RDFPath,
12
- SelfReferenceProperty,
13
- SingleProperty,
14
- )
15
- from cognite.neat._rules.models.entities import ClassEntity
16
- from cognite.neat._rules.models.entities._multi_value import MultiValueTypeInfo
17
- from cognite.neat._rules.models.information import (
18
- InformationClass,
19
- InformationProperty,
20
- InformationRules,
21
- )
22
- from cognite.neat._utils.collection_ import most_occurring_element
23
- from cognite.neat._utils.rdf_ import get_inheritance_path
24
-
25
- from ._base import BaseAnalysis
26
-
27
-
28
- class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, InformationProperty, ClassEntity, str]):
29
- """Assumes analysis over only the complete schema"""
30
-
31
- def _get_object(self, property_: InformationProperty) -> ClassEntity | None:
32
- return property_.value_type if isinstance(property_.value_type, ClassEntity) else None
33
-
34
- def _get_max_occurrence(self, property_: InformationProperty) -> int | float | None:
35
- return property_.max_count
36
-
37
- def _get_cls_entity(self, class_: InformationClass | InformationProperty) -> ClassEntity:
38
- return class_.class_
39
-
40
- @classmethod
41
- def _set_cls_entity(cls, property_: InformationProperty, class_: ClassEntity) -> None:
42
- property_.class_ = class_
43
-
44
- def _get_prop_entity(self, property_: InformationProperty) -> str:
45
- return property_.property_
46
-
47
- def _get_cls_parents(self, class_: InformationClass) -> list[ClassEntity] | None:
48
- return list(class_.implements or []) or None
49
-
50
- def _get_properties(self) -> list[InformationProperty]:
51
- return list(self.rules.properties)
52
-
53
- def _get_classes(self) -> list[InformationClass]:
54
- return list(self.rules.classes)
55
-
56
- def has_hop_transformations(self):
57
- return any(
58
- prop_.instance_source and isinstance(prop_.instance_source.traversal, Hop)
59
- for prop_ in self.rules.properties
60
- )
61
-
62
- def has_self_reference_property_transformations(self):
63
- return any(
64
- prop_.instance_source and isinstance(prop_.instance_source.traversal, SelfReferenceProperty)
65
- for prop_ in self.rules.properties
66
- )
67
-
68
- def all_reference_transformations(self):
69
- return [
70
- prop_
71
- for prop_ in self.rules.properties
72
- if prop_.instance_source and isinstance(prop_.instance_source.traversal, SelfReferenceProperty)
73
- ]
74
-
75
- def define_property_renaming_config(self, class_: ClassEntity) -> dict[str | URIRef, str]:
76
- property_renaming_configuration: dict[str | URIRef, str] = {}
77
-
78
- if definitions := self.class_property_pairs(only_rdfpath=True, consider_inheritance=True).get(class_, None):
79
- for property_id, definition in definitions.items():
80
- transformation = cast(RDFPath, definition.instance_source)
81
-
82
- # use case we have a single property rdf path, and defined prefix
83
- # in either metadata or prefixes of rules
84
- if isinstance(
85
- transformation.traversal,
86
- SingleProperty,
87
- ) and (
88
- transformation.traversal.property.prefix in self.rules.prefixes
89
- or transformation.traversal.property.prefix == self.rules.metadata.prefix
90
- ):
91
- namespace = (
92
- self.rules.metadata.namespace
93
- if transformation.traversal.property.prefix == self.rules.metadata.prefix
94
- else self.rules.prefixes[transformation.traversal.property.prefix]
95
- )
96
-
97
- property_renaming_configuration[namespace[transformation.traversal.property.suffix]] = property_id
98
-
99
- # otherwise we default to the property id
100
- else:
101
- property_renaming_configuration[property_id] = property_id
102
-
103
- return property_renaming_configuration
104
-
105
- def neat_id_to_instance_source_property_uri(self, property_neat_id: URIRef) -> URIRef | None:
106
- if (
107
- (property_ := self.properties_by_neat_id.get(property_neat_id))
108
- and property_.instance_source
109
- and isinstance(
110
- property_.instance_source.traversal,
111
- SingleProperty,
112
- )
113
- and (
114
- property_.instance_source.traversal.property.prefix in self.rules.prefixes
115
- or property_.instance_source.traversal.property.prefix == self.rules.metadata.prefix
116
- )
117
- ):
118
- namespace = (
119
- self.rules.metadata.namespace
120
- if property_.instance_source.traversal.property.prefix == self.rules.metadata.prefix
121
- else self.rules.prefixes[property_.instance_source.traversal.property.prefix]
122
- )
123
-
124
- return namespace[property_.instance_source.traversal.property.suffix]
125
- return None
126
-
127
- def property_types(self, class_: ClassEntity) -> dict[str, EntityTypes]:
128
- property_types = {}
129
- if definitions := self.class_property_pairs(consider_inheritance=True).get(class_, None):
130
- for property_id, definition in definitions.items():
131
- property_types[property_id] = definition.type_
132
-
133
- return property_types
134
-
135
- def most_occurring_class_in_transformations(self, class_: ClassEntity) -> ClassEntity | None:
136
- classes = []
137
- if class_property_pairs := self.class_property_pairs(consider_inheritance=True, only_rdfpath=True).get(
138
- class_, None
139
- ):
140
- for property_ in class_property_pairs.values():
141
- classes.append(cast(RDFPath, property_.instance_source).traversal.class_)
142
-
143
- return cast(ClassEntity, most_occurring_element(classes))
144
- else:
145
- return None
146
-
147
- def subset_rules(self, desired_classes: set[ClassEntity]) -> InformationRules:
148
- """
149
- Subset rules to only include desired classes and their properties.
150
-
151
- Args:
152
- desired_classes: Desired classes to include in the reduced data model
153
-
154
- Returns:
155
- Instance of InformationRules
156
-
157
- !!! note "Inheritance"
158
- If desired classes contain a class that is a subclass of another class(es), the parent class(es)
159
- will be included in the reduced data model as well even though the parent class(es) are
160
- not in the desired classes set. This is to ensure that the reduced data model is
161
- consistent and complete.
162
-
163
- !!! note "Partial Reduction"
164
- This method does not perform checks if classes that are value types of desired classes
165
- properties are part of desired classes. If a class is not part of desired classes, but it
166
- is a value type of a property of a class that is part of desired classes, derived reduced
167
- rules will be marked as partial.
168
-
169
- !!! note "Validation"
170
- This method will attempt to validate the reduced rules with custom validations.
171
- If it fails, it will return a partial rules with a warning message, validated
172
- only with base Pydantic validators.
173
- """
174
- class_as_dict = self.as_class_dict()
175
- class_parents_pairs = self.class_parent_pairs()
176
- defined_classes = self.defined_classes(consider_inheritance=True)
177
-
178
- possible_classes = defined_classes.intersection(desired_classes)
179
- impossible_classes = desired_classes - possible_classes
180
-
181
- # need to add all the parent classes of the desired classes to the possible classes
182
- parents: set[ClassEntity] = set()
183
- for class_ in possible_classes:
184
- parents = parents.union({parent for parent in get_inheritance_path(class_, class_parents_pairs)})
185
- possible_classes = possible_classes.union(parents)
186
-
187
- if not possible_classes:
188
- raise ValueError("None of the desired classes are defined in the data model!")
189
-
190
- if impossible_classes:
191
- warnings.warn(
192
- f"Could not find the following classes defined in the data model: {impossible_classes}",
193
- stacklevel=2,
194
- )
195
-
196
- reduced_data_model: dict[str, Any] = {
197
- "metadata": self.rules.metadata.model_copy(),
198
- "prefixes": (self.rules.prefixes or {}).copy(),
199
- "classes": [],
200
- "properties": [],
201
- }
202
-
203
- for class_ in possible_classes:
204
- reduced_data_model["classes"].append(class_as_dict[str(class_.suffix)])
205
-
206
- class_property_pairs = self.classes_with_properties(consider_inheritance=False)
207
-
208
- for class_, properties in class_property_pairs.items():
209
- if class_ in possible_classes:
210
- reduced_data_model["properties"].extend(properties)
211
-
212
- try:
213
- return type(self.rules)(**reduced_data_model)
214
- except ValidationError as e:
215
- warnings.warn(f"Reduced data model is not complete: {e}", stacklevel=2)
216
- reduced_data_model["metadata"].schema_ = SchemaCompleteness.partial
217
- return type(self.rules).model_construct(**reduced_data_model)
218
-
219
- def class_uri(self, class_: ClassEntity) -> URIRef | None:
220
- """Get URI for a class entity based on the rules.
221
-
222
- Args:
223
- class_: instance of ClassEntity
224
-
225
- Returns:
226
- URIRef of the class entity or None if not found
227
- """
228
-
229
- # we need to handle optional renamings and we do this
230
- # by checking if the most occurring class in transformations alternatively
231
- # in cases when we are not specifying transformations we default to the class entity
232
- if not (most_frequent_class := self.most_occurring_class_in_transformations(class_)):
233
- most_frequent_class = class_
234
-
235
- # case 1 class prefix in rules.prefixes
236
- if most_frequent_class.prefix in self.rules.prefixes:
237
- return self.rules.prefixes[cast(str, most_frequent_class.prefix)][most_frequent_class.suffix]
238
-
239
- # case 2 class prefix equal to rules.metadata.prefix
240
- elif most_frequent_class.prefix == self.rules.metadata.prefix:
241
- return self.rules.metadata.namespace[most_frequent_class.suffix]
242
-
243
- # case 3 when class prefix is not found in prefixes of rules
244
- else:
245
- return None
246
-
247
- @property
248
- def multi_value_properties(self) -> list[InformationProperty]:
249
- return [prop_ for prop_ in self.rules.properties if isinstance(prop_.value_type, MultiValueTypeInfo)]
@@ -1,372 +0,0 @@
1
- """ """
2
-
3
- import re
4
- import sys
5
- import urllib.parse
6
- from collections import Counter
7
- from functools import total_ordering
8
- from typing import ClassVar, Literal
9
-
10
- from pydantic import BaseModel, field_validator, model_serializer
11
-
12
- from cognite.neat._issues.errors import NeatValueError
13
-
14
- if sys.version_info >= (3, 11):
15
- from enum import StrEnum
16
- from typing import Self
17
- else:
18
- from backports.strenum import StrEnum
19
- from typing_extensions import Self
20
-
21
-
22
- class TransformationRuleType(StrEnum):
23
- rdfpath = "rdfpath"
24
- rawlookup = "rawlookup"
25
- sparql = "sparql"
26
-
27
-
28
- class Lookup(StrEnum):
29
- table = "table"
30
- key = "key"
31
- value = "value" # type: ignore
32
-
33
-
34
- class EntityTypes(StrEnum):
35
- class_ = "class"
36
- property_ = "property"
37
- undefined = "undefined"
38
-
39
-
40
- # FOR PARSING STRINGS:
41
- PREFIX_REGEX = r"[a-zA-Z]+[a-zA-Z0-9-_.]*[a-zA-Z0-9]+"
42
- SUFFIX_REGEX = r"[a-zA-Z0-9-_.]+[a-zA-Z0-9]|[-_.]*[a-zA-Z0-9]+"
43
- VERSION_REGEX = r"[a-zA-Z0-9]([.a-zA-Z0-9_-]{0,41}[a-zA-Z0-9])?"
44
-
45
- ENTITY_ID_REGEX = rf"{PREFIX_REGEX}:({SUFFIX_REGEX})"
46
- ENTITY_ID_REGEX_COMPILED = re.compile(rf"^(?P<prefix>{PREFIX_REGEX}):(?P<suffix>{SUFFIX_REGEX})$")
47
- VERSIONED_ENTITY_REGEX_COMPILED = re.compile(
48
- rf"^(?P<prefix>{PREFIX_REGEX}):(?P<suffix>{SUFFIX_REGEX})\(version=(?P<version>{VERSION_REGEX})\)$"
49
- )
50
- CLASS_ID_REGEX = rf"(?P<{EntityTypes.class_}>{ENTITY_ID_REGEX})"
51
- CLASS_ID_REGEX_COMPILED = re.compile(rf"^{CLASS_ID_REGEX}$")
52
- PROPERTY_ID_REGEX = rf"\((?P<{EntityTypes.property_}>{ENTITY_ID_REGEX})\)"
53
-
54
- # traversal direction
55
- DIRECTION_REGEX = r"(?P<direction>(->|<-))"
56
-
57
- # steps
58
- STEP_REGEX = rf"((->|<-){CLASS_ID_REGEX}({PROPERTY_ID_REGEX})?)"
59
- STEP_REGEX_COMPILED = re.compile(STEP_REGEX)
60
- STEP_CLASS_REGEX_COMPILED = re.compile(rf"(^{DIRECTION_REGEX}{CLASS_ID_REGEX})$")
61
- STEP_CLASS_AND_PROPERTY_REGEX_COMPILED = re.compile(rf"(^{DIRECTION_REGEX}{CLASS_ID_REGEX}{PROPERTY_ID_REGEX}$)")
62
-
63
-
64
- _traversal = "traversal"
65
- ORIGIN_REGEX = rf"(?P<origin>{ENTITY_ID_REGEX})"
66
-
67
- HOP_REGEX_COMPILED = re.compile(rf"^{ORIGIN_REGEX}(?P<{_traversal}>{STEP_REGEX}+)$")
68
-
69
- # grabbing specific property for a class, property can be either object, annotation or data property
70
- SINGLE_PROPERTY_REGEX_COMPILED = re.compile(rf"^{CLASS_ID_REGEX}{PROPERTY_ID_REGEX}$")
71
-
72
- # grabbing all properties for a class
73
- ALL_PROPERTIES_REGEX_COMPILED = re.compile(rf"^{CLASS_ID_REGEX}\(\*\)$")
74
-
75
- ALL_TRAVERSAL_REGEX_COMPILED = (
76
- rf"({CLASS_ID_REGEX}\(\*\)|{CLASS_ID_REGEX}{PROPERTY_ID_REGEX}|{ORIGIN_REGEX}(?P<{_traversal}>{STEP_REGEX}+))"
77
- )
78
-
79
- TABLE_REGEX_COMPILED = re.compile(
80
- rf"^(?P<{Lookup.table}>{SUFFIX_REGEX})\((?P<{Lookup.key}>{SUFFIX_REGEX}),\s*(?P<{Lookup.value}>{SUFFIX_REGEX})\)$"
81
- )
82
-
83
-
84
- StepDirection = Literal["source", "target", "origin"]
85
- _direction_by_symbol: dict[str, StepDirection] = {"->": "target", "<-": "source"}
86
- _symbol_by_direction: dict[StepDirection, str] = {"source": "<-", "target": "->"}
87
-
88
- Undefined = type(object())
89
- Unknown = type(object())
90
-
91
-
92
- # mypy does not like the sentinel value, and it is not possible to ignore only the line with it below.
93
- # so we ignore all errors beyond this point.
94
- # mypy: ignore-errors
95
- @total_ordering
96
- class Entity(BaseModel, arbitrary_types_allowed=True):
97
- """Entity is a class or property in OWL/RDF sense."""
98
-
99
- type_: ClassVar[EntityTypes] = EntityTypes.undefined
100
- prefix: str | Undefined = Undefined
101
- suffix: str | Unknown
102
- version: str | None = None
103
- name: str | None = None
104
- description: str | None = None
105
-
106
- def __lt__(self, other: object) -> bool:
107
- if type(self) is not type(other) or not isinstance(other, Entity):
108
- return NotImplemented
109
- return self.versioned_id < other.versioned_id
110
-
111
- def __eq__(self, other: object) -> bool:
112
- if type(self) is not type(other) or not isinstance(other, Entity):
113
- return NotImplemented
114
- return self.versioned_id == other.versioned_id
115
-
116
- def __hash__(self) -> int:
117
- return hash(self.versioned_id)
118
-
119
- def as_non_versioned_entity(self) -> Self:
120
- return self.from_string(f"{self.prefix}:{self.suffix}")
121
-
122
- @property
123
- def id(self) -> str:
124
- if self.suffix is Unknown:
125
- return "#N/A"
126
- elif self.prefix is Undefined:
127
- return self.suffix
128
- else:
129
- return f"{self.prefix}:{self.suffix}"
130
-
131
- @property
132
- def versioned_id(self) -> str:
133
- if self.version is None:
134
- return self.id
135
- else:
136
- return f"{self.id}(version={self.version})"
137
-
138
- @property
139
- def space(self) -> str:
140
- """Returns entity space in CDF."""
141
- return self.prefix
142
-
143
- @property
144
- def external_id(self) -> str:
145
- """Returns entity external id in CDF."""
146
- return self.suffix
147
-
148
- def __repr__(self):
149
- return self.versioned_id
150
-
151
- def __str__(self):
152
- return self.versioned_id
153
-
154
- @classmethod
155
- def from_string(cls, entity_string: str, base_prefix: str | None = None) -> Self:
156
- if entity_string == "#N/A":
157
- return cls(prefix=Undefined, suffix=Unknown)
158
- elif result := VERSIONED_ENTITY_REGEX_COMPILED.match(entity_string):
159
- return cls(
160
- prefix=result.group("prefix"),
161
- suffix=result.group("suffix"),
162
- version=result.group("version"),
163
- )
164
- elif result := ENTITY_ID_REGEX_COMPILED.match(entity_string):
165
- return cls(prefix=result.group("prefix"), suffix=result.group("suffix"))
166
- elif base_prefix and re.match(SUFFIX_REGEX, entity_string) and re.match(PREFIX_REGEX, base_prefix):
167
- return cls(prefix=base_prefix, suffix=entity_string)
168
- else:
169
- raise ValueError(f"{cls.__name__} is expected to be prefix:suffix, got {entity_string}")
170
-
171
- @classmethod
172
- def from_list(cls, entity_strings: list[str], base_prefix: str | None = None) -> list[Self]:
173
- return [
174
- cls.from_string(entity_string=entity_string, base_prefix=base_prefix) for entity_string in entity_strings
175
- ]
176
-
177
-
178
- class Step(BaseModel):
179
- class_: Entity
180
- property: Entity | None = None # only terminal step has property
181
- direction: StepDirection
182
-
183
- @classmethod
184
- def from_string(cls, raw: str, **kwargs) -> Self:
185
- if result := STEP_CLASS_AND_PROPERTY_REGEX_COMPILED.match(raw):
186
- return cls(
187
- class_=Entity.from_string(result.group(EntityTypes.class_)),
188
- property=Entity.from_string(result.group(EntityTypes.property_)),
189
- direction=_direction_by_symbol[result.group("direction")],
190
- **kwargs,
191
- )
192
- elif result := STEP_CLASS_REGEX_COMPILED.match(raw):
193
- return cls(
194
- class_=Entity.from_string(result.group(EntityTypes.class_)),
195
- direction=_direction_by_symbol[result.group("direction")],
196
- ) # type: ignore
197
- msg = f"Invalid step {raw}, expected in one of the following forms:"
198
- msg += " ->prefix:suffix, <-prefix:suffix, ->prefix:suffix(prefix:suffix) or <-prefix:suffix(prefix:suffix)"
199
- raise ValueError(msg)
200
-
201
- def __str__(self) -> str:
202
- if self.property:
203
- return f"{self.class_}({self.property})"
204
- else:
205
- return f"{_symbol_by_direction[self.direction]}{self.class_}"
206
-
207
- def __repr__(self) -> str:
208
- return self.__str__()
209
-
210
-
211
- class Traversal(BaseModel):
212
- class_: Entity
213
-
214
- def __str__(self) -> str:
215
- return f"{self.class_}"
216
-
217
- def __repr__(self) -> str:
218
- return self.__str__()
219
-
220
- @model_serializer(when_used="unless-none", return_type=str)
221
- def as_str(self) -> str:
222
- return str(self)
223
-
224
-
225
- class SingleProperty(Traversal):
226
- property: Entity
227
-
228
- @classmethod
229
- def from_string(cls, class_: str, property_: str) -> Self:
230
- return cls(class_=Entity.from_string(class_), property=Entity.from_string(property_))
231
-
232
- def __str__(self) -> str:
233
- return f"{self.class_}({self.property})"
234
-
235
-
236
- class SelfReferenceProperty(Traversal):
237
- @classmethod
238
- def from_string(cls, class_: str) -> Self:
239
- return cls(class_=Entity.from_string(class_))
240
-
241
-
242
- class Origin(BaseModel):
243
- class_: Entity
244
-
245
- @field_validator("class_", mode="before")
246
- def process_if_string(cls, value):
247
- return Entity.from_string(value) if isinstance(value, str) else value
248
-
249
-
250
- class Hop(Traversal):
251
- """Multi or single hop traversal through graph"""
252
-
253
- traversal: list[Step]
254
-
255
- @classmethod
256
- def from_string(cls, class_: str, traversal: str | list[Step]) -> Self:
257
- return cls(
258
- class_=Entity.from_string(class_),
259
- traversal=(
260
- [Step.from_string(result[0]) for result in STEP_REGEX_COMPILED.findall(traversal)]
261
- if isinstance(traversal, str)
262
- else traversal
263
- ),
264
- )
265
-
266
- def __str__(self) -> str:
267
- return f"{self.class_}{''.join([str(step) for step in self.traversal])}"
268
-
269
-
270
- class TableLookup(BaseModel):
271
- name: str
272
- key: str
273
- value: str
274
-
275
-
276
- class Rule(BaseModel):
277
- pass
278
-
279
-
280
- class Query(BaseModel):
281
- query: str
282
-
283
-
284
- class RDFPath(Rule):
285
- traversal: SingleProperty | SelfReferenceProperty | Hop
286
-
287
- def __str__(self) -> str:
288
- return f"{self.traversal}"
289
-
290
- def __repr__(self) -> str:
291
- return self.__str__()
292
-
293
- @model_serializer(when_used="unless-none", return_type=str)
294
- def as_str(self) -> str:
295
- return str(self)
296
-
297
-
298
- class RawLookup(RDFPath):
299
- table: TableLookup
300
-
301
-
302
- class SPARQLQuery(RDFPath):
303
- traversal: Query
304
-
305
-
306
- def parse_traversal(raw: str) -> SelfReferenceProperty | SingleProperty | Hop:
307
- if result := CLASS_ID_REGEX_COMPILED.match(raw):
308
- return SelfReferenceProperty.from_string(class_=result.group(EntityTypes.class_))
309
- elif result := SINGLE_PROPERTY_REGEX_COMPILED.match(raw):
310
- return SingleProperty.from_string(
311
- class_=result.group(EntityTypes.class_),
312
- property_=result.group(EntityTypes.property_),
313
- )
314
- elif result := HOP_REGEX_COMPILED.match(raw):
315
- return Hop.from_string(class_=result.group("origin"), traversal=result.group(_traversal))
316
- else:
317
- raise NeatValueError(f"Invalid RDF Path: {raw!r}")
318
-
319
-
320
- def parse_table_lookup(raw: str) -> TableLookup:
321
- if result := TABLE_REGEX_COMPILED.match(raw):
322
- return TableLookup(
323
- name=result.group(Lookup.table),
324
- key=result.group(Lookup.key),
325
- value=result.group(Lookup.value),
326
- )
327
- raise NeatValueError(f"Invalid table lookup: {raw!r}")
328
-
329
-
330
- def parse_rule(rule_raw: str, rule_type: TransformationRuleType | None) -> RDFPath:
331
- rule_raw = urllib.parse.unquote(rule_raw)
332
- match rule_type:
333
- case TransformationRuleType.rdfpath:
334
- rule_raw = rule_raw.replace(" ", "")
335
- return RDFPath(traversal=parse_traversal(rule_raw))
336
- case TransformationRuleType.rawlookup:
337
- rule_raw = rule_raw.replace(" ", "")
338
- if Counter(rule_raw).get("|") != 1:
339
- raise NeatValueError(f"Invalid rawlookup rule: {rule_raw!r}")
340
- traversal, table_lookup = rule_raw.split("|")
341
- return RawLookup(
342
- traversal=parse_traversal(traversal),
343
- table=parse_table_lookup(table_lookup),
344
- )
345
- case TransformationRuleType.sparql:
346
- return SPARQLQuery(traversal=Query(query=rule_raw))
347
- case None:
348
- raise ValueError("Rule type must be specified")
349
-
350
-
351
- def is_valid_rule(rule_type: TransformationRuleType, rule_raw: str) -> bool:
352
- is_valid_rule = {
353
- TransformationRuleType.rdfpath: is_rdfpath,
354
- TransformationRuleType.rawlookup: is_rawlookup,
355
- }[rule_type]
356
- return is_valid_rule(rule_raw)
357
-
358
-
359
- def is_rdfpath(raw: str) -> bool:
360
- try:
361
- parse_traversal(raw)
362
- except ValueError:
363
- return False
364
- return True
365
-
366
-
367
- def is_rawlookup(raw: str) -> bool:
368
- try:
369
- parse_rule(raw, TransformationRuleType.rawlookup)
370
- except ValueError:
371
- return False
372
- return True