cognite-neat 0.81.12__py3-none-any.whl → 0.82.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/graph/extractors/_mock_graph_generator.py +2 -8
- cognite/neat/graph/loaders/_rdf2dms.py +1 -1
- cognite/neat/graph/queries/__init__.py +3 -0
- cognite/neat/graph/queries/_base.py +99 -0
- cognite/neat/graph/queries/_construct.py +185 -0
- cognite/neat/graph/queries/_shared.py +159 -0
- cognite/neat/graph/stores/_base.py +24 -87
- cognite/neat/rules/analysis/_information_rules.py +34 -58
- cognite/neat/rules/importers/_inference2rules.py +5 -1
- cognite/neat/rules/models/information/_rules.py +5 -0
- cognite/neat/rules/models/information/_rules_input.py +3 -6
- cognite/neat/utils/utils.py +6 -1
- {cognite_neat-0.81.12.dist-info → cognite_neat-0.82.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.81.12.dist-info → cognite_neat-0.82.0.dist-info}/RECORD +18 -14
- {cognite_neat-0.81.12.dist-info → cognite_neat-0.82.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.81.12.dist-info → cognite_neat-0.82.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.81.12.dist-info → cognite_neat-0.82.0.dist-info}/entry_points.txt +0 -0
cognite/neat/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.82.0"
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
It is a bit ugly and needs some proper refactoring, but it is not a priority at the moment.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import logging
|
|
6
5
|
import random
|
|
7
6
|
import warnings
|
|
8
7
|
from collections import OrderedDict
|
|
@@ -87,11 +86,11 @@ def generate_triples(
|
|
|
87
86
|
stop_on_exception: bool = False,
|
|
88
87
|
allow_isolated_classes: bool = True,
|
|
89
88
|
) -> list[Triple]:
|
|
90
|
-
"""Generate mock triples based on data model defined
|
|
89
|
+
"""Generate mock triples based on data model defined in rules and desired number
|
|
91
90
|
of class instances
|
|
92
91
|
|
|
93
92
|
Args:
|
|
94
|
-
|
|
93
|
+
rules : Rules defining the data model
|
|
95
94
|
class_count: Target class count for each class in the ontology
|
|
96
95
|
stop_on_exception: To stop if exception is encountered or not, default is False
|
|
97
96
|
allow_isolated_classes: To allow generation of instances for classes that are not
|
|
@@ -107,11 +106,9 @@ def generate_triples(
|
|
|
107
106
|
if non_existing_classes := set(class_count.keys()) - defined_classes:
|
|
108
107
|
msg = f"Class count contains classes {non_existing_classes} for which properties are not defined in Data Model!"
|
|
109
108
|
if stop_on_exception:
|
|
110
|
-
logging.error(msg)
|
|
111
109
|
raise ValueError(msg)
|
|
112
110
|
else:
|
|
113
111
|
msg += " These classes will be ignored."
|
|
114
|
-
logging.warning(msg)
|
|
115
112
|
warnings.warn(msg, stacklevel=2)
|
|
116
113
|
for class_ in non_existing_classes:
|
|
117
114
|
class_count.pop(class_)
|
|
@@ -279,14 +276,12 @@ def _generate_mock_object_property_triples(
|
|
|
279
276
|
if property_definition.value_type not in instance_ids:
|
|
280
277
|
msg = f"Class {property_definition.value_type} not found in class count! "
|
|
281
278
|
if stop_on_exception:
|
|
282
|
-
logging.error(msg)
|
|
283
279
|
raise ValueError(msg)
|
|
284
280
|
else:
|
|
285
281
|
msg += (
|
|
286
282
|
f"Skipping creating triples for property {property_definition.name} "
|
|
287
283
|
f"of class {class_.suffix} which expects values of this type!"
|
|
288
284
|
)
|
|
289
|
-
logging.warning(msg)
|
|
290
285
|
warnings.warn(msg, stacklevel=2)
|
|
291
286
|
return []
|
|
292
287
|
|
|
@@ -354,7 +349,6 @@ def _generate_triples_per_class(
|
|
|
354
349
|
)
|
|
355
350
|
|
|
356
351
|
else:
|
|
357
|
-
logging.error(f"Property type {property_.value_type} not supported!")
|
|
358
352
|
raise ValueError(f"Property type {property_.value_type} not supported!")
|
|
359
353
|
|
|
360
354
|
return triples
|
|
@@ -93,7 +93,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
93
93
|
yield from issues
|
|
94
94
|
tracker.issue(issues)
|
|
95
95
|
class_name = self.class_by_view_id.get(view.as_id(), view.external_id)
|
|
96
|
-
triples = self.graph_store.
|
|
96
|
+
triples = self.graph_store.read(class_name)
|
|
97
97
|
for identifier, properties in _triples2dictionary(triples).items():
|
|
98
98
|
try:
|
|
99
99
|
yield self._create_node(identifier, properties, pydantic_cls, view_id)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import cast
|
|
3
|
+
|
|
4
|
+
from rdflib import RDF, Graph, URIRef
|
|
5
|
+
from rdflib.query import ResultRow
|
|
6
|
+
|
|
7
|
+
from cognite.neat.rules.models.entities import ClassEntity
|
|
8
|
+
from cognite.neat.rules.models.information import InformationRules
|
|
9
|
+
from cognite.neat.utils.utils import remove_namespace
|
|
10
|
+
|
|
11
|
+
from ._construct import build_construct_query
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Queries:
|
|
15
|
+
"""Helper class for storing standard queries for the graph store."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, graph: Graph, rules: InformationRules | None = None):
|
|
18
|
+
self.graph = graph
|
|
19
|
+
self.rules = rules
|
|
20
|
+
|
|
21
|
+
def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
|
|
22
|
+
"""Get instances ids for a given class
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
class_uri: Class for which instances are to be found
|
|
26
|
+
limit: Max number of instances to return, by default -1 meaning all instances
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
List of class instance URIs
|
|
30
|
+
"""
|
|
31
|
+
query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
|
|
32
|
+
"class", class_uri
|
|
33
|
+
).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
|
|
34
|
+
return [cast(tuple, res)[0] for res in list(self.graph.query(query_statement))]
|
|
35
|
+
|
|
36
|
+
def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
|
|
37
|
+
"""Get all triples for instances of a given class
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
class_uri: Class for which instances are to be found
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List of triples for instances of the given class
|
|
44
|
+
"""
|
|
45
|
+
query = (
|
|
46
|
+
f"SELECT ?instance ?prop ?value "
|
|
47
|
+
f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Select queries gives an iterable of result rows
|
|
51
|
+
return cast(list[ResultRow], list(self.graph.query(query)))
|
|
52
|
+
|
|
53
|
+
def triples_of_type_instances(self, rdf_type: str) -> list[tuple[str, str, str]]:
|
|
54
|
+
"""Get all triples of a given type.
|
|
55
|
+
|
|
56
|
+
This method assumes the graph has been transformed into the default namespace.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
if self.rules:
|
|
60
|
+
query = (
|
|
61
|
+
f"SELECT ?instance ?prop ?value "
|
|
62
|
+
f"WHERE {{ ?instance a <{self.rules.metadata.namespace[rdf_type]}> . ?instance ?prop ?value . }} "
|
|
63
|
+
"order by ?instance"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
result = self.graph.query(query)
|
|
67
|
+
|
|
68
|
+
# We cannot include the RDF.type in case there is a neat:type property
|
|
69
|
+
return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
|
|
70
|
+
else:
|
|
71
|
+
warnings.warn("No rules found for the graph store, returning empty list.", stacklevel=2)
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
def construct_instances_of_class(self, class_: str, properties_optional: bool = True) -> list[tuple[str, str, str]]:
|
|
75
|
+
"""CONSTRUCT instances for a given class from the graph store
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
class_: Class entity for which we want to generate query
|
|
79
|
+
properties_optional: Whether to make all properties optional, default True
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
List of triples for instances of the given class
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
if self.rules and (
|
|
86
|
+
query := build_construct_query(
|
|
87
|
+
ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_),
|
|
88
|
+
self.graph,
|
|
89
|
+
self.rules,
|
|
90
|
+
properties_optional,
|
|
91
|
+
)
|
|
92
|
+
):
|
|
93
|
+
result = self.graph.query(query)
|
|
94
|
+
|
|
95
|
+
# We cannot include the RDF.type in case there is a neat:type property
|
|
96
|
+
return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
|
|
97
|
+
else:
|
|
98
|
+
warnings.warn("No rules found for the graph store, returning empty list.", stacklevel=2)
|
|
99
|
+
return []
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import cast
|
|
3
|
+
|
|
4
|
+
from rdflib import Graph, URIRef
|
|
5
|
+
|
|
6
|
+
from cognite.neat.rules.analysis import InformationArchitectRulesAnalysis
|
|
7
|
+
from cognite.neat.rules.models._rdfpath import (
|
|
8
|
+
AllReferences,
|
|
9
|
+
Hop,
|
|
10
|
+
RDFPath,
|
|
11
|
+
SingleProperty,
|
|
12
|
+
Traversal,
|
|
13
|
+
)
|
|
14
|
+
from cognite.neat.rules.models.entities import ClassEntity
|
|
15
|
+
from cognite.neat.rules.models.information import InformationProperty, InformationRules
|
|
16
|
+
from cognite.neat.utils.utils import most_occurring_element
|
|
17
|
+
|
|
18
|
+
from ._shared import Triple, hop2property_path
|
|
19
|
+
|
|
20
|
+
_QUERY_TEMPLATE = """CONSTRUCT {{ {graph_template} }}
|
|
21
|
+
WHERE {{ {graph_pattern}
|
|
22
|
+
{filter}
|
|
23
|
+
}}"""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def build_construct_query(
|
|
27
|
+
class_: ClassEntity,
|
|
28
|
+
graph: Graph,
|
|
29
|
+
rules: InformationRules,
|
|
30
|
+
properties_optional: bool = True,
|
|
31
|
+
class_instances: list[URIRef] | None = None,
|
|
32
|
+
) -> str | None:
|
|
33
|
+
"""Builds a CONSTRUCT query for a given class and rules and optionally filters by class instances.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
class_ : The class entity for which the query is generated.
|
|
37
|
+
graph : The graph containing instances of classes.
|
|
38
|
+
rules : The information rules to use for query generation.
|
|
39
|
+
properties_optional : Whether to make all properties optional. Defaults to True.
|
|
40
|
+
class_instances : List of class instances to filter by. Defaults to None (no filter, return all instances).
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
str: CONSTRUCT query.
|
|
44
|
+
|
|
45
|
+
!!! note "On CONSTRUCT Query"
|
|
46
|
+
CONSTRUCT query is composed of two parts: graph template and graph pattern.
|
|
47
|
+
Graph template is used the shape of instance acquired using graph pattern.
|
|
48
|
+
This allows us to create a new graph with the new shape without actually modifying
|
|
49
|
+
the original graph, or creating new instances.
|
|
50
|
+
|
|
51
|
+
The CONSTRUCT query is far less forgiving than the SELECT query. It will not return
|
|
52
|
+
anything if one of the properties that define the "shape" of the class instance is missing.
|
|
53
|
+
This is the reason why there is an option to make all properties optional, so that
|
|
54
|
+
the query will return all instances that have at least one property defined.
|
|
55
|
+
"""
|
|
56
|
+
if (
|
|
57
|
+
transformations := InformationArchitectRulesAnalysis(rules)
|
|
58
|
+
.class_property_pairs(only_rdfpath=True, consider_inheritance=True)
|
|
59
|
+
.get(class_, None)
|
|
60
|
+
):
|
|
61
|
+
templates, patterns = to_construct_triples(
|
|
62
|
+
graph, list(transformations.values()), rules.prefixes, properties_optional
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return _QUERY_TEMPLATE.format(
|
|
66
|
+
graph_template="\n".join(triples2sparql_statement(templates)),
|
|
67
|
+
graph_pattern="\n".join(triples2sparql_statement(patterns)),
|
|
68
|
+
filter="" if not class_instances else add_filter(class_instances),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
else:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def add_filter(class_instances: list[URIRef]):
|
|
76
|
+
class_instances_formatted = [f"<{instance}>" for instance in class_instances]
|
|
77
|
+
return f"FILTER (?instance IN ({', '.join(class_instances_formatted)}))"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def to_construct_triples(
|
|
81
|
+
graph: Graph, transformations: list[InformationProperty], prefixes: dict, properties_optional: bool = True
|
|
82
|
+
) -> tuple[list[Triple], list[Triple]]:
|
|
83
|
+
"""Converts transformations of a class to CONSTRUCT triples which are used to generate CONSTRUCT query
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
graph: Graph containing instances of classes (used for property inference for hops)
|
|
87
|
+
transformations : List of transformations to use to form triples
|
|
88
|
+
prefixes : Dictionary of prefixes for namespaces
|
|
89
|
+
properties_optional : Flag indicating if properties should be optional. Defaults to True.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
tuple: Tuple of triples that define graph template and graph pattern parts of CONSTRUCT query
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
!!! note "Purely inherited transformations"
|
|
96
|
+
Assumption that neat makes is that in case of purely inherited transformations
|
|
97
|
+
we will type instance with class to which transformation belongs to.
|
|
98
|
+
|
|
99
|
+
Otherwise we will type instance with class that is most occurring in non-inherited
|
|
100
|
+
transformations.
|
|
101
|
+
|
|
102
|
+
"""
|
|
103
|
+
# TODO: Add handling of UNIONs in rules
|
|
104
|
+
|
|
105
|
+
templates = []
|
|
106
|
+
patterns = []
|
|
107
|
+
non_inherited_starting_rdf_types = []
|
|
108
|
+
|
|
109
|
+
for transformation in transformations:
|
|
110
|
+
traversal = cast(RDFPath, transformation.transformation).traversal
|
|
111
|
+
|
|
112
|
+
# keeping track of starting rdf types of non-inherited transformations/properties
|
|
113
|
+
if isinstance(traversal, Traversal) and not transformation.inherited:
|
|
114
|
+
non_inherited_starting_rdf_types.append(traversal.class_.id)
|
|
115
|
+
|
|
116
|
+
graph_template_triple = Triple(
|
|
117
|
+
subject="?instance",
|
|
118
|
+
predicate=f"{transformation.class_.prefix}:{transformation.property_}",
|
|
119
|
+
object=f'?{re.sub(r"[^_a-zA-Z0-9/_]", "_", str(transformation.property_).lower())}',
|
|
120
|
+
optional=False,
|
|
121
|
+
)
|
|
122
|
+
templates.append(graph_template_triple)
|
|
123
|
+
|
|
124
|
+
# use case AllReferences: binding instance to certain rdf property
|
|
125
|
+
if isinstance(traversal, AllReferences):
|
|
126
|
+
graph_pattern_triple = Triple(
|
|
127
|
+
subject="BIND(?instance", predicate="AS", object=f"{graph_template_triple.object})", optional=False
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# use case SingleProperty: simple property traversal
|
|
131
|
+
elif isinstance(traversal, SingleProperty):
|
|
132
|
+
graph_pattern_triple = Triple(
|
|
133
|
+
subject=graph_template_triple.subject,
|
|
134
|
+
predicate=traversal.property.id,
|
|
135
|
+
object=graph_template_triple.object,
|
|
136
|
+
optional=True if properties_optional else not transformation.is_mandatory,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# use case Hop: property traversal with multiple hops turned into property path
|
|
140
|
+
# see: https://www.oxfordsemantic.tech/faqs/what-is-a-property-path
|
|
141
|
+
elif isinstance(traversal, Hop):
|
|
142
|
+
graph_pattern_triple = Triple(
|
|
143
|
+
subject="?instance",
|
|
144
|
+
predicate=hop2property_path(graph, traversal, prefixes),
|
|
145
|
+
object=graph_template_triple.object,
|
|
146
|
+
optional=True if properties_optional else not transformation.is_mandatory,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# other type of rdfpaths are skipped
|
|
150
|
+
else:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
patterns.append(graph_pattern_triple)
|
|
154
|
+
|
|
155
|
+
# Add first triple for graph pattern stating type of object
|
|
156
|
+
# we use most occurring here to pull out most occurring rdf type of the starting
|
|
157
|
+
# node of the transformation, or the class itself to which the transformation is
|
|
158
|
+
# defined for.
|
|
159
|
+
# This is safeguard in case there are multiple classes in the graph pattern
|
|
160
|
+
patterns.insert(
|
|
161
|
+
0,
|
|
162
|
+
Triple(
|
|
163
|
+
subject="?instance",
|
|
164
|
+
predicate="a",
|
|
165
|
+
object=(
|
|
166
|
+
most_occurring_element(non_inherited_starting_rdf_types)
|
|
167
|
+
if non_inherited_starting_rdf_types
|
|
168
|
+
else str(transformation.class_)
|
|
169
|
+
),
|
|
170
|
+
optional=False,
|
|
171
|
+
),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
return templates, patterns
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def triples2sparql_statement(triples: list[Triple]):
|
|
178
|
+
return [
|
|
179
|
+
(
|
|
180
|
+
f"OPTIONAL {{ {triple.subject} {triple.predicate} {triple.object} . }}"
|
|
181
|
+
if triple.optional
|
|
182
|
+
else f"{triple.subject} {triple.predicate} {triple.object} ."
|
|
183
|
+
)
|
|
184
|
+
for triple in triples
|
|
185
|
+
]
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import ClassVar, cast
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
6
|
+
from rdflib import Graph, Literal, Namespace
|
|
7
|
+
from rdflib.term import URIRef
|
|
8
|
+
|
|
9
|
+
from cognite.neat.constants import PREFIXES
|
|
10
|
+
from cognite.neat.rules.models._rdfpath import (
|
|
11
|
+
Hop,
|
|
12
|
+
Step,
|
|
13
|
+
)
|
|
14
|
+
from cognite.neat.utils.utils import remove_namespace, uri_to_short_form
|
|
15
|
+
|
|
16
|
+
if sys.version_info >= (3, 11):
|
|
17
|
+
from typing import Self
|
|
18
|
+
else:
|
|
19
|
+
from typing_extensions import Self
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Triple(BaseModel):
|
|
23
|
+
model_config: ClassVar[ConfigDict] = ConfigDict(
|
|
24
|
+
populate_by_name=True, arbitrary_types_allowed=True, strict=False, extra="allow"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
subject: str | URIRef
|
|
28
|
+
predicate: str | URIRef
|
|
29
|
+
object: str | URIRef | Literal | None = None
|
|
30
|
+
optional: bool = Field(
|
|
31
|
+
description="Indicates whether a triple is optional, used when building SPARQL query",
|
|
32
|
+
default=False,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def from_rdflib_triple(cls, triple: tuple[URIRef, URIRef, URIRef | Literal]) -> Self:
|
|
37
|
+
return cls(subject=triple[0], predicate=triple[1], object=triple[2])
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def generate_prefix_header(prefixes: dict[str, Namespace] = PREFIXES) -> str:
|
|
41
|
+
"""Generate prefix header which is added to SPARQL query and allows for shorten query statements
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
prefixes : dict
|
|
46
|
+
Dict containing prefix - namespace pairs, default PREFIXES
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
str
|
|
51
|
+
Prefix header
|
|
52
|
+
"""
|
|
53
|
+
return "".join(f"PREFIX {key}:<{value}>\n" for key, value in prefixes.items())
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_predicate_id(
|
|
57
|
+
graph: Graph, subject_type_id: str, object_type_id: str, prefixes: dict[str, Namespace] = PREFIXES
|
|
58
|
+
) -> URIRef:
|
|
59
|
+
"""Returns predicate (aka property) URI (i.e., ID) that connects subject and object
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
graph : Graph
|
|
64
|
+
Data model graph or data model instance (aka knowledge graph)
|
|
65
|
+
subject_type_id : str
|
|
66
|
+
ID of subject type (aka subject class)
|
|
67
|
+
object_type_id : str
|
|
68
|
+
ID of object type (aka object class)
|
|
69
|
+
prefixes : dict, optional
|
|
70
|
+
Dict containing prefix - namespace pairs, default PREFIXES
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
URIRef
|
|
75
|
+
ID of predicate (aka property) connecting subject and object
|
|
76
|
+
"""
|
|
77
|
+
query = """
|
|
78
|
+
|
|
79
|
+
SELECT ?predicateTypeID
|
|
80
|
+
WHERE {
|
|
81
|
+
?subjectInstanceID a subjectTypeID .
|
|
82
|
+
?objectInstanceID a objectTypeID .
|
|
83
|
+
?subjectInstanceID ?predicateTypeID ?objectInstanceID .
|
|
84
|
+
} LIMIT 1"""
|
|
85
|
+
|
|
86
|
+
query = query.replace("insertPrefixes", generate_prefix_header(prefixes))
|
|
87
|
+
final_query = query.replace("subjectTypeID", subject_type_id).replace("objectTypeID", object_type_id)
|
|
88
|
+
res = list(cast(tuple, graph.query(final_query)))
|
|
89
|
+
|
|
90
|
+
if len(res) != 1:
|
|
91
|
+
raise ValueError("Subject and Object must have exactly 1 relation!")
|
|
92
|
+
|
|
93
|
+
return res[0][0]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def hop2property_path(graph: Graph, hop: Hop, prefixes: dict[str, Namespace]) -> str:
|
|
97
|
+
"""Converts hop to property path string
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
graph : Graph
|
|
102
|
+
Graph containing instances of classes
|
|
103
|
+
hop : Hop
|
|
104
|
+
Hop to convert
|
|
105
|
+
prefixes : dict[str, Namespace]
|
|
106
|
+
Dictionary of prefixes to use for compression and predicate querying
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
str
|
|
111
|
+
Property path string for hop traversal (e.g. ^rdf:type/rdfs:subClassOf)
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
# setting previous step to origin, as we are starting from there
|
|
115
|
+
previous_step = Step(class_=hop.class_, direction="origin")
|
|
116
|
+
|
|
117
|
+
# add triples for all steps until destination
|
|
118
|
+
property_path = ""
|
|
119
|
+
for current_step in hop.traversal:
|
|
120
|
+
sub_entity, obj_entity = (
|
|
121
|
+
(current_step, previous_step) if current_step.direction == "source" else (previous_step, current_step)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
predicate_raw = get_predicate_id(graph, sub_entity.class_.id, obj_entity.class_.id, prefixes)
|
|
125
|
+
|
|
126
|
+
predicate = uri_to_short_form(predicate_raw, prefixes)
|
|
127
|
+
|
|
128
|
+
predicate = f"^{predicate}" if current_step.direction == "source" else predicate
|
|
129
|
+
property_path += f"{predicate}/"
|
|
130
|
+
|
|
131
|
+
previous_step = current_step
|
|
132
|
+
|
|
133
|
+
if previous_step.property:
|
|
134
|
+
return property_path + previous_step.property.id
|
|
135
|
+
else:
|
|
136
|
+
# removing "/" at the end of property path if there is no property at the end
|
|
137
|
+
return property_path[:-1]
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def triples2dictionary(triples: Iterable[tuple[URIRef, URIRef, str | URIRef]]) -> dict[URIRef, dict[str, list[str]]]:
|
|
141
|
+
"""Converts list of triples to dictionary"""
|
|
142
|
+
dictionary: dict[URIRef, dict[str, list[str]]] = {}
|
|
143
|
+
for triple in triples:
|
|
144
|
+
id_: str
|
|
145
|
+
property_: str
|
|
146
|
+
value: str
|
|
147
|
+
uri: URIRef
|
|
148
|
+
|
|
149
|
+
id_, property_, value = remove_namespace(*triple) # type: ignore[misc]
|
|
150
|
+
uri = triple[0]
|
|
151
|
+
|
|
152
|
+
if uri not in dictionary:
|
|
153
|
+
dictionary[uri] = {"external_id": [id_]}
|
|
154
|
+
|
|
155
|
+
if property_ not in dictionary[uri]:
|
|
156
|
+
dictionary[uri][property_] = [value]
|
|
157
|
+
else:
|
|
158
|
+
dictionary[uri][property_].append(value)
|
|
159
|
+
return dictionary
|
|
@@ -5,18 +5,17 @@ from datetime import datetime, timezone
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import cast
|
|
7
7
|
|
|
8
|
-
from rdflib import
|
|
8
|
+
from rdflib import Graph, Namespace, URIRef
|
|
9
9
|
from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
|
|
10
|
-
from rdflib.query import ResultRow
|
|
11
10
|
|
|
12
11
|
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
13
12
|
from cognite.neat.graph._shared import MIMETypes
|
|
14
13
|
from cognite.neat.graph.extractors import RdfFileExtractor, TripleExtractors
|
|
15
14
|
from cognite.neat.graph.models import Triple
|
|
15
|
+
from cognite.neat.graph.queries import Queries
|
|
16
16
|
from cognite.neat.graph.transformers import Transformers
|
|
17
17
|
from cognite.neat.rules.models.entities import ClassEntity
|
|
18
18
|
from cognite.neat.rules.models.information import InformationRules
|
|
19
|
-
from cognite.neat.utils import remove_namespace
|
|
20
19
|
from cognite.neat.utils.auxiliary import local_import
|
|
21
20
|
|
|
22
21
|
from ._provenance import Change, Provenance
|
|
@@ -59,32 +58,30 @@ class NeatGraphStore:
|
|
|
59
58
|
)
|
|
60
59
|
|
|
61
60
|
if rules:
|
|
62
|
-
self.rules
|
|
63
|
-
self.base_namespace = self.rules.metadata.namespace
|
|
64
|
-
self.provenance.append(
|
|
65
|
-
Change.record(
|
|
66
|
-
activity=f"{type(self)}.rules",
|
|
67
|
-
start=_start,
|
|
68
|
-
end=datetime.now(timezone.utc),
|
|
69
|
-
description=f"Added rules to graph store as {type(self.rules).__name__}",
|
|
70
|
-
)
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
if self.rules.prefixes:
|
|
74
|
-
self._upsert_prefixes(self.rules.prefixes)
|
|
75
|
-
self.provenance.append(
|
|
76
|
-
Change.record(
|
|
77
|
-
activity=f"{type(self).__name__}._upsert_prefixes",
|
|
78
|
-
start=_start,
|
|
79
|
-
end=datetime.now(timezone.utc),
|
|
80
|
-
description="Upsert prefixes to graph store",
|
|
81
|
-
)
|
|
82
|
-
)
|
|
83
|
-
|
|
61
|
+
self.add_rules(rules)
|
|
84
62
|
else:
|
|
85
63
|
self.base_namespace = DEFAULT_NAMESPACE
|
|
86
64
|
|
|
87
|
-
self.queries =
|
|
65
|
+
self.queries = Queries(self.graph, self.rules)
|
|
66
|
+
|
|
67
|
+
def add_rules(self, rules: InformationRules) -> None:
|
|
68
|
+
"""This method is used to add rules to the graph store and it is the only correct
|
|
69
|
+
way to add rules to the graph store, after the graph store has been initialized."""
|
|
70
|
+
|
|
71
|
+
self.rules = rules
|
|
72
|
+
self.base_namespace = self.rules.metadata.namespace
|
|
73
|
+
self.queries = Queries(self.graph, self.rules)
|
|
74
|
+
self.provenance.append(
|
|
75
|
+
Change.record(
|
|
76
|
+
activity=f"{type(self)}.rules",
|
|
77
|
+
start=datetime.now(timezone.utc),
|
|
78
|
+
end=datetime.now(timezone.utc),
|
|
79
|
+
description=f"Added rules to graph store as {type(self.rules).__name__}",
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if self.rules.prefixes:
|
|
84
|
+
self._upsert_prefixes(self.rules.prefixes)
|
|
88
85
|
|
|
89
86
|
def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
|
|
90
87
|
"""Adds prefixes to the graph store."""
|
|
@@ -181,7 +178,7 @@ class NeatGraphStore:
|
|
|
181
178
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
182
179
|
return []
|
|
183
180
|
|
|
184
|
-
return
|
|
181
|
+
return self.queries.construct_instances_of_class(class_)
|
|
185
182
|
|
|
186
183
|
def _parse_file(
|
|
187
184
|
self,
|
|
@@ -281,63 +278,3 @@ class NeatGraphStore:
|
|
|
281
278
|
description=transformer.description,
|
|
282
279
|
)
|
|
283
280
|
)
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
class _Queries:
|
|
287
|
-
"""Helper class for storing standard queries for the graph store."""
|
|
288
|
-
|
|
289
|
-
def __init__(self, store: NeatGraphStore):
|
|
290
|
-
self.store = store
|
|
291
|
-
|
|
292
|
-
def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
|
|
293
|
-
"""Get instances ids for a given class
|
|
294
|
-
|
|
295
|
-
Args:
|
|
296
|
-
class_uri: Class for which instances are to be found
|
|
297
|
-
limit: Max number of instances to return, by default -1 meaning all instances
|
|
298
|
-
|
|
299
|
-
Returns:
|
|
300
|
-
List of class instance URIs
|
|
301
|
-
"""
|
|
302
|
-
query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
|
|
303
|
-
"class", class_uri
|
|
304
|
-
).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
|
|
305
|
-
return [cast(tuple, res)[0] for res in list(self.store.graph.query(query_statement))]
|
|
306
|
-
|
|
307
|
-
def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
|
|
308
|
-
"""Get all triples for instances of a given class
|
|
309
|
-
|
|
310
|
-
Args:
|
|
311
|
-
class_uri: Class for which instances are to be found
|
|
312
|
-
|
|
313
|
-
Returns:
|
|
314
|
-
List of triples for instances of the given class
|
|
315
|
-
"""
|
|
316
|
-
query = (
|
|
317
|
-
f"SELECT ?instance ?prop ?value "
|
|
318
|
-
f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
# Select queries gives an iterable of result rows
|
|
322
|
-
return cast(list[ResultRow], list(self.store.graph.query(query)))
|
|
323
|
-
|
|
324
|
-
def triples_of_type_instances(self, rdf_type: str) -> list[tuple[str, str, str]]:
|
|
325
|
-
"""Get all triples of a given type.
|
|
326
|
-
|
|
327
|
-
This method assumes the graph has been transformed into the default namespace.
|
|
328
|
-
"""
|
|
329
|
-
|
|
330
|
-
if self.store.rules:
|
|
331
|
-
query = (
|
|
332
|
-
f"SELECT ?instance ?prop ?value "
|
|
333
|
-
f"WHERE {{ ?instance a <{self.store.rules.metadata.namespace[rdf_type]}> . ?instance ?prop ?value . }} "
|
|
334
|
-
"order by ?instance"
|
|
335
|
-
)
|
|
336
|
-
|
|
337
|
-
result = self.store.graph.query(query)
|
|
338
|
-
|
|
339
|
-
# We cannot include the RDF.type in case there is a neat:type property
|
|
340
|
-
return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
|
|
341
|
-
else:
|
|
342
|
-
warnings.warn("No rules found for the graph store, returning empty list.", stacklevel=2)
|
|
343
|
-
return []
|
|
@@ -2,7 +2,7 @@ import itertools
|
|
|
2
2
|
import logging
|
|
3
3
|
import warnings
|
|
4
4
|
from collections import defaultdict
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from pydantic import ValidationError
|
|
@@ -17,24 +17,11 @@ from ._base import BaseAnalysis
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
20
|
+
"""Assumes analysis over only the complete schema"""
|
|
21
|
+
|
|
20
22
|
def __init__(self, rules: InformationRules):
|
|
21
23
|
self.rules = rules
|
|
22
24
|
|
|
23
|
-
@property
|
|
24
|
-
def referred_classes(self) -> set[ClassEntity]:
|
|
25
|
-
return self.directly_referred_classes.union(self.inherited_referred_classes)
|
|
26
|
-
|
|
27
|
-
@property
|
|
28
|
-
def referred_classes_properties(self) -> list[InformationProperty]:
|
|
29
|
-
referred_classes_properties = []
|
|
30
|
-
class_properties_dict = self.classes_with_properties(use_reference=True)
|
|
31
|
-
|
|
32
|
-
for class_ in self.referred_classes:
|
|
33
|
-
if class_ in class_properties_dict:
|
|
34
|
-
referred_classes_properties.extend(class_properties_dict[class_])
|
|
35
|
-
|
|
36
|
-
return referred_classes_properties
|
|
37
|
-
|
|
38
25
|
@property
|
|
39
26
|
def directly_referred_classes(self) -> set[ClassEntity]:
|
|
40
27
|
return {
|
|
@@ -51,19 +38,17 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
51
38
|
dir_referred_classes = self.directly_referred_classes
|
|
52
39
|
inherited_referred_classes = []
|
|
53
40
|
for class_ in dir_referred_classes:
|
|
54
|
-
inherited_referred_classes.extend(self.class_inheritance_path(class_
|
|
41
|
+
inherited_referred_classes.extend(self.class_inheritance_path(class_))
|
|
55
42
|
return set(inherited_referred_classes)
|
|
56
43
|
|
|
57
|
-
def class_parent_pairs(self
|
|
44
|
+
def class_parent_pairs(self) -> dict[ClassEntity, list[ParentClassEntity]]:
|
|
58
45
|
"""This only returns class - parent pairs only if parent is in the same data model"""
|
|
59
46
|
class_subclass_pairs: dict[ClassEntity, list[ParentClassEntity]] = {}
|
|
60
47
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
if not rules:
|
|
48
|
+
if not self.rules:
|
|
64
49
|
return class_subclass_pairs
|
|
65
50
|
|
|
66
|
-
for definition in rules.classes:
|
|
51
|
+
for definition in self.rules.classes:
|
|
67
52
|
class_subclass_pairs[definition.class_] = []
|
|
68
53
|
|
|
69
54
|
if definition.parent is None:
|
|
@@ -81,13 +66,12 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
81
66
|
return class_subclass_pairs
|
|
82
67
|
|
|
83
68
|
def classes_with_properties(
|
|
84
|
-
self, consider_inheritance: bool = False
|
|
69
|
+
self, consider_inheritance: bool = False
|
|
85
70
|
) -> dict[ClassEntity, list[InformationProperty]]:
|
|
86
71
|
"""Returns classes that have been defined in the data model.
|
|
87
72
|
|
|
88
73
|
Args:
|
|
89
74
|
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
90
|
-
use_reference: Whether to use reference rules or not. Defaults False
|
|
91
75
|
|
|
92
76
|
Returns:
|
|
93
77
|
Dictionary of classes with a list of properties defined for them
|
|
@@ -103,21 +87,19 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
103
87
|
|
|
104
88
|
class_property_pairs: dict[ClassEntity, list[InformationProperty]] = defaultdict(list)
|
|
105
89
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
for property_ in rules.properties:
|
|
90
|
+
for property_ in self.rules.properties:
|
|
109
91
|
class_property_pairs[property_.class_].append(property_)
|
|
110
92
|
|
|
111
93
|
if consider_inheritance:
|
|
112
|
-
class_parent_pairs = self.class_parent_pairs(
|
|
94
|
+
class_parent_pairs = self.class_parent_pairs()
|
|
113
95
|
for class_ in class_parent_pairs:
|
|
114
96
|
self._add_inherited_properties(class_, class_property_pairs, class_parent_pairs)
|
|
115
97
|
|
|
116
98
|
return class_property_pairs
|
|
117
99
|
|
|
118
|
-
def class_inheritance_path(self, class_: ClassEntity | str
|
|
100
|
+
def class_inheritance_path(self, class_: ClassEntity | str) -> list[ClassEntity]:
|
|
119
101
|
class_ = class_ if isinstance(class_, ClassEntity) else ClassEntity.load(class_)
|
|
120
|
-
class_parent_pairs = self.class_parent_pairs(
|
|
102
|
+
class_parent_pairs = self.class_parent_pairs()
|
|
121
103
|
return get_inheritance_path(class_, class_parent_pairs)
|
|
122
104
|
|
|
123
105
|
@classmethod
|
|
@@ -133,8 +115,13 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
133
115
|
if parent.as_class_entity() in class_property_pairs:
|
|
134
116
|
for property_ in class_property_pairs[parent.as_class_entity()]:
|
|
135
117
|
property_ = property_.model_copy()
|
|
136
|
-
|
|
118
|
+
|
|
119
|
+
# This corresponds to importing properties from parent class
|
|
120
|
+
# making sure that the property is attached to desired child class
|
|
137
121
|
property_.class_ = class_
|
|
122
|
+
property_.inherited = True
|
|
123
|
+
|
|
124
|
+
# need same if we have RDF path to make sure that the starting class is the
|
|
138
125
|
|
|
139
126
|
if class_ in class_property_pairs:
|
|
140
127
|
class_property_pairs[class_].append(property_)
|
|
@@ -142,14 +129,13 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
142
129
|
class_property_pairs[class_] = [property_]
|
|
143
130
|
|
|
144
131
|
def class_property_pairs(
|
|
145
|
-
self, only_rdfpath: bool = False, consider_inheritance: bool = False
|
|
132
|
+
self, only_rdfpath: bool = False, consider_inheritance: bool = False
|
|
146
133
|
) -> dict[ClassEntity, dict[str, InformationProperty]]:
|
|
147
134
|
"""Returns a dictionary of classes with a dictionary of properties associated with them.
|
|
148
135
|
|
|
149
136
|
Args:
|
|
150
137
|
only_rdfpath : To consider only properties which have rule `rdfpath` set. Defaults False
|
|
151
138
|
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
152
|
-
use_reference : Whether to use reference rules or not. Defaults False
|
|
153
139
|
|
|
154
140
|
Returns:
|
|
155
141
|
Dictionary of classes with a dictionary of properties associated with them.
|
|
@@ -178,7 +164,7 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
178
164
|
|
|
179
165
|
class_property_pairs = {}
|
|
180
166
|
|
|
181
|
-
for class_, properties in self.classes_with_properties(consider_inheritance
|
|
167
|
+
for class_, properties in self.classes_with_properties(consider_inheritance).items():
|
|
182
168
|
processed_properties = {}
|
|
183
169
|
for property_ in properties:
|
|
184
170
|
if property_.property_ in processed_properties:
|
|
@@ -197,12 +183,11 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
197
183
|
|
|
198
184
|
return class_property_pairs
|
|
199
185
|
|
|
200
|
-
def class_linkage(self, consider_inheritance: bool = False
|
|
186
|
+
def class_linkage(self, consider_inheritance: bool = False) -> pd.DataFrame:
|
|
201
187
|
"""Returns a dataframe with the class linkage of the data model.
|
|
202
188
|
|
|
203
189
|
Args:
|
|
204
190
|
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
205
|
-
use_reference: Whether to use reference rules or not. Defaults False
|
|
206
191
|
|
|
207
192
|
Returns:
|
|
208
193
|
Dataframe with the class linkage of the data model
|
|
@@ -210,7 +195,7 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
210
195
|
|
|
211
196
|
class_linkage = pd.DataFrame(columns=["source_class", "target_class", "connecting_property", "max_occurrence"])
|
|
212
197
|
|
|
213
|
-
class_property_pairs = self.classes_with_properties(consider_inheritance
|
|
198
|
+
class_property_pairs = self.classes_with_properties(consider_inheritance)
|
|
214
199
|
properties = list(itertools.chain.from_iterable(class_property_pairs.values()))
|
|
215
200
|
|
|
216
201
|
for property_ in properties:
|
|
@@ -230,56 +215,50 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
230
215
|
|
|
231
216
|
return class_linkage
|
|
232
217
|
|
|
233
|
-
def connected_classes(self, consider_inheritance: bool = False
|
|
218
|
+
def connected_classes(self, consider_inheritance: bool = False) -> set[ClassEntity]:
|
|
234
219
|
"""Return a set of classes that are connected to other classes.
|
|
235
220
|
|
|
236
221
|
Args:
|
|
237
222
|
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
238
|
-
use_reference: Whether to use reference rules or not. Defaults False
|
|
239
223
|
|
|
240
224
|
Returns:
|
|
241
225
|
Set of classes that are connected to other classes
|
|
242
226
|
"""
|
|
243
|
-
class_linkage = self.class_linkage(consider_inheritance
|
|
227
|
+
class_linkage = self.class_linkage(consider_inheritance)
|
|
244
228
|
return set(class_linkage.source_class.values).union(set(class_linkage.target_class.values))
|
|
245
229
|
|
|
246
|
-
def defined_classes(self, consider_inheritance: bool = False
|
|
230
|
+
def defined_classes(self, consider_inheritance: bool = False) -> set[ClassEntity]:
|
|
247
231
|
"""Returns classes that have properties defined for them in the data model.
|
|
248
232
|
|
|
249
233
|
Args:
|
|
250
234
|
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
251
|
-
use_reference: Whether to use reference rules or not. Defaults False
|
|
252
235
|
|
|
253
236
|
Returns:
|
|
254
237
|
Set of classes that have been defined in the data model
|
|
255
238
|
"""
|
|
256
|
-
class_property_pairs = self.classes_with_properties(consider_inheritance
|
|
239
|
+
class_property_pairs = self.classes_with_properties(consider_inheritance)
|
|
257
240
|
properties = list(itertools.chain.from_iterable(class_property_pairs.values()))
|
|
258
241
|
|
|
259
242
|
return {property.class_ for property in properties}
|
|
260
243
|
|
|
261
|
-
def disconnected_classes(self, consider_inheritance: bool = False
|
|
244
|
+
def disconnected_classes(self, consider_inheritance: bool = False) -> set[ClassEntity]:
|
|
262
245
|
"""Return a set of classes that are disconnected (i.e. isolated) from other classes.
|
|
263
246
|
|
|
264
247
|
Args:
|
|
265
248
|
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
266
|
-
use_reference: Whether to use reference rules or not. Defaults False
|
|
267
249
|
|
|
268
250
|
Returns:
|
|
269
251
|
Set of classes that are disconnected from other classes
|
|
270
252
|
"""
|
|
271
|
-
return self.defined_classes(consider_inheritance
|
|
272
|
-
consider_inheritance, use_reference
|
|
273
|
-
)
|
|
253
|
+
return self.defined_classes(consider_inheritance) - self.connected_classes(consider_inheritance)
|
|
274
254
|
|
|
275
255
|
def symmetrically_connected_classes(
|
|
276
|
-
self, consider_inheritance: bool = False
|
|
256
|
+
self, consider_inheritance: bool = False
|
|
277
257
|
) -> set[tuple[ClassEntity, ClassEntity]]:
|
|
278
258
|
"""Returns a set of pairs of symmetrically linked classes.
|
|
279
259
|
|
|
280
260
|
Args:
|
|
281
261
|
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
282
|
-
use_reference: Whether to use reference rules or not. Defaults False
|
|
283
262
|
|
|
284
263
|
Returns:
|
|
285
264
|
Set of pairs of symmetrically linked classes
|
|
@@ -293,7 +272,7 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
293
272
|
# TODO: Find better name for this method
|
|
294
273
|
sym_pairs: set[tuple[ClassEntity, ClassEntity]] = set()
|
|
295
274
|
|
|
296
|
-
class_linkage = self.class_linkage(consider_inheritance
|
|
275
|
+
class_linkage = self.class_linkage(consider_inheritance)
|
|
297
276
|
if class_linkage.empty:
|
|
298
277
|
return sym_pairs
|
|
299
278
|
|
|
@@ -321,13 +300,12 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
321
300
|
class_dict[str(definition.class_.suffix)] = definition
|
|
322
301
|
return class_dict
|
|
323
302
|
|
|
324
|
-
def subset_rules(self, desired_classes: set[ClassEntity]
|
|
303
|
+
def subset_rules(self, desired_classes: set[ClassEntity]) -> InformationRules:
|
|
325
304
|
"""
|
|
326
305
|
Subset rules to only include desired classes and their properties.
|
|
327
306
|
|
|
328
307
|
Args:
|
|
329
308
|
desired_classes: Desired classes to include in the reduced data model
|
|
330
|
-
use_reference: Whether to use reference rules or not. Defaults False
|
|
331
309
|
|
|
332
310
|
Returns:
|
|
333
311
|
Instance of InformationRules
|
|
@@ -350,9 +328,7 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
350
328
|
only with base Pydantic validators.
|
|
351
329
|
"""
|
|
352
330
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
if rules.metadata.schema_ is not SchemaCompleteness.complete:
|
|
331
|
+
if self.rules.metadata.schema_ is not SchemaCompleteness.complete:
|
|
356
332
|
raise ValueError("Rules are not complete cannot perform reduction!")
|
|
357
333
|
class_as_dict = self.as_class_dict()
|
|
358
334
|
class_parents_pairs = self.class_parent_pairs()
|
|
@@ -380,8 +356,8 @@ class InformationArchitectRulesAnalysis(BaseAnalysis):
|
|
|
380
356
|
)
|
|
381
357
|
|
|
382
358
|
reduced_data_model: dict[str, Any] = {
|
|
383
|
-
"metadata": rules.metadata.model_copy(),
|
|
384
|
-
"prefixes": (rules.prefixes or {}).copy(),
|
|
359
|
+
"metadata": self.rules.metadata.model_copy(),
|
|
360
|
+
"prefixes": (self.rules.prefixes or {}).copy(),
|
|
385
361
|
"classes": [],
|
|
386
362
|
"properties": [],
|
|
387
363
|
}
|
|
@@ -17,7 +17,7 @@ from cognite.neat.rules.models.information import (
|
|
|
17
17
|
InformationMetadata,
|
|
18
18
|
InformationRulesInput,
|
|
19
19
|
)
|
|
20
|
-
from cognite.neat.utils.utils import get_namespace, remove_namespace
|
|
20
|
+
from cognite.neat.utils.utils import get_namespace, remove_namespace, uri_to_short_form
|
|
21
21
|
|
|
22
22
|
ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
|
|
23
23
|
WHERE { ?s a ?class . }
|
|
@@ -176,6 +176,10 @@ class InferenceImporter(BaseImporter):
|
|
|
176
176
|
"max_count": cast(RdfLiteral, occurrence).value,
|
|
177
177
|
"value_type": value_type_id,
|
|
178
178
|
"reference": property_uri,
|
|
179
|
+
"transformation": (
|
|
180
|
+
f"{uri_to_short_form(class_definition['reference'], prefixes)}"
|
|
181
|
+
f"({uri_to_short_form(cast(URIRef, property_uri), prefixes)})"
|
|
182
|
+
),
|
|
179
183
|
"comment": (
|
|
180
184
|
f"Class <{class_id}> has property <{property_id}> with "
|
|
181
185
|
f"value type <{value_type_id}> which occurs <1> times in the graph"
|
|
@@ -169,6 +169,11 @@ class InformationProperty(SheetEntity):
|
|
|
169
169
|
match_type: MatchType | None = Field(alias="Match Type", default=None)
|
|
170
170
|
transformation: str | RDFPath | None = Field(alias="Transformation", default=None)
|
|
171
171
|
comment: str | None = Field(alias="Comment", default=None)
|
|
172
|
+
inherited: bool = Field(
|
|
173
|
+
default=False,
|
|
174
|
+
alias="Inherited",
|
|
175
|
+
description="Flag to indicate if the property is inherited, only use for internal purposes",
|
|
176
|
+
)
|
|
172
177
|
|
|
173
178
|
@field_serializer("max_count", when_used="json-unless-none")
|
|
174
179
|
def serialize_max_count(self, value: int | float | None) -> int | float | None | str:
|
|
@@ -84,8 +84,7 @@ class InformationPropertyInput:
|
|
|
84
84
|
default: Any | None = None
|
|
85
85
|
reference: str | None = None
|
|
86
86
|
match_type: str | None = None
|
|
87
|
-
|
|
88
|
-
rule: str | None = None
|
|
87
|
+
transformation: str | None = None
|
|
89
88
|
|
|
90
89
|
@classmethod
|
|
91
90
|
@overload
|
|
@@ -122,8 +121,7 @@ class InformationPropertyInput:
|
|
|
122
121
|
default=data.get("default", None),
|
|
123
122
|
reference=data.get("reference", None),
|
|
124
123
|
match_type=data.get("match_type", None),
|
|
125
|
-
|
|
126
|
-
rule=data.get("rule", None),
|
|
124
|
+
transformation=data.get("transformation", None),
|
|
127
125
|
)
|
|
128
126
|
|
|
129
127
|
def dump(self, default_prefix: str) -> dict[str, Any]:
|
|
@@ -158,8 +156,7 @@ class InformationPropertyInput:
|
|
|
158
156
|
"Default": self.default,
|
|
159
157
|
"Reference": self.reference,
|
|
160
158
|
"Match Type": self.match_type,
|
|
161
|
-
"
|
|
162
|
-
"Rule": self.rule,
|
|
159
|
+
"Transformation": self.transformation,
|
|
163
160
|
}
|
|
164
161
|
|
|
165
162
|
|
cognite/neat/utils/utils.py
CHANGED
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
import re
|
|
4
4
|
import sys
|
|
5
5
|
import time
|
|
6
|
-
from collections import OrderedDict
|
|
6
|
+
from collections import Counter, OrderedDict
|
|
7
7
|
from collections.abc import Iterable
|
|
8
8
|
from datetime import datetime
|
|
9
9
|
from functools import wraps
|
|
@@ -386,3 +386,8 @@ def string_to_ideal_type(input_string: str) -> int | bool | float | datetime | s
|
|
|
386
386
|
except ValueError:
|
|
387
387
|
# Return the input string if no conversion is possible
|
|
388
388
|
return input_string
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def most_occurring_element(list_of_elements: list):
|
|
392
|
+
counts = Counter(list_of_elements)
|
|
393
|
+
return counts.most_common(1)[0][0]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
cognite/neat/__init__.py,sha256=v-rRiDOgZ3sQSMQKq0vgUQZvpeOkoHFXissAx6Ktg84,61
|
|
2
|
-
cognite/neat/_version.py,sha256=
|
|
2
|
+
cognite/neat/_version.py,sha256=3E7u1ATSjtoHo9Pq3zuZICzm_iPH64qoru2bGBZuNbw,23
|
|
3
3
|
cognite/neat/app/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
cognite/neat/app/api/asgi/metrics.py,sha256=nxFy7L5cChTI0a-zkCiJ59Aq8yLuIJp5c9Dg0wRXtV0,152
|
|
5
5
|
cognite/neat/app/api/configuration.py,sha256=2U5M6M252swvQPQyooA1EBzFUZNtcTmuSaywfJDgckM,4232
|
|
@@ -64,16 +64,20 @@ cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=w16hu_REIFEV
|
|
|
64
64
|
cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=o4yxkf81FGFrKkflvlyDYie05fTYsT_LcRFM63OTVCI,3406
|
|
65
65
|
cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=KTYmL8vhXijlmkN1UFQrGpaCllpRekr1y55SoLhlLbg,4559
|
|
66
66
|
cognite/neat/graph/extractors/_dexpi.py,sha256=CYSLt0Fl7Y2RCqOfIAT0N8Cjs-Yu2lRLvB13axtAaWw,9384
|
|
67
|
-
cognite/neat/graph/extractors/_mock_graph_generator.py,sha256=
|
|
67
|
+
cognite/neat/graph/extractors/_mock_graph_generator.py,sha256=1TjgbxDVwgZjivIqx1lLKwggn_zHqWLiYM26esgDAMs,14694
|
|
68
68
|
cognite/neat/graph/extractors/_rdf_file.py,sha256=w4-XgPgNsmZOkNxjO1ZQCcopTntmmtxfDBkQxn1se6E,463
|
|
69
69
|
cognite/neat/graph/issues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
70
|
cognite/neat/graph/issues/loader.py,sha256=v8YDsehkUT1QUG61JM9BDV_lqowMUnDmGmbay0aFzN4,3085
|
|
71
71
|
cognite/neat/graph/loaders/__init__.py,sha256=hHC9sfFfbnGSVFTYeuNTIEu4tdLSJ2mWV07fereLelo,125
|
|
72
72
|
cognite/neat/graph/loaders/_base.py,sha256=bdYC6CwsHVqnQa1QzOhL68qQhF1OtrsearqH6D-z3E4,4037
|
|
73
|
-
cognite/neat/graph/loaders/_rdf2dms.py,sha256=
|
|
73
|
+
cognite/neat/graph/loaders/_rdf2dms.py,sha256=6B3fdsaygSOYClq5vgNPMF4HJkO_Xt1OlrrbTsU0Bdc,12989
|
|
74
74
|
cognite/neat/graph/models.py,sha256=AtLgZh2qyRP6NRetjQCy9qLMuTQB0CH52Zsev-qa2sk,149
|
|
75
|
+
cognite/neat/graph/queries/__init__.py,sha256=BgDd-037kvtWwAoGAy8eORVNMiZ5-E9sIV0txIpeaN4,50
|
|
76
|
+
cognite/neat/graph/queries/_base.py,sha256=20A7GDBdmc35VmHVz5n0YCGPcnBAmUX-bM2ImHPManc,3844
|
|
77
|
+
cognite/neat/graph/queries/_construct.py,sha256=FxzSQqzCpo7lKVYerlLAY03oqCeFM5L6MozfBUblzr4,7341
|
|
78
|
+
cognite/neat/graph/queries/_shared.py,sha256=EwW2RbPttt7-z7QTgfKWlthA2Nq5d3bYyyewFkCA7R4,5043
|
|
75
79
|
cognite/neat/graph/stores/__init__.py,sha256=G-VG_YwfRt1kuPao07PDJyZ3w_0-eguzLUM13n-Z_RA,64
|
|
76
|
-
cognite/neat/graph/stores/_base.py,sha256=
|
|
80
|
+
cognite/neat/graph/stores/_base.py,sha256=b4qidXCl9190LrvLXTms2lDqv1ErwvvcGzQkY6mU9hI,10096
|
|
77
81
|
cognite/neat/graph/stores/_oxrdflib.py,sha256=A5zeRm5_e8ui_ihGpgstRDg_N7qcLZ3QZBRGrOXSGI0,9569
|
|
78
82
|
cognite/neat/graph/stores/_provenance.py,sha256=Hr9WBhFj-eoet4czL8XSBGYnu9Yn66YsTgH_G0n3QpY,3293
|
|
79
83
|
cognite/neat/graph/transformers/__init__.py,sha256=wXrNSyJNGnis3haaCKVPZ5y5kKSUsOUHnh-860ekatk,555
|
|
@@ -178,7 +182,7 @@ cognite/neat/rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
|
|
|
178
182
|
cognite/neat/rules/_shared.py,sha256=5dc2dfwqjNB_Us27x7oRNkEoEETT0gQChoiIU8mcvyQ,170
|
|
179
183
|
cognite/neat/rules/analysis/__init__.py,sha256=J2yL0QWSvXOWLbaYPyA0HXHh3aqOWmkwobScdgVQpw8,115
|
|
180
184
|
cognite/neat/rules/analysis/_base.py,sha256=PmN5NLgGsovsHtsnvUzc_zuarWl-Xwk1azWcYKKuWdA,669
|
|
181
|
-
cognite/neat/rules/analysis/_information_rules.py,sha256=
|
|
185
|
+
cognite/neat/rules/analysis/_information_rules.py,sha256=fdSMyInsPJdgLHKwSkj2N9bcEXld9ETxUIXWqeDH8L4,17478
|
|
182
186
|
cognite/neat/rules/examples/__init__.py,sha256=nxIwueAcHgZhkYriGxnDLQmIyiT8PByPHbScjYKDKe0,374
|
|
183
187
|
cognite/neat/rules/examples/wind-energy.owl,sha256=NuomCA9FuuLF0JlSuG3OKqD4VBcHgSjDKFLV17G1zV8,65934
|
|
184
188
|
cognite/neat/rules/exceptions.py,sha256=YLnsbXXJdDSr_szQoioEtOdqDV8PR7RdQjpMP2SWeCs,123868
|
|
@@ -197,7 +201,7 @@ cognite/neat/rules/importers/_dtdl2rules/_unit_lookup.py,sha256=wW4saKva61Q_i17g
|
|
|
197
201
|
cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py,sha256=ysmWUxZ0npwrTB0uiH5jA0v37sfCwowGaYk17IyxPUU,12663
|
|
198
202
|
cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py,sha256=QDyGt5YBaxzF4v_oCFSgKRSpwVdVruDU3-VW0DEiHbY,6718
|
|
199
203
|
cognite/neat/rules/importers/_dtdl2rules/spec.py,sha256=tim_MfN1J0F3Oeqk3BMgIA82d_MZvhRuRMsLK3B4PYc,11897
|
|
200
|
-
cognite/neat/rules/importers/_inference2rules.py,sha256=
|
|
204
|
+
cognite/neat/rules/importers/_inference2rules.py,sha256=vN3l6gfca19FHGzPb9fwolZaq4Z8KkeiR-iK1up8Kqk,11478
|
|
201
205
|
cognite/neat/rules/importers/_owl2rules/__init__.py,sha256=tdGcrgtozdQyST-pTlxIa4cLBNTLvtk1nNYR4vOdFSw,63
|
|
202
206
|
cognite/neat/rules/importers/_owl2rules/_owl2classes.py,sha256=QpTxvrTGczIa48X8lgXGnMN1AWPhHK0DR6uNq175xak,7357
|
|
203
207
|
cognite/neat/rules/importers/_owl2rules/_owl2metadata.py,sha256=nwnUaBNAAYMoBre2UmsnkJXUuaqGEpR3U3txDrH2w6g,7527
|
|
@@ -233,8 +237,8 @@ cognite/neat/rules/models/domain.py,sha256=wZ-DeIPFnacbNlxSrRuLzUpnhHdTpzNc22z0s
|
|
|
233
237
|
cognite/neat/rules/models/entities.py,sha256=lkLsKg8U3Xto30PCB85ScDpv2SPRVq1ukVEQHzH53_g,18868
|
|
234
238
|
cognite/neat/rules/models/information/__init__.py,sha256=HR6g8xgyU53U7Ck8pPdbT70817Q4NC1r1pCRq5SA8iw,291
|
|
235
239
|
cognite/neat/rules/models/information/_converter.py,sha256=r0a2uyzv8m82xzAkYt_-ZXdMN5u46SA_mn95Oo7ng-s,11424
|
|
236
|
-
cognite/neat/rules/models/information/_rules.py,sha256=
|
|
237
|
-
cognite/neat/rules/models/information/_rules_input.py,sha256=
|
|
240
|
+
cognite/neat/rules/models/information/_rules.py,sha256=hQihDlji-DV3pmAPxIZpUBP9PVCcZxJXN2KyZZecFeM,13089
|
|
241
|
+
cognite/neat/rules/models/information/_rules_input.py,sha256=ExCjcD0pvsThXYDf3uWYLzSLqN_2OtXFggbW_RB8hr4,10343
|
|
238
242
|
cognite/neat/rules/models/information/_serializer.py,sha256=yti9I_xJruxrib66YIBInhze___Io-oPTQH6uWDumPE,3503
|
|
239
243
|
cognite/neat/rules/models/information/_validation.py,sha256=Is2GzL2lZU3A5zPu3NjvlXfmIU2_Y10C5Nxi5Denz4g,7528
|
|
240
244
|
cognite/neat/rules/models/wrapped_entities.py,sha256=ThhjnNNrpgz0HeORIQ8Q894trxP73P7T_TuZj6qH2CU,7157
|
|
@@ -251,7 +255,7 @@ cognite/neat/utils/exceptions.py,sha256=-w4cAcvcoWLf-_ZwAl7QV_NysfqtQzIOd1Ti-mpx
|
|
|
251
255
|
cognite/neat/utils/spreadsheet.py,sha256=LI0c7dlW0zXHkHw0NvB-gg6Df6cDcE3FbiaHBYLXdzQ,2714
|
|
252
256
|
cognite/neat/utils/text.py,sha256=4bg1_Q0lg7KsoxaDOvXrVyeY78BJN8i-27BlyDzUCls,3082
|
|
253
257
|
cognite/neat/utils/upload.py,sha256=XaAKqyMhz6qXbUrttGNIXZxFRPJvrnbMpDRF8GEiK2g,2707
|
|
254
|
-
cognite/neat/utils/utils.py,sha256=
|
|
258
|
+
cognite/neat/utils/utils.py,sha256=1LEwR8gpHw_6pvEeLkW_cDU_lUun4qSsw_Rr3JsKwgA,14172
|
|
255
259
|
cognite/neat/utils/xml.py,sha256=ppLT3lQKVp8wOP-m8-tFY8uB2P4R76l7R_-kUtsABng,992
|
|
256
260
|
cognite/neat/workflows/__init__.py,sha256=oiKub_U9f5cA0I1nKl5dFkR4BD8_6Be9eMzQ_50PwP0,396
|
|
257
261
|
cognite/neat/workflows/_exceptions.py,sha256=ugI_X1XNpikAiL8zIggBjcx6q7WvOpRIgvxHrj2Rhr4,1348
|
|
@@ -297,8 +301,8 @@ cognite/neat/workflows/steps_registry.py,sha256=fkTX14ZA7_gkUYfWIlx7A1XbCidvqR23
|
|
|
297
301
|
cognite/neat/workflows/tasks.py,sha256=dqlJwKAb0jlkl7abbY8RRz3m7MT4SK8-7cntMWkOYjw,788
|
|
298
302
|
cognite/neat/workflows/triggers.py,sha256=_BLNplzoz0iic367u1mhHMHiUrCwP-SLK6_CZzfODX0,7071
|
|
299
303
|
cognite/neat/workflows/utils.py,sha256=gKdy3RLG7ctRhbCRwaDIWpL9Mi98zm56-d4jfHDqP1E,453
|
|
300
|
-
cognite_neat-0.
|
|
301
|
-
cognite_neat-0.
|
|
302
|
-
cognite_neat-0.
|
|
303
|
-
cognite_neat-0.
|
|
304
|
-
cognite_neat-0.
|
|
304
|
+
cognite_neat-0.82.0.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
|
|
305
|
+
cognite_neat-0.82.0.dist-info/METADATA,sha256=AHFZCjG5ms8ip8WkI3RnErG7lBXVEd19m9x_gDS9W8A,9290
|
|
306
|
+
cognite_neat-0.82.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
307
|
+
cognite_neat-0.82.0.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
|
|
308
|
+
cognite_neat-0.82.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|