cognite-neat 0.85.7__py3-none-any.whl → 0.85.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/graph/extractors/_classic_cdf/_assets.py +116 -24
- cognite/neat/graph/extractors/_classic_cdf/_events.py +56 -26
- cognite/neat/graph/extractors/_classic_cdf/_files.py +73 -29
- cognite/neat/graph/extractors/_classic_cdf/_labels.py +20 -11
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py +35 -20
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py +60 -22
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +78 -30
- cognite/neat/rules/importers/_inference2rules.py +89 -23
- cognite/neat/rules/models/data_types.py +1 -1
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/METADATA +1 -1
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/RECORD +15 -15
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/LICENSE +0 -0
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/WHEEL +0 -0
- {cognite_neat-0.85.7.dist-info → cognite_neat-0.85.9.dist-info}/entry_points.txt +0 -0
|
@@ -38,7 +38,11 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
38
38
|
namespace: Namespace | None = None,
|
|
39
39
|
):
|
|
40
40
|
return cls(
|
|
41
|
-
cast(
|
|
41
|
+
cast(
|
|
42
|
+
Iterable[Relationship],
|
|
43
|
+
client.relationships(data_set_external_ids=data_set_external_id),
|
|
44
|
+
),
|
|
45
|
+
namespace,
|
|
42
46
|
)
|
|
43
47
|
|
|
44
48
|
@classmethod
|
|
@@ -48,26 +52,25 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
48
52
|
def extract(self) -> Iterable[Triple]:
|
|
49
53
|
"""Extracts an asset with the given asset_id."""
|
|
50
54
|
for relationship in self.relationships:
|
|
51
|
-
yield from self._relationship2triples(relationship
|
|
55
|
+
yield from self._relationship2triples(relationship)
|
|
52
56
|
|
|
53
|
-
|
|
54
|
-
def _relationship2triples(cls, relationship: Relationship, namespace: Namespace) -> list[Triple]:
|
|
57
|
+
def _relationship2triples(self, relationship: Relationship) -> list[Triple]:
|
|
55
58
|
"""Converts an asset to triples."""
|
|
56
59
|
|
|
57
60
|
if relationship.external_id and relationship.source_external_id and relationship.target_external_id:
|
|
58
61
|
# relationships do not have an internal id, so we generate one
|
|
59
|
-
id_ = namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]
|
|
62
|
+
id_ = self.namespace[f"Relationship_{create_sha256_hash(relationship.external_id)}"]
|
|
60
63
|
|
|
61
64
|
# Set rdf type
|
|
62
|
-
triples: list[Triple] = [(id_, RDF.type, namespace["Relationship"])]
|
|
65
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace["Relationship"])]
|
|
63
66
|
|
|
64
67
|
# Set source and target types
|
|
65
68
|
if source_type := relationship.source_type:
|
|
66
69
|
triples.append(
|
|
67
70
|
(
|
|
68
71
|
id_,
|
|
69
|
-
namespace.source_type,
|
|
70
|
-
namespace[source_type.title()],
|
|
72
|
+
self.namespace.source_type,
|
|
73
|
+
self.namespace[source_type.title()],
|
|
71
74
|
)
|
|
72
75
|
)
|
|
73
76
|
|
|
@@ -75,19 +78,19 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
75
78
|
triples.append(
|
|
76
79
|
(
|
|
77
80
|
id_,
|
|
78
|
-
namespace.target_type,
|
|
79
|
-
namespace[target_type.title()],
|
|
81
|
+
self.namespace.target_type,
|
|
82
|
+
self.namespace[target_type.title()],
|
|
80
83
|
)
|
|
81
84
|
)
|
|
82
85
|
|
|
83
86
|
# Create attributes
|
|
84
87
|
|
|
85
|
-
triples.append((id_, namespace.external_id, Literal(relationship.external_id)))
|
|
88
|
+
triples.append((id_, self.namespace.external_id, Literal(relationship.external_id)))
|
|
86
89
|
|
|
87
90
|
triples.append(
|
|
88
91
|
(
|
|
89
92
|
id_,
|
|
90
|
-
namespace.source_external_id,
|
|
93
|
+
self.namespace.source_external_id,
|
|
91
94
|
Literal(relationship.source_external_id),
|
|
92
95
|
)
|
|
93
96
|
)
|
|
@@ -95,7 +98,7 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
95
98
|
triples.append(
|
|
96
99
|
(
|
|
97
100
|
id_,
|
|
98
|
-
namespace.target_external_id,
|
|
101
|
+
self.namespace.target_external_id,
|
|
99
102
|
Literal(relationship.target_external_id),
|
|
100
103
|
)
|
|
101
104
|
)
|
|
@@ -104,7 +107,7 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
104
107
|
triples.append(
|
|
105
108
|
(
|
|
106
109
|
id_,
|
|
107
|
-
namespace.start_time,
|
|
110
|
+
self.namespace.start_time,
|
|
108
111
|
Literal(datetime.fromtimestamp(relationship.start_time / 1000, timezone.utc)),
|
|
109
112
|
)
|
|
110
113
|
)
|
|
@@ -113,7 +116,7 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
113
116
|
triples.append(
|
|
114
117
|
(
|
|
115
118
|
id_,
|
|
116
|
-
namespace.end_time,
|
|
119
|
+
self.namespace.end_time,
|
|
117
120
|
Literal(datetime.fromtimestamp(relationship.end_time / 1000, timezone.utc)),
|
|
118
121
|
)
|
|
119
122
|
)
|
|
@@ -122,7 +125,7 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
122
125
|
triples.append(
|
|
123
126
|
(
|
|
124
127
|
id_,
|
|
125
|
-
namespace.created_time,
|
|
128
|
+
self.namespace.created_time,
|
|
126
129
|
Literal(datetime.fromtimestamp(relationship.created_time / 1000, timezone.utc)),
|
|
127
130
|
)
|
|
128
131
|
)
|
|
@@ -131,7 +134,7 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
131
134
|
triples.append(
|
|
132
135
|
(
|
|
133
136
|
id_,
|
|
134
|
-
namespace.last_updated_time,
|
|
137
|
+
self.namespace.last_updated_time,
|
|
135
138
|
Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, timezone.utc)),
|
|
136
139
|
)
|
|
137
140
|
)
|
|
@@ -140,7 +143,7 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
140
143
|
triples.append(
|
|
141
144
|
(
|
|
142
145
|
id_,
|
|
143
|
-
namespace.confidence,
|
|
146
|
+
self.namespace.confidence,
|
|
144
147
|
Literal(relationship.confidence),
|
|
145
148
|
)
|
|
146
149
|
)
|
|
@@ -149,11 +152,23 @@ class RelationshipsExtractor(BaseExtractor):
|
|
|
149
152
|
for label in relationship.labels:
|
|
150
153
|
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
151
154
|
# since labels do not have internal ids, we use the external_id as the id
|
|
152
|
-
triples.append(
|
|
155
|
+
triples.append(
|
|
156
|
+
(
|
|
157
|
+
id_,
|
|
158
|
+
self.namespace.label,
|
|
159
|
+
self.namespace[f"Label_{quote(label.dump()['externalId'])}"],
|
|
160
|
+
)
|
|
161
|
+
)
|
|
153
162
|
|
|
154
163
|
# Create connection
|
|
155
164
|
if relationship.data_set_id:
|
|
156
|
-
triples.append(
|
|
165
|
+
triples.append(
|
|
166
|
+
(
|
|
167
|
+
id_,
|
|
168
|
+
self.namespace.dataset,
|
|
169
|
+
self.namespace[f"Dataset_{relationship.data_set_id}"],
|
|
170
|
+
)
|
|
171
|
+
)
|
|
157
172
|
|
|
158
173
|
return triples
|
|
159
174
|
return []
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from collections.abc import Iterable
|
|
2
3
|
from datetime import datetime, timezone
|
|
3
4
|
from pathlib import Path
|
|
@@ -20,15 +21,19 @@ class SequencesExtractor(BaseExtractor):
|
|
|
20
21
|
Args:
|
|
21
22
|
sequence (Iterable[Sequence]): An iterable of sequences.
|
|
22
23
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
24
|
+
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
25
|
+
a JSON string.
|
|
23
26
|
"""
|
|
24
27
|
|
|
25
28
|
def __init__(
|
|
26
29
|
self,
|
|
27
30
|
sequence: Iterable[Sequence],
|
|
28
31
|
namespace: Namespace | None = None,
|
|
32
|
+
unpack_metadata: bool = True,
|
|
29
33
|
):
|
|
30
34
|
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
31
35
|
self.sequence = sequence
|
|
36
|
+
self.unpack_metadata = unpack_metadata
|
|
32
37
|
|
|
33
38
|
@classmethod
|
|
34
39
|
def from_dataset(
|
|
@@ -36,50 +41,71 @@ class SequencesExtractor(BaseExtractor):
|
|
|
36
41
|
client: CogniteClient,
|
|
37
42
|
data_set_external_id: str,
|
|
38
43
|
namespace: Namespace | None = None,
|
|
44
|
+
unpack_metadata: bool = True,
|
|
39
45
|
):
|
|
40
|
-
return cls(
|
|
46
|
+
return cls(
|
|
47
|
+
cast(
|
|
48
|
+
Iterable[Sequence],
|
|
49
|
+
client.sequences(data_set_external_ids=data_set_external_id),
|
|
50
|
+
),
|
|
51
|
+
namespace,
|
|
52
|
+
unpack_metadata,
|
|
53
|
+
)
|
|
41
54
|
|
|
42
55
|
@classmethod
|
|
43
|
-
def from_file(
|
|
44
|
-
|
|
56
|
+
def from_file(
|
|
57
|
+
cls,
|
|
58
|
+
file_path: str,
|
|
59
|
+
namespace: Namespace | None = None,
|
|
60
|
+
unpack_metadata: bool = True,
|
|
61
|
+
):
|
|
62
|
+
return cls(SequenceList.load(Path(file_path).read_text()), namespace, unpack_metadata)
|
|
45
63
|
|
|
46
64
|
def extract(self) -> Iterable[Triple]:
|
|
47
65
|
"""Extract sequences as triples."""
|
|
48
66
|
for sequence in self.sequence:
|
|
49
|
-
yield from self._sequence2triples(sequence
|
|
67
|
+
yield from self._sequence2triples(sequence)
|
|
50
68
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
id_ = namespace[f"Sequence_{sequence.id}"]
|
|
69
|
+
def _sequence2triples(self, sequence: Sequence) -> list[Triple]:
|
|
70
|
+
id_ = self.namespace[f"Sequence_{sequence.id}"]
|
|
54
71
|
|
|
55
72
|
# Set rdf type
|
|
56
|
-
triples: list[Triple] = [(id_, RDF.type, namespace.Sequence)]
|
|
73
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace.Sequence)]
|
|
57
74
|
|
|
58
75
|
# Create attributes
|
|
59
76
|
|
|
60
77
|
if sequence.external_id:
|
|
61
|
-
triples.append((id_, namespace.external_id, Literal(sequence.external_id)))
|
|
78
|
+
triples.append((id_, self.namespace.external_id, Literal(sequence.external_id)))
|
|
62
79
|
|
|
63
80
|
if sequence.name:
|
|
64
|
-
triples.append((id_, namespace.name, Literal(sequence.name)))
|
|
81
|
+
triples.append((id_, self.namespace.name, Literal(sequence.name)))
|
|
65
82
|
|
|
66
83
|
if sequence.metadata:
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
84
|
+
if self.unpack_metadata:
|
|
85
|
+
for key, value in sequence.metadata.items():
|
|
86
|
+
if value:
|
|
87
|
+
type_aware_value = string_to_ideal_type(value)
|
|
88
|
+
try:
|
|
89
|
+
triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
|
|
90
|
+
except ValidationError:
|
|
91
|
+
triples.append((id_, self.namespace[key], Literal(type_aware_value)))
|
|
92
|
+
else:
|
|
93
|
+
triples.append(
|
|
94
|
+
(
|
|
95
|
+
id_,
|
|
96
|
+
self.namespace.metadata,
|
|
97
|
+
Literal(json.dumps(sequence.metadata)),
|
|
98
|
+
)
|
|
99
|
+
)
|
|
74
100
|
|
|
75
101
|
if sequence.description:
|
|
76
|
-
triples.append((id_, namespace.description, Literal(sequence.description)))
|
|
102
|
+
triples.append((id_, self.namespace.description, Literal(sequence.description)))
|
|
77
103
|
|
|
78
104
|
if sequence.created_time:
|
|
79
105
|
triples.append(
|
|
80
106
|
(
|
|
81
107
|
id_,
|
|
82
|
-
namespace.created_time,
|
|
108
|
+
self.namespace.created_time,
|
|
83
109
|
Literal(datetime.fromtimestamp(sequence.created_time / 1000, timezone.utc)),
|
|
84
110
|
)
|
|
85
111
|
)
|
|
@@ -88,15 +114,27 @@ class SequencesExtractor(BaseExtractor):
|
|
|
88
114
|
triples.append(
|
|
89
115
|
(
|
|
90
116
|
id_,
|
|
91
|
-
namespace.last_updated_time,
|
|
117
|
+
self.namespace.last_updated_time,
|
|
92
118
|
Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, timezone.utc)),
|
|
93
119
|
)
|
|
94
120
|
)
|
|
95
121
|
|
|
96
122
|
if sequence.data_set_id:
|
|
97
|
-
triples.append(
|
|
123
|
+
triples.append(
|
|
124
|
+
(
|
|
125
|
+
id_,
|
|
126
|
+
self.namespace.data_set_id,
|
|
127
|
+
self.namespace[f"Dataset_{sequence.data_set_id}"],
|
|
128
|
+
)
|
|
129
|
+
)
|
|
98
130
|
|
|
99
131
|
if sequence.asset_id:
|
|
100
|
-
triples.append(
|
|
132
|
+
triples.append(
|
|
133
|
+
(
|
|
134
|
+
id_,
|
|
135
|
+
self.namespace.asset,
|
|
136
|
+
self.namespace[f"Asset_{sequence.asset_id}"],
|
|
137
|
+
)
|
|
138
|
+
)
|
|
101
139
|
|
|
102
140
|
return triples
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from collections.abc import Iterable
|
|
2
3
|
from datetime import datetime, timezone
|
|
3
4
|
from pathlib import Path
|
|
@@ -20,15 +21,19 @@ class TimeSeriesExtractor(BaseExtractor):
|
|
|
20
21
|
Args:
|
|
21
22
|
timeseries (Iterable[TimeSeries]): An iterable of timeseries.
|
|
22
23
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
24
|
+
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
25
|
+
a JSON string.
|
|
23
26
|
"""
|
|
24
27
|
|
|
25
28
|
def __init__(
|
|
26
29
|
self,
|
|
27
30
|
timeseries: Iterable[TimeSeries],
|
|
28
31
|
namespace: Namespace | None = None,
|
|
32
|
+
unpack_metadata: bool = True,
|
|
29
33
|
):
|
|
30
34
|
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
31
35
|
self.timeseries = timeseries
|
|
36
|
+
self.unpack_metadata = unpack_metadata
|
|
32
37
|
|
|
33
38
|
@classmethod
|
|
34
39
|
def from_dataset(
|
|
@@ -36,65 +41,84 @@ class TimeSeriesExtractor(BaseExtractor):
|
|
|
36
41
|
client: CogniteClient,
|
|
37
42
|
data_set_external_id: str,
|
|
38
43
|
namespace: Namespace | None = None,
|
|
44
|
+
unpack_metadata: bool = True,
|
|
39
45
|
):
|
|
40
46
|
return cls(
|
|
41
|
-
cast(
|
|
47
|
+
cast(
|
|
48
|
+
Iterable[TimeSeries],
|
|
49
|
+
client.time_series(data_set_external_ids=data_set_external_id),
|
|
50
|
+
),
|
|
51
|
+
namespace,
|
|
52
|
+
unpack_metadata,
|
|
42
53
|
)
|
|
43
54
|
|
|
44
55
|
@classmethod
|
|
45
|
-
def from_file(
|
|
46
|
-
|
|
56
|
+
def from_file(
|
|
57
|
+
cls,
|
|
58
|
+
file_path: str,
|
|
59
|
+
namespace: Namespace | None = None,
|
|
60
|
+
unpack_metadata: bool = True,
|
|
61
|
+
):
|
|
62
|
+
return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace, unpack_metadata)
|
|
47
63
|
|
|
48
64
|
def extract(self) -> Iterable[Triple]:
|
|
49
65
|
"""Extract timeseries as triples."""
|
|
50
66
|
for timeseries in self.timeseries:
|
|
51
|
-
yield from self._timeseries2triples(timeseries
|
|
67
|
+
yield from self._timeseries2triples(timeseries)
|
|
52
68
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
id_ = namespace[f"TimeSeries_{timeseries.id}"]
|
|
69
|
+
def _timeseries2triples(self, timeseries: TimeSeries) -> list[Triple]:
|
|
70
|
+
id_ = self.namespace[f"TimeSeries_{timeseries.id}"]
|
|
56
71
|
|
|
57
72
|
# Set rdf type
|
|
58
|
-
triples: list[Triple] = [(id_, RDF.type, namespace.TimeSeries)]
|
|
73
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace.TimeSeries)]
|
|
59
74
|
|
|
60
75
|
# Create attributes
|
|
61
76
|
|
|
62
77
|
if timeseries.external_id:
|
|
63
|
-
triples.append((id_, namespace.external_id, Literal(timeseries.external_id)))
|
|
78
|
+
triples.append((id_, self.namespace.external_id, Literal(timeseries.external_id)))
|
|
64
79
|
|
|
65
80
|
if timeseries.name:
|
|
66
|
-
triples.append((id_, namespace.name, Literal(timeseries.name)))
|
|
81
|
+
triples.append((id_, self.namespace.name, Literal(timeseries.name)))
|
|
67
82
|
|
|
68
83
|
if timeseries.is_string:
|
|
69
|
-
triples.append((id_, namespace.is_string, Literal(timeseries.is_string)))
|
|
84
|
+
triples.append((id_, self.namespace.is_string, Literal(timeseries.is_string)))
|
|
70
85
|
|
|
71
86
|
if timeseries.metadata:
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
87
|
+
if self.unpack_metadata:
|
|
88
|
+
for key, value in timeseries.metadata.items():
|
|
89
|
+
if value:
|
|
90
|
+
type_aware_value = string_to_ideal_type(value)
|
|
91
|
+
try:
|
|
92
|
+
triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
|
|
93
|
+
except ValidationError:
|
|
94
|
+
triples.append((id_, self.namespace[key], Literal(type_aware_value)))
|
|
95
|
+
else:
|
|
96
|
+
triples.append(
|
|
97
|
+
(
|
|
98
|
+
id_,
|
|
99
|
+
self.namespace.metadata,
|
|
100
|
+
Literal(json.dumps(timeseries.metadata)),
|
|
101
|
+
)
|
|
102
|
+
)
|
|
79
103
|
|
|
80
104
|
if timeseries.unit:
|
|
81
|
-
triples.append((id_, namespace.unit, Literal(timeseries.unit)))
|
|
105
|
+
triples.append((id_, self.namespace.unit, Literal(timeseries.unit)))
|
|
82
106
|
|
|
83
|
-
if namespace.is_step:
|
|
84
|
-
triples.append((id_, namespace.is_step, Literal(timeseries.is_step)))
|
|
107
|
+
if self.namespace.is_step:
|
|
108
|
+
triples.append((id_, self.namespace.is_step, Literal(timeseries.is_step)))
|
|
85
109
|
|
|
86
110
|
if timeseries.description:
|
|
87
|
-
triples.append((id_, namespace.description, Literal(timeseries.description)))
|
|
111
|
+
triples.append((id_, self.namespace.description, Literal(timeseries.description)))
|
|
88
112
|
|
|
89
113
|
if timeseries.security_categories:
|
|
90
114
|
for category in timeseries.security_categories:
|
|
91
|
-
triples.append((id_, namespace.security_categories, Literal(category)))
|
|
115
|
+
triples.append((id_, self.namespace.security_categories, Literal(category)))
|
|
92
116
|
|
|
93
117
|
if timeseries.created_time:
|
|
94
118
|
triples.append(
|
|
95
119
|
(
|
|
96
120
|
id_,
|
|
97
|
-
namespace.created_time,
|
|
121
|
+
self.namespace.created_time,
|
|
98
122
|
Literal(datetime.fromtimestamp(timeseries.created_time / 1000, timezone.utc)),
|
|
99
123
|
)
|
|
100
124
|
)
|
|
@@ -103,26 +127,50 @@ class TimeSeriesExtractor(BaseExtractor):
|
|
|
103
127
|
triples.append(
|
|
104
128
|
(
|
|
105
129
|
id_,
|
|
106
|
-
namespace.last_updated_time,
|
|
130
|
+
self.namespace.last_updated_time,
|
|
107
131
|
Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, timezone.utc)),
|
|
108
132
|
)
|
|
109
133
|
)
|
|
110
134
|
|
|
111
135
|
if timeseries.legacy_name:
|
|
112
|
-
triples.append((id_, namespace.legacy_name, Literal(timeseries.legacy_name)))
|
|
136
|
+
triples.append((id_, self.namespace.legacy_name, Literal(timeseries.legacy_name)))
|
|
113
137
|
|
|
114
138
|
# Create connections
|
|
115
139
|
if timeseries.unit_external_id:
|
|
116
140
|
# try to create connection to QUDT unit catalog
|
|
117
141
|
try:
|
|
118
|
-
triples.append(
|
|
142
|
+
triples.append(
|
|
143
|
+
(
|
|
144
|
+
id_,
|
|
145
|
+
self.namespace.unit_external_id,
|
|
146
|
+
URIRef(str(AnyHttpUrl(timeseries.unit_external_id))),
|
|
147
|
+
)
|
|
148
|
+
)
|
|
119
149
|
except ValidationError:
|
|
120
|
-
triples.append(
|
|
150
|
+
triples.append(
|
|
151
|
+
(
|
|
152
|
+
id_,
|
|
153
|
+
self.namespace.unit_external_id,
|
|
154
|
+
Literal(timeseries.unit_external_id),
|
|
155
|
+
)
|
|
156
|
+
)
|
|
121
157
|
|
|
122
158
|
if timeseries.data_set_id:
|
|
123
|
-
triples.append(
|
|
159
|
+
triples.append(
|
|
160
|
+
(
|
|
161
|
+
id_,
|
|
162
|
+
self.namespace.dataset,
|
|
163
|
+
self.namespace[f"Dataset_{timeseries.data_set_id}"],
|
|
164
|
+
)
|
|
165
|
+
)
|
|
124
166
|
|
|
125
167
|
if timeseries.asset_id:
|
|
126
|
-
triples.append(
|
|
168
|
+
triples.append(
|
|
169
|
+
(
|
|
170
|
+
id_,
|
|
171
|
+
self.namespace.asset,
|
|
172
|
+
self.namespace[f"Asset_{timeseries.asset_id}"],
|
|
173
|
+
)
|
|
174
|
+
)
|
|
127
175
|
|
|
128
176
|
return triples
|
|
@@ -17,7 +17,11 @@ from cognite.neat.rules.models.information import (
|
|
|
17
17
|
InformationMetadata,
|
|
18
18
|
InformationRulesInput,
|
|
19
19
|
)
|
|
20
|
-
from cognite.neat.utils.utils import
|
|
20
|
+
from cognite.neat.utils.utils import (
|
|
21
|
+
get_namespace,
|
|
22
|
+
remove_namespace_from_uri,
|
|
23
|
+
uri_to_short_form,
|
|
24
|
+
)
|
|
21
25
|
|
|
22
26
|
ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
|
|
23
27
|
WHERE { ?s a ?class . }
|
|
@@ -25,9 +29,21 @@ ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
|
|
|
25
29
|
|
|
26
30
|
INSTANCES_OF_CLASS_QUERY = """SELECT ?s WHERE { ?s a <class> . }"""
|
|
27
31
|
|
|
32
|
+
INSTANCE_PROPERTIES_JSON_DEFINITION = """SELECT ?property (count(?property) as ?occurrence) ?dataType ?objectType
|
|
33
|
+
WHERE {<instance_id> ?property ?value .
|
|
34
|
+
|
|
35
|
+
BIND(IF(REGEX(?value, "^\u007b(.*)\u007d$"),
|
|
36
|
+
<http://www.w3.org/2001/XMLSchema#json>,
|
|
37
|
+
datatype(?value)) AS ?dataType)
|
|
38
|
+
|
|
39
|
+
OPTIONAL {?value rdf:type ?objectType .}}
|
|
40
|
+
GROUP BY ?property ?dataType ?objectType"""
|
|
41
|
+
|
|
28
42
|
INSTANCE_PROPERTIES_DEFINITION = """SELECT ?property (count(?property) as ?occurrence) ?dataType ?objectType
|
|
29
43
|
WHERE {<instance_id> ?property ?value .
|
|
44
|
+
|
|
30
45
|
BIND(datatype(?value) AS ?dataType)
|
|
46
|
+
|
|
31
47
|
OPTIONAL {?value rdf:type ?objectType .}}
|
|
32
48
|
GROUP BY ?property ?dataType ?objectType"""
|
|
33
49
|
|
|
@@ -36,28 +52,57 @@ class InferenceImporter(BaseImporter):
|
|
|
36
52
|
"""Infers rules from a triple store.
|
|
37
53
|
|
|
38
54
|
Rules inference through analysis of knowledge graph provided in various formats.
|
|
39
|
-
Use the factory methods to create
|
|
55
|
+
Use the factory methods to create a triple store from sources such as
|
|
40
56
|
RDF files, JSON files, YAML files, XML files, or directly from a graph store.
|
|
41
57
|
|
|
42
58
|
Args:
|
|
43
59
|
issue_list: Issue list to store issues
|
|
44
60
|
graph: Knowledge graph
|
|
45
61
|
max_number_of_instance: Maximum number of instances to be used in inference
|
|
62
|
+
prefix: Prefix to be used for the inferred model
|
|
63
|
+
check_for_json_string: Check if values are JSON strings
|
|
46
64
|
"""
|
|
47
65
|
|
|
48
|
-
def __init__(
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
issue_list: IssueList,
|
|
69
|
+
graph: Graph,
|
|
70
|
+
max_number_of_instance: int = -1,
|
|
71
|
+
prefix: str = "inferred",
|
|
72
|
+
check_for_json_string: bool = False,
|
|
73
|
+
) -> None:
|
|
49
74
|
self.issue_list = issue_list
|
|
50
75
|
self.graph = graph
|
|
51
76
|
self.max_number_of_instance = max_number_of_instance
|
|
77
|
+
self.prefix = prefix
|
|
78
|
+
self.check_for_json_string = check_for_json_string
|
|
52
79
|
|
|
53
80
|
@classmethod
|
|
54
|
-
def from_graph_store(
|
|
81
|
+
def from_graph_store(
|
|
82
|
+
cls,
|
|
83
|
+
store: NeatGraphStore,
|
|
84
|
+
max_number_of_instance: int = -1,
|
|
85
|
+
prefix: str = "inferred",
|
|
86
|
+
check_for_json_string: bool = False,
|
|
87
|
+
) -> "InferenceImporter":
|
|
55
88
|
issue_list = IssueList(title="Inferred from graph store")
|
|
56
89
|
|
|
57
|
-
return cls(
|
|
90
|
+
return cls(
|
|
91
|
+
issue_list,
|
|
92
|
+
store.graph,
|
|
93
|
+
max_number_of_instance=max_number_of_instance,
|
|
94
|
+
prefix=prefix,
|
|
95
|
+
check_for_json_string=check_for_json_string,
|
|
96
|
+
)
|
|
58
97
|
|
|
59
98
|
@classmethod
|
|
60
|
-
def from_rdf_file(
|
|
99
|
+
def from_rdf_file(
|
|
100
|
+
cls,
|
|
101
|
+
filepath: Path,
|
|
102
|
+
max_number_of_instance: int = -1,
|
|
103
|
+
prefix: str = "inferred",
|
|
104
|
+
check_for_json_string: bool = False,
|
|
105
|
+
) -> "InferenceImporter":
|
|
61
106
|
issue_list = IssueList(title=f"'{filepath.name}'")
|
|
62
107
|
|
|
63
108
|
graph = Graph()
|
|
@@ -66,18 +111,42 @@ class InferenceImporter(BaseImporter):
|
|
|
66
111
|
except Exception:
|
|
67
112
|
issue_list.append(issues.fileread.FileReadError(filepath))
|
|
68
113
|
|
|
69
|
-
return cls(
|
|
114
|
+
return cls(
|
|
115
|
+
issue_list,
|
|
116
|
+
graph,
|
|
117
|
+
max_number_of_instance=max_number_of_instance,
|
|
118
|
+
prefix=prefix,
|
|
119
|
+
check_for_json_string=check_for_json_string,
|
|
120
|
+
)
|
|
70
121
|
|
|
71
122
|
@classmethod
|
|
72
|
-
def from_json_file(
|
|
123
|
+
def from_json_file(
|
|
124
|
+
cls,
|
|
125
|
+
filepath: Path,
|
|
126
|
+
max_number_of_instance: int = -1,
|
|
127
|
+
prefix: str = "inferred",
|
|
128
|
+
check_for_json_string: bool = False,
|
|
129
|
+
) -> "InferenceImporter":
|
|
73
130
|
raise NotImplementedError("JSON file format is not supported yet.")
|
|
74
131
|
|
|
75
132
|
@classmethod
|
|
76
|
-
def from_yaml_file(
|
|
133
|
+
def from_yaml_file(
|
|
134
|
+
cls,
|
|
135
|
+
filepath: Path,
|
|
136
|
+
max_number_of_instance: int = -1,
|
|
137
|
+
prefix: str = "inferred",
|
|
138
|
+
check_for_json_string: bool = False,
|
|
139
|
+
) -> "InferenceImporter":
|
|
77
140
|
raise NotImplementedError("YAML file format is not supported yet.")
|
|
78
141
|
|
|
79
142
|
@classmethod
|
|
80
|
-
def from_xml_file(
|
|
143
|
+
def from_xml_file(
|
|
144
|
+
cls,
|
|
145
|
+
filepath: Path,
|
|
146
|
+
max_number_of_instance: int = -1,
|
|
147
|
+
prefix: str = "inferred",
|
|
148
|
+
check_for_json_string: bool = False,
|
|
149
|
+
) -> "InferenceImporter":
|
|
81
150
|
raise NotImplementedError("JSON file format is not supported yet.")
|
|
82
151
|
|
|
83
152
|
@overload
|
|
@@ -135,6 +204,7 @@ class InferenceImporter(BaseImporter):
|
|
|
135
204
|
properties: dict[str, dict] = {}
|
|
136
205
|
prefixes: dict[str, Namespace] = PREFIXES.copy()
|
|
137
206
|
|
|
207
|
+
query = INSTANCE_PROPERTIES_JSON_DEFINITION if self.check_for_json_string else INSTANCE_PROPERTIES_DEFINITION
|
|
138
208
|
# Adds default namespace to prefixes
|
|
139
209
|
prefixes[self._default_metadata().prefix] = self._default_metadata().namespace
|
|
140
210
|
|
|
@@ -162,7 +232,7 @@ class InferenceImporter(BaseImporter):
|
|
|
162
232
|
+ f" LIMIT {self.max_number_of_instance}"
|
|
163
233
|
):
|
|
164
234
|
for property_uri, occurrence, data_type_uri, object_type_uri in self.graph.query( # type: ignore[misc]
|
|
165
|
-
|
|
235
|
+
query.replace("instance_id", instance)
|
|
166
236
|
): # type: ignore[misc]
|
|
167
237
|
property_id = remove_namespace_from_uri(property_uri)
|
|
168
238
|
self._add_uri_namespace_to_prefixes(cast(URIRef, property_uri), prefixes)
|
|
@@ -239,8 +309,7 @@ class InferenceImporter(BaseImporter):
|
|
|
239
309
|
if Namespace(get_namespace(URI)) not in prefixes.values():
|
|
240
310
|
prefixes[f"prefix-{len(prefixes)+1}"] = Namespace(get_namespace(URI))
|
|
241
311
|
|
|
242
|
-
|
|
243
|
-
def _default_metadata(cls):
|
|
312
|
+
def _default_metadata(self):
|
|
244
313
|
return InformationMetadata(
|
|
245
314
|
name="Inferred Model",
|
|
246
315
|
creator="NEAT",
|
|
@@ -248,7 +317,7 @@ class InferenceImporter(BaseImporter):
|
|
|
248
317
|
created=datetime.now(),
|
|
249
318
|
updated=datetime.now(),
|
|
250
319
|
description="Inferred model from knowledge graph",
|
|
251
|
-
prefix=
|
|
320
|
+
prefix=self.prefix,
|
|
252
321
|
namespace=DEFAULT_NAMESPACE,
|
|
253
322
|
)
|
|
254
323
|
|
|
@@ -262,12 +331,9 @@ class InferenceImporter(BaseImporter):
|
|
|
262
331
|
|
|
263
332
|
@classmethod
|
|
264
333
|
def _read_value_type_occurrence_from_comment(cls, value_type: str, comment: str) -> int:
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
),
|
|
272
|
-
).group(1)
|
|
273
|
-
)
|
|
334
|
+
if result := re.search(
|
|
335
|
+
rf"with value type <{value_type}> which occurs <(\d+)> times in the graph",
|
|
336
|
+
comment,
|
|
337
|
+
):
|
|
338
|
+
return int(result.group(1))
|
|
339
|
+
return 0
|