cognite-neat 0.106.0__py3-none-any.whl → 0.108.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/__init__.py +5 -1
- cognite/neat/_graph/extractors/_base.py +32 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +128 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +156 -12
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +50 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +26 -1
- cognite/neat/_graph/extractors/_dms.py +196 -47
- cognite/neat/_graph/extractors/_dms_graph.py +199 -0
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +33 -5
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +123 -19
- cognite/neat/_graph/queries/_base.py +140 -84
- cognite/neat/_graph/queries/_construct.py +2 -2
- cognite/neat/_graph/transformers/__init__.py +8 -1
- cognite/neat/_graph/transformers/_base.py +9 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +90 -3
- cognite/neat/_graph/transformers/_rdfpath.py +3 -3
- cognite/neat/_graph/transformers/_value_type.py +106 -45
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_rules/analysis/_base.py +1 -1
- cognite/neat/_rules/analysis/_information.py +14 -13
- cognite/neat/_rules/catalog/__init__.py +1 -0
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dms2rules.py +7 -5
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +242 -19
- cognite/neat/_rules/models/_base_rules.py +13 -15
- cognite/neat/_rules/models/_types.py +5 -0
- cognite/neat/_rules/models/dms/_rules.py +51 -10
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules.py +48 -5
- cognite/neat/_rules/models/information/_rules_input.py +6 -1
- cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
- cognite/neat/_rules/transformers/__init__.py +10 -0
- cognite/neat/_rules/transformers/_converters.py +300 -62
- cognite/neat/_session/_base.py +57 -10
- cognite/neat/_session/_drop.py +5 -1
- cognite/neat/_session/_inspect.py +3 -2
- cognite/neat/_session/_mapping.py +17 -6
- cognite/neat/_session/_prepare.py +0 -47
- cognite/neat/_session/_read.py +115 -10
- cognite/neat/_session/_set.py +27 -0
- cognite/neat/_session/_show.py +4 -4
- cognite/neat/_session/_state.py +12 -1
- cognite/neat/_session/_to.py +43 -2
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +8 -3
- cognite/neat/_store/_graph_store.py +331 -136
- cognite/neat/_store/_rules_store.py +130 -1
- cognite/neat/_utils/auth.py +3 -1
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +2 -2
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +67 -65
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
|
@@ -3,15 +3,16 @@ import warnings
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import cast
|
|
6
|
+
from typing import cast, overload
|
|
7
|
+
from zipfile import ZipExtFile
|
|
7
8
|
|
|
8
9
|
import pandas as pd
|
|
9
10
|
from pandas import Index
|
|
10
|
-
from rdflib import Dataset, Namespace, URIRef
|
|
11
|
+
from rdflib import Dataset, Graph, Namespace, URIRef
|
|
12
|
+
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
|
|
11
13
|
from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
|
|
12
14
|
|
|
13
|
-
from cognite.neat.
|
|
14
|
-
from cognite.neat._graph._shared import rdflib_to_oxi_type
|
|
15
|
+
from cognite.neat._graph._shared import quad_formats, rdflib_to_oxi_type
|
|
15
16
|
from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
|
|
16
17
|
from cognite.neat._graph.queries import Queries
|
|
17
18
|
from cognite.neat._graph.transformers import Transformers
|
|
@@ -21,7 +22,7 @@ from cognite.neat._rules.models import InformationRules
|
|
|
21
22
|
from cognite.neat._rules.models.entities import ClassEntity
|
|
22
23
|
from cognite.neat._shared import InstanceType, Triple
|
|
23
24
|
from cognite.neat._utils.auxiliary import local_import
|
|
24
|
-
from cognite.neat._utils.rdf_ import add_triples_in_batch
|
|
25
|
+
from cognite.neat._utils.rdf_ import add_triples_in_batch, remove_namespace_from_uri
|
|
25
26
|
|
|
26
27
|
from ._provenance import Change, Provenance
|
|
27
28
|
|
|
@@ -38,80 +39,130 @@ class NeatGraphStore:
|
|
|
38
39
|
Args:
|
|
39
40
|
graph : Instance of rdflib.Graph class for graph storage
|
|
40
41
|
rules:
|
|
42
|
+
|
|
43
|
+
!!! note "Dataset"
|
|
44
|
+
The store leverages a RDF dataset which is defined as a collection of RDF graphs
|
|
45
|
+
where all but one are named graphs associated with URIRef (the graph name),
|
|
46
|
+
and the unnamed default graph which is in context of rdflib library has an
|
|
47
|
+
identifier URIRef('urn:x-rdflib:default').
|
|
41
48
|
"""
|
|
42
49
|
|
|
43
50
|
rdf_store_type: str
|
|
44
51
|
|
|
45
52
|
def __init__(
|
|
46
53
|
self,
|
|
47
|
-
|
|
48
|
-
|
|
54
|
+
dataset: Dataset,
|
|
55
|
+
default_named_graph: URIRef | None = None,
|
|
49
56
|
):
|
|
50
|
-
self.rules: InformationRules
|
|
57
|
+
self.rules: dict[URIRef, InformationRules] = {}
|
|
58
|
+
self.base_namespace: dict[URIRef, Namespace] = {}
|
|
51
59
|
|
|
52
60
|
_start = datetime.now(timezone.utc)
|
|
53
|
-
self.
|
|
61
|
+
self.dataset = dataset
|
|
54
62
|
self.provenance = Provenance(
|
|
55
63
|
[
|
|
56
64
|
Change.record(
|
|
57
65
|
activity=f"{type(self).__name__}.__init__",
|
|
58
66
|
start=_start,
|
|
59
67
|
end=datetime.now(timezone.utc),
|
|
60
|
-
description=f"Initialize graph store as {type(self.
|
|
68
|
+
description=f"Initialize graph store as {type(self.dataset.store).__name__}",
|
|
61
69
|
)
|
|
62
70
|
]
|
|
63
71
|
)
|
|
64
72
|
|
|
65
|
-
|
|
66
|
-
self.add_rules(rules)
|
|
67
|
-
else:
|
|
68
|
-
self.base_namespace = DEFAULT_NAMESPACE
|
|
73
|
+
self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
|
|
69
74
|
|
|
70
|
-
self.queries = Queries(self.
|
|
75
|
+
self.queries = Queries(self.dataset, self.rules, self.default_named_graph)
|
|
76
|
+
|
|
77
|
+
def graph(self, named_graph: URIRef | None = None) -> Graph:
|
|
78
|
+
"""Get named graph from the dataset to query over"""
|
|
79
|
+
return self.dataset.graph(named_graph or self.default_named_graph)
|
|
71
80
|
|
|
72
81
|
@property
|
|
73
82
|
def type_(self) -> str:
|
|
74
83
|
"Return type of the graph store"
|
|
75
|
-
return type(self.
|
|
84
|
+
return type(self.dataset.store).__name__
|
|
85
|
+
|
|
86
|
+
# no destination
|
|
87
|
+
@overload
|
|
88
|
+
def serialize(self, filepath: None = None) -> str: ...
|
|
89
|
+
|
|
90
|
+
# with destination
|
|
91
|
+
@overload
|
|
92
|
+
def serialize(self, filepath: Path) -> None: ...
|
|
93
|
+
|
|
94
|
+
def serialize(self, filepath: Path | None = None) -> None | str:
|
|
95
|
+
"""Serialize the graph store to a file.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
filepath: File path to serialize the graph store to
|
|
76
99
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
100
|
+
Returns:
|
|
101
|
+
Serialized graph store
|
|
102
|
+
|
|
103
|
+
!!! note "Trig Format"
|
|
104
|
+
Notice that instead of turtle format we are using trig format for serialization.
|
|
105
|
+
This is because trig format is a superset of turtle format and it allows us to
|
|
106
|
+
serialize named graphs as well. Allowing serialization of one or more named graphs
|
|
107
|
+
including the default graph.
|
|
80
108
|
"""
|
|
109
|
+
if filepath:
|
|
110
|
+
self.dataset.serialize(
|
|
111
|
+
filepath,
|
|
112
|
+
format="ox-trig" if self.type_ == "OxigraphStore" else "trig",
|
|
113
|
+
)
|
|
114
|
+
return None
|
|
115
|
+
else:
|
|
116
|
+
return self.dataset.serialize(format="ox-trig" if self.type_ == "OxigraphStore" else "trig")
|
|
81
117
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
118
|
+
def add_rules(self, rules: InformationRules, named_graph: URIRef | None = None) -> None:
|
|
119
|
+
"""This method is used to add rules to a named graph stored in the graph store.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
rules: InformationRules object containing rules to be added to the named graph
|
|
123
|
+
named_graph: URIRef of the named graph to store the rules in, by default None
|
|
124
|
+
rules will be added to the default graph
|
|
125
|
+
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
named_graph = named_graph or self.default_named_graph
|
|
129
|
+
|
|
130
|
+
if named_graph in self.named_graphs:
|
|
131
|
+
# attaching appropriate namespace to the rules
|
|
132
|
+
# as well base_namespace
|
|
133
|
+
self.rules[named_graph] = rules
|
|
134
|
+
self.base_namespace[named_graph] = rules.metadata.namespace
|
|
135
|
+
self.queries = Queries(self.dataset, self.rules)
|
|
136
|
+
self.provenance.append(
|
|
137
|
+
Change.record(
|
|
138
|
+
activity=f"{type(self)}.rules",
|
|
139
|
+
start=datetime.now(timezone.utc),
|
|
140
|
+
end=datetime.now(timezone.utc),
|
|
141
|
+
description=f"Added {type(self.rules).__name__} to {named_graph} named graph",
|
|
142
|
+
)
|
|
91
143
|
)
|
|
92
|
-
)
|
|
93
144
|
|
|
94
|
-
|
|
95
|
-
|
|
145
|
+
if self.rules[named_graph].prefixes:
|
|
146
|
+
self._upsert_prefixes(self.rules[named_graph].prefixes, named_graph)
|
|
96
147
|
|
|
97
|
-
def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
|
|
148
|
+
def _upsert_prefixes(self, prefixes: dict[str, Namespace], named_graph: URIRef) -> None:
|
|
98
149
|
"""Adds prefixes to the graph store."""
|
|
99
150
|
_start = datetime.now(timezone.utc)
|
|
100
151
|
for prefix, namespace in prefixes.items():
|
|
101
|
-
self.graph.bind(prefix, namespace)
|
|
152
|
+
self.graph(named_graph).bind(prefix, namespace)
|
|
102
153
|
|
|
103
154
|
self.provenance.append(
|
|
104
155
|
Change.record(
|
|
105
156
|
activity=f"{type(self).__name__}._upsert_prefixes",
|
|
106
157
|
start=_start,
|
|
107
158
|
end=datetime.now(timezone.utc),
|
|
108
|
-
description="Upsert prefixes to graph
|
|
159
|
+
description="Upsert prefixes to the name graph {named_graph}",
|
|
109
160
|
)
|
|
110
161
|
)
|
|
111
162
|
|
|
112
163
|
@classmethod
|
|
113
|
-
def from_memory_store(cls
|
|
114
|
-
return cls(Dataset()
|
|
164
|
+
def from_memory_store(cls) -> "Self":
|
|
165
|
+
return cls(Dataset())
|
|
115
166
|
|
|
116
167
|
@classmethod
|
|
117
168
|
def from_sparql_store(
|
|
@@ -119,7 +170,6 @@ class NeatGraphStore:
|
|
|
119
170
|
query_endpoint: str | None = None,
|
|
120
171
|
update_endpoint: str | None = None,
|
|
121
172
|
returnFormat: str = "csv",
|
|
122
|
-
rules: InformationRules | None = None,
|
|
123
173
|
) -> "Self":
|
|
124
174
|
store = SPARQLUpdateStore(
|
|
125
175
|
query_endpoint=query_endpoint,
|
|
@@ -130,10 +180,27 @@ class NeatGraphStore:
|
|
|
130
180
|
autocommit=False,
|
|
131
181
|
)
|
|
132
182
|
graph = Dataset(store=store)
|
|
133
|
-
return cls(graph
|
|
183
|
+
return cls(graph)
|
|
134
184
|
|
|
135
185
|
@classmethod
|
|
136
|
-
def
|
|
186
|
+
def from_oxi_remote_store(
|
|
187
|
+
cls,
|
|
188
|
+
remote_url: str,
|
|
189
|
+
autocommit: bool = False,
|
|
190
|
+
) -> "Self":
|
|
191
|
+
"""Creates a NeatGraphStore from a remote Oxigraph store SPARQL endpoint."""
|
|
192
|
+
|
|
193
|
+
return cls(
|
|
194
|
+
dataset=Dataset(
|
|
195
|
+
store=SPARQLUpdateStore(
|
|
196
|
+
query_endpoint=f"{remote_url}/query", update_endpoint=f"{remote_url}/query", autocommit=autocommit
|
|
197
|
+
),
|
|
198
|
+
default_union=True,
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
@classmethod
|
|
203
|
+
def from_oxi_local_store(cls, storage_dir: Path | None = None) -> "Self":
|
|
137
204
|
"""Creates a NeatGraphStore from an Oxigraph store."""
|
|
138
205
|
local_import("pyoxigraph", "oxi")
|
|
139
206
|
local_import("oxrdflib", "oxi")
|
|
@@ -152,29 +219,41 @@ class NeatGraphStore:
|
|
|
152
219
|
else:
|
|
153
220
|
raise Exception("Error initializing Oxigraph store")
|
|
154
221
|
|
|
155
|
-
|
|
156
|
-
|
|
222
|
+
return cls(
|
|
223
|
+
dataset=Dataset(
|
|
224
|
+
store=oxrdflib.OxigraphStore(store=oxi_store),
|
|
225
|
+
)
|
|
157
226
|
)
|
|
158
227
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
def write(self, extractor: TripleExtractors) -> IssueList:
|
|
228
|
+
def write(self, extractor: TripleExtractors, named_graph: URIRef | None = None) -> IssueList:
|
|
162
229
|
last_change: Change | None = None
|
|
230
|
+
named_graph = named_graph or self.default_named_graph
|
|
163
231
|
with catch_issues() as issue_list:
|
|
164
232
|
_start = datetime.now(timezone.utc)
|
|
165
233
|
success = True
|
|
166
234
|
|
|
167
235
|
if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
|
|
168
|
-
self._parse_file(
|
|
236
|
+
self._parse_file(
|
|
237
|
+
named_graph,
|
|
238
|
+
extractor.filepath,
|
|
239
|
+
cast(str, extractor.format),
|
|
240
|
+
extractor.base_uri,
|
|
241
|
+
)
|
|
242
|
+
if isinstance(extractor.filepath, ZipExtFile):
|
|
243
|
+
extractor.filepath.close()
|
|
244
|
+
|
|
169
245
|
elif isinstance(extractor, RdfFileExtractor):
|
|
170
246
|
success = False
|
|
171
247
|
issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
|
|
172
248
|
warnings.warn(
|
|
173
|
-
|
|
249
|
+
(
|
|
250
|
+
f"Cannot write to named graph {named_graph} with "
|
|
251
|
+
f"{type(extractor).__name__}, errors found in file:\n{issue_text}"
|
|
252
|
+
),
|
|
174
253
|
stacklevel=2,
|
|
175
254
|
)
|
|
176
255
|
else:
|
|
177
|
-
self._add_triples(extractor.extract())
|
|
256
|
+
self._add_triples(extractor.extract(), named_graph=named_graph)
|
|
178
257
|
|
|
179
258
|
if success:
|
|
180
259
|
_end = datetime.now(timezone.utc)
|
|
@@ -189,7 +268,7 @@ class NeatGraphStore:
|
|
|
189
268
|
activity=activity,
|
|
190
269
|
start=_start,
|
|
191
270
|
end=_end,
|
|
192
|
-
description=f"Extracted triples to graph
|
|
271
|
+
description=f"Extracted triples to named graph {named_graph} using {type(extractor).__name__}",
|
|
193
272
|
)
|
|
194
273
|
self.provenance.append(last_change)
|
|
195
274
|
if last_change:
|
|
@@ -197,26 +276,56 @@ class NeatGraphStore:
|
|
|
197
276
|
return issue_list
|
|
198
277
|
|
|
199
278
|
def _read_via_rules_linkage(
|
|
200
|
-
self,
|
|
279
|
+
self,
|
|
280
|
+
class_neat_id: URIRef,
|
|
281
|
+
property_link_pairs: dict[str, URIRef] | None,
|
|
282
|
+
named_graph: URIRef | None = None,
|
|
201
283
|
) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
|
|
202
|
-
|
|
203
|
-
|
|
284
|
+
named_graph = named_graph or self.default_named_graph
|
|
285
|
+
|
|
286
|
+
if named_graph not in self.named_graphs:
|
|
287
|
+
warnings.warn(
|
|
288
|
+
f"Named graph {named_graph} not found in graph store, cannot read",
|
|
289
|
+
stacklevel=2,
|
|
290
|
+
)
|
|
204
291
|
return
|
|
292
|
+
|
|
293
|
+
if not self.rules or named_graph not in self.rules:
|
|
294
|
+
warnings.warn(
|
|
295
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
296
|
+
stacklevel=2,
|
|
297
|
+
)
|
|
298
|
+
return
|
|
299
|
+
|
|
205
300
|
if self.multi_type_instances:
|
|
206
301
|
warnings.warn(
|
|
207
302
|
"Multi typed instances detected, issues with loading can occur!",
|
|
208
303
|
stacklevel=2,
|
|
209
304
|
)
|
|
210
305
|
|
|
211
|
-
|
|
306
|
+
analysis = InformationAnalysis(self.rules[named_graph])
|
|
307
|
+
|
|
308
|
+
if cls := analysis.classes_by_neat_id.get(class_neat_id):
|
|
212
309
|
if property_link_pairs:
|
|
213
310
|
property_renaming_config = {
|
|
214
311
|
prop_uri: prop_name
|
|
215
312
|
for prop_name, prop_neat_id in property_link_pairs.items()
|
|
216
|
-
if (
|
|
217
|
-
prop_uri := InformationAnalysis(self.rules).neat_id_to_transformation_property_uri(prop_neat_id)
|
|
218
|
-
)
|
|
313
|
+
if (prop_uri := analysis.neat_id_to_instance_source_property_uri(prop_neat_id))
|
|
219
314
|
}
|
|
315
|
+
if information_properties := analysis.classes_with_properties(consider_inheritance=True).get(
|
|
316
|
+
cls.class_
|
|
317
|
+
):
|
|
318
|
+
for prop in information_properties:
|
|
319
|
+
if prop.neatId is None:
|
|
320
|
+
continue
|
|
321
|
+
# Include renaming done in the Information rules that are not present in the
|
|
322
|
+
# property_link_pairs. The use case for this renaming to startNode and endNode
|
|
323
|
+
# properties that are not part of DMSRules but will typically be present
|
|
324
|
+
# in the Information rules.
|
|
325
|
+
if (
|
|
326
|
+
uri := analysis.neat_id_to_instance_source_property_uri(prop.neatId)
|
|
327
|
+
) and uri not in property_renaming_config:
|
|
328
|
+
property_renaming_config[uri] = prop.property_
|
|
220
329
|
|
|
221
330
|
yield from self._read_via_class_entity(cls.class_, property_renaming_config)
|
|
222
331
|
return
|
|
@@ -231,9 +340,22 @@ class NeatGraphStore:
|
|
|
231
340
|
self,
|
|
232
341
|
class_entity: ClassEntity,
|
|
233
342
|
property_renaming_config: dict[URIRef, str] | None = None,
|
|
343
|
+
named_graph: URIRef | None = None,
|
|
234
344
|
) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
|
|
235
|
-
|
|
236
|
-
|
|
345
|
+
named_graph = named_graph or self.default_named_graph
|
|
346
|
+
|
|
347
|
+
if named_graph not in self.named_graphs:
|
|
348
|
+
warnings.warn(
|
|
349
|
+
f"Named graph {named_graph} not found in graph store, cannot read",
|
|
350
|
+
stacklevel=2,
|
|
351
|
+
)
|
|
352
|
+
return
|
|
353
|
+
|
|
354
|
+
if not self.rules or named_graph not in self.rules:
|
|
355
|
+
warnings.warn(
|
|
356
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
357
|
+
stacklevel=2,
|
|
358
|
+
)
|
|
237
359
|
return
|
|
238
360
|
if self.multi_type_instances:
|
|
239
361
|
warnings.warn(
|
|
@@ -241,28 +363,28 @@ class NeatGraphStore:
|
|
|
241
363
|
stacklevel=2,
|
|
242
364
|
)
|
|
243
365
|
|
|
244
|
-
if class_entity not in [definition.class_ for definition in self.rules.classes]:
|
|
366
|
+
if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
|
|
245
367
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
246
368
|
return
|
|
247
369
|
|
|
248
|
-
if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
|
|
370
|
+
if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
|
|
249
371
|
warnings.warn(
|
|
250
372
|
f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
|
|
251
373
|
stacklevel=2,
|
|
252
374
|
)
|
|
253
375
|
return
|
|
254
376
|
|
|
255
|
-
has_hop_transformations = InformationAnalysis(self.rules).has_hop_transformations()
|
|
377
|
+
has_hop_transformations = InformationAnalysis(self.rules[named_graph]).has_hop_transformations()
|
|
256
378
|
has_self_reference_transformations = InformationAnalysis(
|
|
257
|
-
self.rules
|
|
379
|
+
self.rules[named_graph]
|
|
258
380
|
).has_self_reference_property_transformations()
|
|
259
381
|
if has_hop_transformations or has_self_reference_transformations:
|
|
260
382
|
msg = (
|
|
261
|
-
f"Rules contain [{'Hop' if has_hop_transformations else ''
|
|
262
|
-
f", {'SelfReferenceProperty' if has_self_reference_transformations else ''
|
|
383
|
+
f"Rules contain [{'Hop' if has_hop_transformations else ''}"
|
|
384
|
+
f", {'SelfReferenceProperty' if has_self_reference_transformations else ''}]"
|
|
263
385
|
" rdfpath."
|
|
264
|
-
f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''
|
|
265
|
-
f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''
|
|
386
|
+
f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''}"
|
|
387
|
+
f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''}]"
|
|
266
388
|
" transformer(s) first!"
|
|
267
389
|
)
|
|
268
390
|
|
|
@@ -277,23 +399,19 @@ class NeatGraphStore:
|
|
|
277
399
|
|
|
278
400
|
# get potential property renaming config
|
|
279
401
|
property_renaming_config = property_renaming_config or InformationAnalysis(
|
|
280
|
-
self.rules
|
|
402
|
+
self.rules[named_graph]
|
|
281
403
|
).define_property_renaming_config(class_entity)
|
|
282
404
|
|
|
283
|
-
# get property types to guide process of removing or not namespaces from results
|
|
284
|
-
property_types = InformationAnalysis(self.rules).property_types(class_entity)
|
|
285
405
|
for instance_id in instance_ids:
|
|
286
406
|
if res := self.queries.describe(
|
|
287
407
|
instance_id=instance_id,
|
|
288
408
|
instance_type=class_entity.suffix,
|
|
289
409
|
property_renaming_config=property_renaming_config,
|
|
290
|
-
property_types=property_types,
|
|
291
410
|
):
|
|
292
411
|
yield res
|
|
293
412
|
|
|
294
413
|
def read(
|
|
295
|
-
self,
|
|
296
|
-
class_: str,
|
|
414
|
+
self, class_: str, named_graph: URIRef | None = None
|
|
297
415
|
) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
|
|
298
416
|
"""Read instances for given class from the graph store.
|
|
299
417
|
|
|
@@ -302,9 +420,20 @@ class NeatGraphStore:
|
|
|
302
420
|
the rules which are attached to the graph store.
|
|
303
421
|
|
|
304
422
|
"""
|
|
423
|
+
named_graph = named_graph or self.default_named_graph
|
|
424
|
+
|
|
425
|
+
if named_graph not in self.named_graphs:
|
|
426
|
+
warnings.warn(
|
|
427
|
+
f"Named graph {named_graph} not found in graph store, cannot read",
|
|
428
|
+
stacklevel=2,
|
|
429
|
+
)
|
|
430
|
+
return
|
|
305
431
|
|
|
306
|
-
if not self.rules:
|
|
307
|
-
warnings.warn(
|
|
432
|
+
if not self.rules or named_graph not in self.rules:
|
|
433
|
+
warnings.warn(
|
|
434
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
435
|
+
stacklevel=2,
|
|
436
|
+
)
|
|
308
437
|
return
|
|
309
438
|
if self.multi_type_instances:
|
|
310
439
|
warnings.warn(
|
|
@@ -312,15 +441,15 @@ class NeatGraphStore:
|
|
|
312
441
|
stacklevel=2,
|
|
313
442
|
)
|
|
314
443
|
|
|
315
|
-
class_entity = ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_)
|
|
444
|
+
class_entity = ClassEntity(prefix=self.rules[named_graph].metadata.prefix, suffix=class_)
|
|
316
445
|
|
|
317
|
-
if class_entity not in [definition.class_ for definition in self.rules.classes]:
|
|
446
|
+
if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
|
|
318
447
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
319
448
|
return
|
|
320
449
|
|
|
321
450
|
yield from self._read_via_class_entity(class_entity)
|
|
322
451
|
|
|
323
|
-
def count_of_id(self, neat_id: URIRef) -> int:
|
|
452
|
+
def count_of_id(self, neat_id: URIRef, named_graph: URIRef | None = None) -> int:
|
|
324
453
|
"""Count the number of instances of a given type
|
|
325
454
|
|
|
326
455
|
Args:
|
|
@@ -329,18 +458,31 @@ class NeatGraphStore:
|
|
|
329
458
|
Returns:
|
|
330
459
|
Number of instances
|
|
331
460
|
"""
|
|
332
|
-
|
|
333
|
-
|
|
461
|
+
named_graph = named_graph or self.default_named_graph
|
|
462
|
+
|
|
463
|
+
if named_graph not in self.named_graphs:
|
|
464
|
+
warnings.warn(
|
|
465
|
+
f"Named graph {named_graph} not found in graph store, cannot count",
|
|
466
|
+
stacklevel=2,
|
|
467
|
+
)
|
|
468
|
+
return 0
|
|
469
|
+
|
|
470
|
+
if not self.rules or named_graph not in self.rules:
|
|
471
|
+
warnings.warn(
|
|
472
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
473
|
+
stacklevel=2,
|
|
474
|
+
)
|
|
334
475
|
return 0
|
|
335
476
|
|
|
336
477
|
class_entity = next(
|
|
337
|
-
(definition.class_ for definition in self.rules.classes if definition.neatId == neat_id),
|
|
478
|
+
(definition.class_ for definition in self.rules[named_graph].classes if definition.neatId == neat_id),
|
|
479
|
+
None,
|
|
338
480
|
)
|
|
339
481
|
if not class_entity:
|
|
340
482
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
341
483
|
return 0
|
|
342
484
|
|
|
343
|
-
if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
|
|
485
|
+
if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
|
|
344
486
|
warnings.warn(
|
|
345
487
|
f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
|
|
346
488
|
stacklevel=2,
|
|
@@ -351,17 +493,19 @@ class NeatGraphStore:
|
|
|
351
493
|
|
|
352
494
|
def count_of_type(self, class_uri: URIRef) -> int:
|
|
353
495
|
query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
|
|
354
|
-
return int(next(iter(self.
|
|
496
|
+
return int(next(iter(self.dataset.query(query)))[0]) # type: ignore[arg-type, index]
|
|
355
497
|
|
|
356
498
|
def _parse_file(
|
|
357
499
|
self,
|
|
358
|
-
|
|
500
|
+
named_graph: URIRef,
|
|
501
|
+
filepath: Path | ZipExtFile,
|
|
359
502
|
format: str = "turtle",
|
|
360
503
|
base_uri: URIRef | None = None,
|
|
361
504
|
) -> None:
|
|
362
505
|
"""Imports graph data from file.
|
|
363
506
|
|
|
364
507
|
Args:
|
|
508
|
+
named_graph : URIRef of the named graph to store the data in
|
|
365
509
|
filepath : File path to file containing graph data, by default None
|
|
366
510
|
format : rdflib format file containing RDF graph, by default "turtle"
|
|
367
511
|
base_uri : base URI to add to graph in case of relative URIs, by default None
|
|
@@ -375,28 +519,38 @@ class NeatGraphStore:
|
|
|
375
519
|
"""
|
|
376
520
|
|
|
377
521
|
# Oxigraph store, do not want to type hint this as it is an optional dependency
|
|
378
|
-
if
|
|
522
|
+
if self.type_ == "OxigraphStore":
|
|
379
523
|
local_import("pyoxigraph", "oxi")
|
|
380
524
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
525
|
+
if format in quad_formats():
|
|
526
|
+
self.dataset.parse(
|
|
527
|
+
filepath, # type: ignore[arg-type]
|
|
528
|
+
format=rdflib_to_oxi_type(format),
|
|
529
|
+
transactional=False,
|
|
530
|
+
publicID=base_uri,
|
|
531
|
+
)
|
|
532
|
+
else:
|
|
533
|
+
self.graph(named_graph).parse(
|
|
534
|
+
filepath, # type: ignore[arg-type]
|
|
535
|
+
format=rdflib_to_oxi_type(format),
|
|
536
|
+
transactional=False,
|
|
537
|
+
publicID=base_uri,
|
|
538
|
+
)
|
|
539
|
+
self.dataset.store._store.optimize() # type: ignore[attr-defined]
|
|
389
540
|
|
|
390
541
|
# All other stores
|
|
391
542
|
else:
|
|
392
|
-
if
|
|
393
|
-
self.
|
|
543
|
+
if format in quad_formats():
|
|
544
|
+
self.dataset.parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
|
|
394
545
|
else:
|
|
395
|
-
|
|
396
|
-
if filename.is_file():
|
|
397
|
-
self.graph.parse(filename, publicID=base_uri)
|
|
546
|
+
self.graph(named_graph).parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
|
|
398
547
|
|
|
399
|
-
def _add_triples(
|
|
548
|
+
def _add_triples(
|
|
549
|
+
self,
|
|
550
|
+
triples: Iterable[Triple],
|
|
551
|
+
named_graph: URIRef,
|
|
552
|
+
batch_size: int = 10_000,
|
|
553
|
+
) -> None:
|
|
400
554
|
"""Adds triples to the graph store in batches.
|
|
401
555
|
|
|
402
556
|
Args:
|
|
@@ -404,66 +558,103 @@ class NeatGraphStore:
|
|
|
404
558
|
batch_size: Batch size of triples per commit, by default 10_000
|
|
405
559
|
verbose: Verbose mode, by default False
|
|
406
560
|
"""
|
|
407
|
-
add_triples_in_batch(self.graph, triples, batch_size)
|
|
561
|
+
add_triples_in_batch(self.graph(named_graph), triples, batch_size)
|
|
408
562
|
|
|
409
|
-
def transform(self, transformer: Transformers) -> None:
|
|
563
|
+
def transform(self, transformer: Transformers, named_graph: URIRef | None = None) -> None:
|
|
410
564
|
"""Transforms the graph store using a transformer."""
|
|
411
565
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
566
|
+
named_graph = named_graph or self.default_named_graph
|
|
567
|
+
if named_graph in self.named_graphs:
|
|
568
|
+
missing_changes = [
|
|
569
|
+
change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
|
|
570
|
+
]
|
|
571
|
+
if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
|
|
572
|
+
warnings.warn(
|
|
573
|
+
f"Cannot transform graph store with {type(transformer).__name__}, already applied",
|
|
574
|
+
stacklevel=2,
|
|
575
|
+
)
|
|
576
|
+
elif missing_changes:
|
|
577
|
+
warnings.warn(
|
|
578
|
+
(
|
|
579
|
+
f"Cannot transform graph store with {type(transformer).__name__}, "
|
|
580
|
+
f"missing one or more required changes [{', '.join(missing_changes)}]"
|
|
581
|
+
),
|
|
582
|
+
stacklevel=2,
|
|
583
|
+
)
|
|
428
584
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
585
|
+
else:
|
|
586
|
+
_start = datetime.now(timezone.utc)
|
|
587
|
+
transformer.transform(self.graph(named_graph))
|
|
588
|
+
self.provenance.append(
|
|
589
|
+
Change.record(
|
|
590
|
+
activity=f"{type(transformer).__name__}",
|
|
591
|
+
start=_start,
|
|
592
|
+
end=datetime.now(timezone.utc),
|
|
593
|
+
description=transformer.description,
|
|
594
|
+
)
|
|
438
595
|
)
|
|
596
|
+
|
|
597
|
+
else:
|
|
598
|
+
warnings.warn(
|
|
599
|
+
f"Named graph {named_graph} not found in graph store, cannot transform",
|
|
600
|
+
stacklevel=2,
|
|
439
601
|
)
|
|
440
602
|
|
|
441
603
|
@property
|
|
442
|
-
def summary(self) -> pd.DataFrame:
|
|
443
|
-
return
|
|
604
|
+
def summary(self) -> dict[URIRef, pd.DataFrame]:
|
|
605
|
+
return {
|
|
606
|
+
named_graph: pd.DataFrame(
|
|
607
|
+
self.queries.summarize_instances(named_graph),
|
|
608
|
+
columns=["Type", "Occurrence"],
|
|
609
|
+
)
|
|
610
|
+
for named_graph in self.named_graphs
|
|
611
|
+
}
|
|
444
612
|
|
|
445
613
|
@property
|
|
446
|
-
def multi_type_instances(self) -> dict[str, list[str]]:
|
|
447
|
-
return self.queries.multi_type_instances()
|
|
614
|
+
def multi_type_instances(self) -> dict[URIRef, dict[str, list[str]]]:
|
|
615
|
+
return {named_graph: self.queries.multi_type_instances(named_graph) for named_graph in self.named_graphs}
|
|
448
616
|
|
|
449
617
|
def _repr_html_(self) -> str:
|
|
450
618
|
provenance = self.provenance._repr_html_()
|
|
451
|
-
summary: pd.DataFrame = self.summary
|
|
619
|
+
summary: dict[URIRef, pd.DataFrame] = self.summary
|
|
452
620
|
|
|
453
|
-
|
|
621
|
+
def _short_name_of_graph(named_graph: URIRef) -> str:
|
|
622
|
+
return "default" if named_graph == self.default_named_graph else remove_namespace_from_uri(named_graph)
|
|
623
|
+
|
|
624
|
+
if not summary:
|
|
454
625
|
summary_text = "<br /><strong>Graph is empty</strong><br />"
|
|
455
626
|
else:
|
|
627
|
+
all_types = set().union(
|
|
628
|
+
*[set(sub_summary.Type) for sub_summary in summary.values() if not sub_summary.empty]
|
|
629
|
+
)
|
|
630
|
+
|
|
456
631
|
summary_text = (
|
|
457
632
|
"<br /><strong>Overview</strong>:" # type: ignore
|
|
458
|
-
f"<ul><li>{len(summary)}
|
|
459
|
-
f"<li>{
|
|
460
|
-
f"{cast(pd.DataFrame, self._shorten_summary(summary))._repr_html_()}" # type: ignore[operator]
|
|
633
|
+
f"<ul><li>{len(summary)} named graphs</strong></li>"
|
|
634
|
+
f"<li>Total of {len(all_types)} unique types</strong></li>"
|
|
461
635
|
)
|
|
462
636
|
|
|
463
|
-
|
|
464
|
-
|
|
637
|
+
for named_graph, table in summary.items():
|
|
638
|
+
summary_text += (
|
|
639
|
+
f"<li>{sum(table['Occurrence'])} instances in {_short_name_of_graph(named_graph)}"
|
|
640
|
+
" graph</strong></li>"
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
summary_text += "</ul>"
|
|
644
|
+
for named_graph, table in summary.items():
|
|
645
|
+
summary_text += (
|
|
646
|
+
f"<br /><strong>{_short_name_of_graph(named_graph)} graph</strong>:"
|
|
647
|
+
f"{cast(pd.DataFrame, self._shorten_summary(table))._repr_html_()}" # type: ignore[operator]
|
|
648
|
+
)
|
|
465
649
|
|
|
466
|
-
|
|
650
|
+
for named_graph, multi_value_instances in self.multi_type_instances.items():
|
|
651
|
+
if multi_value_instances:
|
|
652
|
+
summary_text += (
|
|
653
|
+
f"<br><strong>Multi value instances detected in {_short_name_of_graph(named_graph)}"
|
|
654
|
+
"graph! Loading could have issues!</strong></br>"
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
return f"{summary_text}{provenance}"
|
|
467
658
|
|
|
468
659
|
def _shorten_summary(self, summary: pd.DataFrame) -> pd.DataFrame:
|
|
469
660
|
"""Shorten summary to top 5 types by occurrence."""
|
|
@@ -490,3 +681,7 @@ class NeatGraphStore:
|
|
|
490
681
|
shorter_summary.index = cast(Index, indexes)
|
|
491
682
|
|
|
492
683
|
return shorter_summary
|
|
684
|
+
|
|
685
|
+
@property
|
|
686
|
+
def named_graphs(self) -> list[URIRef]:
|
|
687
|
+
return [cast(URIRef, context.identifier) for context in self.dataset.contexts()]
|