cognite-neat 0.107.0__py3-none-any.whl → 0.109.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +87 -6
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
- cognite/neat/_graph/extractors/_dms.py +162 -47
- cognite/neat/_graph/extractors/_dms_graph.py +54 -4
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +3 -2
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
- cognite/neat/_graph/queries/_base.py +144 -84
- cognite/neat/_graph/queries/_construct.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +3 -1
- cognite/neat/_graph/transformers/_base.py +4 -4
- cognite/neat/_graph/transformers/_classic_cdf.py +13 -13
- cognite/neat/_graph/transformers/_prune_graph.py +3 -3
- cognite/neat/_graph/transformers/_rdfpath.py +3 -4
- cognite/neat/_graph/transformers/_value_type.py +71 -13
- cognite/neat/_issues/errors/__init__.py +2 -0
- cognite/neat/_issues/errors/_external.py +8 -0
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_issues/warnings/_resources.py +1 -1
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/exporters/_rules2yaml.py +1 -1
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +310 -26
- cognite/neat/_rules/models/_base_rules.py +22 -11
- cognite/neat/_rules/models/dms/_exporter.py +5 -4
- cognite/neat/_rules/models/dms/_rules.py +1 -8
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules_input.py +5 -0
- cognite/neat/_rules/transformers/__init__.py +10 -3
- cognite/neat/_rules/transformers/_base.py +6 -1
- cognite/neat/_rules/transformers/_converters.py +530 -364
- cognite/neat/_rules/transformers/_mapping.py +4 -4
- cognite/neat/_session/_base.py +100 -47
- cognite/neat/_session/_create.py +133 -0
- cognite/neat/_session/_drop.py +60 -2
- cognite/neat/_session/_fix.py +28 -0
- cognite/neat/_session/_inspect.py +22 -7
- cognite/neat/_session/_mapping.py +8 -8
- cognite/neat/_session/_prepare.py +3 -247
- cognite/neat/_session/_read.py +138 -17
- cognite/neat/_session/_set.py +50 -1
- cognite/neat/_session/_show.py +16 -43
- cognite/neat/_session/_state.py +53 -52
- cognite/neat/_session/_to.py +11 -4
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +8 -1
- cognite/neat/_store/_graph_store.py +301 -146
- cognite/neat/_store/_provenance.py +36 -20
- cognite/neat/_store/_rules_store.py +253 -267
- cognite/neat/_store/exceptions.py +40 -4
- cognite/neat/_utils/auth.py +5 -3
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/RECORD +69 -67
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/entry_points.txt +0 -0
|
@@ -8,23 +8,24 @@ from zipfile import ZipExtFile
|
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from pandas import Index
|
|
11
|
-
from rdflib import Dataset, Namespace, URIRef
|
|
11
|
+
from rdflib import Dataset, Graph, Namespace, URIRef
|
|
12
|
+
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
|
|
12
13
|
from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
|
|
13
14
|
|
|
14
|
-
from cognite.neat.
|
|
15
|
-
from cognite.neat._graph._shared import rdflib_to_oxi_type
|
|
15
|
+
from cognite.neat._graph._shared import quad_formats, rdflib_to_oxi_type
|
|
16
16
|
from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
|
|
17
17
|
from cognite.neat._graph.queries import Queries
|
|
18
18
|
from cognite.neat._graph.transformers import Transformers
|
|
19
19
|
from cognite.neat._issues import IssueList, catch_issues
|
|
20
|
+
from cognite.neat._issues.errors import OxigraphStorageLockedError
|
|
20
21
|
from cognite.neat._rules.analysis import InformationAnalysis
|
|
21
22
|
from cognite.neat._rules.models import InformationRules
|
|
22
23
|
from cognite.neat._rules.models.entities import ClassEntity
|
|
23
24
|
from cognite.neat._shared import InstanceType, Triple
|
|
24
25
|
from cognite.neat._utils.auxiliary import local_import
|
|
25
|
-
from cognite.neat._utils.rdf_ import add_triples_in_batch
|
|
26
|
+
from cognite.neat._utils.rdf_ import add_triples_in_batch, remove_namespace_from_uri
|
|
26
27
|
|
|
27
|
-
from ._provenance import Change, Provenance
|
|
28
|
+
from ._provenance import Change, Entity, Provenance
|
|
28
29
|
|
|
29
30
|
if sys.version_info < (3, 11):
|
|
30
31
|
from typing_extensions import Self
|
|
@@ -39,41 +40,49 @@ class NeatGraphStore:
|
|
|
39
40
|
Args:
|
|
40
41
|
graph : Instance of rdflib.Graph class for graph storage
|
|
41
42
|
rules:
|
|
43
|
+
|
|
44
|
+
!!! note "Dataset"
|
|
45
|
+
The store leverages a RDF dataset which is defined as a collection of RDF graphs
|
|
46
|
+
where all but one are named graphs associated with URIRef (the graph name),
|
|
47
|
+
and the unnamed default graph which is in context of rdflib library has an
|
|
48
|
+
identifier URIRef('urn:x-rdflib:default').
|
|
42
49
|
"""
|
|
43
50
|
|
|
44
51
|
rdf_store_type: str
|
|
45
52
|
|
|
46
53
|
def __init__(
|
|
47
54
|
self,
|
|
48
|
-
|
|
49
|
-
|
|
55
|
+
dataset: Dataset,
|
|
56
|
+
default_named_graph: URIRef | None = None,
|
|
50
57
|
):
|
|
51
|
-
self.rules: InformationRules
|
|
58
|
+
self.rules: dict[URIRef, InformationRules] = {}
|
|
59
|
+
self.base_namespace: dict[URIRef, Namespace] = {}
|
|
52
60
|
|
|
53
61
|
_start = datetime.now(timezone.utc)
|
|
54
|
-
self.
|
|
55
|
-
self.provenance = Provenance(
|
|
62
|
+
self.dataset = dataset
|
|
63
|
+
self.provenance = Provenance[Entity](
|
|
56
64
|
[
|
|
57
65
|
Change.record(
|
|
58
66
|
activity=f"{type(self).__name__}.__init__",
|
|
59
67
|
start=_start,
|
|
60
68
|
end=datetime.now(timezone.utc),
|
|
61
|
-
description=f"Initialize graph store as {type(self.
|
|
69
|
+
description=f"Initialize graph store as {type(self.dataset.store).__name__}",
|
|
62
70
|
)
|
|
63
71
|
]
|
|
64
72
|
)
|
|
65
73
|
|
|
66
|
-
|
|
67
|
-
self.add_rules(rules)
|
|
68
|
-
else:
|
|
69
|
-
self.base_namespace = DEFAULT_NAMESPACE
|
|
74
|
+
self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
|
|
70
75
|
|
|
71
|
-
self.queries = Queries(self.
|
|
76
|
+
self.queries = Queries(self.dataset, self.rules, self.default_named_graph)
|
|
77
|
+
|
|
78
|
+
def graph(self, named_graph: URIRef | None = None) -> Graph:
|
|
79
|
+
"""Get named graph from the dataset to query over"""
|
|
80
|
+
return self.dataset.graph(named_graph or self.default_named_graph)
|
|
72
81
|
|
|
73
82
|
@property
|
|
74
83
|
def type_(self) -> str:
|
|
75
84
|
"Return type of the graph store"
|
|
76
|
-
return type(self.
|
|
85
|
+
return type(self.dataset.store).__name__
|
|
77
86
|
|
|
78
87
|
# no destination
|
|
79
88
|
@overload
|
|
@@ -91,54 +100,70 @@ class NeatGraphStore:
|
|
|
91
100
|
|
|
92
101
|
Returns:
|
|
93
102
|
Serialized graph store
|
|
103
|
+
|
|
104
|
+
!!! note "Trig Format"
|
|
105
|
+
Notice that instead of turtle format we are using trig format for serialization.
|
|
106
|
+
This is because trig format is a superset of turtle format and it allows us to
|
|
107
|
+
serialize named graphs as well. Allowing serialization of one or more named graphs
|
|
108
|
+
including the default graph.
|
|
94
109
|
"""
|
|
95
110
|
if filepath:
|
|
96
|
-
self.
|
|
111
|
+
self.dataset.serialize(
|
|
97
112
|
filepath,
|
|
98
|
-
format="ox-trig" if self.type_ == "OxigraphStore" else "
|
|
113
|
+
format="ox-trig" if self.type_ == "OxigraphStore" else "trig",
|
|
99
114
|
)
|
|
100
115
|
return None
|
|
101
116
|
else:
|
|
102
|
-
return self.
|
|
117
|
+
return self.dataset.serialize(format="ox-trig" if self.type_ == "OxigraphStore" else "trig")
|
|
118
|
+
|
|
119
|
+
def add_rules(self, rules: InformationRules, named_graph: URIRef | None = None) -> None:
|
|
120
|
+
"""This method is used to add rules to a named graph stored in the graph store.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
rules: InformationRules object containing rules to be added to the named graph
|
|
124
|
+
named_graph: URIRef of the named graph to store the rules in, by default None
|
|
125
|
+
rules will be added to the default graph
|
|
103
126
|
|
|
104
|
-
def add_rules(self, rules: InformationRules) -> None:
|
|
105
|
-
"""This method is used to add rules to the graph store and it is the only correct
|
|
106
|
-
way to add rules to the graph store, after the graph store has been initialized.
|
|
107
127
|
"""
|
|
108
128
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
129
|
+
named_graph = named_graph or self.default_named_graph
|
|
130
|
+
|
|
131
|
+
if named_graph in self.named_graphs:
|
|
132
|
+
# attaching appropriate namespace to the rules
|
|
133
|
+
# as well base_namespace
|
|
134
|
+
self.rules[named_graph] = rules
|
|
135
|
+
self.base_namespace[named_graph] = rules.metadata.namespace
|
|
136
|
+
self.queries = Queries(self.dataset, self.rules)
|
|
137
|
+
self.provenance.append(
|
|
138
|
+
Change.record(
|
|
139
|
+
activity=f"{type(self)}.rules",
|
|
140
|
+
start=datetime.now(timezone.utc),
|
|
141
|
+
end=datetime.now(timezone.utc),
|
|
142
|
+
description=f"Added {type(self.rules).__name__} to {named_graph} named graph",
|
|
143
|
+
)
|
|
118
144
|
)
|
|
119
|
-
)
|
|
120
145
|
|
|
121
|
-
|
|
122
|
-
|
|
146
|
+
if self.rules[named_graph].prefixes:
|
|
147
|
+
self._upsert_prefixes(self.rules[named_graph].prefixes, named_graph)
|
|
123
148
|
|
|
124
|
-
def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
|
|
149
|
+
def _upsert_prefixes(self, prefixes: dict[str, Namespace], named_graph: URIRef) -> None:
|
|
125
150
|
"""Adds prefixes to the graph store."""
|
|
126
151
|
_start = datetime.now(timezone.utc)
|
|
127
152
|
for prefix, namespace in prefixes.items():
|
|
128
|
-
self.graph.bind(prefix, namespace)
|
|
153
|
+
self.graph(named_graph).bind(prefix, namespace)
|
|
129
154
|
|
|
130
155
|
self.provenance.append(
|
|
131
156
|
Change.record(
|
|
132
157
|
activity=f"{type(self).__name__}._upsert_prefixes",
|
|
133
158
|
start=_start,
|
|
134
159
|
end=datetime.now(timezone.utc),
|
|
135
|
-
description="Upsert prefixes to graph
|
|
160
|
+
description="Upsert prefixes to the name graph {named_graph}",
|
|
136
161
|
)
|
|
137
162
|
)
|
|
138
163
|
|
|
139
164
|
@classmethod
|
|
140
|
-
def from_memory_store(cls
|
|
141
|
-
return cls(Dataset()
|
|
165
|
+
def from_memory_store(cls) -> "Self":
|
|
166
|
+
return cls(Dataset())
|
|
142
167
|
|
|
143
168
|
@classmethod
|
|
144
169
|
def from_sparql_store(
|
|
@@ -146,7 +171,6 @@ class NeatGraphStore:
|
|
|
146
171
|
query_endpoint: str | None = None,
|
|
147
172
|
update_endpoint: str | None = None,
|
|
148
173
|
returnFormat: str = "csv",
|
|
149
|
-
rules: InformationRules | None = None,
|
|
150
174
|
) -> "Self":
|
|
151
175
|
store = SPARQLUpdateStore(
|
|
152
176
|
query_endpoint=query_endpoint,
|
|
@@ -157,53 +181,75 @@ class NeatGraphStore:
|
|
|
157
181
|
autocommit=False,
|
|
158
182
|
)
|
|
159
183
|
graph = Dataset(store=store)
|
|
160
|
-
return cls(graph
|
|
184
|
+
return cls(graph)
|
|
161
185
|
|
|
162
186
|
@classmethod
|
|
163
|
-
def
|
|
187
|
+
def from_oxi_remote_store(
|
|
188
|
+
cls,
|
|
189
|
+
remote_url: str,
|
|
190
|
+
autocommit: bool = False,
|
|
191
|
+
) -> "Self":
|
|
192
|
+
"""Creates a NeatGraphStore from a remote Oxigraph store SPARQL endpoint."""
|
|
193
|
+
|
|
194
|
+
return cls(
|
|
195
|
+
dataset=Dataset(
|
|
196
|
+
store=SPARQLUpdateStore(
|
|
197
|
+
query_endpoint=f"{remote_url}/query", update_endpoint=f"{remote_url}/query", autocommit=autocommit
|
|
198
|
+
),
|
|
199
|
+
default_union=True,
|
|
200
|
+
)
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
def from_oxi_local_store(cls, storage_dir: Path | None = None) -> "Self":
|
|
164
205
|
"""Creates a NeatGraphStore from an Oxigraph store."""
|
|
165
206
|
local_import("pyoxigraph", "oxi")
|
|
166
207
|
local_import("oxrdflib", "oxi")
|
|
167
208
|
import oxrdflib
|
|
168
209
|
import pyoxigraph
|
|
169
210
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
if "lock" in str(e) and i < 3:
|
|
177
|
-
continue
|
|
178
|
-
raise e
|
|
179
|
-
else:
|
|
180
|
-
raise Exception("Error initializing Oxigraph store")
|
|
211
|
+
try:
|
|
212
|
+
oxi_store = pyoxigraph.Store(path=str(storage_dir) if storage_dir else None)
|
|
213
|
+
except OSError as e:
|
|
214
|
+
if "lock" in str(e):
|
|
215
|
+
raise OxigraphStorageLockedError(filepath=cast(Path, storage_dir)) from e
|
|
216
|
+
raise e
|
|
181
217
|
|
|
182
|
-
|
|
183
|
-
|
|
218
|
+
return cls(
|
|
219
|
+
dataset=Dataset(
|
|
220
|
+
store=oxrdflib.OxigraphStore(store=oxi_store),
|
|
221
|
+
)
|
|
184
222
|
)
|
|
185
223
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
def write(self, extractor: TripleExtractors) -> IssueList:
|
|
224
|
+
def write(self, extractor: TripleExtractors, named_graph: URIRef | None = None) -> IssueList:
|
|
189
225
|
last_change: Change | None = None
|
|
226
|
+
named_graph = named_graph or self.default_named_graph
|
|
190
227
|
with catch_issues() as issue_list:
|
|
191
228
|
_start = datetime.now(timezone.utc)
|
|
192
229
|
success = True
|
|
193
230
|
|
|
194
231
|
if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
|
|
195
|
-
self._parse_file(
|
|
232
|
+
self._parse_file(
|
|
233
|
+
named_graph,
|
|
234
|
+
extractor.filepath,
|
|
235
|
+
cast(str, extractor.format),
|
|
236
|
+
extractor.base_uri,
|
|
237
|
+
)
|
|
196
238
|
if isinstance(extractor.filepath, ZipExtFile):
|
|
197
239
|
extractor.filepath.close()
|
|
240
|
+
|
|
198
241
|
elif isinstance(extractor, RdfFileExtractor):
|
|
199
242
|
success = False
|
|
200
243
|
issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
|
|
201
244
|
warnings.warn(
|
|
202
|
-
|
|
245
|
+
(
|
|
246
|
+
f"Cannot write to named graph {named_graph} with "
|
|
247
|
+
f"{type(extractor).__name__}, errors found in file:\n{issue_text}"
|
|
248
|
+
),
|
|
203
249
|
stacklevel=2,
|
|
204
250
|
)
|
|
205
251
|
else:
|
|
206
|
-
self._add_triples(extractor.extract())
|
|
252
|
+
self._add_triples(extractor.extract(), named_graph=named_graph)
|
|
207
253
|
|
|
208
254
|
if success:
|
|
209
255
|
_end = datetime.now(timezone.utc)
|
|
@@ -218,7 +264,7 @@ class NeatGraphStore:
|
|
|
218
264
|
activity=activity,
|
|
219
265
|
start=_start,
|
|
220
266
|
end=_end,
|
|
221
|
-
description=f"Extracted triples to graph
|
|
267
|
+
description=f"Extracted triples to named graph {named_graph} using {type(extractor).__name__}",
|
|
222
268
|
)
|
|
223
269
|
self.provenance.append(last_change)
|
|
224
270
|
if last_change:
|
|
@@ -226,17 +272,35 @@ class NeatGraphStore:
|
|
|
226
272
|
return issue_list
|
|
227
273
|
|
|
228
274
|
def _read_via_rules_linkage(
|
|
229
|
-
self,
|
|
275
|
+
self,
|
|
276
|
+
class_neat_id: URIRef,
|
|
277
|
+
property_link_pairs: dict[str, URIRef] | None,
|
|
278
|
+
named_graph: URIRef | None = None,
|
|
230
279
|
) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
|
|
231
|
-
|
|
232
|
-
|
|
280
|
+
named_graph = named_graph or self.default_named_graph
|
|
281
|
+
|
|
282
|
+
if named_graph not in self.named_graphs:
|
|
283
|
+
warnings.warn(
|
|
284
|
+
f"Named graph {named_graph} not found in graph store, cannot read",
|
|
285
|
+
stacklevel=2,
|
|
286
|
+
)
|
|
233
287
|
return
|
|
288
|
+
|
|
289
|
+
if not self.rules or named_graph not in self.rules:
|
|
290
|
+
warnings.warn(
|
|
291
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
292
|
+
stacklevel=2,
|
|
293
|
+
)
|
|
294
|
+
return
|
|
295
|
+
|
|
234
296
|
if self.multi_type_instances:
|
|
235
297
|
warnings.warn(
|
|
236
298
|
"Multi typed instances detected, issues with loading can occur!",
|
|
237
299
|
stacklevel=2,
|
|
238
300
|
)
|
|
239
|
-
|
|
301
|
+
|
|
302
|
+
analysis = InformationAnalysis(self.rules[named_graph])
|
|
303
|
+
|
|
240
304
|
if cls := analysis.classes_by_neat_id.get(class_neat_id):
|
|
241
305
|
if property_link_pairs:
|
|
242
306
|
property_renaming_config = {
|
|
@@ -272,9 +336,22 @@ class NeatGraphStore:
|
|
|
272
336
|
self,
|
|
273
337
|
class_entity: ClassEntity,
|
|
274
338
|
property_renaming_config: dict[URIRef, str] | None = None,
|
|
339
|
+
named_graph: URIRef | None = None,
|
|
275
340
|
) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
|
|
276
|
-
|
|
277
|
-
|
|
341
|
+
named_graph = named_graph or self.default_named_graph
|
|
342
|
+
|
|
343
|
+
if named_graph not in self.named_graphs:
|
|
344
|
+
warnings.warn(
|
|
345
|
+
f"Named graph {named_graph} not found in graph store, cannot read",
|
|
346
|
+
stacklevel=2,
|
|
347
|
+
)
|
|
348
|
+
return
|
|
349
|
+
|
|
350
|
+
if not self.rules or named_graph not in self.rules:
|
|
351
|
+
warnings.warn(
|
|
352
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
353
|
+
stacklevel=2,
|
|
354
|
+
)
|
|
278
355
|
return
|
|
279
356
|
if self.multi_type_instances:
|
|
280
357
|
warnings.warn(
|
|
@@ -282,28 +359,28 @@ class NeatGraphStore:
|
|
|
282
359
|
stacklevel=2,
|
|
283
360
|
)
|
|
284
361
|
|
|
285
|
-
if class_entity not in [definition.class_ for definition in self.rules.classes]:
|
|
362
|
+
if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
|
|
286
363
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
287
364
|
return
|
|
288
365
|
|
|
289
|
-
if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
|
|
366
|
+
if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
|
|
290
367
|
warnings.warn(
|
|
291
368
|
f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
|
|
292
369
|
stacklevel=2,
|
|
293
370
|
)
|
|
294
371
|
return
|
|
295
372
|
|
|
296
|
-
has_hop_transformations = InformationAnalysis(self.rules).has_hop_transformations()
|
|
373
|
+
has_hop_transformations = InformationAnalysis(self.rules[named_graph]).has_hop_transformations()
|
|
297
374
|
has_self_reference_transformations = InformationAnalysis(
|
|
298
|
-
self.rules
|
|
375
|
+
self.rules[named_graph]
|
|
299
376
|
).has_self_reference_property_transformations()
|
|
300
377
|
if has_hop_transformations or has_self_reference_transformations:
|
|
301
378
|
msg = (
|
|
302
|
-
f"Rules contain [{'Hop' if has_hop_transformations else ''
|
|
303
|
-
f", {'SelfReferenceProperty' if has_self_reference_transformations else ''
|
|
379
|
+
f"Rules contain [{'Hop' if has_hop_transformations else ''}"
|
|
380
|
+
f", {'SelfReferenceProperty' if has_self_reference_transformations else ''}]"
|
|
304
381
|
" rdfpath."
|
|
305
|
-
f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''
|
|
306
|
-
f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''
|
|
382
|
+
f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''}"
|
|
383
|
+
f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''}]"
|
|
307
384
|
" transformer(s) first!"
|
|
308
385
|
)
|
|
309
386
|
|
|
@@ -318,23 +395,19 @@ class NeatGraphStore:
|
|
|
318
395
|
|
|
319
396
|
# get potential property renaming config
|
|
320
397
|
property_renaming_config = property_renaming_config or InformationAnalysis(
|
|
321
|
-
self.rules
|
|
398
|
+
self.rules[named_graph]
|
|
322
399
|
).define_property_renaming_config(class_entity)
|
|
323
400
|
|
|
324
|
-
# get property types to guide process of removing or not namespaces from results
|
|
325
|
-
property_types = InformationAnalysis(self.rules).property_types(class_entity)
|
|
326
401
|
for instance_id in instance_ids:
|
|
327
402
|
if res := self.queries.describe(
|
|
328
403
|
instance_id=instance_id,
|
|
329
404
|
instance_type=class_entity.suffix,
|
|
330
405
|
property_renaming_config=property_renaming_config,
|
|
331
|
-
property_types=property_types,
|
|
332
406
|
):
|
|
333
407
|
yield res
|
|
334
408
|
|
|
335
409
|
def read(
|
|
336
|
-
self,
|
|
337
|
-
class_: str,
|
|
410
|
+
self, class_: str, named_graph: URIRef | None = None
|
|
338
411
|
) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
|
|
339
412
|
"""Read instances for given class from the graph store.
|
|
340
413
|
|
|
@@ -343,9 +416,20 @@ class NeatGraphStore:
|
|
|
343
416
|
the rules which are attached to the graph store.
|
|
344
417
|
|
|
345
418
|
"""
|
|
419
|
+
named_graph = named_graph or self.default_named_graph
|
|
346
420
|
|
|
347
|
-
if not self.
|
|
348
|
-
warnings.warn(
|
|
421
|
+
if named_graph not in self.named_graphs:
|
|
422
|
+
warnings.warn(
|
|
423
|
+
f"Named graph {named_graph} not found in graph store, cannot read",
|
|
424
|
+
stacklevel=2,
|
|
425
|
+
)
|
|
426
|
+
return
|
|
427
|
+
|
|
428
|
+
if not self.rules or named_graph not in self.rules:
|
|
429
|
+
warnings.warn(
|
|
430
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
431
|
+
stacklevel=2,
|
|
432
|
+
)
|
|
349
433
|
return
|
|
350
434
|
if self.multi_type_instances:
|
|
351
435
|
warnings.warn(
|
|
@@ -353,15 +437,15 @@ class NeatGraphStore:
|
|
|
353
437
|
stacklevel=2,
|
|
354
438
|
)
|
|
355
439
|
|
|
356
|
-
class_entity = ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_)
|
|
440
|
+
class_entity = ClassEntity(prefix=self.rules[named_graph].metadata.prefix, suffix=class_)
|
|
357
441
|
|
|
358
|
-
if class_entity not in [definition.class_ for definition in self.rules.classes]:
|
|
442
|
+
if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
|
|
359
443
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
360
444
|
return
|
|
361
445
|
|
|
362
446
|
yield from self._read_via_class_entity(class_entity)
|
|
363
447
|
|
|
364
|
-
def count_of_id(self, neat_id: URIRef) -> int:
|
|
448
|
+
def count_of_id(self, neat_id: URIRef, named_graph: URIRef | None = None) -> int:
|
|
365
449
|
"""Count the number of instances of a given type
|
|
366
450
|
|
|
367
451
|
Args:
|
|
@@ -370,18 +454,31 @@ class NeatGraphStore:
|
|
|
370
454
|
Returns:
|
|
371
455
|
Number of instances
|
|
372
456
|
"""
|
|
373
|
-
|
|
374
|
-
|
|
457
|
+
named_graph = named_graph or self.default_named_graph
|
|
458
|
+
|
|
459
|
+
if named_graph not in self.named_graphs:
|
|
460
|
+
warnings.warn(
|
|
461
|
+
f"Named graph {named_graph} not found in graph store, cannot count",
|
|
462
|
+
stacklevel=2,
|
|
463
|
+
)
|
|
464
|
+
return 0
|
|
465
|
+
|
|
466
|
+
if not self.rules or named_graph not in self.rules:
|
|
467
|
+
warnings.warn(
|
|
468
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
469
|
+
stacklevel=2,
|
|
470
|
+
)
|
|
375
471
|
return 0
|
|
376
472
|
|
|
377
473
|
class_entity = next(
|
|
378
|
-
(definition.class_ for definition in self.rules.classes if definition.neatId == neat_id),
|
|
474
|
+
(definition.class_ for definition in self.rules[named_graph].classes if definition.neatId == neat_id),
|
|
475
|
+
None,
|
|
379
476
|
)
|
|
380
477
|
if not class_entity:
|
|
381
478
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
382
479
|
return 0
|
|
383
480
|
|
|
384
|
-
if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
|
|
481
|
+
if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
|
|
385
482
|
warnings.warn(
|
|
386
483
|
f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
|
|
387
484
|
stacklevel=2,
|
|
@@ -392,10 +489,11 @@ class NeatGraphStore:
|
|
|
392
489
|
|
|
393
490
|
def count_of_type(self, class_uri: URIRef) -> int:
|
|
394
491
|
query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
|
|
395
|
-
return int(next(iter(self.
|
|
492
|
+
return int(next(iter(self.dataset.query(query)))[0]) # type: ignore[arg-type, index]
|
|
396
493
|
|
|
397
494
|
def _parse_file(
|
|
398
495
|
self,
|
|
496
|
+
named_graph: URIRef,
|
|
399
497
|
filepath: Path | ZipExtFile,
|
|
400
498
|
format: str = "turtle",
|
|
401
499
|
base_uri: URIRef | None = None,
|
|
@@ -403,6 +501,7 @@ class NeatGraphStore:
|
|
|
403
501
|
"""Imports graph data from file.
|
|
404
502
|
|
|
405
503
|
Args:
|
|
504
|
+
named_graph : URIRef of the named graph to store the data in
|
|
406
505
|
filepath : File path to file containing graph data, by default None
|
|
407
506
|
format : rdflib format file containing RDF graph, by default "turtle"
|
|
408
507
|
base_uri : base URI to add to graph in case of relative URIs, by default None
|
|
@@ -419,25 +518,35 @@ class NeatGraphStore:
|
|
|
419
518
|
if self.type_ == "OxigraphStore":
|
|
420
519
|
local_import("pyoxigraph", "oxi")
|
|
421
520
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
521
|
+
if format in quad_formats():
|
|
522
|
+
self.dataset.parse(
|
|
523
|
+
filepath, # type: ignore[arg-type]
|
|
524
|
+
format=rdflib_to_oxi_type(format),
|
|
525
|
+
transactional=False,
|
|
526
|
+
publicID=base_uri,
|
|
527
|
+
)
|
|
528
|
+
else:
|
|
529
|
+
self.graph(named_graph).parse(
|
|
530
|
+
filepath, # type: ignore[arg-type]
|
|
531
|
+
format=rdflib_to_oxi_type(format),
|
|
532
|
+
transactional=False,
|
|
533
|
+
publicID=base_uri,
|
|
534
|
+
)
|
|
535
|
+
self.dataset.store._store.optimize() # type: ignore[attr-defined]
|
|
430
536
|
|
|
431
537
|
# All other stores
|
|
432
538
|
else:
|
|
433
|
-
if
|
|
434
|
-
self.
|
|
539
|
+
if format in quad_formats():
|
|
540
|
+
self.dataset.parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
|
|
435
541
|
else:
|
|
436
|
-
|
|
437
|
-
if filename.is_file():
|
|
438
|
-
self.graph.parse(filename, publicID=base_uri)
|
|
542
|
+
self.graph(named_graph).parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
|
|
439
543
|
|
|
440
|
-
def _add_triples(
|
|
544
|
+
def _add_triples(
|
|
545
|
+
self,
|
|
546
|
+
triples: Iterable[Triple],
|
|
547
|
+
named_graph: URIRef,
|
|
548
|
+
batch_size: int = 10_000,
|
|
549
|
+
) -> None:
|
|
441
550
|
"""Adds triples to the graph store in batches.
|
|
442
551
|
|
|
443
552
|
Args:
|
|
@@ -445,66 +554,103 @@ class NeatGraphStore:
|
|
|
445
554
|
batch_size: Batch size of triples per commit, by default 10_000
|
|
446
555
|
verbose: Verbose mode, by default False
|
|
447
556
|
"""
|
|
448
|
-
add_triples_in_batch(self.graph, triples, batch_size)
|
|
557
|
+
add_triples_in_batch(self.graph(named_graph), triples, batch_size)
|
|
449
558
|
|
|
450
|
-
def transform(self, transformer: Transformers) -> None:
|
|
559
|
+
def transform(self, transformer: Transformers, named_graph: URIRef | None = None) -> None:
|
|
451
560
|
"""Transforms the graph store using a transformer."""
|
|
452
561
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
562
|
+
named_graph = named_graph or self.default_named_graph
|
|
563
|
+
if named_graph in self.named_graphs:
|
|
564
|
+
missing_changes = [
|
|
565
|
+
change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
|
|
566
|
+
]
|
|
567
|
+
if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
|
|
568
|
+
warnings.warn(
|
|
569
|
+
f"Cannot transform graph store with {type(transformer).__name__}, already applied",
|
|
570
|
+
stacklevel=2,
|
|
571
|
+
)
|
|
572
|
+
elif missing_changes:
|
|
573
|
+
warnings.warn(
|
|
574
|
+
(
|
|
575
|
+
f"Cannot transform graph store with {type(transformer).__name__}, "
|
|
576
|
+
f"missing one or more required changes [{', '.join(missing_changes)}]"
|
|
577
|
+
),
|
|
578
|
+
stacklevel=2,
|
|
579
|
+
)
|
|
469
580
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
581
|
+
else:
|
|
582
|
+
_start = datetime.now(timezone.utc)
|
|
583
|
+
transformer.transform(self.graph(named_graph))
|
|
584
|
+
self.provenance.append(
|
|
585
|
+
Change.record(
|
|
586
|
+
activity=f"{type(transformer).__name__}",
|
|
587
|
+
start=_start,
|
|
588
|
+
end=datetime.now(timezone.utc),
|
|
589
|
+
description=transformer.description,
|
|
590
|
+
)
|
|
479
591
|
)
|
|
592
|
+
|
|
593
|
+
else:
|
|
594
|
+
warnings.warn(
|
|
595
|
+
f"Named graph {named_graph} not found in graph store, cannot transform",
|
|
596
|
+
stacklevel=2,
|
|
480
597
|
)
|
|
481
598
|
|
|
482
599
|
@property
|
|
483
|
-
def summary(self) -> pd.DataFrame:
|
|
484
|
-
return
|
|
600
|
+
def summary(self) -> dict[URIRef, pd.DataFrame]:
|
|
601
|
+
return {
|
|
602
|
+
named_graph: pd.DataFrame(
|
|
603
|
+
self.queries.summarize_instances(named_graph),
|
|
604
|
+
columns=["Type", "Occurrence"],
|
|
605
|
+
)
|
|
606
|
+
for named_graph in self.named_graphs
|
|
607
|
+
}
|
|
485
608
|
|
|
486
609
|
@property
|
|
487
|
-
def multi_type_instances(self) -> dict[str, list[str]]:
|
|
488
|
-
return self.queries.multi_type_instances()
|
|
610
|
+
def multi_type_instances(self) -> dict[URIRef, dict[str, list[str]]]:
|
|
611
|
+
return {named_graph: self.queries.multi_type_instances(named_graph) for named_graph in self.named_graphs}
|
|
489
612
|
|
|
490
613
|
def _repr_html_(self) -> str:
|
|
491
614
|
provenance = self.provenance._repr_html_()
|
|
492
|
-
summary: pd.DataFrame = self.summary
|
|
615
|
+
summary: dict[URIRef, pd.DataFrame] = self.summary
|
|
616
|
+
|
|
617
|
+
def _short_name_of_graph(named_graph: URIRef) -> str:
|
|
618
|
+
return "default" if named_graph == self.default_named_graph else remove_namespace_from_uri(named_graph)
|
|
493
619
|
|
|
494
|
-
if summary
|
|
620
|
+
if not summary:
|
|
495
621
|
summary_text = "<br /><strong>Graph is empty</strong><br />"
|
|
496
622
|
else:
|
|
623
|
+
all_types = set().union(
|
|
624
|
+
*[set(sub_summary.Type) for sub_summary in summary.values() if not sub_summary.empty]
|
|
625
|
+
)
|
|
626
|
+
|
|
497
627
|
summary_text = (
|
|
498
628
|
"<br /><strong>Overview</strong>:" # type: ignore
|
|
499
|
-
f"<ul><li>{len(summary)}
|
|
500
|
-
f"<li>{
|
|
501
|
-
f"{cast(pd.DataFrame, self._shorten_summary(summary))._repr_html_()}" # type: ignore[operator]
|
|
629
|
+
f"<ul><li>{len(summary)} named graphs</strong></li>"
|
|
630
|
+
f"<li>Total of {len(all_types)} unique types</strong></li>"
|
|
502
631
|
)
|
|
503
632
|
|
|
504
|
-
|
|
505
|
-
|
|
633
|
+
for named_graph, table in summary.items():
|
|
634
|
+
summary_text += (
|
|
635
|
+
f"<li>{sum(table['Occurrence'])} instances in {_short_name_of_graph(named_graph)}"
|
|
636
|
+
" graph</strong></li>"
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
summary_text += "</ul>"
|
|
640
|
+
for named_graph, table in summary.items():
|
|
641
|
+
summary_text += (
|
|
642
|
+
f"<br /><strong>{_short_name_of_graph(named_graph)} graph</strong>:"
|
|
643
|
+
f"{cast(pd.DataFrame, self._shorten_summary(table))._repr_html_()}" # type: ignore[operator]
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
for named_graph, multi_value_instances in self.multi_type_instances.items():
|
|
647
|
+
if multi_value_instances:
|
|
648
|
+
summary_text += (
|
|
649
|
+
f"<br><strong>Multi value instances detected in {_short_name_of_graph(named_graph)}"
|
|
650
|
+
"graph! Loading could have issues!</strong></br>"
|
|
651
|
+
)
|
|
506
652
|
|
|
507
|
-
return f"{summary_text}
|
|
653
|
+
return f"{summary_text}{provenance}"
|
|
508
654
|
|
|
509
655
|
def _shorten_summary(self, summary: pd.DataFrame) -> pd.DataFrame:
|
|
510
656
|
"""Shorten summary to top 5 types by occurrence."""
|
|
@@ -531,3 +677,12 @@ class NeatGraphStore:
|
|
|
531
677
|
shorter_summary.index = cast(Index, indexes)
|
|
532
678
|
|
|
533
679
|
return shorter_summary
|
|
680
|
+
|
|
681
|
+
@property
|
|
682
|
+
def named_graphs(self) -> list[URIRef]:
|
|
683
|
+
return [cast(URIRef, context.identifier) for context in self.dataset.contexts()]
|
|
684
|
+
|
|
685
|
+
@property
|
|
686
|
+
def empty(self) -> bool:
|
|
687
|
+
"""Cheap way to check if the graph store is empty."""
|
|
688
|
+
return not self.queries.has_data()
|