cognite-neat 0.107.0__py3-none-any.whl → 0.108.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +83 -6
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
- cognite/neat/_graph/extractors/_dms.py +162 -47
- cognite/neat/_graph/extractors/_dms_graph.py +54 -4
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +3 -2
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
- cognite/neat/_graph/queries/_base.py +140 -84
- cognite/neat/_graph/queries/_construct.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +3 -1
- cognite/neat/_graph/transformers/_value_type.py +54 -3
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +241 -18
- cognite/neat/_rules/models/_base_rules.py +13 -3
- cognite/neat/_rules/models/dms/_rules.py +1 -8
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules_input.py +5 -0
- cognite/neat/_rules/transformers/__init__.py +6 -0
- cognite/neat/_rules/transformers/_converters.py +98 -7
- cognite/neat/_session/_base.py +55 -4
- cognite/neat/_session/_drop.py +5 -1
- cognite/neat/_session/_inspect.py +3 -2
- cognite/neat/_session/_read.py +61 -14
- cognite/neat/_session/_set.py +27 -0
- cognite/neat/_session/_show.py +4 -4
- cognite/neat/_session/_state.py +8 -4
- cognite/neat/_session/_to.py +4 -1
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +2 -1
- cognite/neat/_store/_graph_store.py +287 -133
- cognite/neat/_store/_rules_store.py +108 -1
- cognite/neat/_utils/auth.py +1 -1
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +52 -52
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
|
@@ -8,11 +8,11 @@ from zipfile import ZipExtFile
|
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from pandas import Index
|
|
11
|
-
from rdflib import Dataset, Namespace, URIRef
|
|
11
|
+
from rdflib import Dataset, Graph, Namespace, URIRef
|
|
12
|
+
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
|
|
12
13
|
from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
|
|
13
14
|
|
|
14
|
-
from cognite.neat.
|
|
15
|
-
from cognite.neat._graph._shared import rdflib_to_oxi_type
|
|
15
|
+
from cognite.neat._graph._shared import quad_formats, rdflib_to_oxi_type
|
|
16
16
|
from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
|
|
17
17
|
from cognite.neat._graph.queries import Queries
|
|
18
18
|
from cognite.neat._graph.transformers import Transformers
|
|
@@ -22,7 +22,7 @@ from cognite.neat._rules.models import InformationRules
|
|
|
22
22
|
from cognite.neat._rules.models.entities import ClassEntity
|
|
23
23
|
from cognite.neat._shared import InstanceType, Triple
|
|
24
24
|
from cognite.neat._utils.auxiliary import local_import
|
|
25
|
-
from cognite.neat._utils.rdf_ import add_triples_in_batch
|
|
25
|
+
from cognite.neat._utils.rdf_ import add_triples_in_batch, remove_namespace_from_uri
|
|
26
26
|
|
|
27
27
|
from ._provenance import Change, Provenance
|
|
28
28
|
|
|
@@ -39,41 +39,49 @@ class NeatGraphStore:
|
|
|
39
39
|
Args:
|
|
40
40
|
graph : Instance of rdflib.Graph class for graph storage
|
|
41
41
|
rules:
|
|
42
|
+
|
|
43
|
+
!!! note "Dataset"
|
|
44
|
+
The store leverages a RDF dataset which is defined as a collection of RDF graphs
|
|
45
|
+
where all but one are named graphs associated with URIRef (the graph name),
|
|
46
|
+
and the unnamed default graph which is in context of rdflib library has an
|
|
47
|
+
identifier URIRef('urn:x-rdflib:default').
|
|
42
48
|
"""
|
|
43
49
|
|
|
44
50
|
rdf_store_type: str
|
|
45
51
|
|
|
46
52
|
def __init__(
|
|
47
53
|
self,
|
|
48
|
-
|
|
49
|
-
|
|
54
|
+
dataset: Dataset,
|
|
55
|
+
default_named_graph: URIRef | None = None,
|
|
50
56
|
):
|
|
51
|
-
self.rules: InformationRules
|
|
57
|
+
self.rules: dict[URIRef, InformationRules] = {}
|
|
58
|
+
self.base_namespace: dict[URIRef, Namespace] = {}
|
|
52
59
|
|
|
53
60
|
_start = datetime.now(timezone.utc)
|
|
54
|
-
self.
|
|
61
|
+
self.dataset = dataset
|
|
55
62
|
self.provenance = Provenance(
|
|
56
63
|
[
|
|
57
64
|
Change.record(
|
|
58
65
|
activity=f"{type(self).__name__}.__init__",
|
|
59
66
|
start=_start,
|
|
60
67
|
end=datetime.now(timezone.utc),
|
|
61
|
-
description=f"Initialize graph store as {type(self.
|
|
68
|
+
description=f"Initialize graph store as {type(self.dataset.store).__name__}",
|
|
62
69
|
)
|
|
63
70
|
]
|
|
64
71
|
)
|
|
65
72
|
|
|
66
|
-
|
|
67
|
-
self.add_rules(rules)
|
|
68
|
-
else:
|
|
69
|
-
self.base_namespace = DEFAULT_NAMESPACE
|
|
73
|
+
self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
|
|
70
74
|
|
|
71
|
-
self.queries = Queries(self.
|
|
75
|
+
self.queries = Queries(self.dataset, self.rules, self.default_named_graph)
|
|
76
|
+
|
|
77
|
+
def graph(self, named_graph: URIRef | None = None) -> Graph:
|
|
78
|
+
"""Get named graph from the dataset to query over"""
|
|
79
|
+
return self.dataset.graph(named_graph or self.default_named_graph)
|
|
72
80
|
|
|
73
81
|
@property
|
|
74
82
|
def type_(self) -> str:
|
|
75
83
|
"Return type of the graph store"
|
|
76
|
-
return type(self.
|
|
84
|
+
return type(self.dataset.store).__name__
|
|
77
85
|
|
|
78
86
|
# no destination
|
|
79
87
|
@overload
|
|
@@ -91,54 +99,70 @@ class NeatGraphStore:
|
|
|
91
99
|
|
|
92
100
|
Returns:
|
|
93
101
|
Serialized graph store
|
|
102
|
+
|
|
103
|
+
!!! note "Trig Format"
|
|
104
|
+
Notice that instead of turtle format we are using trig format for serialization.
|
|
105
|
+
This is because trig format is a superset of turtle format and it allows us to
|
|
106
|
+
serialize named graphs as well. Allowing serialization of one or more named graphs
|
|
107
|
+
including the default graph.
|
|
94
108
|
"""
|
|
95
109
|
if filepath:
|
|
96
|
-
self.
|
|
110
|
+
self.dataset.serialize(
|
|
97
111
|
filepath,
|
|
98
|
-
format="ox-trig" if self.type_ == "OxigraphStore" else "
|
|
112
|
+
format="ox-trig" if self.type_ == "OxigraphStore" else "trig",
|
|
99
113
|
)
|
|
100
114
|
return None
|
|
101
115
|
else:
|
|
102
|
-
return self.
|
|
116
|
+
return self.dataset.serialize(format="ox-trig" if self.type_ == "OxigraphStore" else "trig")
|
|
117
|
+
|
|
118
|
+
def add_rules(self, rules: InformationRules, named_graph: URIRef | None = None) -> None:
|
|
119
|
+
"""This method is used to add rules to a named graph stored in the graph store.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
rules: InformationRules object containing rules to be added to the named graph
|
|
123
|
+
named_graph: URIRef of the named graph to store the rules in, by default None
|
|
124
|
+
rules will be added to the default graph
|
|
103
125
|
|
|
104
|
-
def add_rules(self, rules: InformationRules) -> None:
|
|
105
|
-
"""This method is used to add rules to the graph store and it is the only correct
|
|
106
|
-
way to add rules to the graph store, after the graph store has been initialized.
|
|
107
126
|
"""
|
|
108
127
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
128
|
+
named_graph = named_graph or self.default_named_graph
|
|
129
|
+
|
|
130
|
+
if named_graph in self.named_graphs:
|
|
131
|
+
# attaching appropriate namespace to the rules
|
|
132
|
+
# as well base_namespace
|
|
133
|
+
self.rules[named_graph] = rules
|
|
134
|
+
self.base_namespace[named_graph] = rules.metadata.namespace
|
|
135
|
+
self.queries = Queries(self.dataset, self.rules)
|
|
136
|
+
self.provenance.append(
|
|
137
|
+
Change.record(
|
|
138
|
+
activity=f"{type(self)}.rules",
|
|
139
|
+
start=datetime.now(timezone.utc),
|
|
140
|
+
end=datetime.now(timezone.utc),
|
|
141
|
+
description=f"Added {type(self.rules).__name__} to {named_graph} named graph",
|
|
142
|
+
)
|
|
118
143
|
)
|
|
119
|
-
)
|
|
120
144
|
|
|
121
|
-
|
|
122
|
-
|
|
145
|
+
if self.rules[named_graph].prefixes:
|
|
146
|
+
self._upsert_prefixes(self.rules[named_graph].prefixes, named_graph)
|
|
123
147
|
|
|
124
|
-
def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
|
|
148
|
+
def _upsert_prefixes(self, prefixes: dict[str, Namespace], named_graph: URIRef) -> None:
|
|
125
149
|
"""Adds prefixes to the graph store."""
|
|
126
150
|
_start = datetime.now(timezone.utc)
|
|
127
151
|
for prefix, namespace in prefixes.items():
|
|
128
|
-
self.graph.bind(prefix, namespace)
|
|
152
|
+
self.graph(named_graph).bind(prefix, namespace)
|
|
129
153
|
|
|
130
154
|
self.provenance.append(
|
|
131
155
|
Change.record(
|
|
132
156
|
activity=f"{type(self).__name__}._upsert_prefixes",
|
|
133
157
|
start=_start,
|
|
134
158
|
end=datetime.now(timezone.utc),
|
|
135
|
-
description="Upsert prefixes to graph
|
|
159
|
+
description="Upsert prefixes to the name graph {named_graph}",
|
|
136
160
|
)
|
|
137
161
|
)
|
|
138
162
|
|
|
139
163
|
@classmethod
|
|
140
|
-
def from_memory_store(cls
|
|
141
|
-
return cls(Dataset()
|
|
164
|
+
def from_memory_store(cls) -> "Self":
|
|
165
|
+
return cls(Dataset())
|
|
142
166
|
|
|
143
167
|
@classmethod
|
|
144
168
|
def from_sparql_store(
|
|
@@ -146,7 +170,6 @@ class NeatGraphStore:
|
|
|
146
170
|
query_endpoint: str | None = None,
|
|
147
171
|
update_endpoint: str | None = None,
|
|
148
172
|
returnFormat: str = "csv",
|
|
149
|
-
rules: InformationRules | None = None,
|
|
150
173
|
) -> "Self":
|
|
151
174
|
store = SPARQLUpdateStore(
|
|
152
175
|
query_endpoint=query_endpoint,
|
|
@@ -157,10 +180,27 @@ class NeatGraphStore:
|
|
|
157
180
|
autocommit=False,
|
|
158
181
|
)
|
|
159
182
|
graph = Dataset(store=store)
|
|
160
|
-
return cls(graph
|
|
183
|
+
return cls(graph)
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def from_oxi_remote_store(
|
|
187
|
+
cls,
|
|
188
|
+
remote_url: str,
|
|
189
|
+
autocommit: bool = False,
|
|
190
|
+
) -> "Self":
|
|
191
|
+
"""Creates a NeatGraphStore from a remote Oxigraph store SPARQL endpoint."""
|
|
192
|
+
|
|
193
|
+
return cls(
|
|
194
|
+
dataset=Dataset(
|
|
195
|
+
store=SPARQLUpdateStore(
|
|
196
|
+
query_endpoint=f"{remote_url}/query", update_endpoint=f"{remote_url}/query", autocommit=autocommit
|
|
197
|
+
),
|
|
198
|
+
default_union=True,
|
|
199
|
+
)
|
|
200
|
+
)
|
|
161
201
|
|
|
162
202
|
@classmethod
|
|
163
|
-
def
|
|
203
|
+
def from_oxi_local_store(cls, storage_dir: Path | None = None) -> "Self":
|
|
164
204
|
"""Creates a NeatGraphStore from an Oxigraph store."""
|
|
165
205
|
local_import("pyoxigraph", "oxi")
|
|
166
206
|
local_import("oxrdflib", "oxi")
|
|
@@ -179,31 +219,41 @@ class NeatGraphStore:
|
|
|
179
219
|
else:
|
|
180
220
|
raise Exception("Error initializing Oxigraph store")
|
|
181
221
|
|
|
182
|
-
|
|
183
|
-
|
|
222
|
+
return cls(
|
|
223
|
+
dataset=Dataset(
|
|
224
|
+
store=oxrdflib.OxigraphStore(store=oxi_store),
|
|
225
|
+
)
|
|
184
226
|
)
|
|
185
227
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
def write(self, extractor: TripleExtractors) -> IssueList:
|
|
228
|
+
def write(self, extractor: TripleExtractors, named_graph: URIRef | None = None) -> IssueList:
|
|
189
229
|
last_change: Change | None = None
|
|
230
|
+
named_graph = named_graph or self.default_named_graph
|
|
190
231
|
with catch_issues() as issue_list:
|
|
191
232
|
_start = datetime.now(timezone.utc)
|
|
192
233
|
success = True
|
|
193
234
|
|
|
194
235
|
if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
|
|
195
|
-
self._parse_file(
|
|
236
|
+
self._parse_file(
|
|
237
|
+
named_graph,
|
|
238
|
+
extractor.filepath,
|
|
239
|
+
cast(str, extractor.format),
|
|
240
|
+
extractor.base_uri,
|
|
241
|
+
)
|
|
196
242
|
if isinstance(extractor.filepath, ZipExtFile):
|
|
197
243
|
extractor.filepath.close()
|
|
244
|
+
|
|
198
245
|
elif isinstance(extractor, RdfFileExtractor):
|
|
199
246
|
success = False
|
|
200
247
|
issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
|
|
201
248
|
warnings.warn(
|
|
202
|
-
|
|
249
|
+
(
|
|
250
|
+
f"Cannot write to named graph {named_graph} with "
|
|
251
|
+
f"{type(extractor).__name__}, errors found in file:\n{issue_text}"
|
|
252
|
+
),
|
|
203
253
|
stacklevel=2,
|
|
204
254
|
)
|
|
205
255
|
else:
|
|
206
|
-
self._add_triples(extractor.extract())
|
|
256
|
+
self._add_triples(extractor.extract(), named_graph=named_graph)
|
|
207
257
|
|
|
208
258
|
if success:
|
|
209
259
|
_end = datetime.now(timezone.utc)
|
|
@@ -218,7 +268,7 @@ class NeatGraphStore:
|
|
|
218
268
|
activity=activity,
|
|
219
269
|
start=_start,
|
|
220
270
|
end=_end,
|
|
221
|
-
description=f"Extracted triples to graph
|
|
271
|
+
description=f"Extracted triples to named graph {named_graph} using {type(extractor).__name__}",
|
|
222
272
|
)
|
|
223
273
|
self.provenance.append(last_change)
|
|
224
274
|
if last_change:
|
|
@@ -226,17 +276,35 @@ class NeatGraphStore:
|
|
|
226
276
|
return issue_list
|
|
227
277
|
|
|
228
278
|
def _read_via_rules_linkage(
|
|
229
|
-
self,
|
|
279
|
+
self,
|
|
280
|
+
class_neat_id: URIRef,
|
|
281
|
+
property_link_pairs: dict[str, URIRef] | None,
|
|
282
|
+
named_graph: URIRef | None = None,
|
|
230
283
|
) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
|
|
231
|
-
|
|
232
|
-
|
|
284
|
+
named_graph = named_graph or self.default_named_graph
|
|
285
|
+
|
|
286
|
+
if named_graph not in self.named_graphs:
|
|
287
|
+
warnings.warn(
|
|
288
|
+
f"Named graph {named_graph} not found in graph store, cannot read",
|
|
289
|
+
stacklevel=2,
|
|
290
|
+
)
|
|
291
|
+
return
|
|
292
|
+
|
|
293
|
+
if not self.rules or named_graph not in self.rules:
|
|
294
|
+
warnings.warn(
|
|
295
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
296
|
+
stacklevel=2,
|
|
297
|
+
)
|
|
233
298
|
return
|
|
299
|
+
|
|
234
300
|
if self.multi_type_instances:
|
|
235
301
|
warnings.warn(
|
|
236
302
|
"Multi typed instances detected, issues with loading can occur!",
|
|
237
303
|
stacklevel=2,
|
|
238
304
|
)
|
|
239
|
-
|
|
305
|
+
|
|
306
|
+
analysis = InformationAnalysis(self.rules[named_graph])
|
|
307
|
+
|
|
240
308
|
if cls := analysis.classes_by_neat_id.get(class_neat_id):
|
|
241
309
|
if property_link_pairs:
|
|
242
310
|
property_renaming_config = {
|
|
@@ -272,9 +340,22 @@ class NeatGraphStore:
|
|
|
272
340
|
self,
|
|
273
341
|
class_entity: ClassEntity,
|
|
274
342
|
property_renaming_config: dict[URIRef, str] | None = None,
|
|
343
|
+
named_graph: URIRef | None = None,
|
|
275
344
|
) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
|
|
276
|
-
|
|
277
|
-
|
|
345
|
+
named_graph = named_graph or self.default_named_graph
|
|
346
|
+
|
|
347
|
+
if named_graph not in self.named_graphs:
|
|
348
|
+
warnings.warn(
|
|
349
|
+
f"Named graph {named_graph} not found in graph store, cannot read",
|
|
350
|
+
stacklevel=2,
|
|
351
|
+
)
|
|
352
|
+
return
|
|
353
|
+
|
|
354
|
+
if not self.rules or named_graph not in self.rules:
|
|
355
|
+
warnings.warn(
|
|
356
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
357
|
+
stacklevel=2,
|
|
358
|
+
)
|
|
278
359
|
return
|
|
279
360
|
if self.multi_type_instances:
|
|
280
361
|
warnings.warn(
|
|
@@ -282,28 +363,28 @@ class NeatGraphStore:
|
|
|
282
363
|
stacklevel=2,
|
|
283
364
|
)
|
|
284
365
|
|
|
285
|
-
if class_entity not in [definition.class_ for definition in self.rules.classes]:
|
|
366
|
+
if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
|
|
286
367
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
287
368
|
return
|
|
288
369
|
|
|
289
|
-
if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
|
|
370
|
+
if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
|
|
290
371
|
warnings.warn(
|
|
291
372
|
f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
|
|
292
373
|
stacklevel=2,
|
|
293
374
|
)
|
|
294
375
|
return
|
|
295
376
|
|
|
296
|
-
has_hop_transformations = InformationAnalysis(self.rules).has_hop_transformations()
|
|
377
|
+
has_hop_transformations = InformationAnalysis(self.rules[named_graph]).has_hop_transformations()
|
|
297
378
|
has_self_reference_transformations = InformationAnalysis(
|
|
298
|
-
self.rules
|
|
379
|
+
self.rules[named_graph]
|
|
299
380
|
).has_self_reference_property_transformations()
|
|
300
381
|
if has_hop_transformations or has_self_reference_transformations:
|
|
301
382
|
msg = (
|
|
302
|
-
f"Rules contain [{'Hop' if has_hop_transformations else ''
|
|
303
|
-
f", {'SelfReferenceProperty' if has_self_reference_transformations else ''
|
|
383
|
+
f"Rules contain [{'Hop' if has_hop_transformations else ''}"
|
|
384
|
+
f", {'SelfReferenceProperty' if has_self_reference_transformations else ''}]"
|
|
304
385
|
" rdfpath."
|
|
305
|
-
f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''
|
|
306
|
-
f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''
|
|
386
|
+
f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''}"
|
|
387
|
+
f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''}]"
|
|
307
388
|
" transformer(s) first!"
|
|
308
389
|
)
|
|
309
390
|
|
|
@@ -318,23 +399,19 @@ class NeatGraphStore:
|
|
|
318
399
|
|
|
319
400
|
# get potential property renaming config
|
|
320
401
|
property_renaming_config = property_renaming_config or InformationAnalysis(
|
|
321
|
-
self.rules
|
|
402
|
+
self.rules[named_graph]
|
|
322
403
|
).define_property_renaming_config(class_entity)
|
|
323
404
|
|
|
324
|
-
# get property types to guide process of removing or not namespaces from results
|
|
325
|
-
property_types = InformationAnalysis(self.rules).property_types(class_entity)
|
|
326
405
|
for instance_id in instance_ids:
|
|
327
406
|
if res := self.queries.describe(
|
|
328
407
|
instance_id=instance_id,
|
|
329
408
|
instance_type=class_entity.suffix,
|
|
330
409
|
property_renaming_config=property_renaming_config,
|
|
331
|
-
property_types=property_types,
|
|
332
410
|
):
|
|
333
411
|
yield res
|
|
334
412
|
|
|
335
413
|
def read(
|
|
336
|
-
self,
|
|
337
|
-
class_: str,
|
|
414
|
+
self, class_: str, named_graph: URIRef | None = None
|
|
338
415
|
) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
|
|
339
416
|
"""Read instances for given class from the graph store.
|
|
340
417
|
|
|
@@ -343,9 +420,20 @@ class NeatGraphStore:
|
|
|
343
420
|
the rules which are attached to the graph store.
|
|
344
421
|
|
|
345
422
|
"""
|
|
423
|
+
named_graph = named_graph or self.default_named_graph
|
|
346
424
|
|
|
347
|
-
if not self.
|
|
348
|
-
warnings.warn(
|
|
425
|
+
if named_graph not in self.named_graphs:
|
|
426
|
+
warnings.warn(
|
|
427
|
+
f"Named graph {named_graph} not found in graph store, cannot read",
|
|
428
|
+
stacklevel=2,
|
|
429
|
+
)
|
|
430
|
+
return
|
|
431
|
+
|
|
432
|
+
if not self.rules or named_graph not in self.rules:
|
|
433
|
+
warnings.warn(
|
|
434
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
435
|
+
stacklevel=2,
|
|
436
|
+
)
|
|
349
437
|
return
|
|
350
438
|
if self.multi_type_instances:
|
|
351
439
|
warnings.warn(
|
|
@@ -353,15 +441,15 @@ class NeatGraphStore:
|
|
|
353
441
|
stacklevel=2,
|
|
354
442
|
)
|
|
355
443
|
|
|
356
|
-
class_entity = ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_)
|
|
444
|
+
class_entity = ClassEntity(prefix=self.rules[named_graph].metadata.prefix, suffix=class_)
|
|
357
445
|
|
|
358
|
-
if class_entity not in [definition.class_ for definition in self.rules.classes]:
|
|
446
|
+
if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
|
|
359
447
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
360
448
|
return
|
|
361
449
|
|
|
362
450
|
yield from self._read_via_class_entity(class_entity)
|
|
363
451
|
|
|
364
|
-
def count_of_id(self, neat_id: URIRef) -> int:
|
|
452
|
+
def count_of_id(self, neat_id: URIRef, named_graph: URIRef | None = None) -> int:
|
|
365
453
|
"""Count the number of instances of a given type
|
|
366
454
|
|
|
367
455
|
Args:
|
|
@@ -370,18 +458,31 @@ class NeatGraphStore:
|
|
|
370
458
|
Returns:
|
|
371
459
|
Number of instances
|
|
372
460
|
"""
|
|
373
|
-
|
|
374
|
-
|
|
461
|
+
named_graph = named_graph or self.default_named_graph
|
|
462
|
+
|
|
463
|
+
if named_graph not in self.named_graphs:
|
|
464
|
+
warnings.warn(
|
|
465
|
+
f"Named graph {named_graph} not found in graph store, cannot count",
|
|
466
|
+
stacklevel=2,
|
|
467
|
+
)
|
|
468
|
+
return 0
|
|
469
|
+
|
|
470
|
+
if not self.rules or named_graph not in self.rules:
|
|
471
|
+
warnings.warn(
|
|
472
|
+
f"Rules for named graph {named_graph} not found in graph store!",
|
|
473
|
+
stacklevel=2,
|
|
474
|
+
)
|
|
375
475
|
return 0
|
|
376
476
|
|
|
377
477
|
class_entity = next(
|
|
378
|
-
(definition.class_ for definition in self.rules.classes if definition.neatId == neat_id),
|
|
478
|
+
(definition.class_ for definition in self.rules[named_graph].classes if definition.neatId == neat_id),
|
|
479
|
+
None,
|
|
379
480
|
)
|
|
380
481
|
if not class_entity:
|
|
381
482
|
warnings.warn("Desired type not found in graph!", stacklevel=2)
|
|
382
483
|
return 0
|
|
383
484
|
|
|
384
|
-
if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
|
|
485
|
+
if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
|
|
385
486
|
warnings.warn(
|
|
386
487
|
f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
|
|
387
488
|
stacklevel=2,
|
|
@@ -392,10 +493,11 @@ class NeatGraphStore:
|
|
|
392
493
|
|
|
393
494
|
def count_of_type(self, class_uri: URIRef) -> int:
|
|
394
495
|
query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
|
|
395
|
-
return int(next(iter(self.
|
|
496
|
+
return int(next(iter(self.dataset.query(query)))[0]) # type: ignore[arg-type, index]
|
|
396
497
|
|
|
397
498
|
def _parse_file(
|
|
398
499
|
self,
|
|
500
|
+
named_graph: URIRef,
|
|
399
501
|
filepath: Path | ZipExtFile,
|
|
400
502
|
format: str = "turtle",
|
|
401
503
|
base_uri: URIRef | None = None,
|
|
@@ -403,6 +505,7 @@ class NeatGraphStore:
|
|
|
403
505
|
"""Imports graph data from file.
|
|
404
506
|
|
|
405
507
|
Args:
|
|
508
|
+
named_graph : URIRef of the named graph to store the data in
|
|
406
509
|
filepath : File path to file containing graph data, by default None
|
|
407
510
|
format : rdflib format file containing RDF graph, by default "turtle"
|
|
408
511
|
base_uri : base URI to add to graph in case of relative URIs, by default None
|
|
@@ -419,25 +522,35 @@ class NeatGraphStore:
|
|
|
419
522
|
if self.type_ == "OxigraphStore":
|
|
420
523
|
local_import("pyoxigraph", "oxi")
|
|
421
524
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
525
|
+
if format in quad_formats():
|
|
526
|
+
self.dataset.parse(
|
|
527
|
+
filepath, # type: ignore[arg-type]
|
|
528
|
+
format=rdflib_to_oxi_type(format),
|
|
529
|
+
transactional=False,
|
|
530
|
+
publicID=base_uri,
|
|
531
|
+
)
|
|
532
|
+
else:
|
|
533
|
+
self.graph(named_graph).parse(
|
|
534
|
+
filepath, # type: ignore[arg-type]
|
|
535
|
+
format=rdflib_to_oxi_type(format),
|
|
536
|
+
transactional=False,
|
|
537
|
+
publicID=base_uri,
|
|
538
|
+
)
|
|
539
|
+
self.dataset.store._store.optimize() # type: ignore[attr-defined]
|
|
430
540
|
|
|
431
541
|
# All other stores
|
|
432
542
|
else:
|
|
433
|
-
if
|
|
434
|
-
self.
|
|
543
|
+
if format in quad_formats():
|
|
544
|
+
self.dataset.parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
|
|
435
545
|
else:
|
|
436
|
-
|
|
437
|
-
if filename.is_file():
|
|
438
|
-
self.graph.parse(filename, publicID=base_uri)
|
|
546
|
+
self.graph(named_graph).parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
|
|
439
547
|
|
|
440
|
-
def _add_triples(
|
|
548
|
+
def _add_triples(
|
|
549
|
+
self,
|
|
550
|
+
triples: Iterable[Triple],
|
|
551
|
+
named_graph: URIRef,
|
|
552
|
+
batch_size: int = 10_000,
|
|
553
|
+
) -> None:
|
|
441
554
|
"""Adds triples to the graph store in batches.
|
|
442
555
|
|
|
443
556
|
Args:
|
|
@@ -445,66 +558,103 @@ class NeatGraphStore:
|
|
|
445
558
|
batch_size: Batch size of triples per commit, by default 10_000
|
|
446
559
|
verbose: Verbose mode, by default False
|
|
447
560
|
"""
|
|
448
|
-
add_triples_in_batch(self.graph, triples, batch_size)
|
|
561
|
+
add_triples_in_batch(self.graph(named_graph), triples, batch_size)
|
|
449
562
|
|
|
450
|
-
def transform(self, transformer: Transformers) -> None:
|
|
563
|
+
def transform(self, transformer: Transformers, named_graph: URIRef | None = None) -> None:
|
|
451
564
|
"""Transforms the graph store using a transformer."""
|
|
452
565
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
566
|
+
named_graph = named_graph or self.default_named_graph
|
|
567
|
+
if named_graph in self.named_graphs:
|
|
568
|
+
missing_changes = [
|
|
569
|
+
change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
|
|
570
|
+
]
|
|
571
|
+
if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
|
|
572
|
+
warnings.warn(
|
|
573
|
+
f"Cannot transform graph store with {type(transformer).__name__}, already applied",
|
|
574
|
+
stacklevel=2,
|
|
575
|
+
)
|
|
576
|
+
elif missing_changes:
|
|
577
|
+
warnings.warn(
|
|
578
|
+
(
|
|
579
|
+
f"Cannot transform graph store with {type(transformer).__name__}, "
|
|
580
|
+
f"missing one or more required changes [{', '.join(missing_changes)}]"
|
|
581
|
+
),
|
|
582
|
+
stacklevel=2,
|
|
583
|
+
)
|
|
469
584
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
585
|
+
else:
|
|
586
|
+
_start = datetime.now(timezone.utc)
|
|
587
|
+
transformer.transform(self.graph(named_graph))
|
|
588
|
+
self.provenance.append(
|
|
589
|
+
Change.record(
|
|
590
|
+
activity=f"{type(transformer).__name__}",
|
|
591
|
+
start=_start,
|
|
592
|
+
end=datetime.now(timezone.utc),
|
|
593
|
+
description=transformer.description,
|
|
594
|
+
)
|
|
479
595
|
)
|
|
596
|
+
|
|
597
|
+
else:
|
|
598
|
+
warnings.warn(
|
|
599
|
+
f"Named graph {named_graph} not found in graph store, cannot transform",
|
|
600
|
+
stacklevel=2,
|
|
480
601
|
)
|
|
481
602
|
|
|
482
603
|
@property
|
|
483
|
-
def summary(self) -> pd.DataFrame:
|
|
484
|
-
return
|
|
604
|
+
def summary(self) -> dict[URIRef, pd.DataFrame]:
|
|
605
|
+
return {
|
|
606
|
+
named_graph: pd.DataFrame(
|
|
607
|
+
self.queries.summarize_instances(named_graph),
|
|
608
|
+
columns=["Type", "Occurrence"],
|
|
609
|
+
)
|
|
610
|
+
for named_graph in self.named_graphs
|
|
611
|
+
}
|
|
485
612
|
|
|
486
613
|
@property
|
|
487
|
-
def multi_type_instances(self) -> dict[str, list[str]]:
|
|
488
|
-
return self.queries.multi_type_instances()
|
|
614
|
+
def multi_type_instances(self) -> dict[URIRef, dict[str, list[str]]]:
|
|
615
|
+
return {named_graph: self.queries.multi_type_instances(named_graph) for named_graph in self.named_graphs}
|
|
489
616
|
|
|
490
617
|
def _repr_html_(self) -> str:
|
|
491
618
|
provenance = self.provenance._repr_html_()
|
|
492
|
-
summary: pd.DataFrame = self.summary
|
|
619
|
+
summary: dict[URIRef, pd.DataFrame] = self.summary
|
|
620
|
+
|
|
621
|
+
def _short_name_of_graph(named_graph: URIRef) -> str:
|
|
622
|
+
return "default" if named_graph == self.default_named_graph else remove_namespace_from_uri(named_graph)
|
|
493
623
|
|
|
494
|
-
if summary
|
|
624
|
+
if not summary:
|
|
495
625
|
summary_text = "<br /><strong>Graph is empty</strong><br />"
|
|
496
626
|
else:
|
|
627
|
+
all_types = set().union(
|
|
628
|
+
*[set(sub_summary.Type) for sub_summary in summary.values() if not sub_summary.empty]
|
|
629
|
+
)
|
|
630
|
+
|
|
497
631
|
summary_text = (
|
|
498
632
|
"<br /><strong>Overview</strong>:" # type: ignore
|
|
499
|
-
f"<ul><li>{len(summary)}
|
|
500
|
-
f"<li>{
|
|
501
|
-
f"{cast(pd.DataFrame, self._shorten_summary(summary))._repr_html_()}" # type: ignore[operator]
|
|
633
|
+
f"<ul><li>{len(summary)} named graphs</strong></li>"
|
|
634
|
+
f"<li>Total of {len(all_types)} unique types</strong></li>"
|
|
502
635
|
)
|
|
503
636
|
|
|
504
|
-
|
|
505
|
-
|
|
637
|
+
for named_graph, table in summary.items():
|
|
638
|
+
summary_text += (
|
|
639
|
+
f"<li>{sum(table['Occurrence'])} instances in {_short_name_of_graph(named_graph)}"
|
|
640
|
+
" graph</strong></li>"
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
summary_text += "</ul>"
|
|
644
|
+
for named_graph, table in summary.items():
|
|
645
|
+
summary_text += (
|
|
646
|
+
f"<br /><strong>{_short_name_of_graph(named_graph)} graph</strong>:"
|
|
647
|
+
f"{cast(pd.DataFrame, self._shorten_summary(table))._repr_html_()}" # type: ignore[operator]
|
|
648
|
+
)
|
|
506
649
|
|
|
507
|
-
|
|
650
|
+
for named_graph, multi_value_instances in self.multi_type_instances.items():
|
|
651
|
+
if multi_value_instances:
|
|
652
|
+
summary_text += (
|
|
653
|
+
f"<br><strong>Multi value instances detected in {_short_name_of_graph(named_graph)}"
|
|
654
|
+
"graph! Loading could have issues!</strong></br>"
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
return f"{summary_text}{provenance}"
|
|
508
658
|
|
|
509
659
|
def _shorten_summary(self, summary: pd.DataFrame) -> pd.DataFrame:
|
|
510
660
|
"""Shorten summary to top 5 types by occurrence."""
|
|
@@ -531,3 +681,7 @@ class NeatGraphStore:
|
|
|
531
681
|
shorter_summary.index = cast(Index, indexes)
|
|
532
682
|
|
|
533
683
|
return shorter_summary
|
|
684
|
+
|
|
685
|
+
@property
|
|
686
|
+
def named_graphs(self) -> list[URIRef]:
|
|
687
|
+
return [cast(URIRef, context.identifier) for context in self.dataset.contexts()]
|