cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/app/api/data_classes/rest.py +0 -19
- cognite/neat/app/api/explorer.py +6 -4
- cognite/neat/app/api/routers/crud.py +11 -21
- cognite/neat/app/api/routers/workflows.py +24 -94
- cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
- cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
- cognite/neat/graph/loaders/_base.py +17 -12
- cognite/neat/graph/loaders/_rdf2asset.py +223 -58
- cognite/neat/graph/loaders/_rdf2dms.py +1 -1
- cognite/neat/graph/stores/_base.py +5 -0
- cognite/neat/rules/analysis/_asset.py +31 -1
- cognite/neat/rules/importers/_inference2rules.py +31 -35
- cognite/neat/rules/models/information/_rules.py +1 -1
- cognite/neat/workflows/steps/data_contracts.py +17 -43
- cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
- cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
- cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
- cognite/neat/workflows/steps_registry.py +5 -7
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
- cognite/neat/app/api/routers/core.py +0 -91
- cognite/neat/app/api/routers/data_exploration.py +0 -336
- cognite/neat/app/api/routers/rules.py +0 -203
- cognite/neat/legacy/__init__.py +0 -0
- cognite/neat/legacy/graph/__init__.py +0 -3
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
- cognite/neat/legacy/graph/examples/__init__.py +0 -10
- cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
- cognite/neat/legacy/graph/exceptions.py +0 -90
- cognite/neat/legacy/graph/extractors/__init__.py +0 -6
- cognite/neat/legacy/graph/extractors/_base.py +0 -14
- cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
- cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
- cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
- cognite/neat/legacy/graph/loaders/__init__.py +0 -23
- cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
- cognite/neat/legacy/graph/loaders/_base.py +0 -67
- cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
- cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
- cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
- cognite/neat/legacy/graph/loaders/core/models.py +0 -136
- cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
- cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
- cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
- cognite/neat/legacy/graph/loaders/validator.py +0 -87
- cognite/neat/legacy/graph/models.py +0 -6
- cognite/neat/legacy/graph/stores/__init__.py +0 -13
- cognite/neat/legacy/graph/stores/_base.py +0 -400
- cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
- cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
- cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
- cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
- cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
- cognite/neat/legacy/graph/transformations/__init__.py +0 -0
- cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
- cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
- cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
- cognite/neat/legacy/graph/transformations/transformer.py +0 -322
- cognite/neat/legacy/rules/__init__.py +0 -0
- cognite/neat/legacy/rules/analysis.py +0 -231
- cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
- cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
- cognite/neat/legacy/rules/examples/__init__.py +0 -18
- cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
- cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
- cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
- cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
- cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
- cognite/neat/legacy/rules/exceptions.py +0 -2972
- cognite/neat/legacy/rules/exporters/__init__.py +0 -20
- cognite/neat/legacy/rules/exporters/_base.py +0 -45
- cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
- cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
- cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
- cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
- cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
- cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
- cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
- cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
- cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
- cognite/neat/legacy/rules/exporters/_validation.py +0 -146
- cognite/neat/legacy/rules/importers/__init__.py +0 -22
- cognite/neat/legacy/rules/importers/_base.py +0 -66
- cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
- cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
- cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
- cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
- cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
- cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
- cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
- cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
- cognite/neat/legacy/rules/models/__init__.py +0 -5
- cognite/neat/legacy/rules/models/_base.py +0 -151
- cognite/neat/legacy/rules/models/raw_rules.py +0 -316
- cognite/neat/legacy/rules/models/rdfpath.py +0 -237
- cognite/neat/legacy/rules/models/rules.py +0 -1289
- cognite/neat/legacy/rules/models/tables.py +0 -9
- cognite/neat/legacy/rules/models/value_types.py +0 -118
- cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
- cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
- cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
- cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
- cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
- cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
- cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
- cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
- cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
- cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/workflows/migration/__init__.py +0 -0
- cognite/neat/workflows/migration/steps.py +0 -91
- cognite/neat/workflows/migration/wf_manifests.py +0 -33
- cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
- cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
- cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
- cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
- cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
- cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
- cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
- cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,400 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import sys
|
|
3
|
-
import time
|
|
4
|
-
from abc import ABC, abstractmethod
|
|
5
|
-
from collections.abc import Iterable, Iterator
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Literal, TypeAlias, cast
|
|
8
|
-
|
|
9
|
-
import pandas as pd
|
|
10
|
-
from prometheus_client import Gauge, Summary
|
|
11
|
-
from rdflib import Graph, Namespace, URIRef
|
|
12
|
-
from rdflib.query import Result, ResultRow
|
|
13
|
-
|
|
14
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE, get_default_prefixes
|
|
15
|
-
from cognite.neat.legacy.graph.models import Triple
|
|
16
|
-
from cognite.neat.legacy.graph.stores._rdf_to_graph import rdf_file_to_graph
|
|
17
|
-
from cognite.neat.legacy.rules.models.rules import Rules
|
|
18
|
-
|
|
19
|
-
if sys.version_info >= (3, 11):
|
|
20
|
-
from typing import Self
|
|
21
|
-
else:
|
|
22
|
-
from typing_extensions import Self
|
|
23
|
-
|
|
24
|
-
prom_qsm = Summary("store_query_time_summary_legacy", "Time spent processing queries", ["query"])
|
|
25
|
-
prom_sq = Gauge("store_single_query_time_legacy", "Time spent processing a single query", ["query"])
|
|
26
|
-
|
|
27
|
-
MIMETypes: TypeAlias = Literal[
|
|
28
|
-
"application/rdf+xml",
|
|
29
|
-
"text/turtle",
|
|
30
|
-
"application/n-triple",
|
|
31
|
-
"application/n-quads",
|
|
32
|
-
"application/trig",
|
|
33
|
-
]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class NeatGraphStoreBase(ABC):
|
|
37
|
-
"""NeatGraphStore is a class that stores the graph and provides methods to read/write data it contains
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
graph : Instance of rdflib.Graph class for graph storage
|
|
42
|
-
base_prefix : Used as a base prefix for graph namespace,
|
|
43
|
-
allowing querying graph data using a short form of a URI
|
|
44
|
-
namespace : Namespace (aka URI) used to resolve any relative URI in the graph
|
|
45
|
-
prefixes : Dictionary of additional prefixes used and bounded to the graph
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
rdf_store_type: str
|
|
49
|
-
|
|
50
|
-
def __init__(
|
|
51
|
-
self,
|
|
52
|
-
graph: Graph | None = None,
|
|
53
|
-
base_prefix: str = "", # usually empty
|
|
54
|
-
namespace: Namespace = DEFAULT_NAMESPACE,
|
|
55
|
-
prefixes: dict[str, Namespace] | None = None,
|
|
56
|
-
):
|
|
57
|
-
self.graph = graph or Graph()
|
|
58
|
-
self.base_prefix: str = base_prefix
|
|
59
|
-
self.namespace: Namespace = namespace
|
|
60
|
-
self.prefixes: dict[str, Namespace] = prefixes or get_default_prefixes()
|
|
61
|
-
|
|
62
|
-
self.rdf_store_query_url: str | None = None
|
|
63
|
-
self.rdf_store_update_url: str | None = None
|
|
64
|
-
self.returnFormat: str | None = None
|
|
65
|
-
self.df_cache: pd.DataFrame | None = None
|
|
66
|
-
self.internal_storage_dir: Path | None = None
|
|
67
|
-
self.graph_name: str | None = None
|
|
68
|
-
self.internal_storage_dir_orig: Path | None = None
|
|
69
|
-
self.storage_dirs_to_delete: list[Path] = []
|
|
70
|
-
self.queries = _Queries(self)
|
|
71
|
-
|
|
72
|
-
@classmethod
|
|
73
|
-
def from_rules(cls, rules: Rules) -> Self:
|
|
74
|
-
"""
|
|
75
|
-
Creates a new instance of NeatGraphStore from TransformationRules and runs the .init_graph() method on it.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
rules: TransformationRules object containing information about the graph store.
|
|
79
|
-
|
|
80
|
-
Returns:
|
|
81
|
-
An instantiated instance of NeatGraphStore
|
|
82
|
-
|
|
83
|
-
"""
|
|
84
|
-
if rules.metadata.namespace is None:
|
|
85
|
-
namespace = DEFAULT_NAMESPACE
|
|
86
|
-
else:
|
|
87
|
-
namespace = rules.metadata.namespace
|
|
88
|
-
store = cls(prefixes=rules.prefixes, namespace=namespace)
|
|
89
|
-
store.init_graph(base_prefix=rules.metadata.prefix)
|
|
90
|
-
return store
|
|
91
|
-
|
|
92
|
-
@abstractmethod
|
|
93
|
-
def _set_graph(self) -> None:
|
|
94
|
-
raise NotImplementedError()
|
|
95
|
-
|
|
96
|
-
def init_graph(
|
|
97
|
-
self,
|
|
98
|
-
rdf_store_query_url: str | None = None,
|
|
99
|
-
rdf_store_update_url: str | None = None,
|
|
100
|
-
graph_name: str | None = None,
|
|
101
|
-
base_prefix: str | None = None,
|
|
102
|
-
returnFormat: str = "csv",
|
|
103
|
-
internal_storage_dir: Path | None = None,
|
|
104
|
-
):
|
|
105
|
-
"""Initializes the graph.
|
|
106
|
-
|
|
107
|
-
Args:
|
|
108
|
-
rdf_store_query_url : URL towards which SPARQL query is executed, by default None
|
|
109
|
-
rdf_store_update_url : URL towards which SPARQL update is executed, by default None
|
|
110
|
-
graph_name : Name of graph, by default None
|
|
111
|
-
base_prefix : Base prefix for graph namespace to change if needed, by default None
|
|
112
|
-
returnFormat : Transport format of graph data between, by default "csv"
|
|
113
|
-
internal_storage_dir : Path to directory where internal storage is located,
|
|
114
|
-
by default None (in-memory storage).
|
|
115
|
-
|
|
116
|
-
!!! note "internal_storage_dir"
|
|
117
|
-
Used only for Oxigraph
|
|
118
|
-
"""
|
|
119
|
-
logging.info("Initializing NeatGraphStore")
|
|
120
|
-
self.rdf_store_query_url = rdf_store_query_url
|
|
121
|
-
self.rdf_store_update_url = rdf_store_update_url
|
|
122
|
-
self.graph_name = graph_name
|
|
123
|
-
self.returnFormat = returnFormat
|
|
124
|
-
self.internal_storage_dir = Path(internal_storage_dir) if internal_storage_dir else None
|
|
125
|
-
self.internal_storage_dir_orig = (
|
|
126
|
-
self.internal_storage_dir if self.internal_storage_dir_orig is None else self.internal_storage_dir_orig
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
self._set_graph()
|
|
130
|
-
|
|
131
|
-
if self.prefixes:
|
|
132
|
-
for prefix, namespace in self.prefixes.items():
|
|
133
|
-
logging.info("Adding prefix %s with namespace %s", prefix, namespace)
|
|
134
|
-
self.graph.bind(prefix, namespace)
|
|
135
|
-
|
|
136
|
-
if base_prefix:
|
|
137
|
-
self.base_prefix = base_prefix
|
|
138
|
-
|
|
139
|
-
self.graph.bind(self.base_prefix, self.namespace)
|
|
140
|
-
logging.info("Adding prefix %s with namespace %s", self.base_prefix, self.namespace)
|
|
141
|
-
logging.info("Graph initialized")
|
|
142
|
-
|
|
143
|
-
def reinitialize_graph(self):
|
|
144
|
-
"""Reinitialize the graph."""
|
|
145
|
-
self.init_graph(
|
|
146
|
-
self.rdf_store_query_url,
|
|
147
|
-
self.rdf_store_update_url,
|
|
148
|
-
self.graph_name,
|
|
149
|
-
self.base_prefix,
|
|
150
|
-
self.returnFormat,
|
|
151
|
-
self.internal_storage_dir,
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
def upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
|
|
155
|
-
"""Adds prefixes to the graph store."""
|
|
156
|
-
self.prefixes.update(prefixes)
|
|
157
|
-
for prefix, namespace in prefixes.items():
|
|
158
|
-
logging.info("Adding prefix %s with namespace %s", prefix, namespace)
|
|
159
|
-
self.graph.bind(prefix, namespace)
|
|
160
|
-
|
|
161
|
-
def close(self) -> None:
|
|
162
|
-
"""Closes the graph."""
|
|
163
|
-
# Can be overridden in subclasses
|
|
164
|
-
return None
|
|
165
|
-
|
|
166
|
-
def restart(self) -> None:
|
|
167
|
-
"""Restarts the graph"""
|
|
168
|
-
# Can be overridden in subclasses
|
|
169
|
-
return None
|
|
170
|
-
|
|
171
|
-
def import_from_file(
|
|
172
|
-
self,
|
|
173
|
-
graph_file: Path,
|
|
174
|
-
mime_type: MIMETypes = "application/rdf+xml",
|
|
175
|
-
add_base_iri: bool = True,
|
|
176
|
-
) -> None:
|
|
177
|
-
"""Imports graph data from file.
|
|
178
|
-
|
|
179
|
-
Args:
|
|
180
|
-
graph_file : File path to file containing graph data, by default None
|
|
181
|
-
mime_type : MIME type of graph data, by default "application/rdf+xml"
|
|
182
|
-
add_base_iri : Add base IRI to graph, by default True
|
|
183
|
-
"""
|
|
184
|
-
if add_base_iri:
|
|
185
|
-
self.graph = rdf_file_to_graph(
|
|
186
|
-
self.graph,
|
|
187
|
-
graph_file,
|
|
188
|
-
base_namespace=self.namespace,
|
|
189
|
-
prefixes=self.prefixes,
|
|
190
|
-
)
|
|
191
|
-
else:
|
|
192
|
-
self.graph = rdf_file_to_graph(self.graph, graph_file, prefixes=self.prefixes)
|
|
193
|
-
return None
|
|
194
|
-
|
|
195
|
-
def get_graph(self) -> Graph:
|
|
196
|
-
"""Returns the graph."""
|
|
197
|
-
return self.graph
|
|
198
|
-
|
|
199
|
-
def set_graph(self, graph: Graph):
|
|
200
|
-
"""Sets the graph."""
|
|
201
|
-
self.graph = graph
|
|
202
|
-
|
|
203
|
-
def query(self, query: str) -> Result:
|
|
204
|
-
"""Returns the result of the query."""
|
|
205
|
-
start_time = time.perf_counter()
|
|
206
|
-
result = self.graph.query(query)
|
|
207
|
-
stop_time = time.perf_counter()
|
|
208
|
-
elapsed_time = stop_time - start_time
|
|
209
|
-
prom_qsm.labels("query").observe(elapsed_time)
|
|
210
|
-
prom_sq.labels("query").set(elapsed_time)
|
|
211
|
-
return result
|
|
212
|
-
|
|
213
|
-
def serialize(self, *args, **kwargs):
|
|
214
|
-
"""Serializes the graph."""
|
|
215
|
-
return self.graph.serialize(*args, **kwargs)
|
|
216
|
-
|
|
217
|
-
def query_delayed(self, query) -> Iterable[Triple]:
|
|
218
|
-
"""Returns the result of the query, but does not execute it immediately.
|
|
219
|
-
|
|
220
|
-
The query is not executed until the result is iterated over.
|
|
221
|
-
|
|
222
|
-
Args:
|
|
223
|
-
query: SPARQL query to execute
|
|
224
|
-
|
|
225
|
-
Returns:
|
|
226
|
-
An iterable of triples
|
|
227
|
-
|
|
228
|
-
"""
|
|
229
|
-
return _DelayedQuery(self.graph, query)
|
|
230
|
-
|
|
231
|
-
@abstractmethod
|
|
232
|
-
def drop(self) -> None:
|
|
233
|
-
"""Drops the graph."""
|
|
234
|
-
raise NotImplementedError()
|
|
235
|
-
|
|
236
|
-
def garbage_collector(self) -> None:
|
|
237
|
-
"""Garbage collection of the graph store."""
|
|
238
|
-
# Can be overridden in subclasses
|
|
239
|
-
return None
|
|
240
|
-
|
|
241
|
-
def query_to_dataframe(
|
|
242
|
-
self,
|
|
243
|
-
query: str,
|
|
244
|
-
column_mapping: dict | None = None,
|
|
245
|
-
save_to_cache: bool = False,
|
|
246
|
-
index_column: str = "instance",
|
|
247
|
-
) -> pd.DataFrame:
|
|
248
|
-
"""Returns the result of the query as a dataframe.
|
|
249
|
-
|
|
250
|
-
Args:
|
|
251
|
-
query: SPARQL query to execute
|
|
252
|
-
column_mapping: Columns name mapping, by default None
|
|
253
|
-
save_to_cache: Save result of query to cache, by default False
|
|
254
|
-
index_column: Indexing column , by default "instance"
|
|
255
|
-
|
|
256
|
-
Returns:
|
|
257
|
-
Dataframe with result of query
|
|
258
|
-
"""
|
|
259
|
-
|
|
260
|
-
if column_mapping is None:
|
|
261
|
-
column_mapping = {0: "instance", 1: "property", 2: "value"}
|
|
262
|
-
|
|
263
|
-
result = self.graph.query(query, DEBUG=False)
|
|
264
|
-
df_cache = pd.DataFrame(list(result))
|
|
265
|
-
df_cache.rename(columns=column_mapping, inplace=True)
|
|
266
|
-
df_cache[index_column] = df_cache[index_column].apply(lambda x: str(x))
|
|
267
|
-
if save_to_cache:
|
|
268
|
-
self.df_cache = df_cache
|
|
269
|
-
return df_cache
|
|
270
|
-
|
|
271
|
-
def commit(self):
|
|
272
|
-
"""Commits the graph."""
|
|
273
|
-
self.graph.commit()
|
|
274
|
-
|
|
275
|
-
def get_df(self) -> pd.DataFrame:
|
|
276
|
-
"""Returns the cached dataframe."""
|
|
277
|
-
if self.df_cache is None:
|
|
278
|
-
raise ValueError("Cache is empty. Run query_to_dataframe() first with save_to_cache.")
|
|
279
|
-
return self.df_cache
|
|
280
|
-
|
|
281
|
-
def get_instance_properties_from_cache(self, instance_id: str) -> pd.DataFrame:
|
|
282
|
-
"""Returns the properties of an instance."""
|
|
283
|
-
if self.df_cache is None:
|
|
284
|
-
raise ValueError("Cache is empty. Run query_to_dataframe() first with save_to_cache.")
|
|
285
|
-
return self.df_cache.loc[self.df_cache["instance"] == instance_id]
|
|
286
|
-
|
|
287
|
-
def print_triples(self):
|
|
288
|
-
"""Prints the triples of the graph."""
|
|
289
|
-
for subj, pred, obj in self.graph:
|
|
290
|
-
logging.info(f"Triple: {subj} {pred} {obj}")
|
|
291
|
-
|
|
292
|
-
def diagnostic_report(self):
|
|
293
|
-
"""Returns the dictionary representation graph diagnostic data ."""
|
|
294
|
-
return {
|
|
295
|
-
"rdf_store_type": self.rdf_store_type,
|
|
296
|
-
"base_prefix": self.base_prefix,
|
|
297
|
-
"namespace": self.namespace,
|
|
298
|
-
"prefixes": self.prefixes,
|
|
299
|
-
"internal_storage_dir": self.internal_storage_dir,
|
|
300
|
-
"rdf_store_query_url": self.rdf_store_query_url,
|
|
301
|
-
"rdf_store_update_url": self.rdf_store_update_url,
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
def add_triples(
|
|
305
|
-
self,
|
|
306
|
-
triples: list[Triple] | set[Triple],
|
|
307
|
-
batch_size: int = 10_000,
|
|
308
|
-
verbose: bool = False,
|
|
309
|
-
):
|
|
310
|
-
"""Adds triples to the graph store in batches.
|
|
311
|
-
|
|
312
|
-
Args:
|
|
313
|
-
triples: list of triples to be added to the graph store
|
|
314
|
-
batch_size: Batch size of triples per commit, by default 10_000
|
|
315
|
-
verbose: Verbose mode, by default False
|
|
316
|
-
"""
|
|
317
|
-
|
|
318
|
-
commit_counter = 0
|
|
319
|
-
if verbose:
|
|
320
|
-
logging.info(f"Committing total of {len(triples)} triples to knowledge graph!")
|
|
321
|
-
total_number_of_triples = len(triples)
|
|
322
|
-
number_of_uploaded_triples = 0
|
|
323
|
-
|
|
324
|
-
def check_commit(force_commit: bool = False):
|
|
325
|
-
"""Commit nodes to the graph if batch counter is reached or if force_commit is True"""
|
|
326
|
-
nonlocal commit_counter
|
|
327
|
-
nonlocal number_of_uploaded_triples
|
|
328
|
-
if force_commit:
|
|
329
|
-
number_of_uploaded_triples += commit_counter
|
|
330
|
-
self.graph.commit()
|
|
331
|
-
if verbose:
|
|
332
|
-
logging.info(f"Committed {number_of_uploaded_triples} of {total_number_of_triples} triples")
|
|
333
|
-
return
|
|
334
|
-
commit_counter += 1
|
|
335
|
-
if commit_counter >= batch_size:
|
|
336
|
-
number_of_uploaded_triples += commit_counter
|
|
337
|
-
self.graph.commit()
|
|
338
|
-
if verbose:
|
|
339
|
-
logging.info(f"Committed {number_of_uploaded_triples} of {total_number_of_triples} triples")
|
|
340
|
-
commit_counter = 0
|
|
341
|
-
|
|
342
|
-
for triple in triples:
|
|
343
|
-
self.graph.add(triple)
|
|
344
|
-
check_commit()
|
|
345
|
-
|
|
346
|
-
check_commit(force_commit=True)
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
class _DelayedQuery(Iterable):
|
|
350
|
-
def __init__(self, graph_ref: Graph, query: str):
|
|
351
|
-
self.graph_ref = graph_ref
|
|
352
|
-
self.query = query
|
|
353
|
-
|
|
354
|
-
def __iter__(self) -> Iterator[Triple]:
|
|
355
|
-
start_time = time.perf_counter()
|
|
356
|
-
result = self.graph_ref.query(self.query)
|
|
357
|
-
stop_time = time.perf_counter()
|
|
358
|
-
elapsed_time = stop_time - start_time
|
|
359
|
-
prom_qsm.labels("query").observe(elapsed_time)
|
|
360
|
-
prom_sq.labels("query").set(elapsed_time)
|
|
361
|
-
return cast(Iterator[Triple], iter(result))
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
class _Queries:
|
|
365
|
-
"""Helper class for storing standard queries for the graph store."""
|
|
366
|
-
|
|
367
|
-
def __init__(self, store: NeatGraphStoreBase):
|
|
368
|
-
self.store = store
|
|
369
|
-
|
|
370
|
-
def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
|
|
371
|
-
"""Get instances ids for a given class
|
|
372
|
-
|
|
373
|
-
Args:
|
|
374
|
-
class_uri: Class for which instances are to be found
|
|
375
|
-
limit: Max number of instances to return, by default -1 meaning all instances
|
|
376
|
-
|
|
377
|
-
Returns:
|
|
378
|
-
List of class instance URIs
|
|
379
|
-
"""
|
|
380
|
-
query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
|
|
381
|
-
"class", class_uri
|
|
382
|
-
).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
|
|
383
|
-
return [cast(tuple, res)[0] for res in list(self.store.query(query_statement))]
|
|
384
|
-
|
|
385
|
-
def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
|
|
386
|
-
"""Get all triples for instances of a given class
|
|
387
|
-
|
|
388
|
-
Args:
|
|
389
|
-
class_uri: Class for which instances are to be found
|
|
390
|
-
|
|
391
|
-
Returns:
|
|
392
|
-
List of triples for instances of the given class
|
|
393
|
-
"""
|
|
394
|
-
query = (
|
|
395
|
-
f"SELECT ?instance ?prop ?value "
|
|
396
|
-
f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
|
|
397
|
-
)
|
|
398
|
-
logging.info(query)
|
|
399
|
-
# Select queries gives an iterable of result rows
|
|
400
|
-
return cast(list[ResultRow], list(self.store.query(query)))
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
import requests
|
|
4
|
-
from rdflib import Graph, Namespace
|
|
5
|
-
from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
|
|
6
|
-
|
|
7
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE, get_default_prefixes
|
|
8
|
-
|
|
9
|
-
from ._base import NeatGraphStoreBase
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class GraphDBStore(NeatGraphStoreBase):
|
|
13
|
-
"""GraphDB is a class that stores the graph in a GraphDB instances and provides methods to
|
|
14
|
-
read/write data it contains
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
graph : Instance of rdflib.Graph class for graph storage
|
|
19
|
-
base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
|
|
20
|
-
namespace : Namespace (aka URI) used to resolve any relative URI in the graph
|
|
21
|
-
prefixes : Dictionary of additional prefixes used and bounded to the graph
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
rdf_store_type = "graphdb"
|
|
25
|
-
|
|
26
|
-
def __init__(
|
|
27
|
-
self,
|
|
28
|
-
graph: Graph | None = None,
|
|
29
|
-
base_prefix: str = "", # usually empty
|
|
30
|
-
namespace: Namespace = DEFAULT_NAMESPACE,
|
|
31
|
-
prefixes: dict[str, Namespace] | None = None,
|
|
32
|
-
):
|
|
33
|
-
prefixes = prefixes if prefixes else get_default_prefixes()
|
|
34
|
-
super().__init__(graph, base_prefix, namespace, prefixes)
|
|
35
|
-
self.graph_db_rest_url: str = "http://localhost:7200"
|
|
36
|
-
|
|
37
|
-
def _set_graph(self) -> None:
|
|
38
|
-
logging.info("Initializing graph store with GraphDB")
|
|
39
|
-
store = SPARQLUpdateStore(
|
|
40
|
-
query_endpoint=self.rdf_store_query_url,
|
|
41
|
-
update_endpoint=self.rdf_store_update_url,
|
|
42
|
-
returnFormat=self.returnFormat,
|
|
43
|
-
context_aware=False,
|
|
44
|
-
postAsEncoded=False,
|
|
45
|
-
autocommit=False,
|
|
46
|
-
)
|
|
47
|
-
self.graph = Graph(store=store)
|
|
48
|
-
|
|
49
|
-
def drop(self):
|
|
50
|
-
"""Drops the graph."""
|
|
51
|
-
r = requests.delete(f"{self.rdf_store_query_url}/rdf-graphs/service?default")
|
|
52
|
-
logging.info(f"Dropped graph with state: {r.text}")
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
|
|
3
|
-
from rdflib import Graph, Namespace
|
|
4
|
-
|
|
5
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE, get_default_prefixes
|
|
6
|
-
|
|
7
|
-
from ._base import NeatGraphStoreBase
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class MemoryStore(NeatGraphStoreBase):
|
|
11
|
-
"""MemoryStore is a class that stores the graph in memory using rdflib and provides
|
|
12
|
-
methods to read/write data it contains.
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
graph : Instance of rdflib.Graph class for graph storage
|
|
17
|
-
base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
|
|
18
|
-
namespace : Namespace (aka URI) used to resolve any relative URI in the graph
|
|
19
|
-
prefixes : Dictionary of additional prefixes used and bounded to the graph
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
rdf_store_type: str = "memory"
|
|
23
|
-
|
|
24
|
-
def __init__(
|
|
25
|
-
self,
|
|
26
|
-
graph: Graph | None = None,
|
|
27
|
-
base_prefix: str = "", # usually empty
|
|
28
|
-
namespace: Namespace = DEFAULT_NAMESPACE,
|
|
29
|
-
prefixes: dict[str, Namespace] | None = None,
|
|
30
|
-
):
|
|
31
|
-
prefixes = prefixes if prefixes else get_default_prefixes() # Init repeated to get nice docstring
|
|
32
|
-
super().__init__(graph, base_prefix, namespace, prefixes)
|
|
33
|
-
|
|
34
|
-
def _set_graph(self):
|
|
35
|
-
logging.info("Initializing graph in memory")
|
|
36
|
-
self.graph = Graph()
|
|
37
|
-
|
|
38
|
-
def drop(self):
|
|
39
|
-
"""Drops the graph."""
|
|
40
|
-
# In the case of in-memory graph, we just reinitialize the graph
|
|
41
|
-
# otherwise we would lose the prefixes and bindings, which fails
|
|
42
|
-
# workflow
|
|
43
|
-
self.reinitialize_graph()
|
|
@@ -1,151 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import os
|
|
3
|
-
import shutil
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
from rdflib import Graph, Namespace
|
|
7
|
-
|
|
8
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE, get_default_prefixes
|
|
9
|
-
from cognite.neat.utils.auxiliary import local_import
|
|
10
|
-
|
|
11
|
-
from ._base import MIMETypes, NeatGraphStoreBase
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class OxiGraphStore(NeatGraphStoreBase):
|
|
15
|
-
"""OxiGraph is a class that stores the graph using OxiGraph and provides methods to read/write data it contains
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
graph : Instance of rdflib.Graph class for graph storage
|
|
20
|
-
base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
|
|
21
|
-
namespace : Namespace (aka URI) used to resolve any relative URI in the graph
|
|
22
|
-
prefixes : Dictionary of additional prefixes used and bounded to the graph
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
rdf_store_type = "oxigraph"
|
|
26
|
-
|
|
27
|
-
def __init__(
|
|
28
|
-
self,
|
|
29
|
-
graph: Graph | None = None,
|
|
30
|
-
base_prefix: str = "", # usually empty
|
|
31
|
-
namespace: Namespace = DEFAULT_NAMESPACE,
|
|
32
|
-
prefixes: dict[str, Namespace] | None = None,
|
|
33
|
-
):
|
|
34
|
-
prefixes = prefixes if prefixes else get_default_prefixes()
|
|
35
|
-
super().__init__(graph, base_prefix, namespace, prefixes)
|
|
36
|
-
|
|
37
|
-
def _set_graph(self) -> None:
|
|
38
|
-
logging.info("Initializing Oxigraph store")
|
|
39
|
-
local_import("pyoxigraph", "oxi")
|
|
40
|
-
import pyoxigraph
|
|
41
|
-
|
|
42
|
-
from cognite.neat.graph.stores import _oxrdflib
|
|
43
|
-
|
|
44
|
-
# Adding support for both in-memory and file-based storage
|
|
45
|
-
for i in range(4):
|
|
46
|
-
try:
|
|
47
|
-
oxstore = pyoxigraph.Store(
|
|
48
|
-
path=(str(self.internal_storage_dir) if self.internal_storage_dir else None)
|
|
49
|
-
) # Store (Rust object) accepts only str as path and not Path.
|
|
50
|
-
break
|
|
51
|
-
except OSError as e:
|
|
52
|
-
if "lock" in str(e) and i < 3:
|
|
53
|
-
# lock originated from another instance of the store
|
|
54
|
-
logging.error("Error initializing Oxigraph store: %s", e)
|
|
55
|
-
else:
|
|
56
|
-
raise e
|
|
57
|
-
else:
|
|
58
|
-
raise Exception("Error initializing Oxigraph store")
|
|
59
|
-
|
|
60
|
-
self.graph = Graph(store=_oxrdflib.OxigraphStore(store=oxstore))
|
|
61
|
-
self.graph.default_union = True
|
|
62
|
-
self.garbage_collector()
|
|
63
|
-
|
|
64
|
-
def close(self):
|
|
65
|
-
"""Closes the graph."""
|
|
66
|
-
if self.graph is not None:
|
|
67
|
-
try:
|
|
68
|
-
self.graph.store._inner.flush() # type: ignore[attr-defined]
|
|
69
|
-
self.graph.close(True)
|
|
70
|
-
except Exception as e:
|
|
71
|
-
logging.debug("Error closing graph: %s", e)
|
|
72
|
-
|
|
73
|
-
def restart(self):
|
|
74
|
-
"""Restarts the graph"""
|
|
75
|
-
self.close()
|
|
76
|
-
self.reinitialize_graph()
|
|
77
|
-
logging.info("GraphStore restarted")
|
|
78
|
-
|
|
79
|
-
def import_from_file(
|
|
80
|
-
self,
|
|
81
|
-
graph_file: Path,
|
|
82
|
-
mime_type: MIMETypes = "application/rdf+xml",
|
|
83
|
-
add_base_iri: bool = True,
|
|
84
|
-
) -> None:
|
|
85
|
-
"""Imports graph data from file.
|
|
86
|
-
|
|
87
|
-
Args:
|
|
88
|
-
graph_file : File path to file containing graph data, by default None
|
|
89
|
-
mime_type : MIME type of the file, by default "application/rdf+xml"
|
|
90
|
-
add_base_iri : Add base IRI to the graph, by default True
|
|
91
|
-
"""
|
|
92
|
-
if add_base_iri:
|
|
93
|
-
self.graph.store._inner.bulk_load( # type: ignore[attr-defined]
|
|
94
|
-
str(graph_file), mime_type, base_iri=self.namespace
|
|
95
|
-
)
|
|
96
|
-
else:
|
|
97
|
-
self.graph.store._inner.bulk_load(str(graph_file), mime_type) # type: ignore[attr-defined]
|
|
98
|
-
self.graph.store._inner.optimize() # type: ignore[attr-defined]
|
|
99
|
-
return None
|
|
100
|
-
|
|
101
|
-
def drop(self):
|
|
102
|
-
try:
|
|
103
|
-
self.close()
|
|
104
|
-
# Due to the specifics of Oxigraph, storage directory cannot be deleted immediately
|
|
105
|
-
# after closing the graph and creating a new one
|
|
106
|
-
if self.internal_storage_dir.exists():
|
|
107
|
-
self.storage_dirs_to_delete.append(self.internal_storage_dir)
|
|
108
|
-
self.garbage_collector()
|
|
109
|
-
|
|
110
|
-
except Exception as e:
|
|
111
|
-
logging.error(f"Error dropping graph : {e}")
|
|
112
|
-
|
|
113
|
-
def garbage_collector(self):
|
|
114
|
-
"""Garbage collection of the graph store."""
|
|
115
|
-
# delete all directories in self.storage_dirs_to_delete
|
|
116
|
-
for d in self.storage_dirs_to_delete:
|
|
117
|
-
shutil.rmtree(d)
|
|
118
|
-
self.storage_dirs_to_delete = []
|
|
119
|
-
|
|
120
|
-
def __del__(self):
|
|
121
|
-
if self.graph is not None:
|
|
122
|
-
if self.graph.store is not None:
|
|
123
|
-
try:
|
|
124
|
-
self.graph.store._inner.flush()
|
|
125
|
-
except Exception:
|
|
126
|
-
logging.debug("Error flushing graph")
|
|
127
|
-
self.graph.close()
|
|
128
|
-
# It requires more investigation os.remove(self.internal_storage_dir / "LOCK")
|
|
129
|
-
|
|
130
|
-
def commit(self):
|
|
131
|
-
"""Commits the graph."""
|
|
132
|
-
if self.graph:
|
|
133
|
-
if self.graph.store:
|
|
134
|
-
logging.info("Committing graph - flushing and optimizing")
|
|
135
|
-
self.graph.store._inner.flush()
|
|
136
|
-
self.graph.store._inner.optimize()
|
|
137
|
-
self.graph.commit()
|
|
138
|
-
|
|
139
|
-
@staticmethod
|
|
140
|
-
def drop_graph_store_storage(storage_path: Path | None) -> None:
|
|
141
|
-
"""Drop graph store storage on disk.
|
|
142
|
-
|
|
143
|
-
Args:
|
|
144
|
-
storage_path : Path to storage directory
|
|
145
|
-
"""
|
|
146
|
-
if storage_path and storage_path.exists():
|
|
147
|
-
for f in os.listdir(storage_path):
|
|
148
|
-
(storage_path / f).unlink()
|
|
149
|
-
logging.info("Graph store dropped.")
|
|
150
|
-
else:
|
|
151
|
-
logging.info(f"Storage path {storage_path} does not exist. Skipping drop.")
|