cognite-neat 0.78.5__py3-none-any.whl → 0.80.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cognite/neat/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.78.5"
1
+ __version__ = "0.80.0"
@@ -1,3 +1,3 @@
1
- from .stores import NeatGraphStoreBase
1
+ from .stores import NeatGraphStore
2
2
 
3
- __all__ = ["NeatGraphStoreBase"]
3
+ __all__ = ["NeatGraphStore"]
@@ -0,0 +1,5 @@
1
+ from typing import Literal, TypeAlias
2
+
3
+ MIMETypes: TypeAlias = Literal[
4
+ "application/rdf+xml", "text/turtle", "application/n-triple", "application/n-quads", "application/trig"
5
+ ]
@@ -6,6 +6,7 @@ from ._classic_cdf._relationships import RelationshipsExtractor
6
6
  from ._classic_cdf._sequences import SequencesExtractor
7
7
  from ._classic_cdf._timeseries import TimeSeriesExtractor
8
8
  from ._mock_graph_generator import MockGraphGenerator
9
+ from ._rdf_file import RdfFileExtractor
9
10
 
10
11
  __all__ = [
11
12
  "AssetsExtractor",
@@ -16,4 +17,18 @@ __all__ = [
16
17
  "EventsExtractor",
17
18
  "FilesExtractor",
18
19
  "LabelsExtractor",
20
+ "RdfFileExtractor",
19
21
  ]
22
+
23
+
24
+ TripleExtractors = (
25
+ AssetsExtractor
26
+ | MockGraphGenerator
27
+ | RelationshipsExtractor
28
+ | TimeSeriesExtractor
29
+ | SequencesExtractor
30
+ | EventsExtractor
31
+ | FilesExtractor
32
+ | LabelsExtractor
33
+ | RdfFileExtractor
34
+ )
@@ -0,0 +1,18 @@
1
+ from pathlib import Path
2
+
3
+ from rdflib import URIRef
4
+
5
+ from cognite.neat.graph._shared import MIMETypes
6
+ from cognite.neat.graph.extractors._base import BaseExtractor
7
+
8
+
9
+ class RdfFileExtractor(BaseExtractor):
10
+ def __init__(
11
+ self,
12
+ filepath: Path,
13
+ mime_type: MIMETypes = "application/rdf+xml",
14
+ base_uri: URIRef | None = None,
15
+ ):
16
+ self.filepath = filepath
17
+ self.mime_type = mime_type
18
+ self.base_uri = base_uri
@@ -6,7 +6,7 @@ from typing import ClassVar, Generic, Literal, TypeVar, overload
6
6
  from cognite.client import CogniteClient
7
7
  from cognite.client.data_classes.capabilities import Capability
8
8
 
9
- from cognite.neat.graph import NeatGraphStoreBase
9
+ from cognite.neat.graph import NeatGraphStore
10
10
  from cognite.neat.graph.issues.loader import FailedAuthorizationError
11
11
  from cognite.neat.issues import NeatIssue, NeatIssueList
12
12
  from cognite.neat.utils.upload import UploadDiffsID, UploadResultIDs
@@ -18,7 +18,7 @@ class BaseLoader(ABC, Generic[T_Output]):
18
18
  _new_line = "\n"
19
19
  _encoding = "utf-8"
20
20
 
21
- def __init__(self, graph_store: NeatGraphStoreBase):
21
+ def __init__(self, graph_store: NeatGraphStore):
22
22
  self.graph_store = graph_store
23
23
 
24
24
  @abstractmethod
@@ -17,7 +17,7 @@ from pydantic.main import Model
17
17
 
18
18
  from cognite.neat.graph._tracking import LogTracker, Tracker
19
19
  from cognite.neat.graph.issues import loader as loader_issues
20
- from cognite.neat.graph.stores import NeatGraphStoreBase
20
+ from cognite.neat.graph.stores import NeatGraphStore
21
21
  from cognite.neat.issues import NeatIssue, NeatIssueList
22
22
  from cognite.neat.rules.models import DMSRules
23
23
  from cognite.neat.rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE
@@ -29,7 +29,7 @@ from ._base import CDFLoader
29
29
  class DMSLoader(CDFLoader[dm.InstanceApply]):
30
30
  def __init__(
31
31
  self,
32
- graph_store: NeatGraphStoreBase,
32
+ graph_store: NeatGraphStore,
33
33
  data_model: dm.DataModel[dm.View] | None,
34
34
  instance_space: str,
35
35
  class_by_view_id: dict[ViewId, str] | None = None,
@@ -48,7 +48,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
48
48
  cls,
49
49
  client: CogniteClient,
50
50
  data_model_id: dm.DataModelId,
51
- graph_store: NeatGraphStoreBase,
51
+ graph_store: NeatGraphStore,
52
52
  instance_space: str,
53
53
  ) -> "DMSLoader":
54
54
  issues: list[NeatIssue] = []
@@ -61,7 +61,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
61
61
  return cls(graph_store, data_model, instance_space, {}, issues)
62
62
 
63
63
  @classmethod
64
- def from_rules(cls, rules: DMSRules, graph_store: NeatGraphStoreBase, instance_space: str) -> "DMSLoader":
64
+ def from_rules(cls, rules: DMSRules, graph_store: NeatGraphStore, instance_space: str) -> "DMSLoader":
65
65
  issues: list[NeatIssue] = []
66
66
  data_model: dm.DataModel[dm.View] | None = None
67
67
  try:
@@ -1,13 +1,3 @@
1
- from ._base import NeatGraphStoreBase
2
- from ._graphdb_store import GraphDBStore
3
- from ._memory_store import MemoryStore
4
- from ._oxigraph_store import OxiGraphStore
1
+ from ._base import NeatGraphStore
5
2
 
6
- STORE_BY_TYPE: dict[str, type[NeatGraphStoreBase]] = {}
7
- for store in NeatGraphStoreBase.__subclasses__():
8
- STORE_BY_TYPE[store.rdf_store_type] = store # type: ignore[type-abstract]
9
-
10
- del store # Cleanup namespace
11
- AVAILABLE_STORES = set(STORE_BY_TYPE.keys())
12
-
13
- __all__ = ["NeatGraphStoreBase", "MemoryStore", "OxiGraphStore", "GraphDBStore", "STORE_BY_TYPE", "AVAILABLE_STORES"]
3
+ __all__ = ["NeatGraphStore"]
@@ -1,276 +1,170 @@
1
- import logging
2
1
  import sys
3
- import time
4
- from abc import ABC, abstractmethod
5
- from collections.abc import Iterable, Iterator
2
+ import warnings
3
+ from collections.abc import Iterable
4
+ from datetime import datetime
6
5
  from pathlib import Path
7
- from typing import Literal, TypeAlias, cast
6
+ from typing import cast
8
7
 
9
- import pandas as pd
10
- from prometheus_client import Gauge, Summary
8
+ import pytz
11
9
  from rdflib import RDF, Graph, Namespace, URIRef
12
- from rdflib.query import Result, ResultRow
10
+ from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
11
+ from rdflib.query import ResultRow
13
12
 
14
- from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
13
+ from cognite.neat.graph._shared import MIMETypes
14
+ from cognite.neat.graph.extractors import RdfFileExtractor, TripleExtractors
15
15
  from cognite.neat.graph.models import Triple
16
- from cognite.neat.graph.stores._rdf_to_graph import rdf_file_to_graph
16
+ from cognite.neat.rules.models.information import InformationRules
17
17
  from cognite.neat.utils import remove_namespace
18
+ from cognite.neat.utils.auxiliary import local_import
18
19
 
19
- if sys.version_info >= (3, 11):
20
- pass
21
- else:
22
- pass
23
-
24
- prom_qsm = Summary("store_query_time_summary", "Time spent processing queries", ["query"])
25
- prom_sq = Gauge("store_single_query_time", "Time spent processing a single query", ["query"])
20
+ from ._provenance import Change, Provenance
26
21
 
27
- MIMETypes: TypeAlias = Literal[
28
- "application/rdf+xml", "text/turtle", "application/n-triple", "application/n-quads", "application/trig"
29
- ]
22
+ if sys.version_info < (3, 11):
23
+ from typing_extensions import Self
24
+ else:
25
+ from typing import Self
30
26
 
31
27
 
32
- class NeatGraphStoreBase(ABC):
28
+ class NeatGraphStore:
33
29
  """NeatGraphStore is a class that stores the graph and provides methods to read/write data it contains
34
30
 
35
31
 
36
32
  Args:
37
33
  graph : Instance of rdflib.Graph class for graph storage
38
- base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
39
- namespace : Namespace (aka URI) used to resolve any relative URI in the graph
40
- prefixes : Dictionary of additional prefixes used and bounded to the graph
34
+ rules:
41
35
  """
42
36
 
43
37
  rdf_store_type: str
44
38
 
45
39
  def __init__(
46
40
  self,
47
- graph: Graph | None = None,
48
- base_prefix: str = "", # usually empty
49
- namespace: Namespace = DEFAULT_NAMESPACE,
50
- prefixes: dict = PREFIXES,
41
+ graph: Graph,
42
+ rules: InformationRules | None = None,
51
43
  ):
52
- self.graph = graph or Graph()
53
- self.base_prefix: str = base_prefix
54
- self.namespace: Namespace = namespace
55
- self.prefixes: dict[str, Namespace] = prefixes
56
-
57
- self.rdf_store_query_url: str | None = None
58
- self.rdf_store_update_url: str | None = None
59
- self.returnFormat: str | None = None
60
- self.df_cache: pd.DataFrame | None = None
61
- self.internal_storage_dir: Path | None = None
62
- self.graph_name: str | None = None
63
- self.internal_storage_dir_orig: Path | None = None
64
- self.storage_dirs_to_delete: list[Path] = []
65
- self.queries = _Queries(self)
66
-
67
- @abstractmethod
68
- def _set_graph(self) -> None:
69
- raise NotImplementedError()
70
-
71
- def init_graph(
72
- self,
73
- rdf_store_query_url: str | None = None,
74
- rdf_store_update_url: str | None = None,
75
- graph_name: str | None = None,
76
- base_prefix: str | None = None,
77
- returnFormat: str = "csv",
78
- internal_storage_dir: Path | None = None,
79
- ):
80
- """Initializes the graph.
81
-
82
- Args:
83
- rdf_store_query_url : URL towards which SPARQL query is executed, by default None
84
- rdf_store_update_url : URL towards which SPARQL update is executed, by default None
85
- graph_name : Name of graph, by default None
86
- base_prefix : Base prefix for graph namespace to change if needed, by default None
87
- returnFormat : Transport format of graph data between, by default "csv"
88
- internal_storage_dir : Path to directory where internal storage is located,
89
- by default None (in-memory storage).
90
-
91
- !!! note "internal_storage_dir"
92
- Used only for Oxigraph
93
- """
94
- logging.info("Initializing NeatGraphStore")
95
- self.rdf_store_query_url = rdf_store_query_url
96
- self.rdf_store_update_url = rdf_store_update_url
97
- self.graph_name = graph_name
98
- self.returnFormat = returnFormat
99
- self.internal_storage_dir = Path(internal_storage_dir) if internal_storage_dir else None
100
- self.internal_storage_dir_orig = (
101
- self.internal_storage_dir if self.internal_storage_dir_orig is None else self.internal_storage_dir_orig
44
+ _start = datetime.now(pytz.utc)
45
+ self.graph = graph
46
+ self.provenance = Provenance(
47
+ [
48
+ Change.record(
49
+ activity=f"{type(self).__name__}.__init__",
50
+ start=_start,
51
+ end=datetime.now(pytz.utc),
52
+ description=f"Initialize graph store as {type(self.graph.store).__name__}",
53
+ )
54
+ ]
102
55
  )
56
+ self.rules = rules
103
57
 
104
- self._set_graph()
105
-
106
- if self.prefixes:
107
- for prefix, namespace in self.prefixes.items():
108
- logging.info("Adding prefix %s with namespace %s", prefix, namespace)
109
- self.graph.bind(prefix, namespace)
110
-
111
- if base_prefix:
112
- self.base_prefix = base_prefix
113
- if self.base_prefix:
114
- self.graph.bind(self.base_prefix, self.namespace)
115
- logging.info("Adding prefix %s with namespace %s", self.base_prefix, self.namespace)
116
- logging.info("Graph initialized")
117
-
118
- def reinitialize_graph(self):
119
- """Reinitialize the graph."""
120
- self.init_graph(
121
- self.rdf_store_query_url,
122
- self.rdf_store_update_url,
123
- self.graph_name,
124
- self.base_prefix,
125
- self.returnFormat,
126
- self.internal_storage_dir,
127
- )
58
+ if self.rules and self.rules.prefixes:
59
+ self._upsert_prefixes(self.rules.prefixes)
128
60
 
129
- def upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
61
+ self.queries = _Queries(self)
62
+
63
+ def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
130
64
  """Adds prefixes to the graph store."""
131
- self.prefixes.update(prefixes)
65
+ _start = datetime.now(pytz.utc)
132
66
  for prefix, namespace in prefixes.items():
133
- logging.info("Adding prefix %s with namespace %s", prefix, namespace)
134
67
  self.graph.bind(prefix, namespace)
135
68
 
136
- def close(self) -> None:
137
- """Closes the graph."""
138
- # Can be overridden in subclasses
139
- return None
140
-
141
- def restart(self) -> None:
142
- """Restarts the graph"""
143
- # Can be overridden in subclasses
144
- return None
145
-
146
- def import_from_file(
147
- self, graph_file: Path, mime_type: MIMETypes = "application/rdf+xml", add_base_iri: bool = True
148
- ) -> None:
149
- """Imports graph data from file.
150
-
151
- Args:
152
- graph_file : File path to file containing graph data, by default None
153
- mime_type : MIME type of graph data, by default "application/rdf+xml"
154
- add_base_iri : Add base IRI to graph, by default True
155
- """
156
- if add_base_iri:
157
- self.graph = rdf_file_to_graph(
158
- self.graph, graph_file, base_namespace=self.namespace, prefixes=self.prefixes
69
+ self.provenance.append(
70
+ Change.record(
71
+ activity=f"{type(self).__name__}._upsert_prefixes",
72
+ start=_start,
73
+ end=datetime.now(pytz.utc),
74
+ description="Upsert prefixes to graph store",
159
75
  )
160
- else:
161
- self.graph = rdf_file_to_graph(self.graph, graph_file, prefixes=self.prefixes)
162
- return None
76
+ )
163
77
 
164
- def get_graph(self) -> Graph:
165
- """Returns the graph."""
166
- return self.graph
78
+ @classmethod
79
+ def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
80
+ return cls(Graph(), rules)
167
81
 
168
- def set_graph(self, graph: Graph):
169
- """Sets the graph."""
170
- self.graph = graph
82
+ @classmethod
83
+ def from_sparql_store(
84
+ cls,
85
+ query_endpoint: str | None = None,
86
+ update_endpoint: str | None = None,
87
+ returnFormat: str = "csv",
88
+ rules: InformationRules | None = None,
89
+ ) -> "Self":
90
+ store = SPARQLUpdateStore(
91
+ query_endpoint=query_endpoint,
92
+ update_endpoint=update_endpoint,
93
+ returnFormat=returnFormat,
94
+ context_aware=False,
95
+ postAsEncoded=False,
96
+ autocommit=False,
97
+ )
98
+ graph = Graph(store=store)
99
+ return cls(graph, rules)
100
+
101
+ @classmethod
102
+ def from_oxi_store(cls, storage_dir: Path | None = None, rules: InformationRules | None = None) -> "Self":
103
+ """Creates a NeatGraphStore from an Oxigraph store."""
104
+ local_import("pyoxigraph", "oxi")
105
+ import pyoxigraph
106
+
107
+ from cognite.neat.graph.stores._oxrdflib import OxigraphStore
108
+
109
+ # Adding support for both oxigraph in-memory and file-based storage
110
+ for i in range(4):
111
+ try:
112
+ oxi_store = pyoxigraph.Store(path=str(storage_dir) if storage_dir else None)
113
+ break
114
+ except OSError as e:
115
+ if "lock" in str(e) and i < 3:
116
+ continue
117
+ raise e
118
+ else:
119
+ raise Exception("Error initializing Oxigraph store")
171
120
 
172
- def query(self, query: str) -> Result:
173
- """Returns the result of the query."""
174
- start_time = time.perf_counter()
175
- result = self.graph.query(query)
176
- stop_time = time.perf_counter()
177
- elapsed_time = stop_time - start_time
178
- prom_qsm.labels("query").observe(elapsed_time)
179
- prom_sq.labels("query").set(elapsed_time)
180
- return result
121
+ graph = Graph(store=OxigraphStore(store=oxi_store))
122
+ graph.default_union = True
181
123
 
182
- def serialize(self, *args, **kwargs):
183
- """Serializes the graph."""
184
- return self.graph.serialize(*args, **kwargs)
124
+ return cls(graph, rules)
185
125
 
186
- def query_delayed(self, query) -> Iterable[Triple]:
187
- """Returns the result of the query, but does not execute it immediately.
126
+ def write(self, extractor: TripleExtractors) -> None:
127
+ if isinstance(extractor, RdfFileExtractor):
128
+ self._parse_file(extractor.filepath, extractor.mime_type, extractor.base_uri)
129
+ else:
130
+ self._add_triples(extractor.extract())
188
131
 
189
- The query is not executed until the result is iterated over.
132
+ def _parse_file(
133
+ self,
134
+ filepath: Path,
135
+ mime_type: MIMETypes = "application/rdf+xml",
136
+ base_uri: URIRef | None = None,
137
+ ) -> None:
138
+ """Imports graph data from file.
190
139
 
191
140
  Args:
192
- query: SPARQL query to execute
193
-
194
- Returns:
195
- An iterable of triples
196
-
141
+ filepath : File path to file containing graph data, by default None
142
+ mime_type : MIME type of graph data, by default "application/rdf+xml"
143
+ add_base_iri : Add base IRI to graph, by default True
197
144
  """
198
- return _DelayedQuery(self.graph, query)
199
145
 
200
- @abstractmethod
201
- def drop(self) -> None:
202
- """Drops the graph."""
203
- raise NotImplementedError()
146
+ # Oxigraph store, do not want to type hint this as it is an optional dependency
147
+ if type(self.graph.store).__name__ == "OxigraphStore":
204
148
 
205
- def garbage_collector(self) -> None:
206
- """Garbage collection of the graph store."""
207
- # Can be overridden in subclasses
208
- return None
149
+ def parse_to_oxi_store():
150
+ local_import("pyoxigraph", "oxi")
151
+ from cognite.neat.graph.stores._oxrdflib import OxigraphStore
209
152
 
210
- def query_to_dataframe(
211
- self,
212
- query: str,
213
- column_mapping: dict | None = None,
214
- save_to_cache: bool = False,
215
- index_column: str = "instance",
216
- ) -> pd.DataFrame:
217
- """Returns the result of the query as a dataframe.
218
-
219
- Args:
220
- query: SPARQL query to execute
221
- column_mapping: Columns name mapping, by default None
222
- save_to_cache: Save result of query to cache, by default False
223
- index_column: Indexing column , by default "instance"
153
+ cast(OxigraphStore, self.graph.store)._inner.bulk_load(str(filepath), mime_type, base_iri=base_uri) # type: ignore[attr-defined]
154
+ cast(OxigraphStore, self.graph.store)._inner.optimize() # type: ignore[attr-defined]
224
155
 
225
- Returns:
226
- Dataframe with result of query
227
- """
156
+ parse_to_oxi_store()
228
157
 
229
- if column_mapping is None:
230
- column_mapping = {0: "instance", 1: "property", 2: "value"}
231
-
232
- result = self.graph.query(query, DEBUG=False)
233
- df_cache = pd.DataFrame(list(result))
234
- df_cache.rename(columns=column_mapping, inplace=True)
235
- df_cache[index_column] = df_cache[index_column].apply(lambda x: str(x))
236
- if save_to_cache:
237
- self.df_cache = df_cache
238
- return df_cache
239
-
240
- def commit(self):
241
- """Commits the graph."""
242
- self.graph.commit()
243
-
244
- def get_df(self) -> pd.DataFrame:
245
- """Returns the cached dataframe."""
246
- if self.df_cache is None:
247
- raise ValueError("Cache is empty. Run query_to_dataframe() first with save_to_cache.")
248
- return self.df_cache
249
-
250
- def get_instance_properties_from_cache(self, instance_id: str) -> pd.DataFrame:
251
- """Returns the properties of an instance."""
252
- if self.df_cache is None:
253
- raise ValueError("Cache is empty. Run query_to_dataframe() first with save_to_cache.")
254
- return self.df_cache.loc[self.df_cache["instance"] == instance_id]
255
-
256
- def print_triples(self):
257
- """Prints the triples of the graph."""
258
- for subj, pred, obj in self.graph:
259
- logging.info(f"Triple: {subj} {pred} {obj}")
260
-
261
- def diagnostic_report(self):
262
- """Returns the dictionary representation graph diagnostic data ."""
263
- return {
264
- "rdf_store_type": self.rdf_store_type,
265
- "base_prefix": self.base_prefix,
266
- "namespace": self.namespace,
267
- "prefixes": self.prefixes,
268
- "internal_storage_dir": self.internal_storage_dir,
269
- "rdf_store_query_url": self.rdf_store_query_url,
270
- "rdf_store_update_url": self.rdf_store_update_url,
271
- }
272
-
273
- def add_triples(self, triples: list[Triple] | set[Triple], batch_size: int = 10_000, verbose: bool = False):
158
+ # All other stores
159
+ else:
160
+ if filepath.is_file():
161
+ self.graph.parse(filepath, publicID=base_uri)
162
+ else:
163
+ for filename in filepath.iterdir():
164
+ if filename.is_file():
165
+ self.graph.parse(filename, publicID=base_uri)
166
+
167
+ def _add_triples(self, triples: Iterable[Triple], batch_size: int = 10_000):
274
168
  """Adds triples to the graph store in batches.
275
169
 
276
170
  Args:
@@ -280,27 +174,20 @@ class NeatGraphStoreBase(ABC):
280
174
  """
281
175
 
282
176
  commit_counter = 0
283
- if verbose:
284
- logging.info(f"Committing total of {len(triples)} triples to knowledge graph!")
285
- total_number_of_triples = len(triples)
286
- number_of_uploaded_triples = 0
177
+ number_of_written_triples = 0
287
178
 
288
179
  def check_commit(force_commit: bool = False):
289
180
  """Commit nodes to the graph if batch counter is reached or if force_commit is True"""
290
181
  nonlocal commit_counter
291
- nonlocal number_of_uploaded_triples
182
+ nonlocal number_of_written_triples
292
183
  if force_commit:
293
- number_of_uploaded_triples += commit_counter
184
+ number_of_written_triples += commit_counter
294
185
  self.graph.commit()
295
- if verbose:
296
- logging.info(f"Committed {number_of_uploaded_triples} of {total_number_of_triples} triples")
297
186
  return
298
187
  commit_counter += 1
299
188
  if commit_counter >= batch_size:
300
- number_of_uploaded_triples += commit_counter
189
+ number_of_written_triples += commit_counter
301
190
  self.graph.commit()
302
- if verbose:
303
- logging.info(f"Committed {number_of_uploaded_triples} of {total_number_of_triples} triples")
304
191
  commit_counter = 0
305
192
 
306
193
  for triple in triples:
@@ -310,25 +197,10 @@ class NeatGraphStoreBase(ABC):
310
197
  check_commit(force_commit=True)
311
198
 
312
199
 
313
- class _DelayedQuery(Iterable):
314
- def __init__(self, graph_ref: Graph, query: str):
315
- self.graph_ref = graph_ref
316
- self.query = query
317
-
318
- def __iter__(self) -> Iterator[Triple]:
319
- start_time = time.perf_counter()
320
- result = self.graph_ref.query(self.query)
321
- stop_time = time.perf_counter()
322
- elapsed_time = stop_time - start_time
323
- prom_qsm.labels("query").observe(elapsed_time)
324
- prom_sq.labels("query").set(elapsed_time)
325
- return cast(Iterator[Triple], iter(result))
326
-
327
-
328
200
  class _Queries:
329
201
  """Helper class for storing standard queries for the graph store."""
330
202
 
331
- def __init__(self, store: NeatGraphStoreBase):
203
+ def __init__(self, store: NeatGraphStore):
332
204
  self.store = store
333
205
 
334
206
  def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
@@ -344,7 +216,7 @@ class _Queries:
344
216
  query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
345
217
  "class", class_uri
346
218
  ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
347
- return [cast(tuple, res)[0] for res in list(self.store.query(query_statement))]
219
+ return [cast(tuple, res)[0] for res in list(self.store.graph.query(query_statement))]
348
220
 
349
221
  def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
350
222
  """Get all triples for instances of a given class
@@ -359,20 +231,27 @@ class _Queries:
359
231
  f"SELECT ?instance ?prop ?value "
360
232
  f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
361
233
  )
362
- logging.info(query)
234
+
363
235
  # Select queries gives an iterable of result rows
364
- return cast(list[ResultRow], list(self.store.query(query)))
236
+ return cast(list[ResultRow], list(self.store.graph.query(query)))
365
237
 
366
238
  def triples_of_type_instances(self, rdf_type: str) -> list[tuple[str, str, str]]:
367
239
  """Get all triples of a given type.
368
240
 
369
241
  This method assumes the graph has been transformed into the default namespace.
370
242
  """
371
- query = (
372
- f"SELECT ?instance ?prop ?value "
373
- f"WHERE {{ ?instance a <{self.store.namespace[rdf_type]}> . ?instance ?prop ?value . }} order by ?instance"
374
- )
375
- result = self.store.query(query)
376
243
 
377
- # We cannot include the RDF.type in case there is a neat:type property
378
- return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
244
+ if self.store.rules:
245
+ query = (
246
+ f"SELECT ?instance ?prop ?value "
247
+ f"WHERE {{ ?instance a <{self.store.rules.metadata.namespace[rdf_type]}> . ?instance ?prop ?value . }} "
248
+ "order by ?instance"
249
+ )
250
+
251
+ result = self.store.graph.query(query)
252
+
253
+ # We cannot include the RDF.type in case there is a neat:type property
254
+ return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
255
+ else:
256
+ warnings.warn("No rules found for the graph store, returning empty list.", stacklevel=2)
257
+ return []
@@ -0,0 +1,99 @@
1
+ # we will use prov-o to represent the provenance of the neat graph store
2
+ # basically tracking changes that occur in the graph store
3
+ # prov-o use concepts of Agent, Activity and Entity to represent provenance
4
+ # where in case of neat we have:
5
+ # Agent: triples extractors, graph enhancers, contextualizers, etc.
6
+ # Activity: write triple, add connection, etc.
7
+ # Entity: neat graph store
8
+
9
+
10
+ import uuid
11
+ from collections import UserList
12
+ from collections.abc import Sequence
13
+ from dataclasses import dataclass
14
+ from datetime import datetime
15
+ from typing import TypeVar
16
+
17
+ from rdflib import PROV, RDF, Literal, URIRef
18
+
19
+ from cognite.neat.constants import DEFAULT_NAMESPACE
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class Agent:
24
+ id_: URIRef = DEFAULT_NAMESPACE.agent
25
+ acted_on_behalf_of: str = "NEAT"
26
+
27
+ def as_triples(self):
28
+ return [
29
+ (self.id_, RDF.type, PROV[type(self).__name__]),
30
+ (self.id_, PROV.actedOnBehalfOf, self.acted_on_behalf_of),
31
+ ]
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class Activity:
36
+ was_associated_with: Agent
37
+ ended_at_time: datetime
38
+ started_at_time: datetime
39
+ used: str # this would be set to for example Extractor, Enhancer, Contextualizer, etc.
40
+ id_: URIRef = DEFAULT_NAMESPACE[f"activity-{uuid.uuid4()}"]
41
+
42
+ def as_triples(self):
43
+ return [
44
+ (self.id_, RDF.type, PROV[type(self).__name__]),
45
+ (self.id_, PROV.wasAssociatedWith, self.was_associated_with.id_),
46
+ (self.id_, PROV.startedAtTime, Literal(self.started_at_time)),
47
+ (self.id_, PROV.endedAtTime, Literal(self.ended_at_time)),
48
+ (self.id_, PROV.used, self.used),
49
+ ]
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class Entity:
54
+ was_generated_by: Activity
55
+ was_attributed_to: Agent
56
+ id_: URIRef = DEFAULT_NAMESPACE["graph-store"]
57
+
58
+ def as_triples(self):
59
+ return [
60
+ (self.id_, RDF.type, PROV[type(self).__name__]),
61
+ (self.id_, PROV.wasGeneratedBy, self.was_generated_by.id_),
62
+ (self.id_, PROV.wasAttributedTo, self.was_attributed_to.id_),
63
+ ]
64
+
65
+
66
+ @dataclass(frozen=True)
67
+ class Change:
68
+ agent: Agent
69
+ activity: Activity
70
+ entity: Entity
71
+ description: str
72
+
73
+ def as_triples(self):
74
+ return self.agent.as_triples() + self.activity.as_triples() + self.entity.as_triples()
75
+
76
+ @classmethod
77
+ def record(cls, activity: str, start: datetime, end: datetime, description: str):
78
+ """User friendly method to record a change that occurred in the graph store."""
79
+ agent = Agent()
80
+ activity = Activity(used=activity, was_associated_with=agent, started_at_time=start, ended_at_time=end)
81
+ entity = Entity(was_generated_by=activity, was_attributed_to=agent)
82
+ return cls(agent, activity, entity, description)
83
+
84
+
85
+ T_Change = TypeVar("T_Change", bound=Change)
86
+
87
+
88
+ class Provenance(UserList[T_Change]):
89
+ def __init__(self, changes: Sequence[T_Change] | None = None):
90
+ super().__init__(changes or [])
91
+
92
+ def did_this_happen(self, this: str) -> bool:
93
+ return any(change.description == this for change in self)
94
+
95
+ def __delitem__(self, *args, **kwargs):
96
+ raise TypeError("Cannot delete change from provenance")
97
+
98
+ def __setitem__(self, *args, **kwargs):
99
+ raise TypeError("Cannot modify change from provenance")
@@ -8,7 +8,7 @@ from rdflib import Literal as RdfLiteral
8
8
 
9
9
  import cognite.neat.rules.issues as issues
10
10
  from cognite.neat.constants import PREFIXES
11
- from cognite.neat.graph.stores import NeatGraphStoreBase
11
+ from cognite.neat.graph.stores import NeatGraphStore
12
12
  from cognite.neat.rules.importers._base import BaseImporter, Rules, _handle_issues
13
13
  from cognite.neat.rules.issues import IssueList
14
14
  from cognite.neat.rules.models import InformationRules, RoleTypes
@@ -47,7 +47,7 @@ class InferenceImporter(BaseImporter):
47
47
  self.max_number_of_instance = max_number_of_instance
48
48
 
49
49
  @classmethod
50
- def from_graph_store(cls, store: NeatGraphStoreBase, max_number_of_instance: int = -1):
50
+ def from_graph_store(cls, store: NeatGraphStore, max_number_of_instance: int = -1):
51
51
  issue_list = IssueList(title="Inferred from graph store")
52
52
 
53
53
  return cls(issue_list, store.graph, max_number_of_instance=max_number_of_instance)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-neat
3
- Version: 0.78.5
3
+ Version: 0.80.0
4
4
  Summary: Knowledge graph transformation
5
5
  Home-page: https://cognite-neat.readthedocs-hosted.com/
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  cognite/neat/__init__.py,sha256=v-rRiDOgZ3sQSMQKq0vgUQZvpeOkoHFXissAx6Ktg84,61
2
- cognite/neat/_version.py,sha256=2uubg0ZmASu9ykJZHeQzJ7RUMWo2b-kKPcvmMJ5XS4A,23
2
+ cognite/neat/_version.py,sha256=-kfBSEQjpAZbilJOxA4s8jDyP952PRUaVbdaHMuEyIs,23
3
3
  cognite/neat/app/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  cognite/neat/app/api/asgi/metrics.py,sha256=nxFy7L5cChTI0a-zkCiJ59Aq8yLuIJp5c9Dg0wRXtV0,152
5
5
  cognite/neat/app/api/configuration.py,sha256=2U5M6M252swvQPQyooA1EBzFUZNtcTmuSaywfJDgckM,4232
@@ -43,7 +43,8 @@ cognite/neat/app/ui/neat-app/build/static/media/logo.8093b84df9ed36a174c629d6fe0
43
43
  cognite/neat/config.py,sha256=oBrWw-KEo0YMbfjVeW6A1FBW7HpP2Pq2ByIq2vlJ10M,6145
44
44
  cognite/neat/constants.py,sha256=GYVcrFrvqcznYgB_0jAyykIIiIamxyOpv4dTZWj4K4U,1300
45
45
  cognite/neat/exceptions.py,sha256=CM7aCvbek9klOgjTsJ9bfEA8t7KTAL6dc7Mviu4NvSI,4268
46
- cognite/neat/graph/__init__.py,sha256=31uTeejWOSd-I8iUG8GOZFhHZcQCsBitJ6X8vu2r1nU,73
46
+ cognite/neat/graph/__init__.py,sha256=J8JSJj3s4gFbuAexma__KGpBXPN8wuydPTKd6EwgKPA,65
47
+ cognite/neat/graph/_shared.py,sha256=9QRETdm7hvqIeiHv_n1xi1DUq91Nq7oRRpnPKE0Pnag,181
47
48
  cognite/neat/graph/_tracking/__init__.py,sha256=pYj7c-YAUIP4hvN-4mlWnwaeZFerzL9_gM-oZhex7cE,91
48
49
  cognite/neat/graph/_tracking/base.py,sha256=8JmaDhlFhSkdBe4SOvFnrdDvMmfTZkHhZxWWWTYkMOQ,820
49
50
  cognite/neat/graph/_tracking/log.py,sha256=dBSINd8Tn92hBl8APMD8r6j15g2SlaX1tsDLCmHvaU4,927
@@ -52,7 +53,7 @@ cognite/neat/graph/examples/Knowledge-Graph-Nordic44.xml,sha256=U2Ns-M4LRjT1fBkh
52
53
  cognite/neat/graph/examples/__init__.py,sha256=yAjHVY3b5jOjmbW-iLbhvu7BG014TpGi3K4igkDqW5I,368
53
54
  cognite/neat/graph/examples/skos-capturing-sheet-wind-topics.xlsx,sha256=CV_yK5ZSbYS_ktfIZUPD8Sevs47zpswLXQUDFkGE4Gw,45798
54
55
  cognite/neat/graph/exceptions.py,sha256=R6pyOH774n9w2x_X_nrUr8OMAdjJMf_XPIqAvxIQaWo,3401
55
- cognite/neat/graph/extractors/__init__.py,sha256=0Mv7iTBwOdMHgqkINh0V2hnDxeC9fIDpBmmnW5Q1lyQ,645
56
+ cognite/neat/graph/extractors/__init__.py,sha256=PRKYPCnxofQ3i_iiJ3xGjEligLgqAPDw2TSlcZt0MlU,947
56
57
  cognite/neat/graph/extractors/_base.py,sha256=TOXDnlqske8DgnJwA0THDVRgmR79Acjm56yF0E-2w7I,356
57
58
  cognite/neat/graph/extractors/_classic_cdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
59
  cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=S5QB_38ysVodGRMqr_SWYYaUtkUCS6a6L2b5D1T-888,3812
@@ -63,19 +64,17 @@ cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=5kClA5zBlhyP
63
64
  cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=ov-n8cBEC73AMO1xam2GUDHv-7SyOEWXWRxLXh9flyY,3298
64
65
  cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=xlnJ4fKvCJawZO6l6EHpx36RRAafd3BdYWS0ajNnGVM,4449
65
66
  cognite/neat/graph/extractors/_mock_graph_generator.py,sha256=gziG2FFsLk-HmA9uxAeT9RCjVpFxjkCTLiC4tq2zgvw,14961
67
+ cognite/neat/graph/extractors/_rdf_file.py,sha256=w4-XgPgNsmZOkNxjO1ZQCcopTntmmtxfDBkQxn1se6E,463
66
68
  cognite/neat/graph/issues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
69
  cognite/neat/graph/issues/loader.py,sha256=v8YDsehkUT1QUG61JM9BDV_lqowMUnDmGmbay0aFzN4,3085
68
70
  cognite/neat/graph/loaders/__init__.py,sha256=hHC9sfFfbnGSVFTYeuNTIEu4tdLSJ2mWV07fereLelo,125
69
- cognite/neat/graph/loaders/_base.py,sha256=VOCRIee9ms6FuBlT3mwBV_mQnI6bO53mrardqiMf-Hk,4045
70
- cognite/neat/graph/loaders/_rdf2dms.py,sha256=bVFLjukCwEUGVoyQ6YnmdRXV945fhX3SiHR6yHLXO2k,12873
71
+ cognite/neat/graph/loaders/_base.py,sha256=bdYC6CwsHVqnQa1QzOhL68qQhF1OtrsearqH6D-z3E4,4037
72
+ cognite/neat/graph/loaders/_rdf2dms.py,sha256=Tn7vy6XwXFXpVDn7uzfzgJMJapbPITerKaF5b5Y4ol4,12857
71
73
  cognite/neat/graph/models.py,sha256=AtLgZh2qyRP6NRetjQCy9qLMuTQB0CH52Zsev-qa2sk,149
72
- cognite/neat/graph/stores/__init__.py,sha256=ivvk7STSo-4wuP_CpizKUCPKmt_ufpNWRJUN9Bv5gdY,543
73
- cognite/neat/graph/stores/_base.py,sha256=ZrPDfWjmO3nJwpLS6r0ru7LZAhMtOcA76-v5akHq-kQ,14226
74
- cognite/neat/graph/stores/_graphdb_store.py,sha256=8QM8I4srDKNsR0PddN6htCYUhfkoqlyy-c232Os7C0A,1776
75
- cognite/neat/graph/stores/_memory_store.py,sha256=GQq19xiyAWU0WQU5txmWnLXBuyP6ywd8plR21UtD3Uw,1420
76
- cognite/neat/graph/stores/_oxigraph_store.py,sha256=Xj69oE4M-9aqd8bq5CpLCMAhwNjJQAP1AC7lxzDsCn0,5448
74
+ cognite/neat/graph/stores/__init__.py,sha256=G-VG_YwfRt1kuPao07PDJyZ3w_0-eguzLUM13n-Z_RA,64
75
+ cognite/neat/graph/stores/_base.py,sha256=DGmguO0qE5sLHgHG757ymP-cFtEimKvD57Irr3FH9yY,9106
77
76
  cognite/neat/graph/stores/_oxrdflib.py,sha256=A5zeRm5_e8ui_ihGpgstRDg_N7qcLZ3QZBRGrOXSGI0,9569
78
- cognite/neat/graph/stores/_rdf_to_graph.py,sha256=1ezWHTPn9UkIsAlxZcYRlqWvj3ixlmB5GGG9NN0ls2Q,1244
77
+ cognite/neat/graph/stores/_provenance.py,sha256=Y20-I8dP3DwTQ1sdI_eC4va2Az2FpK0oZwdfJ5T-2wc,3279
79
78
  cognite/neat/issues.py,sha256=pxQfqfBseMDE8JM0iqZnkLXngeyeFfT0TFtu1UuAd4c,4629
80
79
  cognite/neat/legacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
80
  cognite/neat/legacy/graph/__init__.py,sha256=31uTeejWOSd-I8iUG8GOZFhHZcQCsBitJ6X8vu2r1nU,73
@@ -194,7 +193,7 @@ cognite/neat/rules/importers/_dtdl2rules/_unit_lookup.py,sha256=wW4saKva61Q_i17g
194
193
  cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py,sha256=ysmWUxZ0npwrTB0uiH5jA0v37sfCwowGaYk17IyxPUU,12663
195
194
  cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py,sha256=QDyGt5YBaxzF4v_oCFSgKRSpwVdVruDU3-VW0DEiHbY,6718
196
195
  cognite/neat/rules/importers/_dtdl2rules/spec.py,sha256=tim_MfN1J0F3Oeqk3BMgIA82d_MZvhRuRMsLK3B4PYc,11897
197
- cognite/neat/rules/importers/_inference2rules.py,sha256=JsV3Ii2wmgRELtpV0GC4Y1KtjhyyGR0dtEFpBonHoA8,11213
196
+ cognite/neat/rules/importers/_inference2rules.py,sha256=JgXmhc_6ME9RrA0gVPn9WoVE7NvCrpjdS4_ELW-2e7g,11205
198
197
  cognite/neat/rules/importers/_owl2rules/__init__.py,sha256=tdGcrgtozdQyST-pTlxIa4cLBNTLvtk1nNYR4vOdFSw,63
199
198
  cognite/neat/rules/importers/_owl2rules/_owl2classes.py,sha256=QpTxvrTGczIa48X8lgXGnMN1AWPhHK0DR6uNq175xak,7357
200
199
  cognite/neat/rules/importers/_owl2rules/_owl2metadata.py,sha256=nwnUaBNAAYMoBre2UmsnkJXUuaqGEpR3U3txDrH2w6g,7527
@@ -293,8 +292,8 @@ cognite/neat/workflows/steps_registry.py,sha256=fkTX14ZA7_gkUYfWIlx7A1XbCidvqR23
293
292
  cognite/neat/workflows/tasks.py,sha256=dqlJwKAb0jlkl7abbY8RRz3m7MT4SK8-7cntMWkOYjw,788
294
293
  cognite/neat/workflows/triggers.py,sha256=_BLNplzoz0iic367u1mhHMHiUrCwP-SLK6_CZzfODX0,7071
295
294
  cognite/neat/workflows/utils.py,sha256=gKdy3RLG7ctRhbCRwaDIWpL9Mi98zm56-d4jfHDqP1E,453
296
- cognite_neat-0.78.5.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
297
- cognite_neat-0.78.5.dist-info/METADATA,sha256=cndbsOaHg6ndd8FtHUJVa5ojU6kQvr_ikXsByBRloW8,9298
298
- cognite_neat-0.78.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
299
- cognite_neat-0.78.5.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
300
- cognite_neat-0.78.5.dist-info/RECORD,,
295
+ cognite_neat-0.80.0.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
296
+ cognite_neat-0.80.0.dist-info/METADATA,sha256=dfiX6R98UxzayOm5bMw369QhrIGZmRkwOCIaZdNqxyw,9298
297
+ cognite_neat-0.80.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
298
+ cognite_neat-0.80.0.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
299
+ cognite_neat-0.80.0.dist-info/RECORD,,
@@ -1,51 +0,0 @@
1
- import logging
2
-
3
- import requests
4
- from rdflib import Graph, Namespace
5
- from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
6
-
7
- from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
8
-
9
- from ._base import NeatGraphStoreBase
10
-
11
-
12
- class GraphDBStore(NeatGraphStoreBase):
13
- """GraphDB is a class that stores the graph in a GraphDB instances and provides methods to
14
- read/write data it contains
15
-
16
-
17
- Args:
18
- graph : Instance of rdflib.Graph class for graph storage
19
- base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
20
- namespace : Namespace (aka URI) used to resolve any relative URI in the graph
21
- prefixes : Dictionary of additional prefixes used and bounded to the graph
22
- """
23
-
24
- rdf_store_type = "graphdb"
25
-
26
- def __init__(
27
- self,
28
- graph: Graph | None = None,
29
- base_prefix: str = "", # usually empty
30
- namespace: Namespace = DEFAULT_NAMESPACE,
31
- prefixes: dict = PREFIXES,
32
- ):
33
- super().__init__(graph, base_prefix, namespace, prefixes)
34
- self.graph_db_rest_url: str = "http://localhost:7200"
35
-
36
- def _set_graph(self) -> None:
37
- logging.info("Initializing graph store with GraphDB")
38
- store = SPARQLUpdateStore(
39
- query_endpoint=self.rdf_store_query_url,
40
- update_endpoint=self.rdf_store_update_url,
41
- returnFormat=self.returnFormat,
42
- context_aware=False,
43
- postAsEncoded=False,
44
- autocommit=False,
45
- )
46
- self.graph = Graph(store=store)
47
-
48
- def drop(self):
49
- """Drops the graph."""
50
- r = requests.delete(f"{self.rdf_store_query_url}/rdf-graphs/service?default")
51
- logging.info(f"Dropped graph with state: {r.text}")
@@ -1,43 +0,0 @@
1
- import logging
2
-
3
- from rdflib import Graph, Namespace
4
-
5
- from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
6
-
7
- from ._base import NeatGraphStoreBase
8
-
9
-
10
- class MemoryStore(NeatGraphStoreBase):
11
- """MemoryStore is a class that stores the graph in memory using rdflib and provides
12
- methods to read/write data it contains.
13
-
14
-
15
- Args:
16
- graph : Instance of rdflib.Graph class for graph storage
17
- base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
18
- namespace : Namespace (aka URI) used to resolve any relative URI in the graph
19
- prefixes : Dictionary of additional prefixes used and bounded to the graph
20
- """
21
-
22
- rdf_store_type: str = "memory"
23
-
24
- def __init__(
25
- self,
26
- graph: Graph | None = None,
27
- base_prefix: str = "", # usually empty
28
- namespace: Namespace = DEFAULT_NAMESPACE,
29
- prefixes: dict = PREFIXES,
30
- ):
31
- # Init repeated to get nice docstring
32
- super().__init__(graph, base_prefix, namespace, prefixes)
33
-
34
- def _set_graph(self):
35
- logging.info("Initializing graph in memory")
36
- self.graph = Graph()
37
-
38
- def drop(self):
39
- """Drops the graph."""
40
- # In the case of in-memory graph, we just reinitialize the graph
41
- # otherwise we would lose the prefixes and bindings, which fails
42
- # workflow
43
- self.reinitialize_graph()
@@ -1,147 +0,0 @@
1
- import logging
2
- import os
3
- import shutil
4
- from pathlib import Path
5
-
6
- from rdflib import Graph, Namespace
7
-
8
- from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
9
- from cognite.neat.utils.auxiliary import local_import
10
-
11
- from ._base import MIMETypes, NeatGraphStoreBase
12
-
13
-
14
- class OxiGraphStore(NeatGraphStoreBase):
15
- """OxiGraph is a class that stores the graph using OxiGraph and provides methods to read/write data it contains
16
-
17
-
18
- Args:
19
- graph : Instance of rdflib.Graph class for graph storage
20
- base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
21
- namespace : Namespace (aka URI) used to resolve any relative URI in the graph
22
- prefixes : Dictionary of additional prefixes used and bounded to the graph
23
- """
24
-
25
- rdf_store_type = "oxigraph"
26
-
27
- def __init__(
28
- self,
29
- graph: Graph | None = None,
30
- base_prefix: str = "", # usually empty
31
- namespace: Namespace = DEFAULT_NAMESPACE,
32
- prefixes: dict = PREFIXES,
33
- ):
34
- super().__init__(graph, base_prefix, namespace, prefixes)
35
-
36
- def _set_graph(self) -> None:
37
- logging.info("Initializing Oxigraph store")
38
- local_import("pyoxigraph", "oxi")
39
- import pyoxigraph
40
-
41
- from cognite.neat.graph.stores import _oxrdflib
42
-
43
- # Adding support for both in-memory and file-based storage
44
- for i in range(4):
45
- try:
46
- oxstore = pyoxigraph.Store(
47
- path=str(self.internal_storage_dir) if self.internal_storage_dir else None
48
- ) # Store (Rust object) accepts only str as path and not Path.
49
- break
50
- except OSError as e:
51
- if "lock" in str(e) and i < 3:
52
- # lock originated from another instance of the store
53
- logging.error("Error initializing Oxigraph store: %s", e)
54
- else:
55
- raise e
56
- else:
57
- raise Exception("Error initializing Oxigraph store")
58
-
59
- self.graph = Graph(store=_oxrdflib.OxigraphStore(store=oxstore))
60
- self.graph.default_union = True
61
- self.garbage_collector()
62
-
63
- def close(self):
64
- """Closes the graph."""
65
- if self.graph is not None:
66
- try:
67
- self.graph.store._inner.flush() # type: ignore[attr-defined]
68
- self.graph.close(True)
69
- except Exception as e:
70
- logging.debug("Error closing graph: %s", e)
71
-
72
- def restart(self):
73
- """Restarts the graph"""
74
- self.close()
75
- self.reinitialize_graph()
76
- logging.info("GraphStore restarted")
77
-
78
- def import_from_file(
79
- self, graph_file: Path, mime_type: MIMETypes = "application/rdf+xml", add_base_iri: bool = True
80
- ) -> None:
81
- """Imports graph data from file.
82
-
83
- Args:
84
- graph_file : File path to file containing graph data, by default None
85
- mime_type : MIME type of the file, by default "application/rdf+xml"
86
- add_base_iri : Add base IRI to the graph, by default True
87
- """
88
- if add_base_iri:
89
- self.graph.store._inner.bulk_load( # type: ignore[attr-defined]
90
- str(graph_file), mime_type, base_iri=self.namespace
91
- )
92
- else:
93
- self.graph.store._inner.bulk_load(str(graph_file), mime_type) # type: ignore[attr-defined]
94
- self.graph.store._inner.optimize() # type: ignore[attr-defined]
95
- return None
96
-
97
- def drop(self):
98
- try:
99
- self.close()
100
- # Due to the specifics of Oxigraph, storage directory cannot be deleted immediately
101
- # after closing the graph and creating a new one
102
- if self.internal_storage_dir.exists():
103
- self.storage_dirs_to_delete.append(self.internal_storage_dir)
104
- self.garbage_collector()
105
-
106
- except Exception as e:
107
- logging.error(f"Error dropping graph : {e}")
108
-
109
- def garbage_collector(self):
110
- """Garbage collection of the graph store."""
111
- # delete all directories in self.storage_dirs_to_delete
112
- for d in self.storage_dirs_to_delete:
113
- shutil.rmtree(d)
114
- self.storage_dirs_to_delete = []
115
-
116
- def __del__(self):
117
- if self.graph is not None:
118
- if self.graph.store is not None:
119
- try:
120
- self.graph.store._inner.flush()
121
- except Exception:
122
- logging.debug("Error flushing graph")
123
- self.graph.close()
124
- # It requires more investigation os.remove(self.internal_storage_dir / "LOCK")
125
-
126
- def commit(self):
127
- """Commits the graph."""
128
- if self.graph:
129
- if self.graph.store:
130
- logging.info("Committing graph - flushing and optimizing")
131
- self.graph.store._inner.flush()
132
- self.graph.store._inner.optimize()
133
- self.graph.commit()
134
-
135
- @staticmethod
136
- def drop_graph_store_storage(storage_path: Path | None) -> None:
137
- """Drop graph store storage on disk.
138
-
139
- Args:
140
- storage_path : Path to storage directory
141
- """
142
- if storage_path and storage_path.exists():
143
- for f in os.listdir(storage_path):
144
- (storage_path / f).unlink()
145
- logging.info("Graph store dropped.")
146
- else:
147
- logging.info(f"Storage path {storage_path} does not exist. Skipping drop.")
@@ -1,40 +0,0 @@
1
- from pathlib import Path
2
-
3
- from rdflib import Graph, Namespace
4
-
5
- from cognite.neat.constants import PREFIXES
6
-
7
-
8
- def rdf_file_to_graph(
9
- graph: Graph,
10
- filepath: Path,
11
- base_prefix: str | None = None,
12
- base_namespace: Namespace | None = None,
13
- prefixes: dict[str, Namespace] = PREFIXES,
14
- ) -> Graph:
15
- """Created rdflib Graph instance loaded with RDF triples from file
16
-
17
- Args:
18
- filepath: Path to the RDF file
19
- base_prefix: base prefix for URIs. Defaults to None.
20
- base_namespace: base namespace for URIs . Defaults to None.
21
- prefixes: Dictionary of prefixes to bind to graph. Defaults to PREFIXES.
22
- graph: Graph instance to load RDF triples into. Defaults to None.
23
-
24
- Returns:
25
- Graph instance loaded with RDF triples from file
26
- """
27
-
28
- if filepath.is_file():
29
- graph.parse(filepath, publicID=base_namespace)
30
- else:
31
- for filename in filepath.iterdir():
32
- if filename.is_file():
33
- graph.parse(filename, publicID=base_namespace)
34
- if base_prefix and base_namespace:
35
- graph.bind(base_prefix, base_namespace)
36
- if prefixes:
37
- for prefix, namespace in prefixes.items():
38
- graph.bind(prefix, namespace)
39
-
40
- return graph