cognite-neat 0.79.0__py3-none-any.whl → 0.80.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

cognite/neat/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.79.0"
1
+ __version__ = "0.80.1"
@@ -1,3 +1,3 @@
1
- from .stores import NeatGraphStoreBase
1
+ from .stores import NeatGraphStore
2
2
 
3
- __all__ = ["NeatGraphStoreBase"]
3
+ __all__ = ["NeatGraphStore"]
@@ -0,0 +1,5 @@
1
+ from typing import Literal, TypeAlias
2
+
3
+ MIMETypes: TypeAlias = Literal[
4
+ "application/rdf+xml", "text/turtle", "application/n-triple", "application/n-quads", "application/trig"
5
+ ]
@@ -6,6 +6,7 @@ from ._classic_cdf._relationships import RelationshipsExtractor
6
6
  from ._classic_cdf._sequences import SequencesExtractor
7
7
  from ._classic_cdf._timeseries import TimeSeriesExtractor
8
8
  from ._mock_graph_generator import MockGraphGenerator
9
+ from ._rdf_file import RdfFileExtractor
9
10
 
10
11
  __all__ = [
11
12
  "AssetsExtractor",
@@ -16,4 +17,18 @@ __all__ = [
16
17
  "EventsExtractor",
17
18
  "FilesExtractor",
18
19
  "LabelsExtractor",
20
+ "RdfFileExtractor",
19
21
  ]
22
+
23
+
24
+ TripleExtractors = (
25
+ AssetsExtractor
26
+ | MockGraphGenerator
27
+ | RelationshipsExtractor
28
+ | TimeSeriesExtractor
29
+ | SequencesExtractor
30
+ | EventsExtractor
31
+ | FilesExtractor
32
+ | LabelsExtractor
33
+ | RdfFileExtractor
34
+ )
@@ -0,0 +1,18 @@
1
+ from pathlib import Path
2
+
3
+ from rdflib import URIRef
4
+
5
+ from cognite.neat.graph._shared import MIMETypes
6
+ from cognite.neat.graph.extractors._base import BaseExtractor
7
+
8
+
9
+ class RdfFileExtractor(BaseExtractor):
10
+ def __init__(
11
+ self,
12
+ filepath: Path,
13
+ mime_type: MIMETypes = "application/rdf+xml",
14
+ base_uri: URIRef | None = None,
15
+ ):
16
+ self.filepath = filepath
17
+ self.mime_type = mime_type
18
+ self.base_uri = base_uri
@@ -6,7 +6,7 @@ from typing import ClassVar, Generic, Literal, TypeVar, overload
6
6
  from cognite.client import CogniteClient
7
7
  from cognite.client.data_classes.capabilities import Capability
8
8
 
9
- from cognite.neat.graph import NeatGraphStoreBase
9
+ from cognite.neat.graph import NeatGraphStore
10
10
  from cognite.neat.graph.issues.loader import FailedAuthorizationError
11
11
  from cognite.neat.issues import NeatIssue, NeatIssueList
12
12
  from cognite.neat.utils.upload import UploadDiffsID, UploadResultIDs
@@ -18,7 +18,7 @@ class BaseLoader(ABC, Generic[T_Output]):
18
18
  _new_line = "\n"
19
19
  _encoding = "utf-8"
20
20
 
21
- def __init__(self, graph_store: NeatGraphStoreBase):
21
+ def __init__(self, graph_store: NeatGraphStore):
22
22
  self.graph_store = graph_store
23
23
 
24
24
  @abstractmethod
@@ -17,7 +17,7 @@ from pydantic.main import Model
17
17
 
18
18
  from cognite.neat.graph._tracking import LogTracker, Tracker
19
19
  from cognite.neat.graph.issues import loader as loader_issues
20
- from cognite.neat.graph.stores import NeatGraphStoreBase
20
+ from cognite.neat.graph.stores import NeatGraphStore
21
21
  from cognite.neat.issues import NeatIssue, NeatIssueList
22
22
  from cognite.neat.rules.models import DMSRules
23
23
  from cognite.neat.rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE
@@ -29,7 +29,7 @@ from ._base import CDFLoader
29
29
  class DMSLoader(CDFLoader[dm.InstanceApply]):
30
30
  def __init__(
31
31
  self,
32
- graph_store: NeatGraphStoreBase,
32
+ graph_store: NeatGraphStore,
33
33
  data_model: dm.DataModel[dm.View] | None,
34
34
  instance_space: str,
35
35
  class_by_view_id: dict[ViewId, str] | None = None,
@@ -48,7 +48,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
48
48
  cls,
49
49
  client: CogniteClient,
50
50
  data_model_id: dm.DataModelId,
51
- graph_store: NeatGraphStoreBase,
51
+ graph_store: NeatGraphStore,
52
52
  instance_space: str,
53
53
  ) -> "DMSLoader":
54
54
  issues: list[NeatIssue] = []
@@ -61,7 +61,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
61
61
  return cls(graph_store, data_model, instance_space, {}, issues)
62
62
 
63
63
  @classmethod
64
- def from_rules(cls, rules: DMSRules, graph_store: NeatGraphStoreBase, instance_space: str) -> "DMSLoader":
64
+ def from_rules(cls, rules: DMSRules, graph_store: NeatGraphStore, instance_space: str) -> "DMSLoader":
65
65
  issues: list[NeatIssue] = []
66
66
  data_model: dm.DataModel[dm.View] | None = None
67
67
  try:
@@ -1,13 +1,3 @@
1
- from ._base import NeatGraphStoreBase
2
- from ._graphdb_store import GraphDBStore
3
- from ._memory_store import MemoryStore
4
- from ._oxigraph_store import OxiGraphStore
1
+ from ._base import NeatGraphStore
5
2
 
6
- STORE_BY_TYPE: dict[str, type[NeatGraphStoreBase]] = {}
7
- for store in NeatGraphStoreBase.__subclasses__():
8
- STORE_BY_TYPE[store.rdf_store_type] = store # type: ignore[type-abstract]
9
-
10
- del store # Cleanup namespace
11
- AVAILABLE_STORES = set(STORE_BY_TYPE.keys())
12
-
13
- __all__ = ["NeatGraphStoreBase", "MemoryStore", "OxiGraphStore", "GraphDBStore", "STORE_BY_TYPE", "AVAILABLE_STORES"]
3
+ __all__ = ["NeatGraphStore"]
@@ -1,291 +1,170 @@
1
- import logging
2
1
  import sys
3
- import time
4
- from abc import ABC, abstractmethod
5
- from collections.abc import Iterable, Iterator
2
+ import warnings
3
+ from collections.abc import Iterable
6
4
  from datetime import datetime
7
5
  from pathlib import Path
8
- from typing import Literal, TypeAlias, cast
6
+ from typing import cast
9
7
 
10
- import pandas as pd
11
8
  import pytz
12
- from prometheus_client import Gauge, Summary
13
9
  from rdflib import RDF, Graph, Namespace, URIRef
14
- from rdflib.query import Result, ResultRow
10
+ from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
11
+ from rdflib.query import ResultRow
15
12
 
16
- from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
13
+ from cognite.neat.graph._shared import MIMETypes
14
+ from cognite.neat.graph.extractors import RdfFileExtractor, TripleExtractors
17
15
  from cognite.neat.graph.models import Triple
18
- from cognite.neat.graph.stores._rdf_to_graph import rdf_file_to_graph
16
+ from cognite.neat.rules.models.information import InformationRules
19
17
  from cognite.neat.utils import remove_namespace
18
+ from cognite.neat.utils.auxiliary import local_import
20
19
 
21
20
  from ._provenance import Change, Provenance
22
21
 
23
- if sys.version_info >= (3, 11):
24
- pass
22
+ if sys.version_info < (3, 11):
23
+ from typing_extensions import Self
25
24
  else:
26
- pass
25
+ from typing import Self
27
26
 
28
- prom_qsm = Summary("store_query_time_summary", "Time spent processing queries", ["query"])
29
- prom_sq = Gauge("store_single_query_time", "Time spent processing a single query", ["query"])
30
27
 
31
- MIMETypes: TypeAlias = Literal[
32
- "application/rdf+xml", "text/turtle", "application/n-triple", "application/n-quads", "application/trig"
33
- ]
34
-
35
-
36
- class NeatGraphStoreBase(ABC):
28
+ class NeatGraphStore:
37
29
  """NeatGraphStore is a class that stores the graph and provides methods to read/write data it contains
38
30
 
39
31
 
40
32
  Args:
41
33
  graph : Instance of rdflib.Graph class for graph storage
42
- base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
43
- namespace : Namespace (aka URI) used to resolve any relative URI in the graph
44
- prefixes : Dictionary of additional prefixes used and bounded to the graph
34
+ rules:
45
35
  """
46
36
 
47
37
  rdf_store_type: str
48
38
 
49
39
  def __init__(
50
40
  self,
51
- graph: Graph | None = None,
52
- base_prefix: str = "", # usually empty
53
- namespace: Namespace = DEFAULT_NAMESPACE,
54
- prefixes: dict = PREFIXES,
41
+ graph: Graph,
42
+ rules: InformationRules | None = None,
55
43
  ):
56
44
  _start = datetime.now(pytz.utc)
57
- self.graph = graph or Graph()
58
- self.base_prefix: str = base_prefix
59
- self.namespace: Namespace = namespace
60
- self.prefixes: dict[str, Namespace] = prefixes
61
-
62
- self.rdf_store_query_url: str | None = None
63
- self.rdf_store_update_url: str | None = None
64
- self.returnFormat: str | None = None
65
- self.df_cache: pd.DataFrame | None = None
66
- self.internal_storage_dir: Path | None = None
67
- self.graph_name: str | None = None
68
- self.internal_storage_dir_orig: Path | None = None
69
- self.storage_dirs_to_delete: list[Path] = []
70
- self.queries = _Queries(self)
45
+ self.graph = graph
71
46
  self.provenance = Provenance(
72
47
  [
73
48
  Change.record(
74
49
  activity=f"{type(self).__name__}.__init__",
75
50
  start=_start,
76
51
  end=datetime.now(pytz.utc),
77
- description="Initialize graph store",
52
+ description=f"Initialize graph store as {type(self.graph.store).__name__}",
78
53
  )
79
54
  ]
80
55
  )
56
+ self.rules = rules
81
57
 
82
- @abstractmethod
83
- def _set_graph(self) -> None:
84
- raise NotImplementedError()
85
-
86
- def init_graph(
87
- self,
88
- rdf_store_query_url: str | None = None,
89
- rdf_store_update_url: str | None = None,
90
- graph_name: str | None = None,
91
- base_prefix: str | None = None,
92
- returnFormat: str = "csv",
93
- internal_storage_dir: Path | None = None,
94
- ):
95
- """Initializes the graph.
96
-
97
- Args:
98
- rdf_store_query_url : URL towards which SPARQL query is executed, by default None
99
- rdf_store_update_url : URL towards which SPARQL update is executed, by default None
100
- graph_name : Name of graph, by default None
101
- base_prefix : Base prefix for graph namespace to change if needed, by default None
102
- returnFormat : Transport format of graph data between, by default "csv"
103
- internal_storage_dir : Path to directory where internal storage is located,
104
- by default None (in-memory storage).
105
-
106
- !!! note "internal_storage_dir"
107
- Used only for Oxigraph
108
- """
109
- logging.info("Initializing NeatGraphStore")
110
- self.rdf_store_query_url = rdf_store_query_url
111
- self.rdf_store_update_url = rdf_store_update_url
112
- self.graph_name = graph_name
113
- self.returnFormat = returnFormat
114
- self.internal_storage_dir = Path(internal_storage_dir) if internal_storage_dir else None
115
- self.internal_storage_dir_orig = (
116
- self.internal_storage_dir if self.internal_storage_dir_orig is None else self.internal_storage_dir_orig
117
- )
58
+ if self.rules and self.rules.prefixes:
59
+ self._upsert_prefixes(self.rules.prefixes)
118
60
 
119
- self._set_graph()
120
-
121
- if self.prefixes:
122
- for prefix, namespace in self.prefixes.items():
123
- logging.info("Adding prefix %s with namespace %s", prefix, namespace)
124
- self.graph.bind(prefix, namespace)
125
-
126
- if base_prefix:
127
- self.base_prefix = base_prefix
128
- if self.base_prefix:
129
- self.graph.bind(self.base_prefix, self.namespace)
130
- logging.info("Adding prefix %s with namespace %s", self.base_prefix, self.namespace)
131
- logging.info("Graph initialized")
132
-
133
- def reinitialize_graph(self):
134
- """Reinitialize the graph."""
135
- self.init_graph(
136
- self.rdf_store_query_url,
137
- self.rdf_store_update_url,
138
- self.graph_name,
139
- self.base_prefix,
140
- self.returnFormat,
141
- self.internal_storage_dir,
142
- )
61
+ self.queries = _Queries(self)
143
62
 
144
- def upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
63
+ def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
145
64
  """Adds prefixes to the graph store."""
146
- self.prefixes.update(prefixes)
65
+ _start = datetime.now(pytz.utc)
147
66
  for prefix, namespace in prefixes.items():
148
- logging.info("Adding prefix %s with namespace %s", prefix, namespace)
149
67
  self.graph.bind(prefix, namespace)
150
68
 
151
- def close(self) -> None:
152
- """Closes the graph."""
153
- # Can be overridden in subclasses
154
- return None
155
-
156
- def restart(self) -> None:
157
- """Restarts the graph"""
158
- # Can be overridden in subclasses
159
- return None
160
-
161
- def import_from_file(
162
- self, graph_file: Path, mime_type: MIMETypes = "application/rdf+xml", add_base_iri: bool = True
163
- ) -> None:
164
- """Imports graph data from file.
165
-
166
- Args:
167
- graph_file : File path to file containing graph data, by default None
168
- mime_type : MIME type of graph data, by default "application/rdf+xml"
169
- add_base_iri : Add base IRI to graph, by default True
170
- """
171
- if add_base_iri:
172
- self.graph = rdf_file_to_graph(
173
- self.graph, graph_file, base_namespace=self.namespace, prefixes=self.prefixes
69
+ self.provenance.append(
70
+ Change.record(
71
+ activity=f"{type(self).__name__}._upsert_prefixes",
72
+ start=_start,
73
+ end=datetime.now(pytz.utc),
74
+ description="Upsert prefixes to graph store",
174
75
  )
175
- else:
176
- self.graph = rdf_file_to_graph(self.graph, graph_file, prefixes=self.prefixes)
177
- return None
76
+ )
178
77
 
179
- def get_graph(self) -> Graph:
180
- """Returns the graph."""
181
- return self.graph
78
+ @classmethod
79
+ def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
80
+ return cls(Graph(), rules)
182
81
 
183
- def set_graph(self, graph: Graph):
184
- """Sets the graph."""
185
- self.graph = graph
82
+ @classmethod
83
+ def from_sparql_store(
84
+ cls,
85
+ query_endpoint: str | None = None,
86
+ update_endpoint: str | None = None,
87
+ returnFormat: str = "csv",
88
+ rules: InformationRules | None = None,
89
+ ) -> "Self":
90
+ store = SPARQLUpdateStore(
91
+ query_endpoint=query_endpoint,
92
+ update_endpoint=update_endpoint,
93
+ returnFormat=returnFormat,
94
+ context_aware=False,
95
+ postAsEncoded=False,
96
+ autocommit=False,
97
+ )
98
+ graph = Graph(store=store)
99
+ return cls(graph, rules)
100
+
101
+ @classmethod
102
+ def from_oxi_store(cls, storage_dir: Path | None = None, rules: InformationRules | None = None) -> "Self":
103
+ """Creates a NeatGraphStore from an Oxigraph store."""
104
+ local_import("pyoxigraph", "oxi")
105
+ import pyoxigraph
106
+
107
+ from cognite.neat.graph.stores._oxrdflib import OxigraphStore
108
+
109
+ # Adding support for both oxigraph in-memory and file-based storage
110
+ for i in range(4):
111
+ try:
112
+ oxi_store = pyoxigraph.Store(path=str(storage_dir) if storage_dir else None)
113
+ break
114
+ except OSError as e:
115
+ if "lock" in str(e) and i < 3:
116
+ continue
117
+ raise e
118
+ else:
119
+ raise Exception("Error initializing Oxigraph store")
186
120
 
187
- def query(self, query: str) -> Result:
188
- """Returns the result of the query."""
189
- start_time = time.perf_counter()
190
- result = self.graph.query(query)
191
- stop_time = time.perf_counter()
192
- elapsed_time = stop_time - start_time
193
- prom_qsm.labels("query").observe(elapsed_time)
194
- prom_sq.labels("query").set(elapsed_time)
195
- return result
121
+ graph = Graph(store=OxigraphStore(store=oxi_store))
122
+ graph.default_union = True
196
123
 
197
- def serialize(self, *args, **kwargs):
198
- """Serializes the graph."""
199
- return self.graph.serialize(*args, **kwargs)
124
+ return cls(graph, rules)
200
125
 
201
- def query_delayed(self, query) -> Iterable[Triple]:
202
- """Returns the result of the query, but does not execute it immediately.
126
+ def write(self, extractor: TripleExtractors) -> None:
127
+ if isinstance(extractor, RdfFileExtractor):
128
+ self._parse_file(extractor.filepath, extractor.mime_type, extractor.base_uri)
129
+ else:
130
+ self._add_triples(extractor.extract())
203
131
 
204
- The query is not executed until the result is iterated over.
132
+ def _parse_file(
133
+ self,
134
+ filepath: Path,
135
+ mime_type: MIMETypes = "application/rdf+xml",
136
+ base_uri: URIRef | None = None,
137
+ ) -> None:
138
+ """Imports graph data from file.
205
139
 
206
140
  Args:
207
- query: SPARQL query to execute
208
-
209
- Returns:
210
- An iterable of triples
211
-
141
+ filepath : File path to file containing graph data, by default None
142
+ mime_type : MIME type of graph data, by default "application/rdf+xml"
143
+ add_base_iri : Add base IRI to graph, by default True
212
144
  """
213
- return _DelayedQuery(self.graph, query)
214
145
 
215
- @abstractmethod
216
- def drop(self) -> None:
217
- """Drops the graph."""
218
- raise NotImplementedError()
146
+ # Oxigraph store, do not want to type hint this as it is an optional dependency
147
+ if type(self.graph.store).__name__ == "OxigraphStore":
219
148
 
220
- def garbage_collector(self) -> None:
221
- """Garbage collection of the graph store."""
222
- # Can be overridden in subclasses
223
- return None
149
+ def parse_to_oxi_store():
150
+ local_import("pyoxigraph", "oxi")
151
+ from cognite.neat.graph.stores._oxrdflib import OxigraphStore
224
152
 
225
- def query_to_dataframe(
226
- self,
227
- query: str,
228
- column_mapping: dict | None = None,
229
- save_to_cache: bool = False,
230
- index_column: str = "instance",
231
- ) -> pd.DataFrame:
232
- """Returns the result of the query as a dataframe.
233
-
234
- Args:
235
- query: SPARQL query to execute
236
- column_mapping: Columns name mapping, by default None
237
- save_to_cache: Save result of query to cache, by default False
238
- index_column: Indexing column , by default "instance"
153
+ cast(OxigraphStore, self.graph.store)._inner.bulk_load(str(filepath), mime_type, base_iri=base_uri) # type: ignore[attr-defined]
154
+ cast(OxigraphStore, self.graph.store)._inner.optimize() # type: ignore[attr-defined]
239
155
 
240
- Returns:
241
- Dataframe with result of query
242
- """
156
+ parse_to_oxi_store()
243
157
 
244
- if column_mapping is None:
245
- column_mapping = {0: "instance", 1: "property", 2: "value"}
246
-
247
- result = self.graph.query(query, DEBUG=False)
248
- df_cache = pd.DataFrame(list(result))
249
- df_cache.rename(columns=column_mapping, inplace=True)
250
- df_cache[index_column] = df_cache[index_column].apply(lambda x: str(x))
251
- if save_to_cache:
252
- self.df_cache = df_cache
253
- return df_cache
254
-
255
- def commit(self):
256
- """Commits the graph."""
257
- self.graph.commit()
258
-
259
- def get_df(self) -> pd.DataFrame:
260
- """Returns the cached dataframe."""
261
- if self.df_cache is None:
262
- raise ValueError("Cache is empty. Run query_to_dataframe() first with save_to_cache.")
263
- return self.df_cache
264
-
265
- def get_instance_properties_from_cache(self, instance_id: str) -> pd.DataFrame:
266
- """Returns the properties of an instance."""
267
- if self.df_cache is None:
268
- raise ValueError("Cache is empty. Run query_to_dataframe() first with save_to_cache.")
269
- return self.df_cache.loc[self.df_cache["instance"] == instance_id]
270
-
271
- def print_triples(self):
272
- """Prints the triples of the graph."""
273
- for subj, pred, obj in self.graph:
274
- logging.info(f"Triple: {subj} {pred} {obj}")
275
-
276
- def diagnostic_report(self):
277
- """Returns the dictionary representation graph diagnostic data ."""
278
- return {
279
- "rdf_store_type": self.rdf_store_type,
280
- "base_prefix": self.base_prefix,
281
- "namespace": self.namespace,
282
- "prefixes": self.prefixes,
283
- "internal_storage_dir": self.internal_storage_dir,
284
- "rdf_store_query_url": self.rdf_store_query_url,
285
- "rdf_store_update_url": self.rdf_store_update_url,
286
- }
287
-
288
- def add_triples(self, triples: list[Triple] | set[Triple], batch_size: int = 10_000, verbose: bool = False):
158
+ # All other stores
159
+ else:
160
+ if filepath.is_file():
161
+ self.graph.parse(filepath, publicID=base_uri)
162
+ else:
163
+ for filename in filepath.iterdir():
164
+ if filename.is_file():
165
+ self.graph.parse(filename, publicID=base_uri)
166
+
167
+ def _add_triples(self, triples: Iterable[Triple], batch_size: int = 10_000):
289
168
  """Adds triples to the graph store in batches.
290
169
 
291
170
  Args:
@@ -295,27 +174,20 @@ class NeatGraphStoreBase(ABC):
295
174
  """
296
175
 
297
176
  commit_counter = 0
298
- if verbose:
299
- logging.info(f"Committing total of {len(triples)} triples to knowledge graph!")
300
- total_number_of_triples = len(triples)
301
- number_of_uploaded_triples = 0
177
+ number_of_written_triples = 0
302
178
 
303
179
  def check_commit(force_commit: bool = False):
304
180
  """Commit nodes to the graph if batch counter is reached or if force_commit is True"""
305
181
  nonlocal commit_counter
306
- nonlocal number_of_uploaded_triples
182
+ nonlocal number_of_written_triples
307
183
  if force_commit:
308
- number_of_uploaded_triples += commit_counter
184
+ number_of_written_triples += commit_counter
309
185
  self.graph.commit()
310
- if verbose:
311
- logging.info(f"Committed {number_of_uploaded_triples} of {total_number_of_triples} triples")
312
186
  return
313
187
  commit_counter += 1
314
188
  if commit_counter >= batch_size:
315
- number_of_uploaded_triples += commit_counter
189
+ number_of_written_triples += commit_counter
316
190
  self.graph.commit()
317
- if verbose:
318
- logging.info(f"Committed {number_of_uploaded_triples} of {total_number_of_triples} triples")
319
191
  commit_counter = 0
320
192
 
321
193
  for triple in triples:
@@ -325,25 +197,10 @@ class NeatGraphStoreBase(ABC):
325
197
  check_commit(force_commit=True)
326
198
 
327
199
 
328
- class _DelayedQuery(Iterable):
329
- def __init__(self, graph_ref: Graph, query: str):
330
- self.graph_ref = graph_ref
331
- self.query = query
332
-
333
- def __iter__(self) -> Iterator[Triple]:
334
- start_time = time.perf_counter()
335
- result = self.graph_ref.query(self.query)
336
- stop_time = time.perf_counter()
337
- elapsed_time = stop_time - start_time
338
- prom_qsm.labels("query").observe(elapsed_time)
339
- prom_sq.labels("query").set(elapsed_time)
340
- return cast(Iterator[Triple], iter(result))
341
-
342
-
343
200
  class _Queries:
344
201
  """Helper class for storing standard queries for the graph store."""
345
202
 
346
- def __init__(self, store: NeatGraphStoreBase):
203
+ def __init__(self, store: NeatGraphStore):
347
204
  self.store = store
348
205
 
349
206
  def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
@@ -359,7 +216,7 @@ class _Queries:
359
216
  query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
360
217
  "class", class_uri
361
218
  ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
362
- return [cast(tuple, res)[0] for res in list(self.store.query(query_statement))]
219
+ return [cast(tuple, res)[0] for res in list(self.store.graph.query(query_statement))]
363
220
 
364
221
  def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
365
222
  """Get all triples for instances of a given class
@@ -374,20 +231,27 @@ class _Queries:
374
231
  f"SELECT ?instance ?prop ?value "
375
232
  f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
376
233
  )
377
- logging.info(query)
234
+
378
235
  # Select queries gives an iterable of result rows
379
- return cast(list[ResultRow], list(self.store.query(query)))
236
+ return cast(list[ResultRow], list(self.store.graph.query(query)))
380
237
 
381
238
  def triples_of_type_instances(self, rdf_type: str) -> list[tuple[str, str, str]]:
382
239
  """Get all triples of a given type.
383
240
 
384
241
  This method assumes the graph has been transformed into the default namespace.
385
242
  """
386
- query = (
387
- f"SELECT ?instance ?prop ?value "
388
- f"WHERE {{ ?instance a <{self.store.namespace[rdf_type]}> . ?instance ?prop ?value . }} order by ?instance"
389
- )
390
- result = self.store.query(query)
391
243
 
392
- # We cannot include the RDF.type in case there is a neat:type property
393
- return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
244
+ if self.store.rules:
245
+ query = (
246
+ f"SELECT ?instance ?prop ?value "
247
+ f"WHERE {{ ?instance a <{self.store.rules.metadata.namespace[rdf_type]}> . ?instance ?prop ?value . }} "
248
+ "order by ?instance"
249
+ )
250
+
251
+ result = self.store.graph.query(query)
252
+
253
+ # We cannot include the RDF.type in case there is a neat:type property
254
+ return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
255
+ else:
256
+ warnings.warn("No rules found for the graph store, returning empty list.", stacklevel=2)
257
+ return []
@@ -685,7 +685,7 @@ def to_edge(self, data_model: DMSSchemaComponents, add_class_prefix: bool = Fals
685
685
 
686
686
  edge = EdgeApply(
687
687
  space=data_model.views[view_id].space,
688
- external_id=f"{self.external_id}-{end_node_external_id}",
688
+ external_id=f"{self.external_id}-{edge_one_to_many}-{end_node_external_id}",
689
689
  type=(data_model.views[view_id].space, edge_type_id),
690
690
  start_node=(data_model.views[view_id].space, self.external_id),
691
691
  end_node=(data_model.views[view_id].space, end_node_external_id),
@@ -8,7 +8,7 @@ from rdflib import Literal as RdfLiteral
8
8
 
9
9
  import cognite.neat.rules.issues as issues
10
10
  from cognite.neat.constants import PREFIXES
11
- from cognite.neat.graph.stores import NeatGraphStoreBase
11
+ from cognite.neat.graph.stores import NeatGraphStore
12
12
  from cognite.neat.rules.importers._base import BaseImporter, Rules, _handle_issues
13
13
  from cognite.neat.rules.issues import IssueList
14
14
  from cognite.neat.rules.models import InformationRules, RoleTypes
@@ -47,7 +47,7 @@ class InferenceImporter(BaseImporter):
47
47
  self.max_number_of_instance = max_number_of_instance
48
48
 
49
49
  @classmethod
50
- def from_graph_store(cls, store: NeatGraphStoreBase, max_number_of_instance: int = -1):
50
+ def from_graph_store(cls, store: NeatGraphStore, max_number_of_instance: int = -1):
51
51
  issue_list = IssueList(title="Inferred from graph store")
52
52
 
53
53
  return cls(issue_list, store.graph, max_number_of_instance=max_number_of_instance)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-neat
3
- Version: 0.79.0
3
+ Version: 0.80.1
4
4
  Summary: Knowledge graph transformation
5
5
  Home-page: https://cognite-neat.readthedocs-hosted.com/
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  cognite/neat/__init__.py,sha256=v-rRiDOgZ3sQSMQKq0vgUQZvpeOkoHFXissAx6Ktg84,61
2
- cognite/neat/_version.py,sha256=1nnRuZbUvt4h80F41M2EZqt3TsZYyhJ2C2rRdk9uzhA,23
2
+ cognite/neat/_version.py,sha256=MAtvieY6wfTxVdjRSj9aYId--4VDiGpJgHkgjPZrL6M,23
3
3
  cognite/neat/app/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  cognite/neat/app/api/asgi/metrics.py,sha256=nxFy7L5cChTI0a-zkCiJ59Aq8yLuIJp5c9Dg0wRXtV0,152
5
5
  cognite/neat/app/api/configuration.py,sha256=2U5M6M252swvQPQyooA1EBzFUZNtcTmuSaywfJDgckM,4232
@@ -43,7 +43,8 @@ cognite/neat/app/ui/neat-app/build/static/media/logo.8093b84df9ed36a174c629d6fe0
43
43
  cognite/neat/config.py,sha256=oBrWw-KEo0YMbfjVeW6A1FBW7HpP2Pq2ByIq2vlJ10M,6145
44
44
  cognite/neat/constants.py,sha256=GYVcrFrvqcznYgB_0jAyykIIiIamxyOpv4dTZWj4K4U,1300
45
45
  cognite/neat/exceptions.py,sha256=CM7aCvbek9klOgjTsJ9bfEA8t7KTAL6dc7Mviu4NvSI,4268
46
- cognite/neat/graph/__init__.py,sha256=31uTeejWOSd-I8iUG8GOZFhHZcQCsBitJ6X8vu2r1nU,73
46
+ cognite/neat/graph/__init__.py,sha256=J8JSJj3s4gFbuAexma__KGpBXPN8wuydPTKd6EwgKPA,65
47
+ cognite/neat/graph/_shared.py,sha256=9QRETdm7hvqIeiHv_n1xi1DUq91Nq7oRRpnPKE0Pnag,181
47
48
  cognite/neat/graph/_tracking/__init__.py,sha256=pYj7c-YAUIP4hvN-4mlWnwaeZFerzL9_gM-oZhex7cE,91
48
49
  cognite/neat/graph/_tracking/base.py,sha256=8JmaDhlFhSkdBe4SOvFnrdDvMmfTZkHhZxWWWTYkMOQ,820
49
50
  cognite/neat/graph/_tracking/log.py,sha256=dBSINd8Tn92hBl8APMD8r6j15g2SlaX1tsDLCmHvaU4,927
@@ -52,7 +53,7 @@ cognite/neat/graph/examples/Knowledge-Graph-Nordic44.xml,sha256=U2Ns-M4LRjT1fBkh
52
53
  cognite/neat/graph/examples/__init__.py,sha256=yAjHVY3b5jOjmbW-iLbhvu7BG014TpGi3K4igkDqW5I,368
53
54
  cognite/neat/graph/examples/skos-capturing-sheet-wind-topics.xlsx,sha256=CV_yK5ZSbYS_ktfIZUPD8Sevs47zpswLXQUDFkGE4Gw,45798
54
55
  cognite/neat/graph/exceptions.py,sha256=R6pyOH774n9w2x_X_nrUr8OMAdjJMf_XPIqAvxIQaWo,3401
55
- cognite/neat/graph/extractors/__init__.py,sha256=0Mv7iTBwOdMHgqkINh0V2hnDxeC9fIDpBmmnW5Q1lyQ,645
56
+ cognite/neat/graph/extractors/__init__.py,sha256=PRKYPCnxofQ3i_iiJ3xGjEligLgqAPDw2TSlcZt0MlU,947
56
57
  cognite/neat/graph/extractors/_base.py,sha256=TOXDnlqske8DgnJwA0THDVRgmR79Acjm56yF0E-2w7I,356
57
58
  cognite/neat/graph/extractors/_classic_cdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
59
  cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=S5QB_38ysVodGRMqr_SWYYaUtkUCS6a6L2b5D1T-888,3812
@@ -63,20 +64,17 @@ cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=5kClA5zBlhyP
63
64
  cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=ov-n8cBEC73AMO1xam2GUDHv-7SyOEWXWRxLXh9flyY,3298
64
65
  cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=xlnJ4fKvCJawZO6l6EHpx36RRAafd3BdYWS0ajNnGVM,4449
65
66
  cognite/neat/graph/extractors/_mock_graph_generator.py,sha256=gziG2FFsLk-HmA9uxAeT9RCjVpFxjkCTLiC4tq2zgvw,14961
67
+ cognite/neat/graph/extractors/_rdf_file.py,sha256=w4-XgPgNsmZOkNxjO1ZQCcopTntmmtxfDBkQxn1se6E,463
66
68
  cognite/neat/graph/issues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
69
  cognite/neat/graph/issues/loader.py,sha256=v8YDsehkUT1QUG61JM9BDV_lqowMUnDmGmbay0aFzN4,3085
68
70
  cognite/neat/graph/loaders/__init__.py,sha256=hHC9sfFfbnGSVFTYeuNTIEu4tdLSJ2mWV07fereLelo,125
69
- cognite/neat/graph/loaders/_base.py,sha256=VOCRIee9ms6FuBlT3mwBV_mQnI6bO53mrardqiMf-Hk,4045
70
- cognite/neat/graph/loaders/_rdf2dms.py,sha256=bVFLjukCwEUGVoyQ6YnmdRXV945fhX3SiHR6yHLXO2k,12873
71
+ cognite/neat/graph/loaders/_base.py,sha256=bdYC6CwsHVqnQa1QzOhL68qQhF1OtrsearqH6D-z3E4,4037
72
+ cognite/neat/graph/loaders/_rdf2dms.py,sha256=Tn7vy6XwXFXpVDn7uzfzgJMJapbPITerKaF5b5Y4ol4,12857
71
73
  cognite/neat/graph/models.py,sha256=AtLgZh2qyRP6NRetjQCy9qLMuTQB0CH52Zsev-qa2sk,149
72
- cognite/neat/graph/stores/__init__.py,sha256=ivvk7STSo-4wuP_CpizKUCPKmt_ufpNWRJUN9Bv5gdY,543
73
- cognite/neat/graph/stores/_base.py,sha256=ZeeHxPHV0TU5B3Ep2db6FOkB1VP1gbJzKRjcY46zqGg,14682
74
- cognite/neat/graph/stores/_graphdb_store.py,sha256=8QM8I4srDKNsR0PddN6htCYUhfkoqlyy-c232Os7C0A,1776
75
- cognite/neat/graph/stores/_memory_store.py,sha256=GQq19xiyAWU0WQU5txmWnLXBuyP6ywd8plR21UtD3Uw,1420
76
- cognite/neat/graph/stores/_oxigraph_store.py,sha256=Xj69oE4M-9aqd8bq5CpLCMAhwNjJQAP1AC7lxzDsCn0,5448
74
+ cognite/neat/graph/stores/__init__.py,sha256=G-VG_YwfRt1kuPao07PDJyZ3w_0-eguzLUM13n-Z_RA,64
75
+ cognite/neat/graph/stores/_base.py,sha256=DGmguO0qE5sLHgHG757ymP-cFtEimKvD57Irr3FH9yY,9106
77
76
  cognite/neat/graph/stores/_oxrdflib.py,sha256=A5zeRm5_e8ui_ihGpgstRDg_N7qcLZ3QZBRGrOXSGI0,9569
78
77
  cognite/neat/graph/stores/_provenance.py,sha256=Y20-I8dP3DwTQ1sdI_eC4va2Az2FpK0oZwdfJ5T-2wc,3279
79
- cognite/neat/graph/stores/_rdf_to_graph.py,sha256=1ezWHTPn9UkIsAlxZcYRlqWvj3ixlmB5GGG9NN0ls2Q,1244
80
78
  cognite/neat/issues.py,sha256=pxQfqfBseMDE8JM0iqZnkLXngeyeFfT0TFtu1UuAd4c,4629
81
79
  cognite/neat/legacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
80
  cognite/neat/legacy/graph/__init__.py,sha256=31uTeejWOSd-I8iUG8GOZFhHZcQCsBitJ6X8vu2r1nU,73
@@ -136,7 +134,7 @@ cognite/neat/legacy/rules/exporters/_rules2dms.py,sha256=13CptTLvY9ghcrLPhumUOg6
136
134
  cognite/neat/legacy/rules/exporters/_rules2excel.py,sha256=ytHsqw2j26T9yLNZHuUSItV8Jp3AvvpIwX8D5-L9GO8,8312
137
135
  cognite/neat/legacy/rules/exporters/_rules2graphql.py,sha256=oXBU5z-qFyxG7MW83HYlW-hazhDDNAPKAbJJcsZfcU4,6251
138
136
  cognite/neat/legacy/rules/exporters/_rules2ontology.py,sha256=m6adoKOP5EVVEjFX4Qi9yw7UflrDRVgNiBxQ9QVgz6g,18458
139
- cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py,sha256=S1Jy4DbrMa3e9YZBc8bWF3r6JWxidGyYNMNOUbrBXws,28805
137
+ cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py,sha256=0Cn9juOgNPnmxLe7eUCUNjfaicLcQPpj_qA8HLFB3lc,28824
140
138
  cognite/neat/legacy/rules/exporters/_rules2rules.py,sha256=KlBm1hWkx4Ly5G-_gdcURUwADolMJFnueus02IW51uQ,3881
141
139
  cognite/neat/legacy/rules/exporters/_rules2triples.py,sha256=ItkLy6Rji4g5UqLtxaOeodGUvpQG-LVr_ss70PcCPZs,1085
142
140
  cognite/neat/legacy/rules/exporters/_validation.py,sha256=saDorwUqJ4Fo6yeCMSRH0Hp3AGCr-rdjb-sOGo91xL0,5767
@@ -195,7 +193,7 @@ cognite/neat/rules/importers/_dtdl2rules/_unit_lookup.py,sha256=wW4saKva61Q_i17g
195
193
  cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py,sha256=ysmWUxZ0npwrTB0uiH5jA0v37sfCwowGaYk17IyxPUU,12663
196
194
  cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py,sha256=QDyGt5YBaxzF4v_oCFSgKRSpwVdVruDU3-VW0DEiHbY,6718
197
195
  cognite/neat/rules/importers/_dtdl2rules/spec.py,sha256=tim_MfN1J0F3Oeqk3BMgIA82d_MZvhRuRMsLK3B4PYc,11897
198
- cognite/neat/rules/importers/_inference2rules.py,sha256=JsV3Ii2wmgRELtpV0GC4Y1KtjhyyGR0dtEFpBonHoA8,11213
196
+ cognite/neat/rules/importers/_inference2rules.py,sha256=JgXmhc_6ME9RrA0gVPn9WoVE7NvCrpjdS4_ELW-2e7g,11205
199
197
  cognite/neat/rules/importers/_owl2rules/__init__.py,sha256=tdGcrgtozdQyST-pTlxIa4cLBNTLvtk1nNYR4vOdFSw,63
200
198
  cognite/neat/rules/importers/_owl2rules/_owl2classes.py,sha256=QpTxvrTGczIa48X8lgXGnMN1AWPhHK0DR6uNq175xak,7357
201
199
  cognite/neat/rules/importers/_owl2rules/_owl2metadata.py,sha256=nwnUaBNAAYMoBre2UmsnkJXUuaqGEpR3U3txDrH2w6g,7527
@@ -294,8 +292,8 @@ cognite/neat/workflows/steps_registry.py,sha256=fkTX14ZA7_gkUYfWIlx7A1XbCidvqR23
294
292
  cognite/neat/workflows/tasks.py,sha256=dqlJwKAb0jlkl7abbY8RRz3m7MT4SK8-7cntMWkOYjw,788
295
293
  cognite/neat/workflows/triggers.py,sha256=_BLNplzoz0iic367u1mhHMHiUrCwP-SLK6_CZzfODX0,7071
296
294
  cognite/neat/workflows/utils.py,sha256=gKdy3RLG7ctRhbCRwaDIWpL9Mi98zm56-d4jfHDqP1E,453
297
- cognite_neat-0.79.0.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
298
- cognite_neat-0.79.0.dist-info/METADATA,sha256=Ww02h9AuPpAD5i76P15Q0T3gYFtxd2crDdFGX0yxcBg,9298
299
- cognite_neat-0.79.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
300
- cognite_neat-0.79.0.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
301
- cognite_neat-0.79.0.dist-info/RECORD,,
295
+ cognite_neat-0.80.1.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
296
+ cognite_neat-0.80.1.dist-info/METADATA,sha256=3LEzcPmc4ZtAKTzsEads0Esq_lFvy43atltQ7byUr3E,9298
297
+ cognite_neat-0.80.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
298
+ cognite_neat-0.80.1.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
299
+ cognite_neat-0.80.1.dist-info/RECORD,,
@@ -1,51 +0,0 @@
1
- import logging
2
-
3
- import requests
4
- from rdflib import Graph, Namespace
5
- from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
6
-
7
- from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
8
-
9
- from ._base import NeatGraphStoreBase
10
-
11
-
12
- class GraphDBStore(NeatGraphStoreBase):
13
- """GraphDB is a class that stores the graph in a GraphDB instances and provides methods to
14
- read/write data it contains
15
-
16
-
17
- Args:
18
- graph : Instance of rdflib.Graph class for graph storage
19
- base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
20
- namespace : Namespace (aka URI) used to resolve any relative URI in the graph
21
- prefixes : Dictionary of additional prefixes used and bounded to the graph
22
- """
23
-
24
- rdf_store_type = "graphdb"
25
-
26
- def __init__(
27
- self,
28
- graph: Graph | None = None,
29
- base_prefix: str = "", # usually empty
30
- namespace: Namespace = DEFAULT_NAMESPACE,
31
- prefixes: dict = PREFIXES,
32
- ):
33
- super().__init__(graph, base_prefix, namespace, prefixes)
34
- self.graph_db_rest_url: str = "http://localhost:7200"
35
-
36
- def _set_graph(self) -> None:
37
- logging.info("Initializing graph store with GraphDB")
38
- store = SPARQLUpdateStore(
39
- query_endpoint=self.rdf_store_query_url,
40
- update_endpoint=self.rdf_store_update_url,
41
- returnFormat=self.returnFormat,
42
- context_aware=False,
43
- postAsEncoded=False,
44
- autocommit=False,
45
- )
46
- self.graph = Graph(store=store)
47
-
48
- def drop(self):
49
- """Drops the graph."""
50
- r = requests.delete(f"{self.rdf_store_query_url}/rdf-graphs/service?default")
51
- logging.info(f"Dropped graph with state: {r.text}")
@@ -1,43 +0,0 @@
1
- import logging
2
-
3
- from rdflib import Graph, Namespace
4
-
5
- from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
6
-
7
- from ._base import NeatGraphStoreBase
8
-
9
-
10
- class MemoryStore(NeatGraphStoreBase):
11
- """MemoryStore is a class that stores the graph in memory using rdflib and provides
12
- methods to read/write data it contains.
13
-
14
-
15
- Args:
16
- graph : Instance of rdflib.Graph class for graph storage
17
- base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
18
- namespace : Namespace (aka URI) used to resolve any relative URI in the graph
19
- prefixes : Dictionary of additional prefixes used and bounded to the graph
20
- """
21
-
22
- rdf_store_type: str = "memory"
23
-
24
- def __init__(
25
- self,
26
- graph: Graph | None = None,
27
- base_prefix: str = "", # usually empty
28
- namespace: Namespace = DEFAULT_NAMESPACE,
29
- prefixes: dict = PREFIXES,
30
- ):
31
- # Init repeated to get nice docstring
32
- super().__init__(graph, base_prefix, namespace, prefixes)
33
-
34
- def _set_graph(self):
35
- logging.info("Initializing graph in memory")
36
- self.graph = Graph()
37
-
38
- def drop(self):
39
- """Drops the graph."""
40
- # In the case of in-memory graph, we just reinitialize the graph
41
- # otherwise we would lose the prefixes and bindings, which fails
42
- # workflow
43
- self.reinitialize_graph()
@@ -1,147 +0,0 @@
1
- import logging
2
- import os
3
- import shutil
4
- from pathlib import Path
5
-
6
- from rdflib import Graph, Namespace
7
-
8
- from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
9
- from cognite.neat.utils.auxiliary import local_import
10
-
11
- from ._base import MIMETypes, NeatGraphStoreBase
12
-
13
-
14
- class OxiGraphStore(NeatGraphStoreBase):
15
- """OxiGraph is a class that stores the graph using OxiGraph and provides methods to read/write data it contains
16
-
17
-
18
- Args:
19
- graph : Instance of rdflib.Graph class for graph storage
20
- base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
21
- namespace : Namespace (aka URI) used to resolve any relative URI in the graph
22
- prefixes : Dictionary of additional prefixes used and bounded to the graph
23
- """
24
-
25
- rdf_store_type = "oxigraph"
26
-
27
- def __init__(
28
- self,
29
- graph: Graph | None = None,
30
- base_prefix: str = "", # usually empty
31
- namespace: Namespace = DEFAULT_NAMESPACE,
32
- prefixes: dict = PREFIXES,
33
- ):
34
- super().__init__(graph, base_prefix, namespace, prefixes)
35
-
36
- def _set_graph(self) -> None:
37
- logging.info("Initializing Oxigraph store")
38
- local_import("pyoxigraph", "oxi")
39
- import pyoxigraph
40
-
41
- from cognite.neat.graph.stores import _oxrdflib
42
-
43
- # Adding support for both in-memory and file-based storage
44
- for i in range(4):
45
- try:
46
- oxstore = pyoxigraph.Store(
47
- path=str(self.internal_storage_dir) if self.internal_storage_dir else None
48
- ) # Store (Rust object) accepts only str as path and not Path.
49
- break
50
- except OSError as e:
51
- if "lock" in str(e) and i < 3:
52
- # lock originated from another instance of the store
53
- logging.error("Error initializing Oxigraph store: %s", e)
54
- else:
55
- raise e
56
- else:
57
- raise Exception("Error initializing Oxigraph store")
58
-
59
- self.graph = Graph(store=_oxrdflib.OxigraphStore(store=oxstore))
60
- self.graph.default_union = True
61
- self.garbage_collector()
62
-
63
- def close(self):
64
- """Closes the graph."""
65
- if self.graph is not None:
66
- try:
67
- self.graph.store._inner.flush() # type: ignore[attr-defined]
68
- self.graph.close(True)
69
- except Exception as e:
70
- logging.debug("Error closing graph: %s", e)
71
-
72
- def restart(self):
73
- """Restarts the graph"""
74
- self.close()
75
- self.reinitialize_graph()
76
- logging.info("GraphStore restarted")
77
-
78
- def import_from_file(
79
- self, graph_file: Path, mime_type: MIMETypes = "application/rdf+xml", add_base_iri: bool = True
80
- ) -> None:
81
- """Imports graph data from file.
82
-
83
- Args:
84
- graph_file : File path to file containing graph data, by default None
85
- mime_type : MIME type of the file, by default "application/rdf+xml"
86
- add_base_iri : Add base IRI to the graph, by default True
87
- """
88
- if add_base_iri:
89
- self.graph.store._inner.bulk_load( # type: ignore[attr-defined]
90
- str(graph_file), mime_type, base_iri=self.namespace
91
- )
92
- else:
93
- self.graph.store._inner.bulk_load(str(graph_file), mime_type) # type: ignore[attr-defined]
94
- self.graph.store._inner.optimize() # type: ignore[attr-defined]
95
- return None
96
-
97
- def drop(self):
98
- try:
99
- self.close()
100
- # Due to the specifics of Oxigraph, storage directory cannot be deleted immediately
101
- # after closing the graph and creating a new one
102
- if self.internal_storage_dir.exists():
103
- self.storage_dirs_to_delete.append(self.internal_storage_dir)
104
- self.garbage_collector()
105
-
106
- except Exception as e:
107
- logging.error(f"Error dropping graph : {e}")
108
-
109
- def garbage_collector(self):
110
- """Garbage collection of the graph store."""
111
- # delete all directories in self.storage_dirs_to_delete
112
- for d in self.storage_dirs_to_delete:
113
- shutil.rmtree(d)
114
- self.storage_dirs_to_delete = []
115
-
116
- def __del__(self):
117
- if self.graph is not None:
118
- if self.graph.store is not None:
119
- try:
120
- self.graph.store._inner.flush()
121
- except Exception:
122
- logging.debug("Error flushing graph")
123
- self.graph.close()
124
- # It requires more investigation os.remove(self.internal_storage_dir / "LOCK")
125
-
126
- def commit(self):
127
- """Commits the graph."""
128
- if self.graph:
129
- if self.graph.store:
130
- logging.info("Committing graph - flushing and optimizing")
131
- self.graph.store._inner.flush()
132
- self.graph.store._inner.optimize()
133
- self.graph.commit()
134
-
135
- @staticmethod
136
- def drop_graph_store_storage(storage_path: Path | None) -> None:
137
- """Drop graph store storage on disk.
138
-
139
- Args:
140
- storage_path : Path to storage directory
141
- """
142
- if storage_path and storage_path.exists():
143
- for f in os.listdir(storage_path):
144
- (storage_path / f).unlink()
145
- logging.info("Graph store dropped.")
146
- else:
147
- logging.info(f"Storage path {storage_path} does not exist. Skipping drop.")
@@ -1,40 +0,0 @@
1
- from pathlib import Path
2
-
3
- from rdflib import Graph, Namespace
4
-
5
- from cognite.neat.constants import PREFIXES
6
-
7
-
8
- def rdf_file_to_graph(
9
- graph: Graph,
10
- filepath: Path,
11
- base_prefix: str | None = None,
12
- base_namespace: Namespace | None = None,
13
- prefixes: dict[str, Namespace] = PREFIXES,
14
- ) -> Graph:
15
- """Created rdflib Graph instance loaded with RDF triples from file
16
-
17
- Args:
18
- filepath: Path to the RDF file
19
- base_prefix: base prefix for URIs. Defaults to None.
20
- base_namespace: base namespace for URIs . Defaults to None.
21
- prefixes: Dictionary of prefixes to bind to graph. Defaults to PREFIXES.
22
- graph: Graph instance to load RDF triples into. Defaults to None.
23
-
24
- Returns:
25
- Graph instance loaded with RDF triples from file
26
- """
27
-
28
- if filepath.is_file():
29
- graph.parse(filepath, publicID=base_namespace)
30
- else:
31
- for filename in filepath.iterdir():
32
- if filename.is_file():
33
- graph.parse(filename, publicID=base_namespace)
34
- if base_prefix and base_namespace:
35
- graph.bind(base_prefix, base_namespace)
36
- if prefixes:
37
- for prefix, namespace in prefixes.items():
38
- graph.bind(prefix, namespace)
39
-
40
- return graph