cognite-neat 0.74.0__py3-none-any.whl → 0.75.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (152) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/routers/core.py +1 -2
  3. cognite/neat/app/api/routers/data_exploration.py +1 -1
  4. cognite/neat/app/api/routers/rules.py +13 -7
  5. cognite/neat/constants.py +2 -2
  6. cognite/neat/graph/extractors/_mock_graph_generator.py +4 -4
  7. cognite/neat/graph/stores/_base.py +2 -23
  8. cognite/neat/legacy/graph/__init__.py +3 -0
  9. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +20182 -0
  10. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +20163 -0
  11. cognite/neat/legacy/graph/examples/__init__.py +7 -0
  12. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  13. cognite/neat/legacy/graph/exceptions.py +91 -0
  14. cognite/neat/{graph/extractor → legacy/graph/extractors}/_base.py +1 -1
  15. cognite/neat/{graph/extractor → legacy/graph/extractors}/_dexpi.py +2 -2
  16. cognite/neat/{graph/extractor → legacy/graph/extractors}/_graph_capturing_sheet.py +6 -6
  17. cognite/neat/{graph/extractor → legacy/graph/extractors}/_mock_graph_generator.py +6 -5
  18. cognite/neat/{graph/loader → legacy/graph/loaders}/__init__.py +1 -1
  19. cognite/neat/{graph/loader → legacy/graph/loaders}/_asset_loader.py +3 -3
  20. cognite/neat/{graph/loader → legacy/graph/loaders}/_base.py +4 -4
  21. cognite/neat/{graph/loader → legacy/graph/loaders}/core/labels.py +2 -2
  22. cognite/neat/{graph/loader → legacy/graph/loaders}/core/models.py +1 -1
  23. cognite/neat/{graph/loader → legacy/graph/loaders}/core/rdf_to_assets.py +3 -3
  24. cognite/neat/{graph/loader → legacy/graph/loaders}/core/rdf_to_relationships.py +5 -5
  25. cognite/neat/{graph/loader → legacy/graph/loaders}/rdf_to_dms.py +5 -5
  26. cognite/neat/legacy/graph/models.py +6 -0
  27. cognite/neat/legacy/graph/stores/__init__.py +13 -0
  28. cognite/neat/legacy/graph/stores/_base.py +384 -0
  29. cognite/neat/legacy/graph/stores/_graphdb_store.py +51 -0
  30. cognite/neat/legacy/graph/stores/_memory_store.py +43 -0
  31. cognite/neat/legacy/graph/stores/_oxigraph_store.py +147 -0
  32. cognite/neat/legacy/graph/stores/_oxrdflib.py +247 -0
  33. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +40 -0
  34. cognite/neat/{graph/transformation → legacy/graph/transformations}/entity_matcher.py +1 -1
  35. cognite/neat/{graph/transformation → legacy/graph/transformations}/query_generator/sparql.py +9 -7
  36. cognite/neat/{graph/transformation → legacy/graph/transformations}/transformer.py +12 -5
  37. cognite/neat/legacy/rules/__init__.py +0 -0
  38. cognite/neat/{rules → legacy/rules}/analysis.py +2 -2
  39. cognite/neat/legacy/rules/examples/__init__.py +18 -0
  40. cognite/neat/legacy/rules/examples/wind-energy.owl +1511 -0
  41. cognite/neat/legacy/rules/exceptions.py +2972 -0
  42. cognite/neat/{rules/exporter → legacy/rules/exporters}/_base.py +2 -2
  43. cognite/neat/{rules/exporter → legacy/rules/exporters}/_core/rules2labels.py +1 -1
  44. cognite/neat/{rules/exporter → legacy/rules/exporters}/_rules2dms.py +6 -6
  45. cognite/neat/{rules/exporter → legacy/rules/exporters}/_rules2excel.py +1 -1
  46. cognite/neat/{rules/exporter → legacy/rules/exporters}/_rules2graphql.py +5 -5
  47. cognite/neat/{rules/exporter → legacy/rules/exporters}/_rules2ontology.py +6 -6
  48. cognite/neat/{rules/exporter → legacy/rules/exporters}/_rules2pydantic_models.py +8 -8
  49. cognite/neat/{rules/exporter → legacy/rules/exporters}/_rules2rules.py +3 -2
  50. cognite/neat/{rules/exporter → legacy/rules/exporters}/_rules2triples.py +3 -2
  51. cognite/neat/{rules/exporter → legacy/rules/exporters}/_validation.py +2 -2
  52. cognite/neat/{rules/importer → legacy/rules/importers}/_base.py +2 -2
  53. cognite/neat/{rules/importer → legacy/rules/importers}/_dict2rules.py +1 -1
  54. cognite/neat/{rules/importer → legacy/rules/importers}/_dms2rules.py +2 -2
  55. cognite/neat/{rules/importer → legacy/rules/importers}/_graph2rules.py +5 -4
  56. cognite/neat/{rules/importer → legacy/rules/importers}/_owl2rules/_owl2metadata.py +11 -7
  57. cognite/neat/{rules/importer → legacy/rules/importers}/_owl2rules/_owl2rules.py +5 -5
  58. cognite/neat/{rules/importer → legacy/rules/importers}/_spreadsheet2rules.py +2 -2
  59. cognite/neat/{rules/importer → legacy/rules/importers}/_xsd2rules.py +1 -1
  60. cognite/neat/{rules/importer → legacy/rules/importers}/_yaml2rules.py +1 -1
  61. cognite/neat/{rules → legacy/rules}/models/raw_rules.py +4 -4
  62. cognite/neat/legacy/rules/models/rdfpath.py +238 -0
  63. cognite/neat/{rules → legacy/rules}/models/rules.py +4 -4
  64. cognite/neat/{rules → legacy/rules}/models/value_types.py +1 -1
  65. cognite/neat/rules/_shared.py +1 -1
  66. cognite/neat/rules/analysis/__init__.py +3 -0
  67. cognite/neat/rules/{_analysis → analysis}/_base.py +1 -1
  68. cognite/neat/rules/{_analysis → analysis}/_information_rules.py +3 -3
  69. cognite/neat/rules/examples/__init__.py +0 -9
  70. cognite/neat/rules/exporters/_base.py +1 -1
  71. cognite/neat/rules/exporters/_rules2dms.py +4 -4
  72. cognite/neat/rules/exporters/_rules2excel.py +2 -2
  73. cognite/neat/rules/exporters/_rules2ontology.py +4 -4
  74. cognite/neat/rules/exporters/_rules2yaml.py +1 -1
  75. cognite/neat/rules/exporters/_validation.py +2 -2
  76. cognite/neat/rules/importers/_base.py +1 -1
  77. cognite/neat/rules/importers/_dms2rules.py +10 -10
  78. cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py +2 -2
  79. cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py +3 -3
  80. cognite/neat/rules/importers/_dtdl2rules/spec.py +1 -1
  81. cognite/neat/rules/importers/_owl2rules/_owl2classes.py +1 -1
  82. cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +6 -6
  83. cognite/neat/rules/importers/_owl2rules/_owl2properties.py +1 -1
  84. cognite/neat/rules/importers/_owl2rules/_owl2rules.py +2 -2
  85. cognite/neat/rules/importers/_spreadsheet2rules.py +2 -2
  86. cognite/neat/rules/importers/_yaml2rules.py +1 -1
  87. cognite/neat/rules/models/_entity.py +142 -0
  88. cognite/neat/rules/models/rdfpath.py +6 -8
  89. cognite/neat/rules/models/{_rules → rules}/__init__.py +5 -5
  90. cognite/neat/rules/models/{_rules/base.py → rules/_base.py} +1 -1
  91. cognite/neat/rules/models/{_rules/dms_architect_rules.py → rules/_dms_architect_rules.py} +5 -5
  92. cognite/neat/rules/models/{_rules/dms_schema.py → rules/_dms_schema.py} +1 -1
  93. cognite/neat/rules/models/{_rules/domain_rules.py → rules/_domain_rules.py} +2 -2
  94. cognite/neat/rules/models/{_rules/information_rules.py → rules/_information_rules.py} +15 -15
  95. cognite/neat/rules/models/{_rules → rules}/_types/_base.py +18 -134
  96. cognite/neat/workflows/steps/data_contracts.py +4 -4
  97. cognite/neat/workflows/steps/lib/graph_store.py +1 -1
  98. cognite/neat/workflows/steps/lib/rules_exporter.py +1 -1
  99. cognite/neat/workflows/steps/lib/rules_importer.py +2 -2
  100. cognite/neat/workflows/steps/lib/rules_validator.py +2 -2
  101. cognite/neat/workflows/steps/lib/v1/graph_contextualization.py +1 -1
  102. cognite/neat/workflows/steps/lib/v1/graph_extractor.py +5 -5
  103. cognite/neat/workflows/steps/lib/v1/graph_loader.py +7 -7
  104. cognite/neat/workflows/steps/lib/v1/graph_store.py +7 -1
  105. cognite/neat/workflows/steps/lib/v1/graph_transformer.py +2 -2
  106. cognite/neat/workflows/steps/lib/v1/rules_exporter.py +7 -7
  107. cognite/neat/workflows/steps/lib/v1/rules_importer.py +13 -13
  108. {cognite_neat-0.74.0.dist-info → cognite_neat-0.75.1.dist-info}/METADATA +1 -1
  109. cognite_neat-0.75.1.dist-info/RECORD +258 -0
  110. cognite_neat-0.74.0.dist-info/RECORD +0 -237
  111. /cognite/neat/{graph/loader/core → legacy}/__init__.py +0 -0
  112. /cognite/neat/{graph/extractor → legacy/graph/extractors}/__init__.py +0 -0
  113. /cognite/neat/{graph/loader → legacy/graph/loaders}/_exceptions.py +0 -0
  114. /cognite/neat/{graph/transformation → legacy/graph/loaders/core}/__init__.py +0 -0
  115. /cognite/neat/{graph/loader → legacy/graph/loaders}/validator.py +0 -0
  116. /cognite/neat/{rules/_analysis → legacy/graph/transformations}/__init__.py +0 -0
  117. /cognite/neat/{graph/transformation → legacy/graph/transformations}/query_generator/__init__.py +0 -0
  118. /cognite/neat/{rules → legacy/rules}/examples/Rules-Nordic44-to-TNT.xlsx +0 -0
  119. /cognite/neat/{rules → legacy/rules}/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  120. /cognite/neat/{rules → legacy/rules}/examples/power-grid-containers.yaml +0 -0
  121. /cognite/neat/{rules → legacy/rules}/examples/power-grid-example.xlsx +0 -0
  122. /cognite/neat/{rules → legacy/rules}/examples/power-grid-model.yaml +0 -0
  123. /cognite/neat/{rules → legacy/rules}/examples/rules-template.xlsx +0 -0
  124. /cognite/neat/{rules → legacy/rules}/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  125. /cognite/neat/{rules → legacy/rules}/examples/skos-rules.xlsx +0 -0
  126. /cognite/neat/{rules → legacy/rules}/examples/source-to-solution-mapping-rules.xlsx +0 -0
  127. /cognite/neat/{rules/exporter → legacy/rules/exporters}/__init__.py +0 -0
  128. /cognite/neat/{rules/exporter → legacy/rules/exporters}/_core/__init__.py +0 -0
  129. /cognite/neat/{rules/importer → legacy/rules/importers}/__init__.py +0 -0
  130. /cognite/neat/{rules/importer → legacy/rules/importers}/_json2rules.py +0 -0
  131. /cognite/neat/{rules/importer → legacy/rules/importers}/_owl2rules/__init__.py +0 -0
  132. /cognite/neat/{rules/importer → legacy/rules/importers}/_owl2rules/_owl2classes.py +0 -0
  133. /cognite/neat/{rules/importer → legacy/rules/importers}/_owl2rules/_owl2properties.py +0 -0
  134. /cognite/neat/{rules → legacy/rules}/models/__init__.py +0 -0
  135. /cognite/neat/{rules → legacy/rules}/models/_base.py +0 -0
  136. /cognite/neat/{rules → legacy/rules}/models/tables.py +0 -0
  137. /cognite/neat/rules/models/{_rules → rules}/_types/__init__.py +0 -0
  138. /cognite/neat/rules/models/{_rules → rules}/_types/_field.py +0 -0
  139. /cognite/neat/rules/models/{_rules → rules}/_types/_value.py +0 -0
  140. /cognite/neat/workflows/examples/{Export DMS → Export_DMS}/workflow.yaml +0 -0
  141. /cognite/neat/workflows/examples/{Export Rules to Ontology → Export_Rules_to_Ontology}/workflow.yaml +0 -0
  142. /cognite/neat/workflows/examples/{Extract DEXPI Graph and Export Rules → Extract_DEXPI_Graph_and_Export_Rules}/workflow.yaml +0 -0
  143. /cognite/neat/workflows/examples/{Extract RDF Graph and Generate Assets → Extract_RDF_Graph_and_Generate_Assets}/workflow.yaml +0 -0
  144. /cognite/neat/workflows/examples/{Import DMS → Import_DMS}/workflow.yaml +0 -0
  145. /cognite/neat/workflows/examples/{Ontology to Data Model → Ontology_to_Data_Model}/workflow.yaml +0 -0
  146. /cognite/neat/workflows/examples/{Validate Rules → Validate_Rules}/workflow.yaml +0 -0
  147. /cognite/neat/workflows/examples/{Validate Solution Model → Validate_Solution_Model}/workflow.yaml +0 -0
  148. /cognite/neat/workflows/examples/{Visualize Data Model Using Mock Graph → Visualize_Data_Model_Using_Mock_Graph}/workflow.yaml +0 -0
  149. /cognite/neat/workflows/examples/{Visualize Semantic Data Model → Visualize_Semantic_Data_Model}/workflow.yaml +0 -0
  150. {cognite_neat-0.74.0.dist-info → cognite_neat-0.75.1.dist-info}/LICENSE +0 -0
  151. {cognite_neat-0.74.0.dist-info → cognite_neat-0.75.1.dist-info}/WHEEL +0 -0
  152. {cognite_neat-0.74.0.dist-info → cognite_neat-0.75.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,384 @@
1
+ import logging
2
+ import sys
3
+ import time
4
+ from abc import ABC, abstractmethod
5
+ from collections.abc import Iterable, Iterator
6
+ from pathlib import Path
7
+ from typing import Literal, TypeAlias, cast
8
+
9
+ import pandas as pd
10
+ from prometheus_client import Gauge, Summary
11
+ from rdflib import Graph, Namespace, URIRef
12
+ from rdflib.query import Result, ResultRow
13
+
14
+ from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
15
+ from cognite.neat.legacy.graph.models import Triple
16
+ from cognite.neat.legacy.graph.stores._rdf_to_graph import rdf_file_to_graph
17
+ from cognite.neat.legacy.rules.models.rules import Rules
18
+
19
+ if sys.version_info >= (3, 11):
20
+ from typing import Self
21
+ else:
22
+ from typing_extensions import Self
23
+
24
+ prom_qsm = Summary("store_query_time_summary_legacy", "Time spent processing queries", ["query"])
25
+ prom_sq = Gauge("store_single_query_time_legacy", "Time spent processing a single query", ["query"])
26
+
27
+ MIMETypes: TypeAlias = Literal[
28
+ "application/rdf+xml", "text/turtle", "application/n-triple", "application/n-quads", "application/trig"
29
+ ]
30
+
31
+
32
+ class NeatGraphStoreBase(ABC):
33
+ """NeatGraphStore is a class that stores the graph and provides methods to read/write data it contains
34
+
35
+
36
+ Args:
37
+ graph : Instance of rdflib.Graph class for graph storage
38
+ base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
39
+ namespace : Namespace (aka URI) used to resolve any relative URI in the graph
40
+ prefixes : Dictionary of additional prefixes used and bounded to the graph
41
+ """
42
+
43
+ rdf_store_type: str
44
+
45
+ def __init__(
46
+ self,
47
+ graph: Graph | None = None,
48
+ base_prefix: str = "", # usually empty
49
+ namespace: Namespace = DEFAULT_NAMESPACE,
50
+ prefixes: dict = PREFIXES,
51
+ ):
52
+ self.graph = graph or Graph()
53
+ self.base_prefix: str = base_prefix
54
+ self.namespace: Namespace = namespace
55
+ self.prefixes: dict[str, Namespace] = prefixes
56
+
57
+ self.rdf_store_query_url: str | None = None
58
+ self.rdf_store_update_url: str | None = None
59
+ self.returnFormat: str | None = None
60
+ self.df_cache: pd.DataFrame | None = None
61
+ self.internal_storage_dir: Path | None = None
62
+ self.graph_name: str | None = None
63
+ self.internal_storage_dir_orig: Path | None = None
64
+ self.storage_dirs_to_delete: list[Path] = []
65
+ self.queries = _Queries(self)
66
+
67
+ @classmethod
68
+ def from_rules(cls, rules: Rules) -> Self:
69
+ """
70
+ Creates a new instance of NeatGraphStore from TransformationRules and runs the .init_graph() method on it.
71
+
72
+ Args:
73
+ rules: TransformationRules object containing information about the graph store.
74
+
75
+ Returns:
76
+ An instantiated instance of NeatGraphStore
77
+
78
+ """
79
+ if rules.metadata.namespace is None:
80
+ namespace = DEFAULT_NAMESPACE
81
+ else:
82
+ namespace = rules.metadata.namespace
83
+ store = cls(prefixes=rules.prefixes, namespace=namespace)
84
+ store.init_graph(base_prefix=rules.metadata.prefix)
85
+ return store
86
+
87
+ @abstractmethod
88
+ def _set_graph(self) -> None:
89
+ raise NotImplementedError()
90
+
91
+ def init_graph(
92
+ self,
93
+ rdf_store_query_url: str | None = None,
94
+ rdf_store_update_url: str | None = None,
95
+ graph_name: str | None = None,
96
+ base_prefix: str | None = None,
97
+ returnFormat: str = "csv",
98
+ internal_storage_dir: Path | None = None,
99
+ ):
100
+ """Initializes the graph.
101
+
102
+ Args:
103
+ rdf_store_query_url : URL towards which SPARQL query is executed, by default None
104
+ rdf_store_update_url : URL towards which SPARQL update is executed, by default None
105
+ graph_name : Name of graph, by default None
106
+ base_prefix : Base prefix for graph namespace to change if needed, by default None
107
+ returnFormat : Transport format of graph data between, by default "csv"
108
+ internal_storage_dir : Path to directory where internal storage is located,
109
+ by default None (in-memory storage).
110
+
111
+ !!! note "internal_storage_dir"
112
+ Used only for Oxigraph
113
+ """
114
+ logging.info("Initializing NeatGraphStore")
115
+ self.rdf_store_query_url = rdf_store_query_url
116
+ self.rdf_store_update_url = rdf_store_update_url
117
+ self.graph_name = graph_name
118
+ self.returnFormat = returnFormat
119
+ self.internal_storage_dir = Path(internal_storage_dir) if internal_storage_dir else None
120
+ self.internal_storage_dir_orig = (
121
+ self.internal_storage_dir if self.internal_storage_dir_orig is None else self.internal_storage_dir_orig
122
+ )
123
+
124
+ self._set_graph()
125
+
126
+ if self.prefixes:
127
+ for prefix, namespace in self.prefixes.items():
128
+ logging.info("Adding prefix %s with namespace %s", prefix, namespace)
129
+ self.graph.bind(prefix, namespace)
130
+
131
+ if base_prefix:
132
+ self.base_prefix = base_prefix
133
+
134
+ self.graph.bind(self.base_prefix, self.namespace)
135
+ logging.info("Adding prefix %s with namespace %s", self.base_prefix, self.namespace)
136
+ logging.info("Graph initialized")
137
+
138
+ def reinitialize_graph(self):
139
+ """Reinitialize the graph."""
140
+ self.init_graph(
141
+ self.rdf_store_query_url,
142
+ self.rdf_store_update_url,
143
+ self.graph_name,
144
+ self.base_prefix,
145
+ self.returnFormat,
146
+ self.internal_storage_dir,
147
+ )
148
+
149
+ def upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
150
+ """Adds prefixes to the graph store."""
151
+ self.prefixes.update(prefixes)
152
+ for prefix, namespace in prefixes.items():
153
+ logging.info("Adding prefix %s with namespace %s", prefix, namespace)
154
+ self.graph.bind(prefix, namespace)
155
+
156
+ def close(self) -> None:
157
+ """Closes the graph."""
158
+ # Can be overridden in subclasses
159
+ return None
160
+
161
+ def restart(self) -> None:
162
+ """Restarts the graph"""
163
+ # Can be overridden in subclasses
164
+ return None
165
+
166
+ def import_from_file(
167
+ self, graph_file: Path, mime_type: MIMETypes = "application/rdf+xml", add_base_iri: bool = True
168
+ ) -> None:
169
+ """Imports graph data from file.
170
+
171
+ Args:
172
+ graph_file : File path to file containing graph data, by default None
173
+ mime_type : MIME type of graph data, by default "application/rdf+xml"
174
+ add_base_iri : Add base IRI to graph, by default True
175
+ """
176
+ if add_base_iri:
177
+ self.graph = rdf_file_to_graph(
178
+ self.graph, graph_file, base_namespace=self.namespace, prefixes=self.prefixes
179
+ )
180
+ else:
181
+ self.graph = rdf_file_to_graph(self.graph, graph_file, prefixes=self.prefixes)
182
+ return None
183
+
184
+ def get_graph(self) -> Graph:
185
+ """Returns the graph."""
186
+ return self.graph
187
+
188
+ def set_graph(self, graph: Graph):
189
+ """Sets the graph."""
190
+ self.graph = graph
191
+
192
+ def query(self, query: str) -> Result:
193
+ """Returns the result of the query."""
194
+ start_time = time.perf_counter()
195
+ result = self.graph.query(query)
196
+ stop_time = time.perf_counter()
197
+ elapsed_time = stop_time - start_time
198
+ prom_qsm.labels("query").observe(elapsed_time)
199
+ prom_sq.labels("query").set(elapsed_time)
200
+ return result
201
+
202
+ def serialize(self, *args, **kwargs):
203
+ """Serializes the graph."""
204
+ return self.graph.serialize(*args, **kwargs)
205
+
206
+ def query_delayed(self, query) -> Iterable[Triple]:
207
+ """Returns the result of the query, but does not execute it immediately.
208
+
209
+ The query is not executed until the result is iterated over.
210
+
211
+ Args:
212
+ query: SPARQL query to execute
213
+
214
+ Returns:
215
+ An iterable of triples
216
+
217
+ """
218
+ return _DelayedQuery(self.graph, query)
219
+
220
+ @abstractmethod
221
+ def drop(self) -> None:
222
+ """Drops the graph."""
223
+ raise NotImplementedError()
224
+
225
+ def garbage_collector(self) -> None:
226
+ """Garbage collection of the graph store."""
227
+ # Can be overridden in subclasses
228
+ return None
229
+
230
+ def query_to_dataframe(
231
+ self,
232
+ query: str,
233
+ column_mapping: dict | None = None,
234
+ save_to_cache: bool = False,
235
+ index_column: str = "instance",
236
+ ) -> pd.DataFrame:
237
+ """Returns the result of the query as a dataframe.
238
+
239
+ Args:
240
+ query: SPARQL query to execute
241
+ column_mapping: Columns name mapping, by default None
242
+ save_to_cache: Save result of query to cache, by default False
243
+ index_column: Indexing column , by default "instance"
244
+
245
+ Returns:
246
+ Dataframe with result of query
247
+ """
248
+
249
+ if column_mapping is None:
250
+ column_mapping = {0: "instance", 1: "property", 2: "value"}
251
+
252
+ result = self.graph.query(query, DEBUG=False)
253
+ df_cache = pd.DataFrame(list(result))
254
+ df_cache.rename(columns=column_mapping, inplace=True)
255
+ df_cache[index_column] = df_cache[index_column].apply(lambda x: str(x))
256
+ if save_to_cache:
257
+ self.df_cache = df_cache
258
+ return df_cache
259
+
260
+ def commit(self):
261
+ """Commits the graph."""
262
+ self.graph.commit()
263
+
264
+ def get_df(self) -> pd.DataFrame:
265
+ """Returns the cached dataframe."""
266
+ if self.df_cache is None:
267
+ raise ValueError("Cache is empty. Run query_to_dataframe() first with save_to_cache.")
268
+ return self.df_cache
269
+
270
+ def get_instance_properties_from_cache(self, instance_id: str) -> pd.DataFrame:
271
+ """Returns the properties of an instance."""
272
+ if self.df_cache is None:
273
+ raise ValueError("Cache is empty. Run query_to_dataframe() first with save_to_cache.")
274
+ return self.df_cache.loc[self.df_cache["instance"] == instance_id]
275
+
276
+ def print_triples(self):
277
+ """Prints the triples of the graph."""
278
+ for subj, pred, obj in self.graph:
279
+ logging.info(f"Triple: {subj} {pred} {obj}")
280
+
281
+ def diagnostic_report(self):
282
+ """Returns the dictionary representation graph diagnostic data ."""
283
+ return {
284
+ "rdf_store_type": self.rdf_store_type,
285
+ "base_prefix": self.base_prefix,
286
+ "namespace": self.namespace,
287
+ "prefixes": self.prefixes,
288
+ "internal_storage_dir": self.internal_storage_dir,
289
+ "rdf_store_query_url": self.rdf_store_query_url,
290
+ "rdf_store_update_url": self.rdf_store_update_url,
291
+ }
292
+
293
+ def add_triples(self, triples: list[Triple] | set[Triple], batch_size: int = 10_000, verbose: bool = False):
294
+ """Adds triples to the graph store in batches.
295
+
296
+ Args:
297
+ triples: list of triples to be added to the graph store
298
+ batch_size: Batch size of triples per commit, by default 10_000
299
+ verbose: Verbose mode, by default False
300
+ """
301
+
302
+ commit_counter = 0
303
+ if verbose:
304
+ logging.info(f"Committing total of {len(triples)} triples to knowledge graph!")
305
+ total_number_of_triples = len(triples)
306
+ number_of_uploaded_triples = 0
307
+
308
+ def check_commit(force_commit: bool = False):
309
+ """Commit nodes to the graph if batch counter is reached or if force_commit is True"""
310
+ nonlocal commit_counter
311
+ nonlocal number_of_uploaded_triples
312
+ if force_commit:
313
+ number_of_uploaded_triples += commit_counter
314
+ self.graph.commit()
315
+ if verbose:
316
+ logging.info(f"Committed {number_of_uploaded_triples} of {total_number_of_triples} triples")
317
+ return
318
+ commit_counter += 1
319
+ if commit_counter >= batch_size:
320
+ number_of_uploaded_triples += commit_counter
321
+ self.graph.commit()
322
+ if verbose:
323
+ logging.info(f"Committed {number_of_uploaded_triples} of {total_number_of_triples} triples")
324
+ commit_counter = 0
325
+
326
+ for triple in triples:
327
+ self.graph.add(triple)
328
+ check_commit()
329
+
330
+ check_commit(force_commit=True)
331
+
332
+
333
+ class _DelayedQuery(Iterable):
334
+ def __init__(self, graph_ref: Graph, query: str):
335
+ self.graph_ref = graph_ref
336
+ self.query = query
337
+
338
+ def __iter__(self) -> Iterator[Triple]:
339
+ start_time = time.perf_counter()
340
+ result = self.graph_ref.query(self.query)
341
+ stop_time = time.perf_counter()
342
+ elapsed_time = stop_time - start_time
343
+ prom_qsm.labels("query").observe(elapsed_time)
344
+ prom_sq.labels("query").set(elapsed_time)
345
+ return cast(Iterator[Triple], iter(result))
346
+
347
+
348
+ class _Queries:
349
+ """Helper class for storing standard queries for the graph store."""
350
+
351
+ def __init__(self, store: NeatGraphStoreBase):
352
+ self.store = store
353
+
354
+ def list_instances_ids_of_class(self, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
355
+ """Get instances ids for a given class
356
+
357
+ Args:
358
+ class_uri: Class for which instances are to be found
359
+ limit: Max number of instances to return, by default -1 meaning all instances
360
+
361
+ Returns:
362
+ List of class instance URIs
363
+ """
364
+ query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
365
+ "class", class_uri
366
+ ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
367
+ return [cast(tuple, res)[0] for res in list(self.store.query(query_statement))]
368
+
369
+ def list_instances_of_type(self, class_uri: URIRef) -> list[ResultRow]:
370
+ """Get all triples for instances of a given class
371
+
372
+ Args:
373
+ class_uri: Class for which instances are to be found
374
+
375
+ Returns:
376
+ List of triples for instances of the given class
377
+ """
378
+ query = (
379
+ f"SELECT ?instance ?prop ?value "
380
+ f"WHERE {{ ?instance rdf:type <{class_uri}> . ?instance ?prop ?value . }} order by ?instance "
381
+ )
382
+ logging.info(query)
383
+ # Select queries gives an iterable of result rows
384
+ return cast(list[ResultRow], list(self.store.query(query)))
@@ -0,0 +1,51 @@
1
+ import logging
2
+
3
+ import requests
4
+ from rdflib import Graph, Namespace
5
+ from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
6
+
7
+ from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
8
+
9
+ from ._base import NeatGraphStoreBase
10
+
11
+
12
+ class GraphDBStore(NeatGraphStoreBase):
13
+ """GraphDB is a class that stores the graph in a GraphDB instances and provides methods to
14
+ read/write data it contains
15
+
16
+
17
+ Args:
18
+ graph : Instance of rdflib.Graph class for graph storage
19
+ base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
20
+ namespace : Namespace (aka URI) used to resolve any relative URI in the graph
21
+ prefixes : Dictionary of additional prefixes used and bounded to the graph
22
+ """
23
+
24
+ rdf_store_type = "graphdb"
25
+
26
+ def __init__(
27
+ self,
28
+ graph: Graph | None = None,
29
+ base_prefix: str = "", # usually empty
30
+ namespace: Namespace = DEFAULT_NAMESPACE,
31
+ prefixes: dict = PREFIXES,
32
+ ):
33
+ super().__init__(graph, base_prefix, namespace, prefixes)
34
+ self.graph_db_rest_url: str = "http://localhost:7200"
35
+
36
+ def _set_graph(self) -> None:
37
+ logging.info("Initializing graph store with GraphDB")
38
+ store = SPARQLUpdateStore(
39
+ query_endpoint=self.rdf_store_query_url,
40
+ update_endpoint=self.rdf_store_update_url,
41
+ returnFormat=self.returnFormat,
42
+ context_aware=False,
43
+ postAsEncoded=False,
44
+ autocommit=False,
45
+ )
46
+ self.graph = Graph(store=store)
47
+
48
+ def drop(self):
49
+ """Drops the graph."""
50
+ r = requests.delete(f"{self.rdf_store_query_url}/rdf-graphs/service?default")
51
+ logging.info(f"Dropped graph with state: {r.text}")
@@ -0,0 +1,43 @@
1
+ import logging
2
+
3
+ from rdflib import Graph, Namespace
4
+
5
+ from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
6
+
7
+ from ._base import NeatGraphStoreBase
8
+
9
+
10
+ class MemoryStore(NeatGraphStoreBase):
11
+ """MemoryStore is a class that stores the graph in memory using rdflib and provides
12
+ methods to read/write data it contains.
13
+
14
+
15
+ Args:
16
+ graph : Instance of rdflib.Graph class for graph storage
17
+ base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
18
+ namespace : Namespace (aka URI) used to resolve any relative URI in the graph
19
+ prefixes : Dictionary of additional prefixes used and bounded to the graph
20
+ """
21
+
22
+ rdf_store_type: str = "memory"
23
+
24
+ def __init__(
25
+ self,
26
+ graph: Graph | None = None,
27
+ base_prefix: str = "", # usually empty
28
+ namespace: Namespace = DEFAULT_NAMESPACE,
29
+ prefixes: dict = PREFIXES,
30
+ ):
31
+ # Init repeated to get nice docstring
32
+ super().__init__(graph, base_prefix, namespace, prefixes)
33
+
34
+ def _set_graph(self):
35
+ logging.info("Initializing graph in memory")
36
+ self.graph = Graph()
37
+
38
+ def drop(self):
39
+ """Drops the graph."""
40
+ # In the case of in-memory graph, we just reinitialize the graph
41
+ # otherwise we would lose the prefixes and bindings, which fails
42
+ # workflow
43
+ self.reinitialize_graph()
@@ -0,0 +1,147 @@
1
+ import logging
2
+ import os
3
+ import shutil
4
+ from pathlib import Path
5
+
6
+ from rdflib import Graph, Namespace
7
+
8
+ from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
9
+ from cognite.neat.utils.auxiliary import local_import
10
+
11
+ from ._base import MIMETypes, NeatGraphStoreBase
12
+
13
+
14
+ class OxiGraphStore(NeatGraphStoreBase):
15
+ """OxiGraph is a class that stores the graph using OxiGraph and provides methods to read/write data it contains
16
+
17
+
18
+ Args:
19
+ graph : Instance of rdflib.Graph class for graph storage
20
+ base_prefix : Used as a base prefix for graph namespace, allowing querying graph data using a shortform of a URI
21
+ namespace : Namespace (aka URI) used to resolve any relative URI in the graph
22
+ prefixes : Dictionary of additional prefixes used and bounded to the graph
23
+ """
24
+
25
+ rdf_store_type = "oxigraph"
26
+
27
+ def __init__(
28
+ self,
29
+ graph: Graph | None = None,
30
+ base_prefix: str = "", # usually empty
31
+ namespace: Namespace = DEFAULT_NAMESPACE,
32
+ prefixes: dict = PREFIXES,
33
+ ):
34
+ super().__init__(graph, base_prefix, namespace, prefixes)
35
+
36
+ def _set_graph(self) -> None:
37
+ logging.info("Initializing Oxigraph store")
38
+ local_import("pyoxigraph", "oxi")
39
+ import pyoxigraph
40
+
41
+ from cognite.neat.graph.stores import _oxrdflib
42
+
43
+ # Adding support for both in-memory and file-based storage
44
+ for i in range(4):
45
+ try:
46
+ oxstore = pyoxigraph.Store(
47
+ path=str(self.internal_storage_dir) if self.internal_storage_dir else None
48
+ ) # Store (Rust object) accepts only str as path and not Path.
49
+ break
50
+ except OSError as e:
51
+ if "lock" in str(e) and i < 3:
52
+ # lock originated from another instance of the store
53
+ logging.error("Error initializing Oxigraph store: %s", e)
54
+ else:
55
+ raise e
56
+ else:
57
+ raise Exception("Error initializing Oxigraph store")
58
+
59
+ self.graph = Graph(store=_oxrdflib.OxigraphStore(store=oxstore))
60
+ self.graph.default_union = True
61
+ self.garbage_collector()
62
+
63
+ def close(self):
64
+ """Closes the graph."""
65
+ if self.graph is not None:
66
+ try:
67
+ self.graph.store._inner.flush() # type: ignore[attr-defined]
68
+ self.graph.close(True)
69
+ except Exception as e:
70
+ logging.debug("Error closing graph: %s", e)
71
+
72
+ def restart(self):
73
+ """Restarts the graph"""
74
+ self.close()
75
+ self.reinitialize_graph()
76
+ logging.info("GraphStore restarted")
77
+
78
+ def import_from_file(
79
+ self, graph_file: Path, mime_type: MIMETypes = "application/rdf+xml", add_base_iri: bool = True
80
+ ) -> None:
81
+ """Imports graph data from file.
82
+
83
+ Args:
84
+ graph_file : File path to file containing graph data, by default None
85
+ mime_type : MIME type of the file, by default "application/rdf+xml"
86
+ add_base_iri : Add base IRI to the graph, by default True
87
+ """
88
+ if add_base_iri:
89
+ self.graph.store._inner.bulk_load( # type: ignore[attr-defined]
90
+ str(graph_file), mime_type, base_iri=self.namespace
91
+ )
92
+ else:
93
+ self.graph.store._inner.bulk_load(str(graph_file), mime_type) # type: ignore[attr-defined]
94
+ self.graph.store._inner.optimize() # type: ignore[attr-defined]
95
+ return None
96
+
97
+ def drop(self):
98
+ try:
99
+ self.close()
100
+ # Due to the specifics of Oxigraph, storage directory cannot be deleted immediately
101
+ # after closing the graph and creating a new one
102
+ if self.internal_storage_dir.exists():
103
+ self.storage_dirs_to_delete.append(self.internal_storage_dir)
104
+ self.garbage_collector()
105
+
106
+ except Exception as e:
107
+ logging.error(f"Error dropping graph : {e}")
108
+
109
+ def garbage_collector(self):
110
+ """Garbage collection of the graph store."""
111
+ # delete all directories in self.storage_dirs_to_delete
112
+ for d in self.storage_dirs_to_delete:
113
+ shutil.rmtree(d)
114
+ self.storage_dirs_to_delete = []
115
+
116
+ def __del__(self):
117
+ if self.graph is not None:
118
+ if self.graph.store is not None:
119
+ try:
120
+ self.graph.store._inner.flush()
121
+ except Exception:
122
+ logging.debug("Error flushing graph")
123
+ self.graph.close()
124
+ # It requires more investigation os.remove(self.internal_storage_dir / "LOCK")
125
+
126
+ def commit(self):
127
+ """Commits the graph."""
128
+ if self.graph:
129
+ if self.graph.store:
130
+ logging.info("Committing graph - flushing and optimizing")
131
+ self.graph.store._inner.flush()
132
+ self.graph.store._inner.optimize()
133
+ self.graph.commit()
134
+
135
+ @staticmethod
136
+ def drop_graph_store_storage(storage_path: Path | None) -> None:
137
+ """Drop graph store storage on disk.
138
+
139
+ Args:
140
+ storage_path : Path to storage directory
141
+ """
142
+ if storage_path and storage_path.exists():
143
+ for f in os.listdir(storage_path):
144
+ (storage_path / f).unlink()
145
+ logging.info("Graph store dropped.")
146
+ else:
147
+ logging.info(f"Storage path {storage_path} does not exist. Skipping drop.")