cognite-neat 0.107.0__py3-none-any.whl → 0.109.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (69) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
  4. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +87 -6
  5. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
  6. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
  7. cognite/neat/_graph/extractors/_dms.py +162 -47
  8. cognite/neat/_graph/extractors/_dms_graph.py +54 -4
  9. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  10. cognite/neat/_graph/extractors/_rdf_file.py +3 -2
  11. cognite/neat/_graph/loaders/__init__.py +1 -3
  12. cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
  13. cognite/neat/_graph/queries/_base.py +144 -84
  14. cognite/neat/_graph/queries/_construct.py +1 -1
  15. cognite/neat/_graph/transformers/__init__.py +3 -1
  16. cognite/neat/_graph/transformers/_base.py +4 -4
  17. cognite/neat/_graph/transformers/_classic_cdf.py +13 -13
  18. cognite/neat/_graph/transformers/_prune_graph.py +3 -3
  19. cognite/neat/_graph/transformers/_rdfpath.py +3 -4
  20. cognite/neat/_graph/transformers/_value_type.py +71 -13
  21. cognite/neat/_issues/errors/__init__.py +2 -0
  22. cognite/neat/_issues/errors/_external.py +8 -0
  23. cognite/neat/_issues/errors/_resources.py +1 -1
  24. cognite/neat/_issues/warnings/__init__.py +0 -2
  25. cognite/neat/_issues/warnings/_models.py +1 -1
  26. cognite/neat/_issues/warnings/_properties.py +0 -8
  27. cognite/neat/_issues/warnings/_resources.py +1 -1
  28. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  29. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  30. cognite/neat/_rules/exporters/_rules2yaml.py +1 -1
  31. cognite/neat/_rules/importers/__init__.py +3 -1
  32. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  33. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  34. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  35. cognite/neat/_rules/importers/_rdf/_inference2rules.py +310 -26
  36. cognite/neat/_rules/models/_base_rules.py +22 -11
  37. cognite/neat/_rules/models/dms/_exporter.py +5 -4
  38. cognite/neat/_rules/models/dms/_rules.py +1 -8
  39. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  40. cognite/neat/_rules/models/information/_rules_input.py +5 -0
  41. cognite/neat/_rules/transformers/__init__.py +10 -3
  42. cognite/neat/_rules/transformers/_base.py +6 -1
  43. cognite/neat/_rules/transformers/_converters.py +530 -364
  44. cognite/neat/_rules/transformers/_mapping.py +4 -4
  45. cognite/neat/_session/_base.py +100 -47
  46. cognite/neat/_session/_create.py +133 -0
  47. cognite/neat/_session/_drop.py +60 -2
  48. cognite/neat/_session/_fix.py +28 -0
  49. cognite/neat/_session/_inspect.py +22 -7
  50. cognite/neat/_session/_mapping.py +8 -8
  51. cognite/neat/_session/_prepare.py +3 -247
  52. cognite/neat/_session/_read.py +138 -17
  53. cognite/neat/_session/_set.py +50 -1
  54. cognite/neat/_session/_show.py +16 -43
  55. cognite/neat/_session/_state.py +53 -52
  56. cognite/neat/_session/_to.py +11 -4
  57. cognite/neat/_session/_wizard.py +1 -1
  58. cognite/neat/_session/exceptions.py +8 -1
  59. cognite/neat/_store/_graph_store.py +301 -146
  60. cognite/neat/_store/_provenance.py +36 -20
  61. cognite/neat/_store/_rules_store.py +253 -267
  62. cognite/neat/_store/exceptions.py +40 -4
  63. cognite/neat/_utils/auth.py +5 -3
  64. cognite/neat/_version.py +1 -1
  65. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/METADATA +1 -1
  66. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/RECORD +69 -67
  67. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/LICENSE +0 -0
  68. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/WHEEL +0 -0
  69. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/entry_points.txt +0 -0
@@ -8,23 +8,24 @@ from zipfile import ZipExtFile
8
8
 
9
9
  import pandas as pd
10
10
  from pandas import Index
11
- from rdflib import Dataset, Namespace, URIRef
11
+ from rdflib import Dataset, Graph, Namespace, URIRef
12
+ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
12
13
  from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
13
14
 
14
- from cognite.neat._constants import DEFAULT_NAMESPACE
15
- from cognite.neat._graph._shared import rdflib_to_oxi_type
15
+ from cognite.neat._graph._shared import quad_formats, rdflib_to_oxi_type
16
16
  from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
17
17
  from cognite.neat._graph.queries import Queries
18
18
  from cognite.neat._graph.transformers import Transformers
19
19
  from cognite.neat._issues import IssueList, catch_issues
20
+ from cognite.neat._issues.errors import OxigraphStorageLockedError
20
21
  from cognite.neat._rules.analysis import InformationAnalysis
21
22
  from cognite.neat._rules.models import InformationRules
22
23
  from cognite.neat._rules.models.entities import ClassEntity
23
24
  from cognite.neat._shared import InstanceType, Triple
24
25
  from cognite.neat._utils.auxiliary import local_import
25
- from cognite.neat._utils.rdf_ import add_triples_in_batch
26
+ from cognite.neat._utils.rdf_ import add_triples_in_batch, remove_namespace_from_uri
26
27
 
27
- from ._provenance import Change, Provenance
28
+ from ._provenance import Change, Entity, Provenance
28
29
 
29
30
  if sys.version_info < (3, 11):
30
31
  from typing_extensions import Self
@@ -39,41 +40,49 @@ class NeatGraphStore:
39
40
  Args:
40
41
  graph : Instance of rdflib.Graph class for graph storage
41
42
  rules:
43
+
44
+ !!! note "Dataset"
45
+ The store leverages a RDF dataset which is defined as a collection of RDF graphs
46
+ where all but one are named graphs associated with URIRef (the graph name),
47
+ and the unnamed default graph which is in context of rdflib library has an
48
+ identifier URIRef('urn:x-rdflib:default').
42
49
  """
43
50
 
44
51
  rdf_store_type: str
45
52
 
46
53
  def __init__(
47
54
  self,
48
- graph: Dataset,
49
- rules: InformationRules | None = None,
55
+ dataset: Dataset,
56
+ default_named_graph: URIRef | None = None,
50
57
  ):
51
- self.rules: InformationRules | None = None
58
+ self.rules: dict[URIRef, InformationRules] = {}
59
+ self.base_namespace: dict[URIRef, Namespace] = {}
52
60
 
53
61
  _start = datetime.now(timezone.utc)
54
- self.graph = graph
55
- self.provenance = Provenance(
62
+ self.dataset = dataset
63
+ self.provenance = Provenance[Entity](
56
64
  [
57
65
  Change.record(
58
66
  activity=f"{type(self).__name__}.__init__",
59
67
  start=_start,
60
68
  end=datetime.now(timezone.utc),
61
- description=f"Initialize graph store as {type(self.graph.store).__name__}",
69
+ description=f"Initialize graph store as {type(self.dataset.store).__name__}",
62
70
  )
63
71
  ]
64
72
  )
65
73
 
66
- if rules:
67
- self.add_rules(rules)
68
- else:
69
- self.base_namespace = DEFAULT_NAMESPACE
74
+ self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
70
75
 
71
- self.queries = Queries(self.graph, self.rules)
76
+ self.queries = Queries(self.dataset, self.rules, self.default_named_graph)
77
+
78
+ def graph(self, named_graph: URIRef | None = None) -> Graph:
79
+ """Get named graph from the dataset to query over"""
80
+ return self.dataset.graph(named_graph or self.default_named_graph)
72
81
 
73
82
  @property
74
83
  def type_(self) -> str:
75
84
  "Return type of the graph store"
76
- return type(self.graph.store).__name__
85
+ return type(self.dataset.store).__name__
77
86
 
78
87
  # no destination
79
88
  @overload
@@ -91,54 +100,70 @@ class NeatGraphStore:
91
100
 
92
101
  Returns:
93
102
  Serialized graph store
103
+
104
+ !!! note "Trig Format"
105
+ Notice that instead of turtle format we are using trig format for serialization.
106
+ This is because trig format is a superset of turtle format and it allows us to
107
+ serialize named graphs as well. Allowing serialization of one or more named graphs
108
+ including the default graph.
94
109
  """
95
110
  if filepath:
96
- self.graph.serialize(
111
+ self.dataset.serialize(
97
112
  filepath,
98
- format="ox-trig" if self.type_ == "OxigraphStore" else "turtle",
113
+ format="ox-trig" if self.type_ == "OxigraphStore" else "trig",
99
114
  )
100
115
  return None
101
116
  else:
102
- return self.graph.serialize(format="ox-trig" if self.type_ == "OxigraphStore" else "turtle")
117
+ return self.dataset.serialize(format="ox-trig" if self.type_ == "OxigraphStore" else "trig")
118
+
119
+ def add_rules(self, rules: InformationRules, named_graph: URIRef | None = None) -> None:
120
+ """This method is used to add rules to a named graph stored in the graph store.
121
+
122
+ Args:
123
+ rules: InformationRules object containing rules to be added to the named graph
124
+ named_graph: URIRef of the named graph to store the rules in, by default None
125
+ rules will be added to the default graph
103
126
 
104
- def add_rules(self, rules: InformationRules) -> None:
105
- """This method is used to add rules to the graph store and it is the only correct
106
- way to add rules to the graph store, after the graph store has been initialized.
107
127
  """
108
128
 
109
- self.rules = rules
110
- self.base_namespace = self.rules.metadata.namespace
111
- self.queries = Queries(self.graph, self.rules)
112
- self.provenance.append(
113
- Change.record(
114
- activity=f"{type(self)}.rules",
115
- start=datetime.now(timezone.utc),
116
- end=datetime.now(timezone.utc),
117
- description=f"Added rules to graph store as {type(self.rules).__name__}",
129
+ named_graph = named_graph or self.default_named_graph
130
+
131
+ if named_graph in self.named_graphs:
132
+ # attaching appropriate namespace to the rules
133
+ # as well base_namespace
134
+ self.rules[named_graph] = rules
135
+ self.base_namespace[named_graph] = rules.metadata.namespace
136
+ self.queries = Queries(self.dataset, self.rules)
137
+ self.provenance.append(
138
+ Change.record(
139
+ activity=f"{type(self)}.rules",
140
+ start=datetime.now(timezone.utc),
141
+ end=datetime.now(timezone.utc),
142
+ description=f"Added {type(self.rules).__name__} to {named_graph} named graph",
143
+ )
118
144
  )
119
- )
120
145
 
121
- if self.rules.prefixes:
122
- self._upsert_prefixes(self.rules.prefixes)
146
+ if self.rules[named_graph].prefixes:
147
+ self._upsert_prefixes(self.rules[named_graph].prefixes, named_graph)
123
148
 
124
- def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
149
+ def _upsert_prefixes(self, prefixes: dict[str, Namespace], named_graph: URIRef) -> None:
125
150
  """Adds prefixes to the graph store."""
126
151
  _start = datetime.now(timezone.utc)
127
152
  for prefix, namespace in prefixes.items():
128
- self.graph.bind(prefix, namespace)
153
+ self.graph(named_graph).bind(prefix, namespace)
129
154
 
130
155
  self.provenance.append(
131
156
  Change.record(
132
157
  activity=f"{type(self).__name__}._upsert_prefixes",
133
158
  start=_start,
134
159
  end=datetime.now(timezone.utc),
135
- description="Upsert prefixes to graph store",
160
+ description="Upsert prefixes to the name graph {named_graph}",
136
161
  )
137
162
  )
138
163
 
139
164
  @classmethod
140
- def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
141
- return cls(Dataset(), rules)
165
+ def from_memory_store(cls) -> "Self":
166
+ return cls(Dataset())
142
167
 
143
168
  @classmethod
144
169
  def from_sparql_store(
@@ -146,7 +171,6 @@ class NeatGraphStore:
146
171
  query_endpoint: str | None = None,
147
172
  update_endpoint: str | None = None,
148
173
  returnFormat: str = "csv",
149
- rules: InformationRules | None = None,
150
174
  ) -> "Self":
151
175
  store = SPARQLUpdateStore(
152
176
  query_endpoint=query_endpoint,
@@ -157,53 +181,75 @@ class NeatGraphStore:
157
181
  autocommit=False,
158
182
  )
159
183
  graph = Dataset(store=store)
160
- return cls(graph, rules)
184
+ return cls(graph)
161
185
 
162
186
  @classmethod
163
- def from_oxi_store(cls, storage_dir: Path | None = None, rules: InformationRules | None = None) -> "Self":
187
+ def from_oxi_remote_store(
188
+ cls,
189
+ remote_url: str,
190
+ autocommit: bool = False,
191
+ ) -> "Self":
192
+ """Creates a NeatGraphStore from a remote Oxigraph store SPARQL endpoint."""
193
+
194
+ return cls(
195
+ dataset=Dataset(
196
+ store=SPARQLUpdateStore(
197
+ query_endpoint=f"{remote_url}/query", update_endpoint=f"{remote_url}/query", autocommit=autocommit
198
+ ),
199
+ default_union=True,
200
+ )
201
+ )
202
+
203
+ @classmethod
204
+ def from_oxi_local_store(cls, storage_dir: Path | None = None) -> "Self":
164
205
  """Creates a NeatGraphStore from an Oxigraph store."""
165
206
  local_import("pyoxigraph", "oxi")
166
207
  local_import("oxrdflib", "oxi")
167
208
  import oxrdflib
168
209
  import pyoxigraph
169
210
 
170
- # Adding support for both oxigraph in-memory and file-based storage
171
- for i in range(4):
172
- try:
173
- oxi_store = pyoxigraph.Store(path=str(storage_dir) if storage_dir else None)
174
- break
175
- except OSError as e:
176
- if "lock" in str(e) and i < 3:
177
- continue
178
- raise e
179
- else:
180
- raise Exception("Error initializing Oxigraph store")
211
+ try:
212
+ oxi_store = pyoxigraph.Store(path=str(storage_dir) if storage_dir else None)
213
+ except OSError as e:
214
+ if "lock" in str(e):
215
+ raise OxigraphStorageLockedError(filepath=cast(Path, storage_dir)) from e
216
+ raise e
181
217
 
182
- graph = Dataset(
183
- store=oxrdflib.OxigraphStore(store=oxi_store),
218
+ return cls(
219
+ dataset=Dataset(
220
+ store=oxrdflib.OxigraphStore(store=oxi_store),
221
+ )
184
222
  )
185
223
 
186
- return cls(graph, rules)
187
-
188
- def write(self, extractor: TripleExtractors) -> IssueList:
224
+ def write(self, extractor: TripleExtractors, named_graph: URIRef | None = None) -> IssueList:
189
225
  last_change: Change | None = None
226
+ named_graph = named_graph or self.default_named_graph
190
227
  with catch_issues() as issue_list:
191
228
  _start = datetime.now(timezone.utc)
192
229
  success = True
193
230
 
194
231
  if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
195
- self._parse_file(extractor.filepath, cast(str, extractor.format), extractor.base_uri)
232
+ self._parse_file(
233
+ named_graph,
234
+ extractor.filepath,
235
+ cast(str, extractor.format),
236
+ extractor.base_uri,
237
+ )
196
238
  if isinstance(extractor.filepath, ZipExtFile):
197
239
  extractor.filepath.close()
240
+
198
241
  elif isinstance(extractor, RdfFileExtractor):
199
242
  success = False
200
243
  issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
201
244
  warnings.warn(
202
- f"Cannot write to graph store with {type(extractor).__name__}, errors found in file:\n{issue_text}",
245
+ (
246
+ f"Cannot write to named graph {named_graph} with "
247
+ f"{type(extractor).__name__}, errors found in file:\n{issue_text}"
248
+ ),
203
249
  stacklevel=2,
204
250
  )
205
251
  else:
206
- self._add_triples(extractor.extract())
252
+ self._add_triples(extractor.extract(), named_graph=named_graph)
207
253
 
208
254
  if success:
209
255
  _end = datetime.now(timezone.utc)
@@ -218,7 +264,7 @@ class NeatGraphStore:
218
264
  activity=activity,
219
265
  start=_start,
220
266
  end=_end,
221
- description=f"Extracted triples to graph store using {type(extractor).__name__}",
267
+ description=f"Extracted triples to named graph {named_graph} using {type(extractor).__name__}",
222
268
  )
223
269
  self.provenance.append(last_change)
224
270
  if last_change:
@@ -226,17 +272,35 @@ class NeatGraphStore:
226
272
  return issue_list
227
273
 
228
274
  def _read_via_rules_linkage(
229
- self, class_neat_id: URIRef, property_link_pairs: dict[str, URIRef] | None
275
+ self,
276
+ class_neat_id: URIRef,
277
+ property_link_pairs: dict[str, URIRef] | None,
278
+ named_graph: URIRef | None = None,
230
279
  ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
231
- if self.rules is None:
232
- warnings.warn("Rules not found in graph store! Aborting!", stacklevel=2)
280
+ named_graph = named_graph or self.default_named_graph
281
+
282
+ if named_graph not in self.named_graphs:
283
+ warnings.warn(
284
+ f"Named graph {named_graph} not found in graph store, cannot read",
285
+ stacklevel=2,
286
+ )
233
287
  return
288
+
289
+ if not self.rules or named_graph not in self.rules:
290
+ warnings.warn(
291
+ f"Rules for named graph {named_graph} not found in graph store!",
292
+ stacklevel=2,
293
+ )
294
+ return
295
+
234
296
  if self.multi_type_instances:
235
297
  warnings.warn(
236
298
  "Multi typed instances detected, issues with loading can occur!",
237
299
  stacklevel=2,
238
300
  )
239
- analysis = InformationAnalysis(self.rules)
301
+
302
+ analysis = InformationAnalysis(self.rules[named_graph])
303
+
240
304
  if cls := analysis.classes_by_neat_id.get(class_neat_id):
241
305
  if property_link_pairs:
242
306
  property_renaming_config = {
@@ -272,9 +336,22 @@ class NeatGraphStore:
272
336
  self,
273
337
  class_entity: ClassEntity,
274
338
  property_renaming_config: dict[URIRef, str] | None = None,
339
+ named_graph: URIRef | None = None,
275
340
  ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
276
- if self.rules is None:
277
- warnings.warn("Rules not found in graph store!", stacklevel=2)
341
+ named_graph = named_graph or self.default_named_graph
342
+
343
+ if named_graph not in self.named_graphs:
344
+ warnings.warn(
345
+ f"Named graph {named_graph} not found in graph store, cannot read",
346
+ stacklevel=2,
347
+ )
348
+ return
349
+
350
+ if not self.rules or named_graph not in self.rules:
351
+ warnings.warn(
352
+ f"Rules for named graph {named_graph} not found in graph store!",
353
+ stacklevel=2,
354
+ )
278
355
  return
279
356
  if self.multi_type_instances:
280
357
  warnings.warn(
@@ -282,28 +359,28 @@ class NeatGraphStore:
282
359
  stacklevel=2,
283
360
  )
284
361
 
285
- if class_entity not in [definition.class_ for definition in self.rules.classes]:
362
+ if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
286
363
  warnings.warn("Desired type not found in graph!", stacklevel=2)
287
364
  return
288
365
 
289
- if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
366
+ if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
290
367
  warnings.warn(
291
368
  f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
292
369
  stacklevel=2,
293
370
  )
294
371
  return
295
372
 
296
- has_hop_transformations = InformationAnalysis(self.rules).has_hop_transformations()
373
+ has_hop_transformations = InformationAnalysis(self.rules[named_graph]).has_hop_transformations()
297
374
  has_self_reference_transformations = InformationAnalysis(
298
- self.rules
375
+ self.rules[named_graph]
299
376
  ).has_self_reference_property_transformations()
300
377
  if has_hop_transformations or has_self_reference_transformations:
301
378
  msg = (
302
- f"Rules contain [{'Hop' if has_hop_transformations else '' }"
303
- f", {'SelfReferenceProperty' if has_self_reference_transformations else '' }]"
379
+ f"Rules contain [{'Hop' if has_hop_transformations else ''}"
380
+ f", {'SelfReferenceProperty' if has_self_reference_transformations else ''}]"
304
381
  " rdfpath."
305
- f" Run [{'ReduceHopTraversal' if has_hop_transformations else '' }"
306
- f", {'AddSelfReferenceProperty' if has_self_reference_transformations else '' }]"
382
+ f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''}"
383
+ f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''}]"
307
384
  " transformer(s) first!"
308
385
  )
309
386
 
@@ -318,23 +395,19 @@ class NeatGraphStore:
318
395
 
319
396
  # get potential property renaming config
320
397
  property_renaming_config = property_renaming_config or InformationAnalysis(
321
- self.rules
398
+ self.rules[named_graph]
322
399
  ).define_property_renaming_config(class_entity)
323
400
 
324
- # get property types to guide process of removing or not namespaces from results
325
- property_types = InformationAnalysis(self.rules).property_types(class_entity)
326
401
  for instance_id in instance_ids:
327
402
  if res := self.queries.describe(
328
403
  instance_id=instance_id,
329
404
  instance_type=class_entity.suffix,
330
405
  property_renaming_config=property_renaming_config,
331
- property_types=property_types,
332
406
  ):
333
407
  yield res
334
408
 
335
409
  def read(
336
- self,
337
- class_: str,
410
+ self, class_: str, named_graph: URIRef | None = None
338
411
  ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
339
412
  """Read instances for given class from the graph store.
340
413
 
@@ -343,9 +416,20 @@ class NeatGraphStore:
343
416
  the rules which are attached to the graph store.
344
417
 
345
418
  """
419
+ named_graph = named_graph or self.default_named_graph
346
420
 
347
- if not self.rules:
348
- warnings.warn("Rules not found in graph store!", stacklevel=2)
421
+ if named_graph not in self.named_graphs:
422
+ warnings.warn(
423
+ f"Named graph {named_graph} not found in graph store, cannot read",
424
+ stacklevel=2,
425
+ )
426
+ return
427
+
428
+ if not self.rules or named_graph not in self.rules:
429
+ warnings.warn(
430
+ f"Rules for named graph {named_graph} not found in graph store!",
431
+ stacklevel=2,
432
+ )
349
433
  return
350
434
  if self.multi_type_instances:
351
435
  warnings.warn(
@@ -353,15 +437,15 @@ class NeatGraphStore:
353
437
  stacklevel=2,
354
438
  )
355
439
 
356
- class_entity = ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_)
440
+ class_entity = ClassEntity(prefix=self.rules[named_graph].metadata.prefix, suffix=class_)
357
441
 
358
- if class_entity not in [definition.class_ for definition in self.rules.classes]:
442
+ if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
359
443
  warnings.warn("Desired type not found in graph!", stacklevel=2)
360
444
  return
361
445
 
362
446
  yield from self._read_via_class_entity(class_entity)
363
447
 
364
- def count_of_id(self, neat_id: URIRef) -> int:
448
+ def count_of_id(self, neat_id: URIRef, named_graph: URIRef | None = None) -> int:
365
449
  """Count the number of instances of a given type
366
450
 
367
451
  Args:
@@ -370,18 +454,31 @@ class NeatGraphStore:
370
454
  Returns:
371
455
  Number of instances
372
456
  """
373
- if not self.rules:
374
- warnings.warn("Rules not found in graph store!", stacklevel=2)
457
+ named_graph = named_graph or self.default_named_graph
458
+
459
+ if named_graph not in self.named_graphs:
460
+ warnings.warn(
461
+ f"Named graph {named_graph} not found in graph store, cannot count",
462
+ stacklevel=2,
463
+ )
464
+ return 0
465
+
466
+ if not self.rules or named_graph not in self.rules:
467
+ warnings.warn(
468
+ f"Rules for named graph {named_graph} not found in graph store!",
469
+ stacklevel=2,
470
+ )
375
471
  return 0
376
472
 
377
473
  class_entity = next(
378
- (definition.class_ for definition in self.rules.classes if definition.neatId == neat_id), None
474
+ (definition.class_ for definition in self.rules[named_graph].classes if definition.neatId == neat_id),
475
+ None,
379
476
  )
380
477
  if not class_entity:
381
478
  warnings.warn("Desired type not found in graph!", stacklevel=2)
382
479
  return 0
383
480
 
384
- if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
481
+ if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
385
482
  warnings.warn(
386
483
  f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
387
484
  stacklevel=2,
@@ -392,10 +489,11 @@ class NeatGraphStore:
392
489
 
393
490
  def count_of_type(self, class_uri: URIRef) -> int:
394
491
  query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
395
- return int(next(iter(self.graph.query(query)))[0]) # type: ignore[arg-type, index]
492
+ return int(next(iter(self.dataset.query(query)))[0]) # type: ignore[arg-type, index]
396
493
 
397
494
  def _parse_file(
398
495
  self,
496
+ named_graph: URIRef,
399
497
  filepath: Path | ZipExtFile,
400
498
  format: str = "turtle",
401
499
  base_uri: URIRef | None = None,
@@ -403,6 +501,7 @@ class NeatGraphStore:
403
501
  """Imports graph data from file.
404
502
 
405
503
  Args:
504
+ named_graph : URIRef of the named graph to store the data in
406
505
  filepath : File path to file containing graph data, by default None
407
506
  format : rdflib format file containing RDF graph, by default "turtle"
408
507
  base_uri : base URI to add to graph in case of relative URIs, by default None
@@ -419,25 +518,35 @@ class NeatGraphStore:
419
518
  if self.type_ == "OxigraphStore":
420
519
  local_import("pyoxigraph", "oxi")
421
520
 
422
- # this is necessary to trigger rdflib oxigraph plugin
423
- self.graph.parse(
424
- filepath, # type: ignore[arg-type]
425
- format=rdflib_to_oxi_type(format),
426
- transactional=False,
427
- publicID=base_uri,
428
- )
429
- self.graph.store._store.optimize() # type: ignore[attr-defined]
521
+ if format in quad_formats():
522
+ self.dataset.parse(
523
+ filepath, # type: ignore[arg-type]
524
+ format=rdflib_to_oxi_type(format),
525
+ transactional=False,
526
+ publicID=base_uri,
527
+ )
528
+ else:
529
+ self.graph(named_graph).parse(
530
+ filepath, # type: ignore[arg-type]
531
+ format=rdflib_to_oxi_type(format),
532
+ transactional=False,
533
+ publicID=base_uri,
534
+ )
535
+ self.dataset.store._store.optimize() # type: ignore[attr-defined]
430
536
 
431
537
  # All other stores
432
538
  else:
433
- if isinstance(filepath, ZipExtFile) or filepath.is_file():
434
- self.graph.parse(filepath, publicID=base_uri) # type: ignore[arg-type]
539
+ if format in quad_formats():
540
+ self.dataset.parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
435
541
  else:
436
- for filename in filepath.iterdir():
437
- if filename.is_file():
438
- self.graph.parse(filename, publicID=base_uri)
542
+ self.graph(named_graph).parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
439
543
 
440
- def _add_triples(self, triples: Iterable[Triple], batch_size: int = 10_000):
544
+ def _add_triples(
545
+ self,
546
+ triples: Iterable[Triple],
547
+ named_graph: URIRef,
548
+ batch_size: int = 10_000,
549
+ ) -> None:
441
550
  """Adds triples to the graph store in batches.
442
551
 
443
552
  Args:
@@ -445,66 +554,103 @@ class NeatGraphStore:
445
554
  batch_size: Batch size of triples per commit, by default 10_000
446
555
  verbose: Verbose mode, by default False
447
556
  """
448
- add_triples_in_batch(self.graph, triples, batch_size)
557
+ add_triples_in_batch(self.graph(named_graph), triples, batch_size)
449
558
 
450
- def transform(self, transformer: Transformers) -> None:
559
+ def transform(self, transformer: Transformers, named_graph: URIRef | None = None) -> None:
451
560
  """Transforms the graph store using a transformer."""
452
561
 
453
- missing_changes = [
454
- change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
455
- ]
456
- if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
457
- warnings.warn(
458
- f"Cannot transform graph store with {type(transformer).__name__}, already applied",
459
- stacklevel=2,
460
- )
461
- elif missing_changes:
462
- warnings.warn(
463
- (
464
- f"Cannot transform graph store with {type(transformer).__name__}, "
465
- f"missing one or more required changes [{', '.join(missing_changes)}]"
466
- ),
467
- stacklevel=2,
468
- )
562
+ named_graph = named_graph or self.default_named_graph
563
+ if named_graph in self.named_graphs:
564
+ missing_changes = [
565
+ change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
566
+ ]
567
+ if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
568
+ warnings.warn(
569
+ f"Cannot transform graph store with {type(transformer).__name__}, already applied",
570
+ stacklevel=2,
571
+ )
572
+ elif missing_changes:
573
+ warnings.warn(
574
+ (
575
+ f"Cannot transform graph store with {type(transformer).__name__}, "
576
+ f"missing one or more required changes [{', '.join(missing_changes)}]"
577
+ ),
578
+ stacklevel=2,
579
+ )
469
580
 
470
- else:
471
- _start = datetime.now(timezone.utc)
472
- transformer.transform(self.graph)
473
- self.provenance.append(
474
- Change.record(
475
- activity=f"{type(transformer).__name__}",
476
- start=_start,
477
- end=datetime.now(timezone.utc),
478
- description=transformer.description,
581
+ else:
582
+ _start = datetime.now(timezone.utc)
583
+ transformer.transform(self.graph(named_graph))
584
+ self.provenance.append(
585
+ Change.record(
586
+ activity=f"{type(transformer).__name__}",
587
+ start=_start,
588
+ end=datetime.now(timezone.utc),
589
+ description=transformer.description,
590
+ )
479
591
  )
592
+
593
+ else:
594
+ warnings.warn(
595
+ f"Named graph {named_graph} not found in graph store, cannot transform",
596
+ stacklevel=2,
480
597
  )
481
598
 
482
599
  @property
483
- def summary(self) -> pd.DataFrame:
484
- return pd.DataFrame(self.queries.summarize_instances(), columns=["Type", "Occurrence"])
600
+ def summary(self) -> dict[URIRef, pd.DataFrame]:
601
+ return {
602
+ named_graph: pd.DataFrame(
603
+ self.queries.summarize_instances(named_graph),
604
+ columns=["Type", "Occurrence"],
605
+ )
606
+ for named_graph in self.named_graphs
607
+ }
485
608
 
486
609
  @property
487
- def multi_type_instances(self) -> dict[str, list[str]]:
488
- return self.queries.multi_type_instances()
610
+ def multi_type_instances(self) -> dict[URIRef, dict[str, list[str]]]:
611
+ return {named_graph: self.queries.multi_type_instances(named_graph) for named_graph in self.named_graphs}
489
612
 
490
613
  def _repr_html_(self) -> str:
491
614
  provenance = self.provenance._repr_html_()
492
- summary: pd.DataFrame = self.summary
615
+ summary: dict[URIRef, pd.DataFrame] = self.summary
616
+
617
+ def _short_name_of_graph(named_graph: URIRef) -> str:
618
+ return "default" if named_graph == self.default_named_graph else remove_namespace_from_uri(named_graph)
493
619
 
494
- if summary.empty:
620
+ if not summary:
495
621
  summary_text = "<br /><strong>Graph is empty</strong><br />"
496
622
  else:
623
+ all_types = set().union(
624
+ *[set(sub_summary.Type) for sub_summary in summary.values() if not sub_summary.empty]
625
+ )
626
+
497
627
  summary_text = (
498
628
  "<br /><strong>Overview</strong>:" # type: ignore
499
- f"<ul><li>{len(summary)} types</strong></li>"
500
- f"<li>{sum(summary['Occurrence'])} instances</strong></li></ul>"
501
- f"{cast(pd.DataFrame, self._shorten_summary(summary))._repr_html_()}" # type: ignore[operator]
629
+ f"<ul><li>{len(summary)} named graphs</strong></li>"
630
+ f"<li>Total of {len(all_types)} unique types</strong></li>"
502
631
  )
503
632
 
504
- if self.multi_type_instances:
505
- summary_text += "<br><strong>Multi value instances detected! Loading could have issues!</strong></br>" # type: ignore
633
+ for named_graph, table in summary.items():
634
+ summary_text += (
635
+ f"<li>{sum(table['Occurrence'])} instances in {_short_name_of_graph(named_graph)}"
636
+ " graph</strong></li>"
637
+ )
638
+
639
+ summary_text += "</ul>"
640
+ for named_graph, table in summary.items():
641
+ summary_text += (
642
+ f"<br /><strong>{_short_name_of_graph(named_graph)} graph</strong>:"
643
+ f"{cast(pd.DataFrame, self._shorten_summary(table))._repr_html_()}" # type: ignore[operator]
644
+ )
645
+
646
+ for named_graph, multi_value_instances in self.multi_type_instances.items():
647
+ if multi_value_instances:
648
+ summary_text += (
649
+ f"<br><strong>Multi value instances detected in {_short_name_of_graph(named_graph)}"
650
+ "graph! Loading could have issues!</strong></br>"
651
+ )
506
652
 
507
- return f"{summary_text}" f"{provenance}"
653
+ return f"{summary_text}{provenance}"
508
654
 
509
655
  def _shorten_summary(self, summary: pd.DataFrame) -> pd.DataFrame:
510
656
  """Shorten summary to top 5 types by occurrence."""
@@ -531,3 +677,12 @@ class NeatGraphStore:
531
677
  shorter_summary.index = cast(Index, indexes)
532
678
 
533
679
  return shorter_summary
680
+
681
+ @property
682
+ def named_graphs(self) -> list[URIRef]:
683
+ return [cast(URIRef, context.identifier) for context in self.dataset.contexts()]
684
+
685
+ @property
686
+ def empty(self) -> bool:
687
+ """Cheap way to check if the graph store is empty."""
688
+ return not self.queries.has_data()