cognite-neat 0.107.0__py3-none-any.whl → 0.108.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (52) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
  4. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +83 -6
  5. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
  6. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
  7. cognite/neat/_graph/extractors/_dms.py +162 -47
  8. cognite/neat/_graph/extractors/_dms_graph.py +54 -4
  9. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  10. cognite/neat/_graph/extractors/_rdf_file.py +3 -2
  11. cognite/neat/_graph/loaders/__init__.py +1 -3
  12. cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
  13. cognite/neat/_graph/queries/_base.py +140 -84
  14. cognite/neat/_graph/queries/_construct.py +1 -1
  15. cognite/neat/_graph/transformers/__init__.py +3 -1
  16. cognite/neat/_graph/transformers/_value_type.py +54 -3
  17. cognite/neat/_issues/errors/_resources.py +1 -1
  18. cognite/neat/_issues/warnings/__init__.py +0 -2
  19. cognite/neat/_issues/warnings/_models.py +1 -1
  20. cognite/neat/_issues/warnings/_properties.py +0 -8
  21. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  22. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  23. cognite/neat/_rules/importers/__init__.py +3 -1
  24. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  25. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  26. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  27. cognite/neat/_rules/importers/_rdf/_inference2rules.py +241 -18
  28. cognite/neat/_rules/models/_base_rules.py +13 -3
  29. cognite/neat/_rules/models/dms/_rules.py +1 -8
  30. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  31. cognite/neat/_rules/models/information/_rules_input.py +5 -0
  32. cognite/neat/_rules/transformers/__init__.py +6 -0
  33. cognite/neat/_rules/transformers/_converters.py +98 -7
  34. cognite/neat/_session/_base.py +55 -4
  35. cognite/neat/_session/_drop.py +5 -1
  36. cognite/neat/_session/_inspect.py +3 -2
  37. cognite/neat/_session/_read.py +61 -14
  38. cognite/neat/_session/_set.py +27 -0
  39. cognite/neat/_session/_show.py +4 -4
  40. cognite/neat/_session/_state.py +8 -4
  41. cognite/neat/_session/_to.py +4 -1
  42. cognite/neat/_session/_wizard.py +1 -1
  43. cognite/neat/_session/exceptions.py +2 -1
  44. cognite/neat/_store/_graph_store.py +287 -133
  45. cognite/neat/_store/_rules_store.py +108 -1
  46. cognite/neat/_utils/auth.py +1 -1
  47. cognite/neat/_version.py +1 -1
  48. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +1 -1
  49. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +52 -52
  50. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
  51. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +0 -0
  52. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
@@ -8,11 +8,11 @@ from zipfile import ZipExtFile
8
8
 
9
9
  import pandas as pd
10
10
  from pandas import Index
11
- from rdflib import Dataset, Namespace, URIRef
11
+ from rdflib import Dataset, Graph, Namespace, URIRef
12
+ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
12
13
  from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
13
14
 
14
- from cognite.neat._constants import DEFAULT_NAMESPACE
15
- from cognite.neat._graph._shared import rdflib_to_oxi_type
15
+ from cognite.neat._graph._shared import quad_formats, rdflib_to_oxi_type
16
16
  from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
17
17
  from cognite.neat._graph.queries import Queries
18
18
  from cognite.neat._graph.transformers import Transformers
@@ -22,7 +22,7 @@ from cognite.neat._rules.models import InformationRules
22
22
  from cognite.neat._rules.models.entities import ClassEntity
23
23
  from cognite.neat._shared import InstanceType, Triple
24
24
  from cognite.neat._utils.auxiliary import local_import
25
- from cognite.neat._utils.rdf_ import add_triples_in_batch
25
+ from cognite.neat._utils.rdf_ import add_triples_in_batch, remove_namespace_from_uri
26
26
 
27
27
  from ._provenance import Change, Provenance
28
28
 
@@ -39,41 +39,49 @@ class NeatGraphStore:
39
39
  Args:
40
40
  graph : Instance of rdflib.Graph class for graph storage
41
41
  rules:
42
+
43
+ !!! note "Dataset"
44
+ The store leverages a RDF dataset which is defined as a collection of RDF graphs
45
+ where all but one are named graphs associated with URIRef (the graph name),
46
+ and the unnamed default graph which is in context of rdflib library has an
47
+ identifier URIRef('urn:x-rdflib:default').
42
48
  """
43
49
 
44
50
  rdf_store_type: str
45
51
 
46
52
  def __init__(
47
53
  self,
48
- graph: Dataset,
49
- rules: InformationRules | None = None,
54
+ dataset: Dataset,
55
+ default_named_graph: URIRef | None = None,
50
56
  ):
51
- self.rules: InformationRules | None = None
57
+ self.rules: dict[URIRef, InformationRules] = {}
58
+ self.base_namespace: dict[URIRef, Namespace] = {}
52
59
 
53
60
  _start = datetime.now(timezone.utc)
54
- self.graph = graph
61
+ self.dataset = dataset
55
62
  self.provenance = Provenance(
56
63
  [
57
64
  Change.record(
58
65
  activity=f"{type(self).__name__}.__init__",
59
66
  start=_start,
60
67
  end=datetime.now(timezone.utc),
61
- description=f"Initialize graph store as {type(self.graph.store).__name__}",
68
+ description=f"Initialize graph store as {type(self.dataset.store).__name__}",
62
69
  )
63
70
  ]
64
71
  )
65
72
 
66
- if rules:
67
- self.add_rules(rules)
68
- else:
69
- self.base_namespace = DEFAULT_NAMESPACE
73
+ self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
70
74
 
71
- self.queries = Queries(self.graph, self.rules)
75
+ self.queries = Queries(self.dataset, self.rules, self.default_named_graph)
76
+
77
+ def graph(self, named_graph: URIRef | None = None) -> Graph:
78
+ """Get named graph from the dataset to query over"""
79
+ return self.dataset.graph(named_graph or self.default_named_graph)
72
80
 
73
81
  @property
74
82
  def type_(self) -> str:
75
83
  "Return type of the graph store"
76
- return type(self.graph.store).__name__
84
+ return type(self.dataset.store).__name__
77
85
 
78
86
  # no destination
79
87
  @overload
@@ -91,54 +99,70 @@ class NeatGraphStore:
91
99
 
92
100
  Returns:
93
101
  Serialized graph store
102
+
103
+ !!! note "Trig Format"
104
+ Notice that instead of turtle format we are using trig format for serialization.
105
+ This is because trig format is a superset of turtle format and it allows us to
106
+ serialize named graphs as well. Allowing serialization of one or more named graphs
107
+ including the default graph.
94
108
  """
95
109
  if filepath:
96
- self.graph.serialize(
110
+ self.dataset.serialize(
97
111
  filepath,
98
- format="ox-trig" if self.type_ == "OxigraphStore" else "turtle",
112
+ format="ox-trig" if self.type_ == "OxigraphStore" else "trig",
99
113
  )
100
114
  return None
101
115
  else:
102
- return self.graph.serialize(format="ox-trig" if self.type_ == "OxigraphStore" else "turtle")
116
+ return self.dataset.serialize(format="ox-trig" if self.type_ == "OxigraphStore" else "trig")
117
+
118
+ def add_rules(self, rules: InformationRules, named_graph: URIRef | None = None) -> None:
119
+ """This method is used to add rules to a named graph stored in the graph store.
120
+
121
+ Args:
122
+ rules: InformationRules object containing rules to be added to the named graph
123
+ named_graph: URIRef of the named graph to store the rules in, by default None
124
+ rules will be added to the default graph
103
125
 
104
- def add_rules(self, rules: InformationRules) -> None:
105
- """This method is used to add rules to the graph store and it is the only correct
106
- way to add rules to the graph store, after the graph store has been initialized.
107
126
  """
108
127
 
109
- self.rules = rules
110
- self.base_namespace = self.rules.metadata.namespace
111
- self.queries = Queries(self.graph, self.rules)
112
- self.provenance.append(
113
- Change.record(
114
- activity=f"{type(self)}.rules",
115
- start=datetime.now(timezone.utc),
116
- end=datetime.now(timezone.utc),
117
- description=f"Added rules to graph store as {type(self.rules).__name__}",
128
+ named_graph = named_graph or self.default_named_graph
129
+
130
+ if named_graph in self.named_graphs:
131
+ # attaching appropriate namespace to the rules
132
+ # as well base_namespace
133
+ self.rules[named_graph] = rules
134
+ self.base_namespace[named_graph] = rules.metadata.namespace
135
+ self.queries = Queries(self.dataset, self.rules)
136
+ self.provenance.append(
137
+ Change.record(
138
+ activity=f"{type(self)}.rules",
139
+ start=datetime.now(timezone.utc),
140
+ end=datetime.now(timezone.utc),
141
+ description=f"Added {type(self.rules).__name__} to {named_graph} named graph",
142
+ )
118
143
  )
119
- )
120
144
 
121
- if self.rules.prefixes:
122
- self._upsert_prefixes(self.rules.prefixes)
145
+ if self.rules[named_graph].prefixes:
146
+ self._upsert_prefixes(self.rules[named_graph].prefixes, named_graph)
123
147
 
124
- def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
148
+ def _upsert_prefixes(self, prefixes: dict[str, Namespace], named_graph: URIRef) -> None:
125
149
  """Adds prefixes to the graph store."""
126
150
  _start = datetime.now(timezone.utc)
127
151
  for prefix, namespace in prefixes.items():
128
- self.graph.bind(prefix, namespace)
152
+ self.graph(named_graph).bind(prefix, namespace)
129
153
 
130
154
  self.provenance.append(
131
155
  Change.record(
132
156
  activity=f"{type(self).__name__}._upsert_prefixes",
133
157
  start=_start,
134
158
  end=datetime.now(timezone.utc),
135
- description="Upsert prefixes to graph store",
159
+ description="Upsert prefixes to the name graph {named_graph}",
136
160
  )
137
161
  )
138
162
 
139
163
  @classmethod
140
- def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
141
- return cls(Dataset(), rules)
164
+ def from_memory_store(cls) -> "Self":
165
+ return cls(Dataset())
142
166
 
143
167
  @classmethod
144
168
  def from_sparql_store(
@@ -146,7 +170,6 @@ class NeatGraphStore:
146
170
  query_endpoint: str | None = None,
147
171
  update_endpoint: str | None = None,
148
172
  returnFormat: str = "csv",
149
- rules: InformationRules | None = None,
150
173
  ) -> "Self":
151
174
  store = SPARQLUpdateStore(
152
175
  query_endpoint=query_endpoint,
@@ -157,10 +180,27 @@ class NeatGraphStore:
157
180
  autocommit=False,
158
181
  )
159
182
  graph = Dataset(store=store)
160
- return cls(graph, rules)
183
+ return cls(graph)
184
+
185
+ @classmethod
186
+ def from_oxi_remote_store(
187
+ cls,
188
+ remote_url: str,
189
+ autocommit: bool = False,
190
+ ) -> "Self":
191
+ """Creates a NeatGraphStore from a remote Oxigraph store SPARQL endpoint."""
192
+
193
+ return cls(
194
+ dataset=Dataset(
195
+ store=SPARQLUpdateStore(
196
+ query_endpoint=f"{remote_url}/query", update_endpoint=f"{remote_url}/query", autocommit=autocommit
197
+ ),
198
+ default_union=True,
199
+ )
200
+ )
161
201
 
162
202
  @classmethod
163
- def from_oxi_store(cls, storage_dir: Path | None = None, rules: InformationRules | None = None) -> "Self":
203
+ def from_oxi_local_store(cls, storage_dir: Path | None = None) -> "Self":
164
204
  """Creates a NeatGraphStore from an Oxigraph store."""
165
205
  local_import("pyoxigraph", "oxi")
166
206
  local_import("oxrdflib", "oxi")
@@ -179,31 +219,41 @@ class NeatGraphStore:
179
219
  else:
180
220
  raise Exception("Error initializing Oxigraph store")
181
221
 
182
- graph = Dataset(
183
- store=oxrdflib.OxigraphStore(store=oxi_store),
222
+ return cls(
223
+ dataset=Dataset(
224
+ store=oxrdflib.OxigraphStore(store=oxi_store),
225
+ )
184
226
  )
185
227
 
186
- return cls(graph, rules)
187
-
188
- def write(self, extractor: TripleExtractors) -> IssueList:
228
+ def write(self, extractor: TripleExtractors, named_graph: URIRef | None = None) -> IssueList:
189
229
  last_change: Change | None = None
230
+ named_graph = named_graph or self.default_named_graph
190
231
  with catch_issues() as issue_list:
191
232
  _start = datetime.now(timezone.utc)
192
233
  success = True
193
234
 
194
235
  if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
195
- self._parse_file(extractor.filepath, cast(str, extractor.format), extractor.base_uri)
236
+ self._parse_file(
237
+ named_graph,
238
+ extractor.filepath,
239
+ cast(str, extractor.format),
240
+ extractor.base_uri,
241
+ )
196
242
  if isinstance(extractor.filepath, ZipExtFile):
197
243
  extractor.filepath.close()
244
+
198
245
  elif isinstance(extractor, RdfFileExtractor):
199
246
  success = False
200
247
  issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
201
248
  warnings.warn(
202
- f"Cannot write to graph store with {type(extractor).__name__}, errors found in file:\n{issue_text}",
249
+ (
250
+ f"Cannot write to named graph {named_graph} with "
251
+ f"{type(extractor).__name__}, errors found in file:\n{issue_text}"
252
+ ),
203
253
  stacklevel=2,
204
254
  )
205
255
  else:
206
- self._add_triples(extractor.extract())
256
+ self._add_triples(extractor.extract(), named_graph=named_graph)
207
257
 
208
258
  if success:
209
259
  _end = datetime.now(timezone.utc)
@@ -218,7 +268,7 @@ class NeatGraphStore:
218
268
  activity=activity,
219
269
  start=_start,
220
270
  end=_end,
221
- description=f"Extracted triples to graph store using {type(extractor).__name__}",
271
+ description=f"Extracted triples to named graph {named_graph} using {type(extractor).__name__}",
222
272
  )
223
273
  self.provenance.append(last_change)
224
274
  if last_change:
@@ -226,17 +276,35 @@ class NeatGraphStore:
226
276
  return issue_list
227
277
 
228
278
  def _read_via_rules_linkage(
229
- self, class_neat_id: URIRef, property_link_pairs: dict[str, URIRef] | None
279
+ self,
280
+ class_neat_id: URIRef,
281
+ property_link_pairs: dict[str, URIRef] | None,
282
+ named_graph: URIRef | None = None,
230
283
  ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
231
- if self.rules is None:
232
- warnings.warn("Rules not found in graph store! Aborting!", stacklevel=2)
284
+ named_graph = named_graph or self.default_named_graph
285
+
286
+ if named_graph not in self.named_graphs:
287
+ warnings.warn(
288
+ f"Named graph {named_graph} not found in graph store, cannot read",
289
+ stacklevel=2,
290
+ )
291
+ return
292
+
293
+ if not self.rules or named_graph not in self.rules:
294
+ warnings.warn(
295
+ f"Rules for named graph {named_graph} not found in graph store!",
296
+ stacklevel=2,
297
+ )
233
298
  return
299
+
234
300
  if self.multi_type_instances:
235
301
  warnings.warn(
236
302
  "Multi typed instances detected, issues with loading can occur!",
237
303
  stacklevel=2,
238
304
  )
239
- analysis = InformationAnalysis(self.rules)
305
+
306
+ analysis = InformationAnalysis(self.rules[named_graph])
307
+
240
308
  if cls := analysis.classes_by_neat_id.get(class_neat_id):
241
309
  if property_link_pairs:
242
310
  property_renaming_config = {
@@ -272,9 +340,22 @@ class NeatGraphStore:
272
340
  self,
273
341
  class_entity: ClassEntity,
274
342
  property_renaming_config: dict[URIRef, str] | None = None,
343
+ named_graph: URIRef | None = None,
275
344
  ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
276
- if self.rules is None:
277
- warnings.warn("Rules not found in graph store!", stacklevel=2)
345
+ named_graph = named_graph or self.default_named_graph
346
+
347
+ if named_graph not in self.named_graphs:
348
+ warnings.warn(
349
+ f"Named graph {named_graph} not found in graph store, cannot read",
350
+ stacklevel=2,
351
+ )
352
+ return
353
+
354
+ if not self.rules or named_graph not in self.rules:
355
+ warnings.warn(
356
+ f"Rules for named graph {named_graph} not found in graph store!",
357
+ stacklevel=2,
358
+ )
278
359
  return
279
360
  if self.multi_type_instances:
280
361
  warnings.warn(
@@ -282,28 +363,28 @@ class NeatGraphStore:
282
363
  stacklevel=2,
283
364
  )
284
365
 
285
- if class_entity not in [definition.class_ for definition in self.rules.classes]:
366
+ if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
286
367
  warnings.warn("Desired type not found in graph!", stacklevel=2)
287
368
  return
288
369
 
289
- if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
370
+ if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
290
371
  warnings.warn(
291
372
  f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
292
373
  stacklevel=2,
293
374
  )
294
375
  return
295
376
 
296
- has_hop_transformations = InformationAnalysis(self.rules).has_hop_transformations()
377
+ has_hop_transformations = InformationAnalysis(self.rules[named_graph]).has_hop_transformations()
297
378
  has_self_reference_transformations = InformationAnalysis(
298
- self.rules
379
+ self.rules[named_graph]
299
380
  ).has_self_reference_property_transformations()
300
381
  if has_hop_transformations or has_self_reference_transformations:
301
382
  msg = (
302
- f"Rules contain [{'Hop' if has_hop_transformations else '' }"
303
- f", {'SelfReferenceProperty' if has_self_reference_transformations else '' }]"
383
+ f"Rules contain [{'Hop' if has_hop_transformations else ''}"
384
+ f", {'SelfReferenceProperty' if has_self_reference_transformations else ''}]"
304
385
  " rdfpath."
305
- f" Run [{'ReduceHopTraversal' if has_hop_transformations else '' }"
306
- f", {'AddSelfReferenceProperty' if has_self_reference_transformations else '' }]"
386
+ f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''}"
387
+ f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''}]"
307
388
  " transformer(s) first!"
308
389
  )
309
390
 
@@ -318,23 +399,19 @@ class NeatGraphStore:
318
399
 
319
400
  # get potential property renaming config
320
401
  property_renaming_config = property_renaming_config or InformationAnalysis(
321
- self.rules
402
+ self.rules[named_graph]
322
403
  ).define_property_renaming_config(class_entity)
323
404
 
324
- # get property types to guide process of removing or not namespaces from results
325
- property_types = InformationAnalysis(self.rules).property_types(class_entity)
326
405
  for instance_id in instance_ids:
327
406
  if res := self.queries.describe(
328
407
  instance_id=instance_id,
329
408
  instance_type=class_entity.suffix,
330
409
  property_renaming_config=property_renaming_config,
331
- property_types=property_types,
332
410
  ):
333
411
  yield res
334
412
 
335
413
  def read(
336
- self,
337
- class_: str,
414
+ self, class_: str, named_graph: URIRef | None = None
338
415
  ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
339
416
  """Read instances for given class from the graph store.
340
417
 
@@ -343,9 +420,20 @@ class NeatGraphStore:
343
420
  the rules which are attached to the graph store.
344
421
 
345
422
  """
423
+ named_graph = named_graph or self.default_named_graph
346
424
 
347
- if not self.rules:
348
- warnings.warn("Rules not found in graph store!", stacklevel=2)
425
+ if named_graph not in self.named_graphs:
426
+ warnings.warn(
427
+ f"Named graph {named_graph} not found in graph store, cannot read",
428
+ stacklevel=2,
429
+ )
430
+ return
431
+
432
+ if not self.rules or named_graph not in self.rules:
433
+ warnings.warn(
434
+ f"Rules for named graph {named_graph} not found in graph store!",
435
+ stacklevel=2,
436
+ )
349
437
  return
350
438
  if self.multi_type_instances:
351
439
  warnings.warn(
@@ -353,15 +441,15 @@ class NeatGraphStore:
353
441
  stacklevel=2,
354
442
  )
355
443
 
356
- class_entity = ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_)
444
+ class_entity = ClassEntity(prefix=self.rules[named_graph].metadata.prefix, suffix=class_)
357
445
 
358
- if class_entity not in [definition.class_ for definition in self.rules.classes]:
446
+ if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
359
447
  warnings.warn("Desired type not found in graph!", stacklevel=2)
360
448
  return
361
449
 
362
450
  yield from self._read_via_class_entity(class_entity)
363
451
 
364
- def count_of_id(self, neat_id: URIRef) -> int:
452
+ def count_of_id(self, neat_id: URIRef, named_graph: URIRef | None = None) -> int:
365
453
  """Count the number of instances of a given type
366
454
 
367
455
  Args:
@@ -370,18 +458,31 @@ class NeatGraphStore:
370
458
  Returns:
371
459
  Number of instances
372
460
  """
373
- if not self.rules:
374
- warnings.warn("Rules not found in graph store!", stacklevel=2)
461
+ named_graph = named_graph or self.default_named_graph
462
+
463
+ if named_graph not in self.named_graphs:
464
+ warnings.warn(
465
+ f"Named graph {named_graph} not found in graph store, cannot count",
466
+ stacklevel=2,
467
+ )
468
+ return 0
469
+
470
+ if not self.rules or named_graph not in self.rules:
471
+ warnings.warn(
472
+ f"Rules for named graph {named_graph} not found in graph store!",
473
+ stacklevel=2,
474
+ )
375
475
  return 0
376
476
 
377
477
  class_entity = next(
378
- (definition.class_ for definition in self.rules.classes if definition.neatId == neat_id), None
478
+ (definition.class_ for definition in self.rules[named_graph].classes if definition.neatId == neat_id),
479
+ None,
379
480
  )
380
481
  if not class_entity:
381
482
  warnings.warn("Desired type not found in graph!", stacklevel=2)
382
483
  return 0
383
484
 
384
- if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
485
+ if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
385
486
  warnings.warn(
386
487
  f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
387
488
  stacklevel=2,
@@ -392,10 +493,11 @@ class NeatGraphStore:
392
493
 
393
494
  def count_of_type(self, class_uri: URIRef) -> int:
394
495
  query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
395
- return int(next(iter(self.graph.query(query)))[0]) # type: ignore[arg-type, index]
496
+ return int(next(iter(self.dataset.query(query)))[0]) # type: ignore[arg-type, index]
396
497
 
397
498
  def _parse_file(
398
499
  self,
500
+ named_graph: URIRef,
399
501
  filepath: Path | ZipExtFile,
400
502
  format: str = "turtle",
401
503
  base_uri: URIRef | None = None,
@@ -403,6 +505,7 @@ class NeatGraphStore:
403
505
  """Imports graph data from file.
404
506
 
405
507
  Args:
508
+ named_graph : URIRef of the named graph to store the data in
406
509
  filepath : File path to file containing graph data, by default None
407
510
  format : rdflib format file containing RDF graph, by default "turtle"
408
511
  base_uri : base URI to add to graph in case of relative URIs, by default None
@@ -419,25 +522,35 @@ class NeatGraphStore:
419
522
  if self.type_ == "OxigraphStore":
420
523
  local_import("pyoxigraph", "oxi")
421
524
 
422
- # this is necessary to trigger rdflib oxigraph plugin
423
- self.graph.parse(
424
- filepath, # type: ignore[arg-type]
425
- format=rdflib_to_oxi_type(format),
426
- transactional=False,
427
- publicID=base_uri,
428
- )
429
- self.graph.store._store.optimize() # type: ignore[attr-defined]
525
+ if format in quad_formats():
526
+ self.dataset.parse(
527
+ filepath, # type: ignore[arg-type]
528
+ format=rdflib_to_oxi_type(format),
529
+ transactional=False,
530
+ publicID=base_uri,
531
+ )
532
+ else:
533
+ self.graph(named_graph).parse(
534
+ filepath, # type: ignore[arg-type]
535
+ format=rdflib_to_oxi_type(format),
536
+ transactional=False,
537
+ publicID=base_uri,
538
+ )
539
+ self.dataset.store._store.optimize() # type: ignore[attr-defined]
430
540
 
431
541
  # All other stores
432
542
  else:
433
- if isinstance(filepath, ZipExtFile) or filepath.is_file():
434
- self.graph.parse(filepath, publicID=base_uri) # type: ignore[arg-type]
543
+ if format in quad_formats():
544
+ self.dataset.parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
435
545
  else:
436
- for filename in filepath.iterdir():
437
- if filename.is_file():
438
- self.graph.parse(filename, publicID=base_uri)
546
+ self.graph(named_graph).parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
439
547
 
440
- def _add_triples(self, triples: Iterable[Triple], batch_size: int = 10_000):
548
+ def _add_triples(
549
+ self,
550
+ triples: Iterable[Triple],
551
+ named_graph: URIRef,
552
+ batch_size: int = 10_000,
553
+ ) -> None:
441
554
  """Adds triples to the graph store in batches.
442
555
 
443
556
  Args:
@@ -445,66 +558,103 @@ class NeatGraphStore:
445
558
  batch_size: Batch size of triples per commit, by default 10_000
446
559
  verbose: Verbose mode, by default False
447
560
  """
448
- add_triples_in_batch(self.graph, triples, batch_size)
561
+ add_triples_in_batch(self.graph(named_graph), triples, batch_size)
449
562
 
450
- def transform(self, transformer: Transformers) -> None:
563
+ def transform(self, transformer: Transformers, named_graph: URIRef | None = None) -> None:
451
564
  """Transforms the graph store using a transformer."""
452
565
 
453
- missing_changes = [
454
- change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
455
- ]
456
- if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
457
- warnings.warn(
458
- f"Cannot transform graph store with {type(transformer).__name__}, already applied",
459
- stacklevel=2,
460
- )
461
- elif missing_changes:
462
- warnings.warn(
463
- (
464
- f"Cannot transform graph store with {type(transformer).__name__}, "
465
- f"missing one or more required changes [{', '.join(missing_changes)}]"
466
- ),
467
- stacklevel=2,
468
- )
566
+ named_graph = named_graph or self.default_named_graph
567
+ if named_graph in self.named_graphs:
568
+ missing_changes = [
569
+ change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
570
+ ]
571
+ if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
572
+ warnings.warn(
573
+ f"Cannot transform graph store with {type(transformer).__name__}, already applied",
574
+ stacklevel=2,
575
+ )
576
+ elif missing_changes:
577
+ warnings.warn(
578
+ (
579
+ f"Cannot transform graph store with {type(transformer).__name__}, "
580
+ f"missing one or more required changes [{', '.join(missing_changes)}]"
581
+ ),
582
+ stacklevel=2,
583
+ )
469
584
 
470
- else:
471
- _start = datetime.now(timezone.utc)
472
- transformer.transform(self.graph)
473
- self.provenance.append(
474
- Change.record(
475
- activity=f"{type(transformer).__name__}",
476
- start=_start,
477
- end=datetime.now(timezone.utc),
478
- description=transformer.description,
585
+ else:
586
+ _start = datetime.now(timezone.utc)
587
+ transformer.transform(self.graph(named_graph))
588
+ self.provenance.append(
589
+ Change.record(
590
+ activity=f"{type(transformer).__name__}",
591
+ start=_start,
592
+ end=datetime.now(timezone.utc),
593
+ description=transformer.description,
594
+ )
479
595
  )
596
+
597
+ else:
598
+ warnings.warn(
599
+ f"Named graph {named_graph} not found in graph store, cannot transform",
600
+ stacklevel=2,
480
601
  )
481
602
 
482
603
  @property
483
- def summary(self) -> pd.DataFrame:
484
- return pd.DataFrame(self.queries.summarize_instances(), columns=["Type", "Occurrence"])
604
+ def summary(self) -> dict[URIRef, pd.DataFrame]:
605
+ return {
606
+ named_graph: pd.DataFrame(
607
+ self.queries.summarize_instances(named_graph),
608
+ columns=["Type", "Occurrence"],
609
+ )
610
+ for named_graph in self.named_graphs
611
+ }
485
612
 
486
613
  @property
487
- def multi_type_instances(self) -> dict[str, list[str]]:
488
- return self.queries.multi_type_instances()
614
+ def multi_type_instances(self) -> dict[URIRef, dict[str, list[str]]]:
615
+ return {named_graph: self.queries.multi_type_instances(named_graph) for named_graph in self.named_graphs}
489
616
 
490
617
  def _repr_html_(self) -> str:
491
618
  provenance = self.provenance._repr_html_()
492
- summary: pd.DataFrame = self.summary
619
+ summary: dict[URIRef, pd.DataFrame] = self.summary
620
+
621
+ def _short_name_of_graph(named_graph: URIRef) -> str:
622
+ return "default" if named_graph == self.default_named_graph else remove_namespace_from_uri(named_graph)
493
623
 
494
- if summary.empty:
624
+ if not summary:
495
625
  summary_text = "<br /><strong>Graph is empty</strong><br />"
496
626
  else:
627
+ all_types = set().union(
628
+ *[set(sub_summary.Type) for sub_summary in summary.values() if not sub_summary.empty]
629
+ )
630
+
497
631
  summary_text = (
498
632
  "<br /><strong>Overview</strong>:" # type: ignore
499
- f"<ul><li>{len(summary)} types</strong></li>"
500
- f"<li>{sum(summary['Occurrence'])} instances</strong></li></ul>"
501
- f"{cast(pd.DataFrame, self._shorten_summary(summary))._repr_html_()}" # type: ignore[operator]
633
+ f"<ul><li>{len(summary)} named graphs</strong></li>"
634
+ f"<li>Total of {len(all_types)} unique types</strong></li>"
502
635
  )
503
636
 
504
- if self.multi_type_instances:
505
- summary_text += "<br><strong>Multi value instances detected! Loading could have issues!</strong></br>" # type: ignore
637
+ for named_graph, table in summary.items():
638
+ summary_text += (
639
+ f"<li>{sum(table['Occurrence'])} instances in {_short_name_of_graph(named_graph)}"
640
+ " graph</strong></li>"
641
+ )
642
+
643
+ summary_text += "</ul>"
644
+ for named_graph, table in summary.items():
645
+ summary_text += (
646
+ f"<br /><strong>{_short_name_of_graph(named_graph)} graph</strong>:"
647
+ f"{cast(pd.DataFrame, self._shorten_summary(table))._repr_html_()}" # type: ignore[operator]
648
+ )
506
649
 
507
- return f"{summary_text}" f"{provenance}"
650
+ for named_graph, multi_value_instances in self.multi_type_instances.items():
651
+ if multi_value_instances:
652
+ summary_text += (
653
+ f"<br><strong>Multi value instances detected in {_short_name_of_graph(named_graph)}"
654
+ "graph! Loading could have issues!</strong></br>"
655
+ )
656
+
657
+ return f"{summary_text}{provenance}"
508
658
 
509
659
  def _shorten_summary(self, summary: pd.DataFrame) -> pd.DataFrame:
510
660
  """Shorten summary to top 5 types by occurrence."""
@@ -531,3 +681,7 @@ class NeatGraphStore:
531
681
  shorter_summary.index = cast(Index, indexes)
532
682
 
533
683
  return shorter_summary
684
+
685
+ @property
686
+ def named_graphs(self) -> list[URIRef]:
687
+ return [cast(URIRef, context.identifier) for context in self.dataset.contexts()]