cognite-neat 0.106.0__py3-none-any.whl → 0.108.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (67) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/__init__.py +5 -1
  4. cognite/neat/_graph/extractors/_base.py +32 -0
  5. cognite/neat/_graph/extractors/_classic_cdf/_base.py +128 -14
  6. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +156 -12
  7. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +50 -12
  8. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +26 -1
  9. cognite/neat/_graph/extractors/_dms.py +196 -47
  10. cognite/neat/_graph/extractors/_dms_graph.py +199 -0
  11. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  12. cognite/neat/_graph/extractors/_rdf_file.py +33 -5
  13. cognite/neat/_graph/loaders/__init__.py +1 -3
  14. cognite/neat/_graph/loaders/_rdf2dms.py +123 -19
  15. cognite/neat/_graph/queries/_base.py +140 -84
  16. cognite/neat/_graph/queries/_construct.py +2 -2
  17. cognite/neat/_graph/transformers/__init__.py +8 -1
  18. cognite/neat/_graph/transformers/_base.py +9 -1
  19. cognite/neat/_graph/transformers/_classic_cdf.py +90 -3
  20. cognite/neat/_graph/transformers/_rdfpath.py +3 -3
  21. cognite/neat/_graph/transformers/_value_type.py +106 -45
  22. cognite/neat/_issues/errors/_resources.py +1 -1
  23. cognite/neat/_issues/warnings/__init__.py +0 -2
  24. cognite/neat/_issues/warnings/_models.py +1 -1
  25. cognite/neat/_issues/warnings/_properties.py +0 -8
  26. cognite/neat/_rules/analysis/_base.py +1 -1
  27. cognite/neat/_rules/analysis/_information.py +14 -13
  28. cognite/neat/_rules/catalog/__init__.py +1 -0
  29. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  30. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  31. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  32. cognite/neat/_rules/importers/__init__.py +3 -1
  33. cognite/neat/_rules/importers/_dms2rules.py +7 -5
  34. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  35. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  36. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  37. cognite/neat/_rules/importers/_rdf/_inference2rules.py +242 -19
  38. cognite/neat/_rules/models/_base_rules.py +13 -15
  39. cognite/neat/_rules/models/_types.py +5 -0
  40. cognite/neat/_rules/models/dms/_rules.py +51 -10
  41. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  42. cognite/neat/_rules/models/information/_rules.py +48 -5
  43. cognite/neat/_rules/models/information/_rules_input.py +6 -1
  44. cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
  45. cognite/neat/_rules/transformers/__init__.py +10 -0
  46. cognite/neat/_rules/transformers/_converters.py +300 -62
  47. cognite/neat/_session/_base.py +57 -10
  48. cognite/neat/_session/_drop.py +5 -1
  49. cognite/neat/_session/_inspect.py +3 -2
  50. cognite/neat/_session/_mapping.py +17 -6
  51. cognite/neat/_session/_prepare.py +0 -47
  52. cognite/neat/_session/_read.py +115 -10
  53. cognite/neat/_session/_set.py +27 -0
  54. cognite/neat/_session/_show.py +4 -4
  55. cognite/neat/_session/_state.py +12 -1
  56. cognite/neat/_session/_to.py +43 -2
  57. cognite/neat/_session/_wizard.py +1 -1
  58. cognite/neat/_session/exceptions.py +8 -3
  59. cognite/neat/_store/_graph_store.py +331 -136
  60. cognite/neat/_store/_rules_store.py +130 -1
  61. cognite/neat/_utils/auth.py +3 -1
  62. cognite/neat/_version.py +1 -1
  63. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +2 -2
  64. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +67 -65
  65. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +1 -1
  66. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
  67. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
@@ -3,15 +3,16 @@ import warnings
3
3
  from collections.abc import Iterable
4
4
  from datetime import datetime, timezone
5
5
  from pathlib import Path
6
- from typing import cast
6
+ from typing import cast, overload
7
+ from zipfile import ZipExtFile
7
8
 
8
9
  import pandas as pd
9
10
  from pandas import Index
10
- from rdflib import Dataset, Namespace, URIRef
11
+ from rdflib import Dataset, Graph, Namespace, URIRef
12
+ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
11
13
  from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
12
14
 
13
- from cognite.neat._constants import DEFAULT_NAMESPACE
14
- from cognite.neat._graph._shared import rdflib_to_oxi_type
15
+ from cognite.neat._graph._shared import quad_formats, rdflib_to_oxi_type
15
16
  from cognite.neat._graph.extractors import RdfFileExtractor, TripleExtractors
16
17
  from cognite.neat._graph.queries import Queries
17
18
  from cognite.neat._graph.transformers import Transformers
@@ -21,7 +22,7 @@ from cognite.neat._rules.models import InformationRules
21
22
  from cognite.neat._rules.models.entities import ClassEntity
22
23
  from cognite.neat._shared import InstanceType, Triple
23
24
  from cognite.neat._utils.auxiliary import local_import
24
- from cognite.neat._utils.rdf_ import add_triples_in_batch
25
+ from cognite.neat._utils.rdf_ import add_triples_in_batch, remove_namespace_from_uri
25
26
 
26
27
  from ._provenance import Change, Provenance
27
28
 
@@ -38,80 +39,130 @@ class NeatGraphStore:
38
39
  Args:
39
40
  graph : Instance of rdflib.Graph class for graph storage
40
41
  rules:
42
+
43
+ !!! note "Dataset"
44
+ The store leverages a RDF dataset which is defined as a collection of RDF graphs
45
+ where all but one are named graphs associated with URIRef (the graph name),
46
+ and the unnamed default graph which is in context of rdflib library has an
47
+ identifier URIRef('urn:x-rdflib:default').
41
48
  """
42
49
 
43
50
  rdf_store_type: str
44
51
 
45
52
  def __init__(
46
53
  self,
47
- graph: Dataset,
48
- rules: InformationRules | None = None,
54
+ dataset: Dataset,
55
+ default_named_graph: URIRef | None = None,
49
56
  ):
50
- self.rules: InformationRules | None = None
57
+ self.rules: dict[URIRef, InformationRules] = {}
58
+ self.base_namespace: dict[URIRef, Namespace] = {}
51
59
 
52
60
  _start = datetime.now(timezone.utc)
53
- self.graph = graph
61
+ self.dataset = dataset
54
62
  self.provenance = Provenance(
55
63
  [
56
64
  Change.record(
57
65
  activity=f"{type(self).__name__}.__init__",
58
66
  start=_start,
59
67
  end=datetime.now(timezone.utc),
60
- description=f"Initialize graph store as {type(self.graph.store).__name__}",
68
+ description=f"Initialize graph store as {type(self.dataset.store).__name__}",
61
69
  )
62
70
  ]
63
71
  )
64
72
 
65
- if rules:
66
- self.add_rules(rules)
67
- else:
68
- self.base_namespace = DEFAULT_NAMESPACE
73
+ self.default_named_graph = default_named_graph or DATASET_DEFAULT_GRAPH_ID
69
74
 
70
- self.queries = Queries(self.graph, self.rules)
75
+ self.queries = Queries(self.dataset, self.rules, self.default_named_graph)
76
+
77
+ def graph(self, named_graph: URIRef | None = None) -> Graph:
78
+ """Get named graph from the dataset to query over"""
79
+ return self.dataset.graph(named_graph or self.default_named_graph)
71
80
 
72
81
  @property
73
82
  def type_(self) -> str:
74
83
  "Return type of the graph store"
75
- return type(self.graph.store).__name__
84
+ return type(self.dataset.store).__name__
85
+
86
+ # no destination
87
+ @overload
88
+ def serialize(self, filepath: None = None) -> str: ...
89
+
90
+ # with destination
91
+ @overload
92
+ def serialize(self, filepath: Path) -> None: ...
93
+
94
+ def serialize(self, filepath: Path | None = None) -> None | str:
95
+ """Serialize the graph store to a file.
96
+
97
+ Args:
98
+ filepath: File path to serialize the graph store to
76
99
 
77
- def add_rules(self, rules: InformationRules) -> None:
78
- """This method is used to add rules to the graph store and it is the only correct
79
- way to add rules to the graph store, after the graph store has been initialized.
100
+ Returns:
101
+ Serialized graph store
102
+
103
+ !!! note "Trig Format"
104
+ Notice that instead of turtle format we are using trig format for serialization.
105
+ This is because trig format is a superset of turtle format and it allows us to
106
+ serialize named graphs as well. Allowing serialization of one or more named graphs
107
+ including the default graph.
80
108
  """
109
+ if filepath:
110
+ self.dataset.serialize(
111
+ filepath,
112
+ format="ox-trig" if self.type_ == "OxigraphStore" else "trig",
113
+ )
114
+ return None
115
+ else:
116
+ return self.dataset.serialize(format="ox-trig" if self.type_ == "OxigraphStore" else "trig")
81
117
 
82
- self.rules = rules
83
- self.base_namespace = self.rules.metadata.namespace
84
- self.queries = Queries(self.graph, self.rules)
85
- self.provenance.append(
86
- Change.record(
87
- activity=f"{type(self)}.rules",
88
- start=datetime.now(timezone.utc),
89
- end=datetime.now(timezone.utc),
90
- description=f"Added rules to graph store as {type(self.rules).__name__}",
118
+ def add_rules(self, rules: InformationRules, named_graph: URIRef | None = None) -> None:
119
+ """This method is used to add rules to a named graph stored in the graph store.
120
+
121
+ Args:
122
+ rules: InformationRules object containing rules to be added to the named graph
123
+ named_graph: URIRef of the named graph to store the rules in, by default None
124
+ rules will be added to the default graph
125
+
126
+ """
127
+
128
+ named_graph = named_graph or self.default_named_graph
129
+
130
+ if named_graph in self.named_graphs:
131
+ # attaching appropriate namespace to the rules
132
+ # as well base_namespace
133
+ self.rules[named_graph] = rules
134
+ self.base_namespace[named_graph] = rules.metadata.namespace
135
+ self.queries = Queries(self.dataset, self.rules)
136
+ self.provenance.append(
137
+ Change.record(
138
+ activity=f"{type(self)}.rules",
139
+ start=datetime.now(timezone.utc),
140
+ end=datetime.now(timezone.utc),
141
+ description=f"Added {type(self.rules).__name__} to {named_graph} named graph",
142
+ )
91
143
  )
92
- )
93
144
 
94
- if self.rules.prefixes:
95
- self._upsert_prefixes(self.rules.prefixes)
145
+ if self.rules[named_graph].prefixes:
146
+ self._upsert_prefixes(self.rules[named_graph].prefixes, named_graph)
96
147
 
97
- def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
148
+ def _upsert_prefixes(self, prefixes: dict[str, Namespace], named_graph: URIRef) -> None:
98
149
  """Adds prefixes to the graph store."""
99
150
  _start = datetime.now(timezone.utc)
100
151
  for prefix, namespace in prefixes.items():
101
- self.graph.bind(prefix, namespace)
152
+ self.graph(named_graph).bind(prefix, namespace)
102
153
 
103
154
  self.provenance.append(
104
155
  Change.record(
105
156
  activity=f"{type(self).__name__}._upsert_prefixes",
106
157
  start=_start,
107
158
  end=datetime.now(timezone.utc),
108
- description="Upsert prefixes to graph store",
159
+ description="Upsert prefixes to the name graph {named_graph}",
109
160
  )
110
161
  )
111
162
 
112
163
  @classmethod
113
- def from_memory_store(cls, rules: InformationRules | None = None) -> "Self":
114
- return cls(Dataset(), rules)
164
+ def from_memory_store(cls) -> "Self":
165
+ return cls(Dataset())
115
166
 
116
167
  @classmethod
117
168
  def from_sparql_store(
@@ -119,7 +170,6 @@ class NeatGraphStore:
119
170
  query_endpoint: str | None = None,
120
171
  update_endpoint: str | None = None,
121
172
  returnFormat: str = "csv",
122
- rules: InformationRules | None = None,
123
173
  ) -> "Self":
124
174
  store = SPARQLUpdateStore(
125
175
  query_endpoint=query_endpoint,
@@ -130,10 +180,27 @@ class NeatGraphStore:
130
180
  autocommit=False,
131
181
  )
132
182
  graph = Dataset(store=store)
133
- return cls(graph, rules)
183
+ return cls(graph)
134
184
 
135
185
  @classmethod
136
- def from_oxi_store(cls, storage_dir: Path | None = None, rules: InformationRules | None = None) -> "Self":
186
+ def from_oxi_remote_store(
187
+ cls,
188
+ remote_url: str,
189
+ autocommit: bool = False,
190
+ ) -> "Self":
191
+ """Creates a NeatGraphStore from a remote Oxigraph store SPARQL endpoint."""
192
+
193
+ return cls(
194
+ dataset=Dataset(
195
+ store=SPARQLUpdateStore(
196
+ query_endpoint=f"{remote_url}/query", update_endpoint=f"{remote_url}/query", autocommit=autocommit
197
+ ),
198
+ default_union=True,
199
+ )
200
+ )
201
+
202
+ @classmethod
203
+ def from_oxi_local_store(cls, storage_dir: Path | None = None) -> "Self":
137
204
  """Creates a NeatGraphStore from an Oxigraph store."""
138
205
  local_import("pyoxigraph", "oxi")
139
206
  local_import("oxrdflib", "oxi")
@@ -152,29 +219,41 @@ class NeatGraphStore:
152
219
  else:
153
220
  raise Exception("Error initializing Oxigraph store")
154
221
 
155
- graph = Dataset(
156
- store=oxrdflib.OxigraphStore(store=oxi_store),
222
+ return cls(
223
+ dataset=Dataset(
224
+ store=oxrdflib.OxigraphStore(store=oxi_store),
225
+ )
157
226
  )
158
227
 
159
- return cls(graph, rules)
160
-
161
- def write(self, extractor: TripleExtractors) -> IssueList:
228
+ def write(self, extractor: TripleExtractors, named_graph: URIRef | None = None) -> IssueList:
162
229
  last_change: Change | None = None
230
+ named_graph = named_graph or self.default_named_graph
163
231
  with catch_issues() as issue_list:
164
232
  _start = datetime.now(timezone.utc)
165
233
  success = True
166
234
 
167
235
  if isinstance(extractor, RdfFileExtractor) and not extractor.issue_list.has_errors:
168
- self._parse_file(extractor.filepath, cast(str, extractor.format), extractor.base_uri)
236
+ self._parse_file(
237
+ named_graph,
238
+ extractor.filepath,
239
+ cast(str, extractor.format),
240
+ extractor.base_uri,
241
+ )
242
+ if isinstance(extractor.filepath, ZipExtFile):
243
+ extractor.filepath.close()
244
+
169
245
  elif isinstance(extractor, RdfFileExtractor):
170
246
  success = False
171
247
  issue_text = "\n".join([issue.as_message() for issue in extractor.issue_list])
172
248
  warnings.warn(
173
- f"Cannot write to graph store with {type(extractor).__name__}, errors found in file:\n{issue_text}",
249
+ (
250
+ f"Cannot write to named graph {named_graph} with "
251
+ f"{type(extractor).__name__}, errors found in file:\n{issue_text}"
252
+ ),
174
253
  stacklevel=2,
175
254
  )
176
255
  else:
177
- self._add_triples(extractor.extract())
256
+ self._add_triples(extractor.extract(), named_graph=named_graph)
178
257
 
179
258
  if success:
180
259
  _end = datetime.now(timezone.utc)
@@ -189,7 +268,7 @@ class NeatGraphStore:
189
268
  activity=activity,
190
269
  start=_start,
191
270
  end=_end,
192
- description=f"Extracted triples to graph store using {type(extractor).__name__}",
271
+ description=f"Extracted triples to named graph {named_graph} using {type(extractor).__name__}",
193
272
  )
194
273
  self.provenance.append(last_change)
195
274
  if last_change:
@@ -197,26 +276,56 @@ class NeatGraphStore:
197
276
  return issue_list
198
277
 
199
278
  def _read_via_rules_linkage(
200
- self, class_neat_id: URIRef, property_link_pairs: dict[str, URIRef] | None
279
+ self,
280
+ class_neat_id: URIRef,
281
+ property_link_pairs: dict[str, URIRef] | None,
282
+ named_graph: URIRef | None = None,
201
283
  ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
202
- if self.rules is None:
203
- warnings.warn("Rules not found in graph store! Aborting!", stacklevel=2)
284
+ named_graph = named_graph or self.default_named_graph
285
+
286
+ if named_graph not in self.named_graphs:
287
+ warnings.warn(
288
+ f"Named graph {named_graph} not found in graph store, cannot read",
289
+ stacklevel=2,
290
+ )
204
291
  return
292
+
293
+ if not self.rules or named_graph not in self.rules:
294
+ warnings.warn(
295
+ f"Rules for named graph {named_graph} not found in graph store!",
296
+ stacklevel=2,
297
+ )
298
+ return
299
+
205
300
  if self.multi_type_instances:
206
301
  warnings.warn(
207
302
  "Multi typed instances detected, issues with loading can occur!",
208
303
  stacklevel=2,
209
304
  )
210
305
 
211
- if cls := InformationAnalysis(self.rules).classes_by_neat_id.get(class_neat_id):
306
+ analysis = InformationAnalysis(self.rules[named_graph])
307
+
308
+ if cls := analysis.classes_by_neat_id.get(class_neat_id):
212
309
  if property_link_pairs:
213
310
  property_renaming_config = {
214
311
  prop_uri: prop_name
215
312
  for prop_name, prop_neat_id in property_link_pairs.items()
216
- if (
217
- prop_uri := InformationAnalysis(self.rules).neat_id_to_transformation_property_uri(prop_neat_id)
218
- )
313
+ if (prop_uri := analysis.neat_id_to_instance_source_property_uri(prop_neat_id))
219
314
  }
315
+ if information_properties := analysis.classes_with_properties(consider_inheritance=True).get(
316
+ cls.class_
317
+ ):
318
+ for prop in information_properties:
319
+ if prop.neatId is None:
320
+ continue
321
+ # Include renaming done in the Information rules that are not present in the
322
+ # property_link_pairs. The use case for this renaming to startNode and endNode
323
+ # properties that are not part of DMSRules but will typically be present
324
+ # in the Information rules.
325
+ if (
326
+ uri := analysis.neat_id_to_instance_source_property_uri(prop.neatId)
327
+ ) and uri not in property_renaming_config:
328
+ property_renaming_config[uri] = prop.property_
220
329
 
221
330
  yield from self._read_via_class_entity(cls.class_, property_renaming_config)
222
331
  return
@@ -231,9 +340,22 @@ class NeatGraphStore:
231
340
  self,
232
341
  class_entity: ClassEntity,
233
342
  property_renaming_config: dict[URIRef, str] | None = None,
343
+ named_graph: URIRef | None = None,
234
344
  ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
235
- if self.rules is None:
236
- warnings.warn("Rules not found in graph store!", stacklevel=2)
345
+ named_graph = named_graph or self.default_named_graph
346
+
347
+ if named_graph not in self.named_graphs:
348
+ warnings.warn(
349
+ f"Named graph {named_graph} not found in graph store, cannot read",
350
+ stacklevel=2,
351
+ )
352
+ return
353
+
354
+ if not self.rules or named_graph not in self.rules:
355
+ warnings.warn(
356
+ f"Rules for named graph {named_graph} not found in graph store!",
357
+ stacklevel=2,
358
+ )
237
359
  return
238
360
  if self.multi_type_instances:
239
361
  warnings.warn(
@@ -241,28 +363,28 @@ class NeatGraphStore:
241
363
  stacklevel=2,
242
364
  )
243
365
 
244
- if class_entity not in [definition.class_ for definition in self.rules.classes]:
366
+ if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
245
367
  warnings.warn("Desired type not found in graph!", stacklevel=2)
246
368
  return
247
369
 
248
- if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
370
+ if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
249
371
  warnings.warn(
250
372
  f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
251
373
  stacklevel=2,
252
374
  )
253
375
  return
254
376
 
255
- has_hop_transformations = InformationAnalysis(self.rules).has_hop_transformations()
377
+ has_hop_transformations = InformationAnalysis(self.rules[named_graph]).has_hop_transformations()
256
378
  has_self_reference_transformations = InformationAnalysis(
257
- self.rules
379
+ self.rules[named_graph]
258
380
  ).has_self_reference_property_transformations()
259
381
  if has_hop_transformations or has_self_reference_transformations:
260
382
  msg = (
261
- f"Rules contain [{'Hop' if has_hop_transformations else '' }"
262
- f", {'SelfReferenceProperty' if has_self_reference_transformations else '' }]"
383
+ f"Rules contain [{'Hop' if has_hop_transformations else ''}"
384
+ f", {'SelfReferenceProperty' if has_self_reference_transformations else ''}]"
263
385
  " rdfpath."
264
- f" Run [{'ReduceHopTraversal' if has_hop_transformations else '' }"
265
- f", {'AddSelfReferenceProperty' if has_self_reference_transformations else '' }]"
386
+ f" Run [{'ReduceHopTraversal' if has_hop_transformations else ''}"
387
+ f", {'AddSelfReferenceProperty' if has_self_reference_transformations else ''}]"
266
388
  " transformer(s) first!"
267
389
  )
268
390
 
@@ -277,23 +399,19 @@ class NeatGraphStore:
277
399
 
278
400
  # get potential property renaming config
279
401
  property_renaming_config = property_renaming_config or InformationAnalysis(
280
- self.rules
402
+ self.rules[named_graph]
281
403
  ).define_property_renaming_config(class_entity)
282
404
 
283
- # get property types to guide process of removing or not namespaces from results
284
- property_types = InformationAnalysis(self.rules).property_types(class_entity)
285
405
  for instance_id in instance_ids:
286
406
  if res := self.queries.describe(
287
407
  instance_id=instance_id,
288
408
  instance_type=class_entity.suffix,
289
409
  property_renaming_config=property_renaming_config,
290
- property_types=property_types,
291
410
  ):
292
411
  yield res
293
412
 
294
413
  def read(
295
- self,
296
- class_: str,
414
+ self, class_: str, named_graph: URIRef | None = None
297
415
  ) -> Iterable[tuple[str, dict[str | InstanceType, list[str]]]]:
298
416
  """Read instances for given class from the graph store.
299
417
 
@@ -302,9 +420,20 @@ class NeatGraphStore:
302
420
  the rules which are attached to the graph store.
303
421
 
304
422
  """
423
+ named_graph = named_graph or self.default_named_graph
424
+
425
+ if named_graph not in self.named_graphs:
426
+ warnings.warn(
427
+ f"Named graph {named_graph} not found in graph store, cannot read",
428
+ stacklevel=2,
429
+ )
430
+ return
305
431
 
306
- if not self.rules:
307
- warnings.warn("Rules not found in graph store!", stacklevel=2)
432
+ if not self.rules or named_graph not in self.rules:
433
+ warnings.warn(
434
+ f"Rules for named graph {named_graph} not found in graph store!",
435
+ stacklevel=2,
436
+ )
308
437
  return
309
438
  if self.multi_type_instances:
310
439
  warnings.warn(
@@ -312,15 +441,15 @@ class NeatGraphStore:
312
441
  stacklevel=2,
313
442
  )
314
443
 
315
- class_entity = ClassEntity(prefix=self.rules.metadata.prefix, suffix=class_)
444
+ class_entity = ClassEntity(prefix=self.rules[named_graph].metadata.prefix, suffix=class_)
316
445
 
317
- if class_entity not in [definition.class_ for definition in self.rules.classes]:
446
+ if class_entity not in [definition.class_ for definition in self.rules[named_graph].classes]:
318
447
  warnings.warn("Desired type not found in graph!", stacklevel=2)
319
448
  return
320
449
 
321
450
  yield from self._read_via_class_entity(class_entity)
322
451
 
323
- def count_of_id(self, neat_id: URIRef) -> int:
452
+ def count_of_id(self, neat_id: URIRef, named_graph: URIRef | None = None) -> int:
324
453
  """Count the number of instances of a given type
325
454
 
326
455
  Args:
@@ -329,18 +458,31 @@ class NeatGraphStore:
329
458
  Returns:
330
459
  Number of instances
331
460
  """
332
- if not self.rules:
333
- warnings.warn("Rules not found in graph store!", stacklevel=2)
461
+ named_graph = named_graph or self.default_named_graph
462
+
463
+ if named_graph not in self.named_graphs:
464
+ warnings.warn(
465
+ f"Named graph {named_graph} not found in graph store, cannot count",
466
+ stacklevel=2,
467
+ )
468
+ return 0
469
+
470
+ if not self.rules or named_graph not in self.rules:
471
+ warnings.warn(
472
+ f"Rules for named graph {named_graph} not found in graph store!",
473
+ stacklevel=2,
474
+ )
334
475
  return 0
335
476
 
336
477
  class_entity = next(
337
- (definition.class_ for definition in self.rules.classes if definition.neatId == neat_id), None
478
+ (definition.class_ for definition in self.rules[named_graph].classes if definition.neatId == neat_id),
479
+ None,
338
480
  )
339
481
  if not class_entity:
340
482
  warnings.warn("Desired type not found in graph!", stacklevel=2)
341
483
  return 0
342
484
 
343
- if not (class_uri := InformationAnalysis(self.rules).class_uri(class_entity)):
485
+ if not (class_uri := InformationAnalysis(self.rules[named_graph]).class_uri(class_entity)):
344
486
  warnings.warn(
345
487
  f"Class {class_entity.suffix} does not have namespace defined for prefix {class_entity.prefix} Rules!",
346
488
  stacklevel=2,
@@ -351,17 +493,19 @@ class NeatGraphStore:
351
493
 
352
494
  def count_of_type(self, class_uri: URIRef) -> int:
353
495
  query = f"SELECT (COUNT(?instance) AS ?instanceCount) WHERE {{ ?instance a <{class_uri}> }}"
354
- return int(next(iter(self.graph.query(query)))[0]) # type: ignore[arg-type, index]
496
+ return int(next(iter(self.dataset.query(query)))[0]) # type: ignore[arg-type, index]
355
497
 
356
498
  def _parse_file(
357
499
  self,
358
- filepath: Path,
500
+ named_graph: URIRef,
501
+ filepath: Path | ZipExtFile,
359
502
  format: str = "turtle",
360
503
  base_uri: URIRef | None = None,
361
504
  ) -> None:
362
505
  """Imports graph data from file.
363
506
 
364
507
  Args:
508
+ named_graph : URIRef of the named graph to store the data in
365
509
  filepath : File path to file containing graph data, by default None
366
510
  format : rdflib format file containing RDF graph, by default "turtle"
367
511
  base_uri : base URI to add to graph in case of relative URIs, by default None
@@ -375,28 +519,38 @@ class NeatGraphStore:
375
519
  """
376
520
 
377
521
  # Oxigraph store, do not want to type hint this as it is an optional dependency
378
- if type(self.graph.store).__name__ == "OxigraphStore":
522
+ if self.type_ == "OxigraphStore":
379
523
  local_import("pyoxigraph", "oxi")
380
524
 
381
- # this is necessary to trigger rdflib oxigraph plugin
382
- self.graph.parse(
383
- filepath,
384
- format=rdflib_to_oxi_type(format),
385
- transactional=False,
386
- publicID=base_uri,
387
- )
388
- self.graph.store._store.optimize() # type: ignore[attr-defined]
525
+ if format in quad_formats():
526
+ self.dataset.parse(
527
+ filepath, # type: ignore[arg-type]
528
+ format=rdflib_to_oxi_type(format),
529
+ transactional=False,
530
+ publicID=base_uri,
531
+ )
532
+ else:
533
+ self.graph(named_graph).parse(
534
+ filepath, # type: ignore[arg-type]
535
+ format=rdflib_to_oxi_type(format),
536
+ transactional=False,
537
+ publicID=base_uri,
538
+ )
539
+ self.dataset.store._store.optimize() # type: ignore[attr-defined]
389
540
 
390
541
  # All other stores
391
542
  else:
392
- if filepath.is_file():
393
- self.graph.parse(filepath, publicID=base_uri)
543
+ if format in quad_formats():
544
+ self.dataset.parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
394
545
  else:
395
- for filename in filepath.iterdir():
396
- if filename.is_file():
397
- self.graph.parse(filename, publicID=base_uri)
546
+ self.graph(named_graph).parse(filepath, publicID=base_uri, format=format) # type: ignore[arg-type]
398
547
 
399
- def _add_triples(self, triples: Iterable[Triple], batch_size: int = 10_000):
548
+ def _add_triples(
549
+ self,
550
+ triples: Iterable[Triple],
551
+ named_graph: URIRef,
552
+ batch_size: int = 10_000,
553
+ ) -> None:
400
554
  """Adds triples to the graph store in batches.
401
555
 
402
556
  Args:
@@ -404,66 +558,103 @@ class NeatGraphStore:
404
558
  batch_size: Batch size of triples per commit, by default 10_000
405
559
  verbose: Verbose mode, by default False
406
560
  """
407
- add_triples_in_batch(self.graph, triples, batch_size)
561
+ add_triples_in_batch(self.graph(named_graph), triples, batch_size)
408
562
 
409
- def transform(self, transformer: Transformers) -> None:
563
+ def transform(self, transformer: Transformers, named_graph: URIRef | None = None) -> None:
410
564
  """Transforms the graph store using a transformer."""
411
565
 
412
- missing_changes = [
413
- change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
414
- ]
415
- if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
416
- warnings.warn(
417
- f"Cannot transform graph store with {type(transformer).__name__}, already applied",
418
- stacklevel=2,
419
- )
420
- elif missing_changes:
421
- warnings.warn(
422
- (
423
- f"Cannot transform graph store with {type(transformer).__name__}, "
424
- f"missing one or more required changes [{', '.join(missing_changes)}]"
425
- ),
426
- stacklevel=2,
427
- )
566
+ named_graph = named_graph or self.default_named_graph
567
+ if named_graph in self.named_graphs:
568
+ missing_changes = [
569
+ change for change in transformer._need_changes if not self.provenance.activity_took_place(change)
570
+ ]
571
+ if self.provenance.activity_took_place(type(transformer).__name__) and transformer._use_only_once:
572
+ warnings.warn(
573
+ f"Cannot transform graph store with {type(transformer).__name__}, already applied",
574
+ stacklevel=2,
575
+ )
576
+ elif missing_changes:
577
+ warnings.warn(
578
+ (
579
+ f"Cannot transform graph store with {type(transformer).__name__}, "
580
+ f"missing one or more required changes [{', '.join(missing_changes)}]"
581
+ ),
582
+ stacklevel=2,
583
+ )
428
584
 
429
- else:
430
- _start = datetime.now(timezone.utc)
431
- transformer.transform(self.graph)
432
- self.provenance.append(
433
- Change.record(
434
- activity=f"{type(transformer).__name__}",
435
- start=_start,
436
- end=datetime.now(timezone.utc),
437
- description=transformer.description,
585
+ else:
586
+ _start = datetime.now(timezone.utc)
587
+ transformer.transform(self.graph(named_graph))
588
+ self.provenance.append(
589
+ Change.record(
590
+ activity=f"{type(transformer).__name__}",
591
+ start=_start,
592
+ end=datetime.now(timezone.utc),
593
+ description=transformer.description,
594
+ )
438
595
  )
596
+
597
+ else:
598
+ warnings.warn(
599
+ f"Named graph {named_graph} not found in graph store, cannot transform",
600
+ stacklevel=2,
439
601
  )
440
602
 
441
603
  @property
442
- def summary(self) -> pd.DataFrame:
443
- return pd.DataFrame(self.queries.summarize_instances(), columns=["Type", "Occurrence"])
604
+ def summary(self) -> dict[URIRef, pd.DataFrame]:
605
+ return {
606
+ named_graph: pd.DataFrame(
607
+ self.queries.summarize_instances(named_graph),
608
+ columns=["Type", "Occurrence"],
609
+ )
610
+ for named_graph in self.named_graphs
611
+ }
444
612
 
445
613
  @property
446
- def multi_type_instances(self) -> dict[str, list[str]]:
447
- return self.queries.multi_type_instances()
614
+ def multi_type_instances(self) -> dict[URIRef, dict[str, list[str]]]:
615
+ return {named_graph: self.queries.multi_type_instances(named_graph) for named_graph in self.named_graphs}
448
616
 
449
617
  def _repr_html_(self) -> str:
450
618
  provenance = self.provenance._repr_html_()
451
- summary: pd.DataFrame = self.summary
619
+ summary: dict[URIRef, pd.DataFrame] = self.summary
452
620
 
453
- if summary.empty:
621
+ def _short_name_of_graph(named_graph: URIRef) -> str:
622
+ return "default" if named_graph == self.default_named_graph else remove_namespace_from_uri(named_graph)
623
+
624
+ if not summary:
454
625
  summary_text = "<br /><strong>Graph is empty</strong><br />"
455
626
  else:
627
+ all_types = set().union(
628
+ *[set(sub_summary.Type) for sub_summary in summary.values() if not sub_summary.empty]
629
+ )
630
+
456
631
  summary_text = (
457
632
  "<br /><strong>Overview</strong>:" # type: ignore
458
- f"<ul><li>{len(summary)} types</strong></li>"
459
- f"<li>{sum(summary['Occurrence'])} instances</strong></li></ul>"
460
- f"{cast(pd.DataFrame, self._shorten_summary(summary))._repr_html_()}" # type: ignore[operator]
633
+ f"<ul><li>{len(summary)} named graphs</strong></li>"
634
+ f"<li>Total of {len(all_types)} unique types</strong></li>"
461
635
  )
462
636
 
463
- if self.multi_type_instances:
464
- summary_text += "<br><strong>Multi value instances detected! Loading could have issues!</strong></br>" # type: ignore
637
+ for named_graph, table in summary.items():
638
+ summary_text += (
639
+ f"<li>{sum(table['Occurrence'])} instances in {_short_name_of_graph(named_graph)}"
640
+ " graph</strong></li>"
641
+ )
642
+
643
+ summary_text += "</ul>"
644
+ for named_graph, table in summary.items():
645
+ summary_text += (
646
+ f"<br /><strong>{_short_name_of_graph(named_graph)} graph</strong>:"
647
+ f"{cast(pd.DataFrame, self._shorten_summary(table))._repr_html_()}" # type: ignore[operator]
648
+ )
465
649
 
466
- return f"{summary_text}" f"{provenance}"
650
+ for named_graph, multi_value_instances in self.multi_type_instances.items():
651
+ if multi_value_instances:
652
+ summary_text += (
653
+ f"<br><strong>Multi value instances detected in {_short_name_of_graph(named_graph)}"
654
+ "graph! Loading could have issues!</strong></br>"
655
+ )
656
+
657
+ return f"{summary_text}{provenance}"
467
658
 
468
659
  def _shorten_summary(self, summary: pd.DataFrame) -> pd.DataFrame:
469
660
  """Shorten summary to top 5 types by occurrence."""
@@ -490,3 +681,7 @@ class NeatGraphStore:
490
681
  shorter_summary.index = cast(Index, indexes)
491
682
 
492
683
  return shorter_summary
684
+
685
+ @property
686
+ def named_graphs(self) -> list[URIRef]:
687
+ return [cast(URIRef, context.identifier) for context in self.dataset.contexts()]