cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (132) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/crud.py +11 -21
  5. cognite/neat/app/api/routers/workflows.py +24 -94
  6. cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
  7. cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
  8. cognite/neat/graph/loaders/_base.py +17 -12
  9. cognite/neat/graph/loaders/_rdf2asset.py +223 -58
  10. cognite/neat/graph/loaders/_rdf2dms.py +1 -1
  11. cognite/neat/graph/stores/_base.py +5 -0
  12. cognite/neat/rules/analysis/_asset.py +31 -1
  13. cognite/neat/rules/importers/_inference2rules.py +31 -35
  14. cognite/neat/rules/models/information/_rules.py +1 -1
  15. cognite/neat/workflows/steps/data_contracts.py +17 -43
  16. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  17. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  18. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  19. cognite/neat/workflows/steps_registry.py +5 -7
  20. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
  21. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
  22. cognite/neat/app/api/routers/core.py +0 -91
  23. cognite/neat/app/api/routers/data_exploration.py +0 -336
  24. cognite/neat/app/api/routers/rules.py +0 -203
  25. cognite/neat/legacy/__init__.py +0 -0
  26. cognite/neat/legacy/graph/__init__.py +0 -3
  27. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  28. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  29. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  30. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  31. cognite/neat/legacy/graph/exceptions.py +0 -90
  32. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  33. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  34. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  35. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  36. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  37. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  38. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  39. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  40. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  41. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  42. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  43. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  44. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  45. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  46. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  47. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  48. cognite/neat/legacy/graph/models.py +0 -6
  49. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  50. cognite/neat/legacy/graph/stores/_base.py +0 -400
  51. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  52. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  53. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  54. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  55. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  56. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  57. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  58. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  59. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  60. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  61. cognite/neat/legacy/rules/__init__.py +0 -0
  62. cognite/neat/legacy/rules/analysis.py +0 -231
  63. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  64. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  65. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  66. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  67. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  68. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  69. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  70. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  71. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  72. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  73. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  74. cognite/neat/legacy/rules/exceptions.py +0 -2972
  75. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  76. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  77. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  78. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  79. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  80. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  81. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  82. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  83. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  84. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  85. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  86. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  87. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  88. cognite/neat/legacy/rules/importers/_base.py +0 -66
  89. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  90. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  91. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  92. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  93. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  94. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  95. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  96. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  97. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  98. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  99. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  100. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  101. cognite/neat/legacy/rules/models/__init__.py +0 -5
  102. cognite/neat/legacy/rules/models/_base.py +0 -151
  103. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  104. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  105. cognite/neat/legacy/rules/models/rules.py +0 -1289
  106. cognite/neat/legacy/rules/models/tables.py +0 -9
  107. cognite/neat/legacy/rules/models/value_types.py +0 -118
  108. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  109. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  110. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  111. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  112. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  113. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  114. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  115. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  116. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  117. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  118. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  119. cognite/neat/workflows/migration/__init__.py +0 -0
  120. cognite/neat/workflows/migration/steps.py +0 -91
  121. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  122. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  123. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  124. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  125. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  126. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  127. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  128. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  129. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  130. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
  131. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
  132. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
@@ -1,746 +0,0 @@
1
- import hashlib
2
- import json
3
- import logging
4
- import uuid
5
- import xml.etree.ElementTree as ET
6
- from pathlib import Path
7
- from typing import ClassVar, cast
8
-
9
- from rdflib import RDF, XSD, Literal, Namespace, URIRef
10
-
11
- from cognite.neat.constants import DEFAULT_NAMESPACE
12
- from cognite.neat.legacy.graph import extractors
13
- from cognite.neat.legacy.graph.extractors._mock_graph_generator import (
14
- generate_triples as generate_mock_triples,
15
- )
16
- from cognite.neat.legacy.rules.exporters._rules2triples import get_instances_as_triples
17
- from cognite.neat.utils.auxiliary import create_sha256_hash
18
- from cognite.neat.workflows._exceptions import StepNotInitialized
19
- from cognite.neat.workflows.model import FlowMessage, StepExecutionStatus
20
- from cognite.neat.workflows.steps.data_contracts import (
21
- RulesData,
22
- SolutionGraph,
23
- SourceGraph,
24
- )
25
- from cognite.neat.workflows.steps.step_model import Configurable, Step
26
-
27
- __all__ = [
28
- "ExtractGraphFromRdfFile",
29
- "ExtractGraphFromRulesInstanceSheet",
30
- "ExtractGraphFromGraphCapturingSheet",
31
- "ExtractGraphFromMockGraph",
32
- "ExtractGraphFromRulesDataModel",
33
- "ExtractGraphFromJsonFile",
34
- "ExtractGraphFromAvevaPiAssetFramework",
35
- "ExtractGraphFromDexpiFile",
36
- ]
37
-
38
- CATEGORY = __name__.split(".")[-1].replace("_", " ").title() + " [LEGACY]"
39
-
40
-
41
- class ExtractGraphFromRdfFile(Step):
42
- """
43
- This step extract instances from a file into the source graph. The file must be in RDF format.
44
- """
45
-
46
- description = "This step extract instances from a file into the source graph. The file must be in RDF format."
47
- version = "legacy"
48
- category = CATEGORY
49
- configurables: ClassVar[list[Configurable]] = [
50
- Configurable(
51
- name="file_path",
52
- value="source-graphs/source-graph-dump.xml",
53
- label="File name of source graph data dump in RDF format",
54
- ),
55
- Configurable(
56
- name="mime_type",
57
- value="application/rdf+xml",
58
- label="MIME type of file containing RDF graph",
59
- options=[
60
- "application/rdf+xml",
61
- "text/turtle",
62
- "application/n-triples",
63
- "application/n-quads",
64
- "application/trig",
65
- ],
66
- ),
67
- Configurable(
68
- name="add_base_iri",
69
- value="True",
70
- label="Whether to add base IRI to graph in case if entity ids are relative",
71
- options=["True", "False"],
72
- ),
73
- ]
74
-
75
- def run(self, source_graph: SourceGraph) -> FlowMessage: # type: ignore[override, syntax]
76
- if self.configs is None or self.data_store_path is None:
77
- raise StepNotInitialized(type(self).__name__)
78
- if source_graph.graph.rdf_store_type.lower() in ("memory", "oxigraph"):
79
- if source_file := self.configs["file_path"]:
80
- source_graph.graph.import_from_file(
81
- self.data_store_path / Path(source_file),
82
- mime_type=self.configs["mime_type"], # type: ignore[arg-type]
83
- add_base_iri=self.configs["add_base_iri"] == "True",
84
- )
85
- logging.info(f"Loaded {source_file} into source graph.")
86
- else:
87
- raise ValueError("You need a source_rdf_store.file specified for source_rdf_store.type=memory")
88
- else:
89
- raise NotImplementedError(f"Graph type {source_graph.graph.rdf_store_type} is not supported.")
90
-
91
- return FlowMessage(output_text="Instances loaded to source graph")
92
-
93
-
94
- class ExtractGraphFromDexpiFile(Step):
95
- """
96
- This step converts DEXPI P&ID (XML) into Knowledge Graph
97
- """
98
-
99
- description = "This step converts DEXPI P&ID (XML) into Knowledge Graph"
100
- version = "legacy"
101
- category = CATEGORY
102
- configurables: ClassVar[list[Configurable]] = [
103
- Configurable(
104
- name="file_path",
105
- value="source-graphs/dexpi-pid.xml",
106
- label="File path to DEXPI P&ID in XML format",
107
- ),
108
- Configurable(
109
- name="base_namespace",
110
- value="http://purl.org/cognite/neat#",
111
- label="Base namespace to be added to ids for all nodes found in P&ID",
112
- ),
113
- ]
114
-
115
- def run(self, source_graph: SourceGraph) -> FlowMessage: # type: ignore[override, syntax]
116
- if self.configs is None or self.data_store_path is None:
117
- raise StepNotInitialized(type(self).__name__)
118
-
119
- file_path = self.configs.get("file_path")
120
- base_namespace = self.configs.get("base_namespace", None)
121
-
122
- if file_path:
123
- triples = extractors.DexpiXML(self.data_store_path / Path(file_path), base_namespace).extract()
124
- source_graph.graph.add_triples(triples, verbose=True)
125
-
126
- logging.info(f"Loaded {file_path} into source graph.")
127
- else:
128
- raise ValueError("You need a source_rdf_store.file specified")
129
-
130
- return FlowMessage(output_text="Instances loaded to source graph")
131
-
132
-
133
- class ExtractGraphFromGraphCapturingSheet(Step):
134
- """
135
- This step extracts nodes and edges from graph capture spreadsheet and load them into graph
136
- """
137
-
138
- description = "This step extracts nodes and edges from graph capturing spreadsheet and load them into graph"
139
- version = "legacy"
140
- category = CATEGORY
141
- configurables: ClassVar[list[Configurable]] = [
142
- Configurable(
143
- name="file_path",
144
- value="source-graphs/graph_capture_sheet.xlsx",
145
- label="File path to Graph Capturing Sheet",
146
- ),
147
- Configurable(
148
- name="base_namespace",
149
- value="http://purl.org/cognite/neat#",
150
- label="Base namespace to be added to ids for all nodes extracted from graph capturing spreadsheet",
151
- ),
152
- Configurable(
153
- name="graph_name",
154
- value="solution",
155
- label="The name of target graph to load nodes and edge sto.",
156
- options=["source", "solution"],
157
- ),
158
- ]
159
-
160
- def run( # type: ignore[override, syntax]
161
- self, rules: RulesData, graph_store: SolutionGraph | SourceGraph
162
- ) -> FlowMessage:
163
- if self.configs is None or self.data_store_path is None:
164
- raise StepNotInitialized(type(self).__name__)
165
-
166
- file_path = self.configs.get("file_path")
167
-
168
- if file_path:
169
- logging.info(f"Processing graph capture sheet {self.data_store_path / Path(file_path)}")
170
-
171
- triples = extractors.GraphCapturingSheet(
172
- rules=rules.rules,
173
- filepath=self.data_store_path / Path(file_path),
174
- namespace=self.configs.get("base_namespace", None),
175
- use_source_ids=True,
176
- ).extract()
177
-
178
- else:
179
- raise ValueError("You need a source_rdf_store.file specified")
180
-
181
- if self.configs["graph_name"] == "solution":
182
- graph_store = cast(SolutionGraph, self.flow_context["SolutionGraph"])
183
- else:
184
- graph_store = cast(SourceGraph, self.flow_context["SourceGraph"])
185
-
186
- graph_store.graph.add_triples(triples, verbose=True) # type: ignore[arg-type]
187
- return FlowMessage(output_text="Graph capture sheet processed")
188
-
189
-
190
- class ExtractGraphFromMockGraph(Step):
191
- """
192
- This step generate mock graph based on the defined classes and target number of instances
193
- """
194
-
195
- description = "This step generate mock graph based on the defined classes and target number of instances"
196
- version = "legacy"
197
- category = CATEGORY
198
- configurables: ClassVar[list[Configurable]] = [
199
- Configurable(
200
- name="class_count",
201
- value='{"GeographicalRegion":5, "SubGeographicalRegion":10}',
202
- label="Target number of instances for each class",
203
- ),
204
- Configurable(
205
- name="graph_name",
206
- value="solution",
207
- label="The name of target graph.",
208
- options=["source", "solution"],
209
- ),
210
- ]
211
-
212
- def run( # type: ignore[override, syntax]
213
- self, transformation_rules: RulesData, graph_store: SolutionGraph | SourceGraph
214
- ) -> FlowMessage:
215
- if self.configs is None:
216
- raise StepNotInitialized(type(self).__name__)
217
- logging.info("Initiated generation of mock triples")
218
- try:
219
- class_count = json.loads(self.configs["class_count"])
220
- except Exception:
221
- return FlowMessage(
222
- error_text="Defected JSON stored in class_count",
223
- step_execution_status=StepExecutionStatus.ABORT_AND_FAIL,
224
- )
225
-
226
- if self.configs["graph_name"] == "solution":
227
- # Todo Anders: Why is the graph fetched from context when it is passed as an argument?
228
- graph_store = cast(SourceGraph | SolutionGraph, self.flow_context["SolutionGraph"])
229
- else:
230
- graph_store = cast(SourceGraph | SolutionGraph, self.flow_context["SourceGraph"])
231
-
232
- logging.info(class_count)
233
- logging.info(transformation_rules.rules.metadata.model_dump())
234
- try:
235
- triples = generate_mock_triples(transformation_rules=transformation_rules.rules, class_count=class_count)
236
- except Exception as e:
237
- return FlowMessage(
238
- error_text=f"Error: {e}",
239
- step_execution_status=StepExecutionStatus.ABORT_AND_FAIL,
240
- )
241
-
242
- logging.info("Adding mock triples to graph")
243
- graph_store.graph.add_triples(triples, verbose=True) # type: ignore[arg-type]
244
- return FlowMessage(output_text=f"Mock graph generated containing total of {len(triples)} triples")
245
-
246
-
247
- class ExtractGraphFromRulesInstanceSheet(Step):
248
- """
249
- This step extracts instances from Rules object and loads them into the graph
250
- """
251
-
252
- description = "This step extracts instances from Rules object and loads them into the graph."
253
- category = CATEGORY
254
- version = "legacy"
255
-
256
- configurables: ClassVar[list[Configurable]] = [
257
- Configurable(
258
- name="graph_name",
259
- value="solution",
260
- label="The name of target graph.",
261
- options=["source", "solution"],
262
- ),
263
- ]
264
-
265
- def run( # type: ignore[override, syntax]
266
- self, transformation_rules: RulesData, graph_store: SolutionGraph | SourceGraph
267
- ) -> FlowMessage:
268
- triples = get_instances_as_triples(transformation_rules.rules)
269
- instance_ids = {triple[0] for triple in triples}
270
- output_text = f"Extracted {len(instance_ids)} instances out of"
271
- output_text += f"Loaded {len(triples)} statements defining"
272
- output_text += f" {len(instance_ids)} instances"
273
-
274
- if self.configs["graph_name"] == "solution":
275
- graph_store = cast(SolutionGraph, self.flow_context["SolutionGraph"])
276
- else:
277
- graph_store = cast(SourceGraph, self.flow_context["SourceGraph"])
278
-
279
- try:
280
- graph_store.graph.add_triples(triples, verbose=True) # type: ignore[arg-type]
281
- except Exception as e:
282
- return FlowMessage(
283
- error_text=f"Error: {e}",
284
- step_execution_status=StepExecutionStatus.ABORT_AND_FAIL,
285
- )
286
-
287
- return FlowMessage(output_text=output_text)
288
-
289
-
290
- class ExtractGraphFromRulesDataModel(Step):
291
- """
292
- This step extracts data model from rules file and loads it into source graph
293
- """
294
-
295
- description = "This step extracts data model from rules file and loads it into source graph."
296
- category = CATEGORY
297
- version = "legacy"
298
-
299
- def run( # type: ignore[override, syntax]
300
- self, transformation_rules: RulesData, source_graph: SourceGraph
301
- ) -> FlowMessage:
302
- ns = DEFAULT_NAMESPACE
303
- classes = transformation_rules.rules.classes
304
- properties = transformation_rules.rules.properties
305
- counter = 0
306
- for class_name, class_def in classes.items():
307
- rdf_instance_id = URIRef(ns + "_" + class_def.class_id)
308
- source_graph.graph.graph.add((rdf_instance_id, URIRef(ns + "Name"), Literal(class_name)))
309
- source_graph.graph.graph.add((rdf_instance_id, RDF.type, URIRef(ns + class_def.class_id)))
310
- if class_def.parent_class:
311
- source_graph.graph.graph.add(
312
- (
313
- rdf_instance_id,
314
- URIRef(ns + "hasParent"),
315
- URIRef(ns + "_" + cast(str, class_def.parent_class)),
316
- )
317
- )
318
- counter += 1
319
-
320
- for _property_name, property_def in properties.items():
321
- rdf_instance_id = URIRef(ns + "_" + property_def.class_id)
322
- source_graph.graph.graph.add(
323
- (
324
- rdf_instance_id,
325
- URIRef(ns + property_def.property_id),
326
- Literal(property_def.expected_value_type),
327
- )
328
- )
329
- if property_def.expected_value_type.suffix not in (
330
- "string",
331
- "integer",
332
- "float",
333
- "boolean",
334
- ):
335
- source_graph.graph.graph.add(
336
- (
337
- rdf_instance_id,
338
- URIRef(ns + "connectedTo"),
339
- URIRef(ns + "_" + property_def.expected_value_type.suffix),
340
- )
341
- )
342
- counter += 1
343
-
344
- output_text = f"Loaded {counter} classes into source graph"
345
- return FlowMessage(output_text=output_text)
346
-
347
-
348
- class ExtractGraphFromJsonFile(Step):
349
- """
350
- This step extracts instances from json file and loads them into a graph store. Warning : the step is experimental
351
- """
352
-
353
- description = "This step extracts instances from json file and loads them into a graph store"
354
- category = CATEGORY
355
- version = "legacy"
356
- configurables: ClassVar[list[Configurable]] = [
357
- Configurable(
358
- name="file_name",
359
- value="data_dump.json",
360
- label="Full path to the file containing data dump in JSON format",
361
- ),
362
- Configurable(
363
- name="graph_name",
364
- value="solution",
365
- label="The name of target graph.",
366
- options=["source", "solution"],
367
- ),
368
- Configurable(
369
- name="object_id_generation_method",
370
- value="hash_of_json_element",
371
- label="Method to be used for generating object ids. \
372
- source_object_properties - takes multiple properties from the source object and concatenates them. \
373
- source_object_id_mapping - takes a single property from the \
374
- source object and maps it to a instance id. \
375
- The option should be used when source object already contains stable ids \
376
- hash_of_json_element - takes a hash of the JSON element.Very generic method but \
377
- can be slow working with big objects. \
378
- uuid - generates a random UUID, the option produces unstables ids . ",
379
- options=[
380
- "source_object_properties",
381
- "source_object_id_mapping",
382
- "hash_of_json_element",
383
- "uuid",
384
- ],
385
- ),
386
- Configurable(
387
- name="json_object_id_mapping",
388
- value="name",
389
- label="Comma separated list of object properties to be used for generating object ids. \
390
- Each property must be prefixed with the name of the object. For example: device:name,pump:id",
391
- ),
392
- Configurable(
393
- name="json_object_labels_mapping",
394
- value="",
395
- label="Comma separated list of object properties to be used for generating object labels. \
396
- Each property must be prefixed with the name of the object. For example: asset:name,asset:type",
397
- ),
398
- Configurable(
399
- name="namespace",
400
- value="http://purl.org/cognite/neat#",
401
- label="Namespace to be used for the generated objects.",
402
- ),
403
- Configurable(
404
- name="namespace_prefix",
405
- value="neat",
406
- label="The prefix to be used for the namespace.",
407
- ),
408
- ]
409
-
410
- def get_json_object_id(
411
- self,
412
- method,
413
- object_name: str,
414
- json_object: dict,
415
- parent_object_id: str,
416
- id_mapping: dict,
417
- ):
418
- if method == "source_object_properties":
419
- object_id = ""
420
- if object_name in id_mapping:
421
- for property_name in id_mapping[object_name]:
422
- object_id += property_name + json_object[property_name]
423
- elif method == "hash_of_json_element":
424
- flat_json_object = {}
425
- for key, value in json_object.items():
426
- if not isinstance(value, dict) and not isinstance(value, list):
427
- flat_json_object[key] = value
428
-
429
- object_id = json.dumps(flat_json_object, sort_keys=True)
430
- elif method == "uuid":
431
- return uuid.uuid4()
432
- elif method == "source_object_id_mapping":
433
- # don't hash existing valid ids
434
- try:
435
- return json_object[id_mapping[object_name][0]]
436
- except KeyError as e:
437
- # back to hashing
438
- logging.debug(f"Object {object_name} doesn't have a valid id.Error : {e}")
439
- object_id = self.get_json_object_id(
440
- "hash_of_json_element",
441
- object_name,
442
- json_object,
443
- parent_object_id,
444
- id_mapping,
445
- )
446
- else:
447
- raise ValueError(
448
- f"Unknown object_id_generation_method: {(self.configs or {}).get('object_id_generation_method')}"
449
- )
450
-
451
- return hashlib.sha256(object_id.encode()).hexdigest()
452
-
453
- def run(self, graph_store: SolutionGraph | SourceGraph) -> FlowMessage: # type: ignore[override, syntax]
454
- if self.configs is None or self.data_store_path is None:
455
- raise StepNotInitialized(type(self).__name__)
456
-
457
- # self.graph.bind
458
- if self.configs["graph_name"] == "solution":
459
- # Todo Anders: Why is the graph fetched from context when it is passed as an argument?
460
- graph_store = cast(SolutionGraph, self.flow_context["SolutionGraph"])
461
- else:
462
- graph_store = cast(SourceGraph, self.flow_context["SourceGraph"])
463
-
464
- ns = Namespace(self.configs["namespace"])
465
- graph_store.graph.graph.bind(self.configs["namespace_prefix"], ns)
466
-
467
- full_path = self.data_store_path / Path(self.configs["file_name"])
468
- logging.info(f"Loading data dump from {full_path}")
469
- with full_path.open() as f:
470
- json_data = json.load(f)
471
-
472
- graph = graph_store.graph
473
- nodes_counter = 0
474
- property_counter = 0
475
- labels_mapping: dict[str, str] = {}
476
- object_id_mapping: dict[str, list[str]] = {}
477
- if self.configs["json_object_labels_mapping"]:
478
- for label_mapping in self.configs["json_object_labels_mapping"].split(","):
479
- object_name, property_name = label_mapping.split(":")
480
- labels_mapping[object_name] = property_name
481
-
482
- if self.configs["json_object_id_mapping"]:
483
- for id_mapping in self.configs["json_object_id_mapping"].split(","):
484
- if ":" not in id_mapping:
485
- continue
486
- object_name, property_name = id_mapping.split(":")
487
- # if multiple ids are used for the same object ,the order of the properties is important
488
- if object_name in object_id_mapping:
489
- object_id_mapping[object_name].append(property_name)
490
- else:
491
- object_id_mapping[object_name] = [property_name]
492
-
493
- # Iterate through the JSON data and convert it to triples
494
- def convert_json_to_triples(
495
- data: dict,
496
- parent_node: URIRef,
497
- parent_object_id: str,
498
- parent_node_path: str,
499
- property_name=None,
500
- ):
501
- nonlocal nodes_counter, property_counter
502
- if isinstance(data, dict):
503
- if len(data) == 0:
504
- return
505
- if property_name is None:
506
- for key, value in data.items():
507
- convert_json_to_triples(value, parent_node, parent_object_id, parent_node_path, key)
508
- else:
509
- object_id = self.get_json_object_id(
510
- self.configs["object_id_generation_method"],
511
- property_name,
512
- data,
513
- parent_object_id,
514
- object_id_mapping,
515
- )
516
- new_node = URIRef(ns + object_id)
517
- graph.graph.add((new_node, RDF.type, URIRef(ns + property_name)))
518
- if labels_mapping and property_name in labels_mapping:
519
- graph.graph.add(
520
- (
521
- new_node,
522
- URIRef(ns + "label"),
523
- Literal(data[labels_mapping[property_name]]),
524
- )
525
- )
526
- else:
527
- graph.graph.add((new_node, URIRef(ns + "label"), Literal(property_name)))
528
- graph.graph.add((new_node, URIRef(ns + "parent"), parent_node))
529
- nodes_counter += 1
530
- for key, value in data.items():
531
- new_node_path = parent_node_path + "/" + key
532
- convert_json_to_triples(value, new_node, object_id, new_node_path, key)
533
- elif isinstance(data, list):
534
- if property_name is None:
535
- for key, value in data.items():
536
- convert_json_to_triples(value, parent_node, parent_object_id, parent_node_path, key)
537
- else:
538
- for item in data:
539
- convert_json_to_triples(
540
- item,
541
- parent_node,
542
- parent_object_id,
543
- parent_node_path,
544
- property_name,
545
- )
546
- else:
547
- # Convert scalar values to RDF literals
548
- if isinstance(data, bool):
549
- data = Literal(data, datatype=XSD.boolean)
550
- elif isinstance(data, int):
551
- data = Literal(data, datatype=XSD.integer)
552
- elif isinstance(data, float):
553
- data = Literal(data, datatype=XSD.float)
554
- elif isinstance(data, str):
555
- data = Literal(data, datatype=XSD.string)
556
- else:
557
- data = Literal(str(data))
558
- property_counter += 1
559
- graph.graph.add((parent_node, URIRef(ns + property_name), data))
560
-
561
- # Start conversion with a root node
562
- root_node = URIRef(ns + "root")
563
- graph.graph.add((root_node, URIRef(ns + "label"), Literal("root node")))
564
- graph.graph.add((root_node, RDF.type, URIRef(ns + "root_node_id")))
565
- convert_json_to_triples(json_data, root_node, "root", "root", None)
566
- return FlowMessage(
567
- output_text=f"Data from source file imported successfully. Imported {nodes_counter} objects \
568
- and {property_counter} properties ."
569
- )
570
-
571
-
572
- class ExtractGraphFromAvevaPiAssetFramework(Step):
573
- """
574
- This step extracts instances from Aveva PI AF and loads them into a graph store. Warning : the step is experimental
575
- """
576
-
577
- description = "This step extracts instances from Aveva PI AF and loads them into a graph store"
578
- category = CATEGORY
579
- version = "legacy"
580
- configurables: ClassVar[list[Configurable]] = [
581
- Configurable(
582
- name="file_name",
583
- value="staging/pi_af_dump.xml",
584
- label="Full path to the file \
585
- containing data dump in XML format",
586
- ),
587
- Configurable(
588
- name="graph_name",
589
- value="solution",
590
- label="The name of target graph.",
591
- options=["source", "solution"],
592
- ),
593
- Configurable(
594
- name="root_node_external_id",
595
- value="root",
596
- label="External id of the root node. The node will be created if it doesn't exist",
597
- ),
598
- Configurable(
599
- name="root_node_name",
600
- value="root",
601
- label="Name of the root node. The node will be created if it doesn't exist",
602
- ),
603
- Configurable(
604
- name="root_node_type",
605
- value="Asset",
606
- label="Type of the root node. The node will be created if it doesn't exist",
607
- ),
608
- Configurable(
609
- name="namespace",
610
- value="http://purl.org/cognite/neat#",
611
- label="Namespace to be used for the generated objects.",
612
- ),
613
- Configurable(
614
- name="namespace_prefix",
615
- value="neat",
616
- label="The prefix to be used for the namespace.",
617
- ),
618
- ]
619
-
620
- def add_root_asset_to_source_graph(self) -> str:
621
- root_external_id = self.configs["root_node_external_id"]
622
- root_name = self.configs["root_node_name"]
623
- root_asset_type = self.configs["root_node_type"]
624
- rdf_root_instance_id = URIRef(self.ns + root_external_id)
625
- self.graph_store.graph.add((rdf_root_instance_id, URIRef(self.ns + "Name"), Literal(root_name)))
626
- self.graph_store.graph.add((rdf_root_instance_id, RDF.type, URIRef(self.ns + root_asset_type)))
627
- return root_external_id
628
-
629
- def run( # type: ignore[override, syntax]
630
- self, flow_msg: FlowMessage, graph_store: SolutionGraph | SourceGraph
631
- ) -> FlowMessage:
632
- if self.configs is None or self.data_store_path is None:
633
- raise StepNotInitialized(type(self).__name__)
634
-
635
- if source_file := self.configs["file_name"]:
636
- source_pi_dump = Path(self.data_store_path) / source_file
637
- else:
638
- return FlowMessage(
639
- output_text="No source file specified",
640
- next_step_ids=["step_error_handler"],
641
- )
642
-
643
- # self.graph.bind
644
- if self.configs["graph_name"] == "solution":
645
- # Todo Anders: Why is the graph fetched from context when it is passed as an argument?
646
- self.graph_store = cast(SourceGraph | SolutionGraph, self.flow_context["SolutionGraph"]).graph
647
- else:
648
- self.graph_store = cast(SourceGraph | SolutionGraph, self.flow_context["SourceGraph"]).graph
649
-
650
- self.ns = Namespace(self.configs["namespace"])
651
- self.graph_store.graph.bind(self.configs["namespace_prefix"], self.ns)
652
-
653
- cdf_root_instance_id = self.add_root_asset_to_source_graph()
654
- # Parse the XML data into an ElementTree object
655
- root = ET.parse(source_pi_dump).getroot()
656
- counter = 0
657
- root_af_element = root.find("AFDatabase/AFElement")
658
- logging.info(f"Found AFElement: {root_af_element}")
659
-
660
- def process_af_attribute(af_element, element_path=None, parent_element_id: str | None = None):
661
- name = af_element.find("Name").text
662
- name = name.replace(" ", "_")
663
- new_element_path = element_path + "/" + name
664
- element_id = "_" + create_sha256_hash(new_element_path)
665
- rdf_instance_id = URIRef(self.ns + element_id)
666
- self.graph_store.graph.add((rdf_instance_id, URIRef(self.ns + "Name"), Literal(name)))
667
- self.graph_store.graph.add((rdf_instance_id, RDF.type, URIRef(self.ns + "Attribute" + name)))
668
- self.graph_store.graph.add((rdf_instance_id, URIRef(self.ns + "Path"), Literal(new_element_path)))
669
- if parent_element_id:
670
- self.graph_store.graph.add(
671
- (
672
- rdf_instance_id,
673
- URIRef(self.ns + "hasParent"),
674
- URIRef(self.ns + parent_element_id),
675
- )
676
- )
677
- for child in af_element:
678
- if child.tag == "AFAttribute":
679
- process_af_attribute(child, new_element_path, element_id)
680
- elif child.tag == "Name":
681
- pass
682
- else:
683
- try:
684
- self.graph_store.graph.add(
685
- (
686
- rdf_instance_id,
687
- URIRef(self.ns + child.tag),
688
- Literal(child.text),
689
- )
690
- )
691
- except Exception as e:
692
- logging.error(f"Error parsing AFAttribute {name} : {e}")
693
-
694
- def process_af_element(af_element, element_path=None, parent_element_id: str | None = None) -> str:
695
- nonlocal counter
696
- name = af_element.find("Name").text
697
- template = None
698
- new_element_path = element_path + "/" + name
699
- element_id = "_" + create_sha256_hash(new_element_path)
700
- rdf_instance_id = URIRef(self.ns + element_id)
701
- self.graph_store.graph.add((rdf_instance_id, URIRef(self.ns + "Name"), Literal(name)))
702
- self.graph_store.graph.add((rdf_instance_id, URIRef(self.ns + "Path"), Literal(new_element_path)))
703
- if parent_element_id:
704
- self.graph_store.graph.add(
705
- (
706
- rdf_instance_id,
707
- URIRef(self.ns + "hasParent"),
708
- URIRef(self.ns + parent_element_id),
709
- )
710
- )
711
-
712
- for child in af_element:
713
- if child.tag == "Name":
714
- pass
715
- if child.tag == "Template":
716
- template = child.text
717
- if child.tag == "AFAttribute":
718
- process_af_attribute(child, new_element_path, element_id)
719
- if child.tag == "AFElement":
720
- counter += 1
721
- process_af_element(child, new_element_path, element_id)
722
- else:
723
- self.graph_store.graph.add(
724
- (
725
- rdf_instance_id,
726
- URIRef(self.ns + child.tag),
727
- Literal(child.text),
728
- )
729
- )
730
-
731
- if template:
732
- self.graph_store.graph.add((rdf_instance_id, RDF.type, URIRef(self.ns + template)))
733
- else:
734
- self.graph_store.graph.add((rdf_instance_id, RDF.type, URIRef(self.ns + "AFElement")))
735
-
736
- return element_id
737
-
738
- process_af_element(root_af_element, "root", cdf_root_instance_id)
739
- self.graph_store.restart() # restarting the graph to release the memory
740
- return FlowMessage(output_text=f" {counter} PI assets loaded into the graph")
741
-
742
- def convert_attribute(self, attribute):
743
- if "{" not in attribute:
744
- return attribute
745
- attr_splitted = attribute.split("{")[-1].split("}")
746
- return attr_splitted[0] + "/" + attr_splitted[1]