cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (132) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/crud.py +11 -21
  5. cognite/neat/app/api/routers/workflows.py +24 -94
  6. cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
  7. cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
  8. cognite/neat/graph/loaders/_base.py +17 -12
  9. cognite/neat/graph/loaders/_rdf2asset.py +223 -58
  10. cognite/neat/graph/loaders/_rdf2dms.py +1 -1
  11. cognite/neat/graph/stores/_base.py +5 -0
  12. cognite/neat/rules/analysis/_asset.py +31 -1
  13. cognite/neat/rules/importers/_inference2rules.py +31 -35
  14. cognite/neat/rules/models/information/_rules.py +1 -1
  15. cognite/neat/workflows/steps/data_contracts.py +17 -43
  16. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  17. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  18. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  19. cognite/neat/workflows/steps_registry.py +5 -7
  20. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
  21. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
  22. cognite/neat/app/api/routers/core.py +0 -91
  23. cognite/neat/app/api/routers/data_exploration.py +0 -336
  24. cognite/neat/app/api/routers/rules.py +0 -203
  25. cognite/neat/legacy/__init__.py +0 -0
  26. cognite/neat/legacy/graph/__init__.py +0 -3
  27. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  28. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  29. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  30. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  31. cognite/neat/legacy/graph/exceptions.py +0 -90
  32. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  33. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  34. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  35. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  36. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  37. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  38. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  39. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  40. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  41. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  42. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  43. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  44. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  45. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  46. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  47. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  48. cognite/neat/legacy/graph/models.py +0 -6
  49. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  50. cognite/neat/legacy/graph/stores/_base.py +0 -400
  51. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  52. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  53. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  54. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  55. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  56. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  57. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  58. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  59. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  60. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  61. cognite/neat/legacy/rules/__init__.py +0 -0
  62. cognite/neat/legacy/rules/analysis.py +0 -231
  63. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  64. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  65. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  66. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  67. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  68. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  69. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  70. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  71. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  72. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  73. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  74. cognite/neat/legacy/rules/exceptions.py +0 -2972
  75. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  76. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  77. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  78. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  79. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  80. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  81. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  82. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  83. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  84. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  85. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  86. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  87. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  88. cognite/neat/legacy/rules/importers/_base.py +0 -66
  89. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  90. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  91. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  92. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  93. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  94. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  95. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  96. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  97. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  98. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  99. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  100. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  101. cognite/neat/legacy/rules/models/__init__.py +0 -5
  102. cognite/neat/legacy/rules/models/_base.py +0 -151
  103. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  104. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  105. cognite/neat/legacy/rules/models/rules.py +0 -1289
  106. cognite/neat/legacy/rules/models/tables.py +0 -9
  107. cognite/neat/legacy/rules/models/value_types.py +0 -118
  108. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  109. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  110. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  111. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  112. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  113. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  114. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  115. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  116. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  117. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  118. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  119. cognite/neat/workflows/migration/__init__.py +0 -0
  120. cognite/neat/workflows/migration/steps.py +0 -91
  121. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  122. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  123. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  124. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  125. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  126. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  127. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  128. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  129. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  130. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
  131. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
  132. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
@@ -1,606 +0,0 @@
1
- import logging
2
- import time
3
- from datetime import datetime
4
- from pathlib import Path
5
- from typing import Any, ClassVar, cast
6
-
7
- from cognite.client import CogniteClient
8
- from cognite.client.data_classes import Asset, AssetFilter
9
- from prometheus_client import Gauge
10
-
11
- from cognite.neat.legacy.graph import loaders as graph_loader
12
- from cognite.neat.legacy.graph.loaders import upload_labels
13
- from cognite.neat.legacy.graph.loaders.core.rdf_to_assets import (
14
- NeatMetadataKeys,
15
- categorize_assets,
16
- rdf2assets,
17
- remove_non_existing_labels,
18
- unique_asset_labels,
19
- upload_assets,
20
- )
21
- from cognite.neat.legacy.graph.loaders.core.rdf_to_relationships import (
22
- categorize_relationships,
23
- rdf2relationships,
24
- upload_relationships,
25
- )
26
- from cognite.neat.legacy.graph.loaders.rdf_to_dms import upload_edges, upload_nodes
27
- from cognite.neat.legacy.graph.loaders.validator import validate_asset_hierarchy
28
- from cognite.neat.legacy.rules.models.rdfpath import TransformationRuleType
29
- from cognite.neat.utils.auxiliary import generate_exception_report
30
- from cognite.neat.workflows._exceptions import StepFlowContextNotInitialized, StepNotInitialized
31
- from cognite.neat.workflows.model import FlowMessage, StepExecutionStatus
32
- from cognite.neat.workflows.steps.data_contracts import (
33
- CategorizedAssets,
34
- CategorizedRelationships,
35
- Edges,
36
- Nodes,
37
- RulesData,
38
- SolutionGraph,
39
- SourceGraph,
40
- )
41
- from cognite.neat.workflows.steps.step_model import Configurable, Step
42
-
43
- __all__ = [
44
- "GenerateAssetsFromGraph",
45
- "GenerateRelationshipsFromGraph",
46
- "GenerateNodesAndEdgesFromGraph",
47
- "LoadLabelsToCDF",
48
- "LoadAssetsToCDF",
49
- "LoadRelationshipsToCDF",
50
- "LoadNodesToCDF",
51
- "LoadEdgesToCDF",
52
- "LoadGraphToRdfFile",
53
- ]
54
-
55
- CATEGORY = __name__.split(".")[-1].replace("_", " ").title() + " [LEGACY]"
56
-
57
-
58
- class LoadLabelsToCDF(Step):
59
- """
60
- This step creates and loads default NEAT labels in CDF
61
- """
62
-
63
- description = "This step creates default NEAT labels in CDF"
64
- category = CATEGORY
65
- version = "legacy"
66
- configurables: ClassVar[list[Configurable]] = [
67
- Configurable(name="data_set_id", value="", label=("CDF dataset id to which the labels will be added."))
68
- ]
69
-
70
- def run(self, rules: RulesData, cdf_client: CogniteClient) -> None: # type: ignore[override, syntax]
71
- upload_labels(
72
- cdf_client,
73
- rules.rules,
74
- data_set_id=int(self.configs["data_set_id"]),
75
- extra_labels=["non-historic", "historic"],
76
- )
77
-
78
-
79
- class GenerateNodesAndEdgesFromGraph(Step):
80
- """
81
- The step generates nodes and edges from the graph
82
- """
83
-
84
- description = "The step generates nodes and edges from the graph"
85
- category = CATEGORY
86
-
87
- configurables: ClassVar[list[Configurable]] = [
88
- Configurable(
89
- name="graph_name",
90
- value="source",
91
- options=["source", "solution"],
92
- label=("The name of the graph to be used for matching." " Supported options : source, solution"),
93
- ),
94
- Configurable(
95
- name="add_class_prefix",
96
- value="False",
97
- options=["True", "False"],
98
- label=("Whether to add class name as a prefix to external ids of instances or not"),
99
- ),
100
- Configurable(
101
- name="data_validation_error_handling_strategy",
102
- value="skip_and_report",
103
- options=["skip_and_report", "fail_and_report"],
104
- label=(
105
- "The strategy for handling data validation errors. Supported options: \
106
- skip_and_report - failed instance (node or edge) will be skipped and reported , \
107
- fail_and_report - failed instance (node or edge) will fail the workflow and report the error"
108
- ),
109
- ),
110
- Configurable(
111
- name="apply_basic_transformation",
112
- value="True",
113
- options=["True", "False"],
114
- label=("Whether to apply basic transformations rules (rdfpath) or not. Default is True."),
115
- ),
116
- ]
117
-
118
- def run( # type: ignore[override, syntax]
119
- self, rules: RulesData, graph: SourceGraph | SolutionGraph
120
- ) -> (FlowMessage, Nodes, Edges): # type: ignore[syntax]
121
- if self.configs is None or self.data_store_path is None:
122
- raise StepNotInitialized(type(self).__name__)
123
- if self.flow_context is None:
124
- raise StepFlowContextNotInitialized(type(self).__name__)
125
-
126
- graph_name = self.configs["graph_name"] or "source"
127
- data_validation_error_handling_strategy = self.configs.get(
128
- "data_validation_error_handling_strategy", "skip_and_report"
129
- )
130
- if graph_name == "solution":
131
- # Todo Anders: Why is the graph fetched from context when it is passed as an argument?
132
- graph = cast(SourceGraph | SolutionGraph, self.flow_context["SolutionGraph"])
133
- else:
134
- graph = cast(SourceGraph | SolutionGraph, self.flow_context["SourceGraph"])
135
-
136
- add_class_prefix = True if self.configs["add_class_prefix"] == "True" else False
137
- apply_basic_transformation = True if self.configs.get("apply_basic_transformation", "True") == "True" else False
138
-
139
- if apply_basic_transformation:
140
- final_rules = rules.rules
141
- else:
142
- logging.debug("Basic transformation rules are not applied to the graph")
143
- final_rules = rules.rules.model_copy(deep=True)
144
- prefix = final_rules.metadata.prefix
145
- for rule in final_rules.properties.values():
146
- rule.rule_type = TransformationRuleType.rdfpath
147
- rule.rule = f"{prefix}:{rule.class_id}({prefix}:{rule.property_id})"
148
-
149
- loader = graph_loader.DMSLoader(final_rules, graph.graph, add_class_prefix=add_class_prefix)
150
- nodes, edges, exceptions = loader.as_nodes_and_edges(stop_on_exception=False)
151
-
152
- msg = f"Total count of: <ul><li>{ len(nodes) } nodes</li><li>{ len(edges) } edges</li></ul>"
153
-
154
- if exceptions:
155
- file_name = f'nodes-and-edges-exceptions_{datetime.now().strftime("%Y%d%m%H%M")}.txt'
156
- exceptions_report_dir = self.data_store_path / "reports"
157
- exceptions_report_dir.mkdir(parents=True, exist_ok=True)
158
- exceptions_report_path = exceptions_report_dir / file_name
159
-
160
- exceptions_report_path.write_text(generate_exception_report(exceptions, "Errors"))
161
- msg += (
162
- f"<p>There is total of { len(exceptions) } exceptions</p>"
163
- f'<a href="/data/reports/{file_name}?{time.time()}" '
164
- f'target="_blank">Full error report </a>'
165
- )
166
- if data_validation_error_handling_strategy == "fail_and_report":
167
- return FlowMessage(error_text=msg, step_execution_status=StepExecutionStatus.ABORT_AND_FAIL)
168
-
169
- return FlowMessage(output_text=msg), Nodes(nodes=nodes), Edges(edges=edges)
170
-
171
-
172
- class LoadGraphToRdfFile(Step):
173
- """
174
- The step generates loads graph to RDF file
175
- """
176
-
177
- description = "The step generates nodes and edges from the graph"
178
- category = CATEGORY
179
- version = "legacy"
180
- configurables: ClassVar[list[Configurable]] = [
181
- Configurable(
182
- name="graph_name",
183
- value="source",
184
- options=["source", "solution"],
185
- label=("The name of the graph to be used for loading RDF File." " Supported options : source, solution"),
186
- ),
187
- Configurable(
188
- name="rdf_file_path",
189
- value="staging/graph_export.ttl",
190
- label=("Relative path for the RDF file storage, " "must end with .ttl !"),
191
- ),
192
- ]
193
-
194
- def run( # type: ignore[override, syntax]
195
- self, graph: SourceGraph | SolutionGraph
196
- ) -> FlowMessage: # type: ignore[syntax]
197
- if self.configs is None or self.data_store_path is None:
198
- raise StepNotInitialized(type(self).__name__)
199
-
200
- storage_path = self.data_store_path / Path(self.configs["rdf_file_path"])
201
- relative_graph_file_path = str(storage_path).split("/data/")[1]
202
-
203
- graph_name = self.configs["graph_name"] or "source"
204
-
205
- if graph_name == "solution":
206
- # Todo Anders: Why is the graph fetched from context when it is passed as an argument?
207
- graph = cast(SourceGraph | SolutionGraph, self.flow_context["SolutionGraph"])
208
- else:
209
- graph = cast(SourceGraph | SolutionGraph, self.flow_context["SourceGraph"])
210
-
211
- graph.graph.serialize(str(storage_path), format="turtle")
212
-
213
- output_text = (
214
- "<p></p>"
215
- "Graph loaded to RDF file can be downloaded here : "
216
- f'<a href="/data/{relative_graph_file_path}?{time.time()}" '
217
- f'target="_blank">{storage_path.stem}.ttl</a>'
218
- )
219
-
220
- return FlowMessage(output_text=output_text)
221
-
222
-
223
- class LoadNodesToCDF(Step):
224
- """
225
- This step uploads nodes to CDF
226
- """
227
-
228
- description = "This step uploads nodes to CDF"
229
- category = CATEGORY
230
- version = "legacy"
231
-
232
- def run(self, cdf_client: CogniteClient, nodes: Nodes) -> FlowMessage: # type: ignore[override, syntax]
233
- if nodes.nodes:
234
- upload_nodes(cdf_client, nodes.nodes, max_retries=2, retry_delay=4)
235
- return FlowMessage(output_text="CDF nodes uploaded successfully")
236
- else:
237
- return FlowMessage(output_text="No nodes to upload!")
238
-
239
-
240
- class LoadEdgesToCDF(Step):
241
- """
242
- This step uploads edges to CDF
243
- """
244
-
245
- description = "This step uploads edges to CDF"
246
- category = CATEGORY
247
- version = "legacy"
248
-
249
- def run(self, cdf_client: CogniteClient, edges: Edges) -> FlowMessage: # type: ignore[override, syntax]
250
- if edges.edges:
251
- upload_edges(cdf_client, edges.edges, max_retries=2, retry_delay=4)
252
- return FlowMessage(output_text="CDF edges uploaded successfully")
253
- else:
254
- return FlowMessage(output_text="No edges to upload!")
255
-
256
-
257
- class GenerateAssetsFromGraph(Step):
258
- """
259
- The step generates assets from the graph ,categorizes them and stores them in CategorizedAssets object
260
- """
261
-
262
- description = (
263
- "The step generates assets from the graph ,categorizes them and stores them in CategorizedAssets object"
264
- )
265
- category = CATEGORY
266
- version = "legacy"
267
- configurables: ClassVar[list[Configurable]] = [
268
- Configurable(name="data_set_id", value="", label=("CDF dataset id to which the labels will be added.")),
269
- Configurable(
270
- name="asset_external_id_prefix",
271
- value="",
272
- label=("Prefix to be added to all asset external ids, default None."),
273
- ),
274
- Configurable(
275
- name="assets_cleanup_type",
276
- value="nothing",
277
- options=["nothing", "orphans", "circular", "full"],
278
- label=(
279
- "Configures asset cleanup process. Supported options: nothing - no cleanup, \
280
- orphans - all orphan assets will be removed, circular - all circular assets will be removed , \
281
- full - full cleanup , both orphans and circular assets will be removed. "
282
- ),
283
- ),
284
- ]
285
-
286
- def run( # type: ignore[override]
287
- self, rules: RulesData, cdf_client: CogniteClient, solution_graph: SolutionGraph
288
- ) -> (FlowMessage, CategorizedAssets): # type: ignore[override, syntax]
289
- if self.configs is None:
290
- raise StepNotInitialized(type(self).__name__)
291
- asset_cleanup_type = self.configs.get("assets_cleanup_type", "nothing")
292
- data_set_id = int(self.configs["data_set_id"])
293
- asset_external_id_prefix = self.configs.get("asset_external_id_prefix", None)
294
-
295
- meta_keys = NeatMetadataKeys.load(self.configs)
296
-
297
- if self.metrics is None:
298
- raise ValueError(self._not_configured_message)
299
- prom_cdf_resource_stats = cast(
300
- Gauge,
301
- self.metrics.register_metric(
302
- "cdf_resources_stats",
303
- "CDF resource stats before and after running the workflow",
304
- m_type="gauge",
305
- metric_labels=["resource_type", "state"],
306
- ),
307
- )
308
- prom_data_issues_stats = cast(
309
- Gauge,
310
- self.metrics.register_metric(
311
- "data_issues_stats", "Data validation issues", m_type="gauge", metric_labels=["resource_type"]
312
- ),
313
- )
314
-
315
- rdf_asset_dicts = rdf2assets(
316
- solution_graph.graph,
317
- rules.rules,
318
- data_set_id=data_set_id,
319
- asset_external_id_prefix=asset_external_id_prefix,
320
- stop_on_exception=True,
321
- meta_keys=meta_keys,
322
- )
323
-
324
- # UPDATE: 2023-04-05 - correct aggregation of assets in CDF for specific dataset
325
- total_assets_before = cdf_client.assets.aggregate(filter=AssetFilter(data_set_ids=[{"id": data_set_id}]))[
326
- 0
327
- ].count
328
-
329
- # Label Validation
330
- labels_before = unique_asset_labels(rdf_asset_dicts.values())
331
- logging.info(f"Assets have {len(labels_before)} unique labels: {', '.join(sorted(labels_before))}")
332
-
333
- rdf_asset_dicts = cast(dict[str, dict[str, Any]], remove_non_existing_labels(cdf_client, rdf_asset_dicts))
334
-
335
- labels_after = unique_asset_labels(rdf_asset_dicts.values())
336
- removed_labels = labels_before - labels_after
337
- logging.info(
338
- f"Removed {len(removed_labels)} labels as these do not exists in CDF. "
339
- f"Removed labels: {', '.join(sorted(removed_labels))}"
340
- )
341
- ######################
342
-
343
- prom_cdf_resource_stats.labels(resource_type="asset", state="count_before_neat_update").set(total_assets_before)
344
- logging.info(f"Total count of assets in CDF before upload: { total_assets_before }")
345
-
346
- orphanage_asset_external_id = (
347
- f"{asset_external_id_prefix}orphanage-{data_set_id}"
348
- if asset_external_id_prefix
349
- else f"orphanage-{data_set_id}"
350
- )
351
- orphan_assets, circular_assets, parent_children_map = validate_asset_hierarchy(rdf_asset_dicts)
352
-
353
- # There could be assets already under a created orphan assets. Include those in oprhan assets list
354
- if orphanage_asset_external_id in parent_children_map:
355
- orphan_assets.extend(parent_children_map[orphanage_asset_external_id])
356
-
357
- orphan_assets_count = len(orphan_assets)
358
- circular_assets_count = len(circular_assets)
359
- prom_data_issues_stats.labels(resource_type="circular_assets").set(len(circular_assets))
360
- prom_data_issues_stats.labels(resource_type="orphan_assets").set(len(orphan_assets))
361
-
362
- if orphan_assets:
363
- logging.error(f"Found orphaned assets: {', '.join(orphan_assets)}")
364
-
365
- if asset_cleanup_type in ["orphans", "full"]:
366
- logging.info("Removing orphaned assets and its children")
367
-
368
- def delete_asset_and_children_recursive(asset_id, rdf_asset_dicts, parent_children_map):
369
- if asset_id in rdf_asset_dicts:
370
- del rdf_asset_dicts[asset_id]
371
-
372
- if asset_id in parent_children_map:
373
- for child_id in parent_children_map[asset_id]:
374
- delete_asset_and_children_recursive(child_id, rdf_asset_dicts, parent_children_map)
375
-
376
- def delete_orphan_assets_recursive(orphan_assets, rdf_asset_dicts, parent_children_map):
377
- for orphan_asset in orphan_assets:
378
- delete_asset_and_children_recursive(orphan_asset, rdf_asset_dicts, parent_children_map)
379
-
380
- # Make sure children, grand-children, great-grandchildren .... are deleted
381
- delete_orphan_assets_recursive(orphan_assets, rdf_asset_dicts, parent_children_map)
382
-
383
- # delete orphange asset
384
- if orphanage_asset_external_id in rdf_asset_dicts:
385
- del rdf_asset_dicts[orphanage_asset_external_id]
386
-
387
- else:
388
- # Kill the process if you dont have orphanage asset in your asset hierarchy
389
- # and inform the user that it is missing !
390
- if orphanage_asset_external_id not in rdf_asset_dicts:
391
- msg = f"You dont have Orphanage asset {orphanage_asset_external_id} in asset hierarchy!"
392
- logging.error(msg)
393
- return FlowMessage(
394
- error_text=msg, step_execution_status=StepExecutionStatus.ABORT_AND_FAIL
395
- ), CategorizedAssets(assets={})
396
-
397
- logging.error("Orphaned assets will be assigned to 'Orphanage' root asset")
398
-
399
- for external_id in orphan_assets:
400
- rdf_asset_dicts[external_id]["parent_external_id"] = orphanage_asset_external_id
401
- else:
402
- logging.info("No orphaned assets found, your assets look healthy !")
403
-
404
- if circular_assets:
405
- logging.error(f"Found circular dependencies: {circular_assets}")
406
- if asset_cleanup_type in ["circular", "full"]:
407
- logging.info("Removing circular assets")
408
- for circular_path in circular_assets:
409
- circular_external_id = circular_path[-1]
410
- del rdf_asset_dicts[circular_external_id]
411
- else:
412
- logging.info("No circular dependency among assets found, your assets hierarchy look healthy !")
413
-
414
- if orphan_assets or circular_assets:
415
- orphan_assets, circular_assets, _ = validate_asset_hierarchy(rdf_asset_dicts)
416
- if circular_assets:
417
- msg = f"Found circular dependencies: {circular_assets!s}"
418
- logging.error(msg)
419
- return FlowMessage(
420
- error_text=msg, step_execution_status=StepExecutionStatus.ABORT_AND_FAIL
421
- ), CategorizedAssets(assets={})
422
- elif orphan_assets:
423
- msg = f"Not able to fix orphans: {', '.join(orphan_assets)}"
424
- logging.error(msg)
425
- return FlowMessage(
426
- error_text=msg, step_execution_status=StepExecutionStatus.ABORT_AND_FAIL
427
- ), CategorizedAssets(assets={})
428
- else:
429
- logging.info("No circular dependency among assets found, your assets hierarchy look healthy !")
430
-
431
- categorized_assets, report = categorize_assets(
432
- cdf_client, rdf_asset_dicts, data_set_id=data_set_id, return_report=True
433
- )
434
-
435
- count_create_assets = len(categorized_assets["create"])
436
- count_update_assets = len(categorized_assets["update"])
437
- count_decommission_assets = len(categorized_assets["decommission"])
438
- count_resurrect_assets = len(categorized_assets["resurrect"])
439
-
440
- prom_cdf_resource_stats.labels(resource_type="asset", state="create").set(count_create_assets)
441
- prom_cdf_resource_stats.labels(resource_type="asset", state="update").set(count_update_assets)
442
- prom_cdf_resource_stats.labels(resource_type="asset", state="decommission").set(count_decommission_assets)
443
- prom_cdf_resource_stats.labels(resource_type="asset", state="resurrect").set(count_resurrect_assets)
444
-
445
- logging.info(f"Total count of assets to be created: { count_create_assets }")
446
- logging.info(f"Total count of assets to be updated: { count_update_assets }")
447
- logging.info(f"Total count of assets to be decommission: { count_decommission_assets }")
448
- logging.info(f"Total count of assets to be resurrect: { count_resurrect_assets }")
449
-
450
- msg = f"Total count of assets { len(rdf_asset_dicts) } of which:"
451
- msg += f"<p> { count_create_assets } to be created </p>"
452
- msg += f"<p> { count_update_assets } to be updated </p>"
453
- msg += f"<p> { count_decommission_assets } to be decommissioned </p>"
454
- msg += f"<p> { count_resurrect_assets } to be resurrected </p>"
455
- msg += f"<p> Found { orphan_assets_count } orphan assets and"
456
- msg += f" { circular_assets_count } circular assets </p>"
457
- if asset_cleanup_type != "nothing":
458
- msg += " <p> All circular and orphan assets were removed successfully </p>"
459
- number_of_updates = len(report["decommission"])
460
- logging.info(f"Total number of updates: {number_of_updates}")
461
-
462
- return FlowMessage(output_text=msg), CategorizedAssets(assets=categorized_assets)
463
-
464
-
465
- class LoadAssetsToCDF(Step):
466
- """
467
- This step uploads categorized assets to CDF
468
- """
469
-
470
- description = "This step uploads categorized assets to CDF"
471
- category = CATEGORY
472
- version = "legacy"
473
-
474
- def run( # type: ignore[override]
475
- self, cdf_client: CogniteClient, categorized_assets: CategorizedAssets, flow_msg: FlowMessage
476
- ) -> FlowMessage:
477
- if flow_msg and flow_msg.payload and "action" in flow_msg.payload:
478
- if flow_msg.payload["action"] != "approve":
479
- raise Exception("Update not approved")
480
- if self.metrics is None:
481
- raise ValueError(self._not_configured_message)
482
-
483
- prom_cdf_resource_stats = cast(
484
- Gauge,
485
- self.metrics.register_metric(
486
- "cdf_resources_stats",
487
- "CDF resource stats before and after running the workflow",
488
- m_type="gauge",
489
- metric_labels=["resource_type", "state"],
490
- ),
491
- )
492
- upload_assets(cdf_client, categorized_assets.assets, max_retries=2, retry_delay=4)
493
- count_create_assets = len(categorized_assets.assets["create"])
494
-
495
- # gets first asset available irrespective of its category
496
- asset_example = next((assets[0] for assets in categorized_assets.assets.values() if assets), None)
497
-
498
- if asset_example:
499
- data_set_id = cast(Asset, asset_example).data_set_id
500
- for _ in range(1000):
501
- total_assets_after = cdf_client.assets.aggregate(
502
- filter=AssetFilter(data_set_ids=[{"id": data_set_id}])
503
- )[0].count
504
- if total_assets_after >= count_create_assets:
505
- break
506
- logging.info(f"Waiting for assets to be created, current count {total_assets_after}")
507
- time.sleep(2)
508
-
509
- # UPDATE: 2023-04-05 - correct aggregation of assets in CDF for specific dataset
510
- total_assets_after = cdf_client.assets.aggregate(filter=AssetFilter(data_set_ids=[{"id": data_set_id}]))[
511
- 0
512
- ].count
513
-
514
- prom_cdf_resource_stats.labels(resource_type="asset", state="count_after_neat_update").set(
515
- total_assets_after
516
- )
517
- logging.info(f"Total count of assets in CDF after update: { total_assets_after }")
518
- del categorized_assets.assets # free up memory after upload .
519
- return FlowMessage(output_text=f"Total count of assets in CDF after update: { total_assets_after }")
520
- else:
521
- return FlowMessage(output_text="No assets to upload!")
522
-
523
-
524
- class GenerateRelationshipsFromGraph(Step):
525
- """
526
- This step generates relationships from the graph and saves them to CategorizedRelationships object
527
- """
528
-
529
- description = "This step generates relationships from the graph and saves them to CategorizedRelationships object"
530
- category = CATEGORY
531
- version = "legacy"
532
- configurables: ClassVar[list[Configurable]] = [
533
- Configurable(name="data_set_id", value="", label=("CDF dataset id to which the labels will be added.")),
534
- Configurable(
535
- name="relationship_external_id_prefix",
536
- value="",
537
- label=("Prefix to be added to all asset external ids, default None."),
538
- ),
539
- ]
540
-
541
- def run( # type: ignore[override]
542
- self, rules: RulesData, cdf_client: CogniteClient, solution_graph: SolutionGraph
543
- ) -> (FlowMessage, CategorizedRelationships): # type: ignore[arg-type, syntax]
544
- # create, categorize and upload relationships
545
- data_set_id = int(self.configs["data_set_id"])
546
- relationship_external_id_prefix = self.configs.get("relationship_external_id_prefix", None)
547
-
548
- rdf_relationships = rdf2relationships(
549
- solution_graph.graph,
550
- rules.rules,
551
- data_set_id=data_set_id,
552
- relationship_external_id_prefix=relationship_external_id_prefix,
553
- )
554
-
555
- categorized_relationships = categorize_relationships(cdf_client, rdf_relationships, data_set_id)
556
- count_defined_relationships = len(rdf_relationships)
557
- count_create_relationships = len(categorized_relationships["create"])
558
- count_decommission_relationships = len(categorized_relationships["decommission"])
559
- count_resurrect_relationships = len(categorized_relationships["resurrect"])
560
-
561
- if self.metrics is None:
562
- raise ValueError(self._not_configured_message)
563
-
564
- prom_cdf_resource_stats = cast(
565
- Gauge,
566
- self.metrics.register_metric(
567
- "cdf_resources_stats",
568
- "CDF resource stats before and after running the workflow",
569
- m_type="gauge",
570
- metric_labels=["resource_type", "state"],
571
- ),
572
- )
573
-
574
- prom_cdf_resource_stats.labels(resource_type="relationships", state="defined").set(count_defined_relationships)
575
- prom_cdf_resource_stats.labels(resource_type="relationships", state="create").set(count_create_relationships)
576
- prom_cdf_resource_stats.labels(resource_type="relationships", state="decommission").set(
577
- count_decommission_relationships
578
- )
579
- prom_cdf_resource_stats.labels(resource_type="relationships", state="resurrect").set(
580
- count_resurrect_relationships
581
- )
582
-
583
- msg = (
584
- f"Total count of relationships { count_defined_relationships } of which:"
585
- f" { count_create_relationships } to be created"
586
- )
587
- msg += f", { count_decommission_relationships } to be decommissioned"
588
- msg += f", { count_resurrect_relationships } to be resurrected"
589
-
590
- return FlowMessage(output_text=msg), CategorizedRelationships(relationships=categorized_relationships)
591
-
592
-
593
- class LoadRelationshipsToCDF(Step):
594
- """
595
- This step uploads relationships to CDF
596
- """
597
-
598
- description = "This step uploads relationships to CDF"
599
- category = CATEGORY
600
- version = "legacy"
601
-
602
- def run( # type: ignore[override, syntax]
603
- self, client: CogniteClient, categorized_relationships: CategorizedRelationships
604
- ) -> FlowMessage:
605
- upload_relationships(client, categorized_relationships.relationships, max_retries=2, retry_delay=4)
606
- return FlowMessage(output_text="CDF relationships uploaded successfully")