cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (132) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/crud.py +11 -21
  5. cognite/neat/app/api/routers/workflows.py +24 -94
  6. cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
  7. cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
  8. cognite/neat/graph/loaders/_base.py +17 -12
  9. cognite/neat/graph/loaders/_rdf2asset.py +223 -58
  10. cognite/neat/graph/loaders/_rdf2dms.py +1 -1
  11. cognite/neat/graph/stores/_base.py +5 -0
  12. cognite/neat/rules/analysis/_asset.py +31 -1
  13. cognite/neat/rules/importers/_inference2rules.py +31 -35
  14. cognite/neat/rules/models/information/_rules.py +1 -1
  15. cognite/neat/workflows/steps/data_contracts.py +17 -43
  16. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  17. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  18. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  19. cognite/neat/workflows/steps_registry.py +5 -7
  20. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
  21. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
  22. cognite/neat/app/api/routers/core.py +0 -91
  23. cognite/neat/app/api/routers/data_exploration.py +0 -336
  24. cognite/neat/app/api/routers/rules.py +0 -203
  25. cognite/neat/legacy/__init__.py +0 -0
  26. cognite/neat/legacy/graph/__init__.py +0 -3
  27. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  28. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  29. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  30. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  31. cognite/neat/legacy/graph/exceptions.py +0 -90
  32. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  33. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  34. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  35. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  36. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  37. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  38. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  39. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  40. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  41. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  42. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  43. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  44. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  45. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  46. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  47. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  48. cognite/neat/legacy/graph/models.py +0 -6
  49. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  50. cognite/neat/legacy/graph/stores/_base.py +0 -400
  51. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  52. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  53. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  54. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  55. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  56. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  57. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  58. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  59. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  60. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  61. cognite/neat/legacy/rules/__init__.py +0 -0
  62. cognite/neat/legacy/rules/analysis.py +0 -231
  63. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  64. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  65. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  66. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  67. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  68. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  69. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  70. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  71. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  72. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  73. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  74. cognite/neat/legacy/rules/exceptions.py +0 -2972
  75. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  76. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  77. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  78. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  79. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  80. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  81. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  82. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  83. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  84. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  85. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  86. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  87. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  88. cognite/neat/legacy/rules/importers/_base.py +0 -66
  89. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  90. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  91. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  92. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  93. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  94. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  95. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  96. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  97. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  98. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  99. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  100. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  101. cognite/neat/legacy/rules/models/__init__.py +0 -5
  102. cognite/neat/legacy/rules/models/_base.py +0 -151
  103. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  104. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  105. cognite/neat/legacy/rules/models/rules.py +0 -1289
  106. cognite/neat/legacy/rules/models/tables.py +0 -9
  107. cognite/neat/legacy/rules/models/value_types.py +0 -118
  108. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  109. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  110. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  111. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  112. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  113. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  114. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  115. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  116. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  117. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  118. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  119. cognite/neat/workflows/migration/__init__.py +0 -0
  120. cognite/neat/workflows/migration/steps.py +0 -91
  121. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  122. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  123. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  124. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  125. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  126. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  127. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  128. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  129. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  130. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
  131. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
  132. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
@@ -1,1046 +0,0 @@
1
- import logging
2
- import sys
3
- from collections.abc import Iterable, Mapping, Sequence
4
- from dataclasses import dataclass, fields
5
- from datetime import datetime
6
- from typing import Any, Literal, TypeAlias, cast, overload
7
- from warnings import warn
8
-
9
- import numpy as np
10
- import pandas as pd
11
- from cognite.client import CogniteClient
12
- from cognite.client.data_classes import Asset, AssetHierarchy, AssetList, AssetUpdate
13
- from cognite.client.exceptions import CogniteDuplicatedError
14
- from deepdiff import DeepDiff # type: ignore[import]
15
- from rdflib import Graph
16
- from rdflib.term import URIRef
17
-
18
- from cognite.neat.legacy.graph.loaders.core.models import AssetTemplate
19
- from cognite.neat.legacy.graph.stores import NeatGraphStoreBase
20
- from cognite.neat.legacy.rules.models.rules import Property, Rules
21
- from cognite.neat.utils.auxiliary import retry_decorator
22
- from cognite.neat.utils.collection_ import chunker
23
- from cognite.neat.utils.rdf_ import remove_namespace_from_uri
24
- from cognite.neat.utils.time_ import datetime_utc_now
25
-
26
- if sys.version_info >= (3, 11):
27
- from datetime import UTC
28
- from typing import Self
29
- else:
30
- from datetime import timezone
31
-
32
- from typing_extensions import Self
33
-
34
- UTC = timezone.utc
35
-
36
- EXCLUDE_PATHS = [
37
- "root['labels']",
38
- "root['metadata']['create_time']",
39
- "root['metadata']['start_time']",
40
- "root['metadata']['update_time']",
41
- "root['metadata']['end_time']",
42
- "root['metadata']['resurrection_time']", # need to account for assets that are brought back to life
43
- ]
44
-
45
-
46
- @dataclass
47
- class NeatMetadataKeys:
48
- """Class holding mapping between NEAT metadata key names and their desired names in
49
- in CDF Asset metadata
50
-
51
- Args:
52
- start_time: Start time key name
53
- end_time: End time key name
54
- update_time: Update time key name
55
- resurrection_time: Resurrection time key name
56
- identifier: Identifier key name
57
- active: Active key name
58
- type: Type key name
59
- """
60
-
61
- start_time: str = "start_time"
62
- end_time: str = "end_time"
63
- update_time: str = "update_time"
64
- resurrection_time: str = "resurrection_time"
65
- identifier: str = "identifier"
66
- active: str = "active"
67
- type: str = "type"
68
-
69
- @classmethod
70
- def load(cls, data: dict) -> Self:
71
- cls_field_names = {f.name for f in fields(cls)}
72
- valid_keys = {}
73
- for key, value in data.items():
74
- if key in cls_field_names:
75
- valid_keys[key] = value
76
- else:
77
- logging.warning(f"Invalid key set {key}")
78
-
79
- return cls(**valid_keys)
80
-
81
- def as_aliases(self) -> dict[str, str]:
82
- return {str(field.default): getattr(self, field.name) for field in fields(self)}
83
-
84
-
85
- def _get_class_instance_ids(graph: Graph, class_uri: URIRef, limit: int = -1) -> list[URIRef]:
86
- """Get instances ids for a given class
87
-
88
- Args:
89
- graph: Graph containing class instances
90
- class_uri: Class for which instances are to be found
91
- limit: Max number of instances to return, by default -1 meaning all instances
92
-
93
- Returns:
94
- List of class instance URIs
95
- """
96
-
97
- query_statement = "SELECT DISTINCT ?subject WHERE { ?subject a <class> .} LIMIT X".replace(
98
- "class", class_uri
99
- ).replace("LIMIT X", "" if limit == -1 else f"LIMIT {limit}")
100
- logging.debug(f"Query statement: {query_statement}")
101
- return [cast(tuple, res)[0] for res in list(graph.query(query_statement))]
102
-
103
-
104
- def _get_class_instance(graph: Graph, instance: URIRef) -> list[tuple]:
105
- """Get instance by means of tuples containing property-value pairs
106
- Args:
107
- graph: Graph containing class instances
108
- instance: Instance URI
109
-
110
- Returns:
111
- list of property-value pairs for given instance
112
- """
113
-
114
- query_statement = "SELECT DISTINCT ?predicate ?object WHERE {<subject> ?predicate ?object .}".replace(
115
- "subject", instance
116
- )
117
- result = list(cast(tuple, graph.query(query_statement)))
118
-
119
- # Adds instance id for the sake of keep the chain of custody
120
- result += [(URIRef("http://purl.org/dc/terms/identifier"), instance)]
121
-
122
- return result
123
-
124
-
125
- def _get_class_property_pairs(transformation_rules: Rules) -> dict[str, list[Property]]:
126
- """Define classes in terms of their properties
127
-
128
- Args:
129
- transformation_rules : Instance of TransformationRules containing class and property definitions
130
-
131
- Returns:
132
- Dict containing keys as class ids and list of their properties
133
- """
134
-
135
- classes: dict[str, list[Property]] = {}
136
-
137
- for property_ in transformation_rules.properties.keys():
138
- class_ = transformation_rules.properties[property_].class_id
139
- if class_ in classes:
140
- classes[class_] += [transformation_rules.properties[property_]]
141
- else:
142
- classes[class_] = [transformation_rules.properties[property_]]
143
-
144
- return classes
145
-
146
-
147
- def _define_asset_class_mapping(transformation_rules: Rules) -> dict[str, dict[str, list]]:
148
- """Define mapping from class to asset properties
149
-
150
- Args:
151
- transformation_rules : Instance of TransformationRules containing class and property definitions
152
-
153
- Returns:
154
- Dict containing mapping from class to asset properties
155
- """
156
- solution2cdf_mapping_rules = _get_class_property_pairs(transformation_rules)
157
-
158
- asset_class_mapping: dict[str, dict[str, list]] = {}
159
-
160
- for class_, properties in solution2cdf_mapping_rules.items():
161
- asset_class_mapping[class_] = {
162
- "external_id": [],
163
- "name": [],
164
- "description": [],
165
- "parent_external_id": [],
166
- "metadata": [],
167
- }
168
-
169
- for property_ in properties:
170
- if "Asset" in property_.cdf_resource_type and property_.property_name != "*":
171
- for resource_type_property in property_.resource_type_property or []:
172
- if (
173
- resource_type_property in asset_class_mapping[class_]
174
- and property_.property_name not in asset_class_mapping[class_][resource_type_property]
175
- ):
176
- asset_class_mapping[class_][resource_type_property] += [property_.property_name]
177
-
178
- if property_.property_name not in asset_class_mapping[class_]["metadata"]:
179
- # Todo; Why Nikola? This adds for example name property to metadata? Isn't that
180
- # controlled by the resource_type_property? If you would like this behavior you
181
- # would set resource_type_property to ["metadata", "name"]?
182
- asset_class_mapping[class_]["metadata"] += [property_.property_name]
183
-
184
- return asset_class_mapping
185
-
186
-
187
- def _remap_class_properties(class_instance: dict, asset_class_mapping: dict) -> tuple[dict, set, set]:
188
- """Remaps original class instance properties to asset properties (e.g., external_id, name, description, metadata)
189
-
190
- Args:
191
- class_instance: Dictionary containing class instance properties and values
192
- originating from RDF stripped from namespaces
193
- asset_class_mapping: Property mapping from class to asset
194
-
195
- Returns:
196
- Remapped class instance, set of missing asset properties and set of missing asset metadata
197
- """
198
- # Make distinction between missing properties that map into Asset fields
199
- # and missing RDF properties that are defined by sheet
200
- instance_properties = list(class_instance.keys())
201
- missing_properties = set()
202
-
203
- for property_group, ordered_properties in asset_class_mapping.items():
204
- if property_group != "metadata" and ordered_properties:
205
- if matching_property := next((a for a in ordered_properties if a in instance_properties), None):
206
- class_instance[property_group] = class_instance[matching_property]
207
- else:
208
- missing_properties.add(property_group)
209
-
210
- missing_metadata = set(asset_class_mapping["metadata"]).difference(set(instance_properties))
211
-
212
- return class_instance, missing_properties, missing_metadata
213
-
214
-
215
- def _class2asset_instance(
216
- class_: str,
217
- class_instance: dict,
218
- asset_class_mapping: dict,
219
- data_set_id: int,
220
- meta_keys: NeatMetadataKeys,
221
- orphanage_asset_external_id: str | None = None,
222
- external_id_prefix: str | None = None,
223
- fallback_property: str = NeatMetadataKeys.identifier,
224
- empty_name_default: str = "Missing Name",
225
- add_missing_metadata: bool = True,
226
- ) -> dict[str, Any]:
227
- """Converts class instance to asset instance dictionary
228
-
229
- Args:
230
- class_: Class name which instance is being converted to asset instance
231
- class_instance: Dictionary containing class instance properties and values originating from RDF
232
- stripped from namespaces
233
- asset_class_mapping: Property mapping from class to asset
234
- data_set_id: data set id to which asset belongs
235
- orphanage_asset_id: Orphanage asset external id, by default None
236
- external_id_prefix: External id prefix to be added to any external id, by default None
237
- fallback_property: Property from class instance to be used as fallback in case of
238
- missing properties, by default "identifier"
239
-
240
-
241
- Returns:
242
- Asset instance dictionary
243
- """
244
-
245
- remapped_class_instance, missing_properties, missing_metadata = _remap_class_properties(
246
- class_instance, asset_class_mapping
247
- )
248
-
249
- # setting class instance type to class name
250
- remapped_class_instance[meta_keys.type] = class_
251
- # This will be a default case since we want to use original identifier as external_id
252
- # We are though dropping namespace from the original identifier (avoiding long-tail URIs)
253
-
254
- if "external_id" in missing_properties or asset_class_mapping["external_id"] == []:
255
- try:
256
- __extracted_from___class2asset_instance_49(
257
- remapped_class_instance, fallback_property, "external_id", class_
258
- )
259
- except Exception:
260
- __extracted_from___class2asset_instance_56(fallback_property, class_, remapped_class_instance)
261
- # This should not be the use case however to still have name of the object we are using
262
- # fallback property here as well (typically identifier)
263
- if "name" in missing_properties:
264
- try:
265
- __extracted_from___class2asset_instance_49(remapped_class_instance, fallback_property, "name", class_)
266
- except Exception:
267
- __extracted_from___class2asset_instance_56(fallback_property, class_, remapped_class_instance)
268
-
269
- # If object is expected to have parent, but parent is not provided, it is added to orphanage
270
- # This is typically sign of objects not following proposed ontology/data model/schema
271
- if "parent_external_id" in missing_properties and orphanage_asset_external_id:
272
- remapped_class_instance["parent_external_id"] = orphanage_asset_external_id
273
-
274
- if "name" in remapped_class_instance and remapped_class_instance["name"] == "":
275
- remapped_class_instance["name"] = empty_name_default
276
- # To maintain shape across of all assets of specific type we are adding missing metadata
277
- # keys as empty strings, this was request by a customer
278
- # Generally this is bad practice, but more of a workaround of their bad data
279
- if missing_metadata and add_missing_metadata:
280
- msg = f"Adding missing metadata keys with values set to empty string for {class_}"
281
- msg += f" instance <{remapped_class_instance['identifier']}>. "
282
- logging.debug(msg)
283
- for key in missing_metadata:
284
- if key not in remapped_class_instance.keys():
285
- remapped_class_instance[key] = ""
286
- logging.debug(f"\tKey {key} added to <{remapped_class_instance['identifier']}> metadata!")
287
-
288
- asset_instance = AssetTemplate(
289
- **remapped_class_instance, external_id_prefix=external_id_prefix, data_set_id=data_set_id
290
- )
291
- # Removing field external_id_prefix from asset instance dictionary as it is only
292
- # convenience field for external_id and parent_external_id update in AssetTemplate
293
- return asset_instance.model_dump(exclude={"external_id_prefix"})
294
-
295
-
296
- # TODO Rename this here and in `__class2asset_instance`
297
- def __extracted_from___class2asset_instance_49(remapped_class_instance, fallback_property, arg2, class_):
298
- remapped_class_instance[arg2] = remapped_class_instance[fallback_property]
299
- msg = f"Missing external_id for {class_} instance <{remapped_class_instance['identifier']}>. "
300
- msg += f"Using value <{remapped_class_instance[fallback_property]}> provided "
301
- msg += f"by property <{fallback_property}>!"
302
-
303
- logging.debug(msg)
304
-
305
-
306
- # TODO Rename this here and in `__class2asset_instance`
307
- def __extracted_from___class2asset_instance_56(fallback_property, class_, remapped_class_instance):
308
- msg = f"Fallback property <{fallback_property}> not found for {class_} "
309
- msg += f"instance <{remapped_class_instance['identifier']}>."
310
- logging.error(msg)
311
- raise ValueError(msg)
312
-
313
-
314
- def _list2dict(class_instance: list) -> dict[str, Any]:
315
- """Converting list of class instance properties and values to dictionary
316
-
317
- Args:
318
- class_instance: Class instance properties and values originating from RDF as list of tuples
319
-
320
- Returns:
321
- Class instance properties and values as dictionary
322
- """
323
-
324
- class_instance_dict: dict[str, Any] = {}
325
- for property_value_pair in class_instance:
326
- property_ = remove_namespace_from_uri(property_value_pair[0])
327
-
328
- # Remove namespace from URIRef values, otherwise convert Literal to string
329
- # ideally this should react upon property type provided in sheet
330
- # however Assets only support string values
331
- value = (
332
- remove_namespace_from_uri(property_value_pair[1])
333
- if isinstance(property_value_pair[1], URIRef)
334
- else str(property_value_pair[1])
335
- )
336
-
337
- if property_ in class_instance_dict and value not in class_instance_dict[property_]:
338
- class_instance_dict[property_] = (
339
- class_instance_dict[property_] + [value]
340
- if isinstance(class_instance_dict[property_], list)
341
- else [class_instance_dict[property_], value]
342
- )
343
- else:
344
- class_instance_dict[property_] = value
345
-
346
- return class_instance_dict
347
-
348
-
349
- def rdf2assets(
350
- graph_store: NeatGraphStoreBase,
351
- rules: Rules,
352
- data_set_id: int,
353
- stop_on_exception: bool = False,
354
- use_orphanage: bool = True,
355
- meta_keys: NeatMetadataKeys | None = None,
356
- asset_external_id_prefix: str | None = None,
357
- ) -> dict[str, dict[str, Any]]:
358
- """Creates assets from RDF graph
359
-
360
- Args:
361
- graph_store : Graph containing RDF data
362
- rules : Instance of TransformationRules class containing transformation rules
363
- data_set_id: data set id to which assets belong
364
- stop_on_exception : Whether to stop upon exception.
365
- use_orphanage : Whether to use an orphanage for assets without parent_external_id
366
- meta_keys : The names of neat metadat keys to use.
367
-
368
- Returns:
369
- Dictionary representations of assets by external id.
370
- """
371
- meta_keys = NeatMetadataKeys() if meta_keys is None else meta_keys
372
- if rules.metadata.namespace is None:
373
- raise ValueError("Namespace must be provided in transformation rules!")
374
- namespace = rules.metadata.namespace
375
-
376
- orphanage_asset_external_id = f"{asset_external_id_prefix or ''}orphanage-{data_set_id}"
377
-
378
- graph = graph_store.get_graph()
379
- # Step 1: Create rdf to asset property mapping
380
- logging.info("Generating rdf to asset property mapping")
381
- asset_class_mapping = _define_asset_class_mapping(rules)
382
-
383
- # Step 4: Get ids of classes
384
- logging.info("Get ids of instances of classes")
385
- assets: dict[str, dict[str, Any]] = {}
386
- class_ids = {class_: _get_class_instance_ids(graph, namespace[class_]) for class_ in asset_class_mapping}
387
- # Step 5: Create Assets based on class instances
388
- logging.info("Create Assets based on class instances")
389
- meta_keys_aliases = meta_keys.as_aliases()
390
- for class_ in asset_class_mapping:
391
- # TODO: Rename class_id to instance_id
392
- class_ns = namespace[class_]
393
- logging.debug(f"Processing class <{class_ns}> . Number of instances: {len(class_ids[class_])}")
394
- progress_counter = 0
395
- # loading all instances into cache
396
- try:
397
- query = (
398
- f"SELECT ?instance ?prop ?value "
399
- f"WHERE {{ ?instance rdf:type <{class_ns}> . ?instance ?prop ?value . }} order by ?instance "
400
- )
401
- logging.info(query)
402
- response_df = graph_store.query_to_dataframe(query)
403
- except Exception as e:
404
- logging.error(f"Error while loading instances of class <{class_ns}> into cache. Reason: {e}")
405
- if stop_on_exception:
406
- raise e
407
- continue
408
-
409
- grouped_df = response_df.groupby("instance")
410
-
411
- for instance_id, group_df in grouped_df:
412
- try:
413
- instance_property_values = group_df.filter(items=["property", "value"]).values.tolist()
414
- instance_property_values += [(URIRef("http://purl.org/dc/terms/identifier"), URIRef(str(instance_id)))]
415
-
416
- # this will strip namespace from property names and values
417
- class_instance = _list2dict(instance_property_values)
418
-
419
- # class instance is repaired and converted to asset dictionary
420
- asset = _class2asset_instance(
421
- class_,
422
- class_instance,
423
- asset_class_mapping[class_],
424
- data_set_id,
425
- meta_keys,
426
- orphanage_asset_external_id if use_orphanage else None, # we need only base external id
427
- asset_external_id_prefix or None,
428
- fallback_property=meta_keys.identifier,
429
- )
430
-
431
- # adding labels and timestamps
432
- asset["labels"] = [asset["metadata"][meta_keys.type], "non-historic"]
433
- now = str(datetime.now(UTC))
434
- asset["metadata"][meta_keys.start_time] = now
435
- asset["metadata"][meta_keys.update_time] = now
436
- asset["metadata"] = {meta_keys_aliases.get(k, k): v for k, v in asset["metadata"].items()}
437
-
438
- # log every 10000 assets
439
- if progress_counter % 10000 == 0:
440
- logging.info(" Next 10000 Assets processed")
441
-
442
- assets[asset["external_id"]] = asset
443
- progress_counter += 1
444
- except Exception as ValidationError:
445
- logging.error(
446
- f"Skipping class <{class_}> instance <{remove_namespace_from_uri(str(instance_id))}>, "
447
- f"reason:\n{ValidationError}\n"
448
- )
449
- if stop_on_exception:
450
- raise ValidationError
451
-
452
- logging.debug(f"Class <{class_}> processed")
453
-
454
- if orphanage_asset_external_id not in assets:
455
- logging.warning(f"Orphanage with external id {orphanage_asset_external_id} not found in asset hierarchy!")
456
- logging.warning(f"Adding default orphanage with external id {orphanage_asset_external_id}")
457
- assets[orphanage_asset_external_id] = _create_orphanage(orphanage_asset_external_id, data_set_id, meta_keys)
458
-
459
- logging.info("Assets dictionary created")
460
-
461
- return assets
462
-
463
-
464
- def rdf2asset_dictionary(
465
- graph_store: NeatGraphStoreBase,
466
- transformation_rules: Rules,
467
- stop_on_exception: bool = False,
468
- use_orphanage: bool = True,
469
- ) -> dict[str, dict[str, Any]]:
470
- warn("'rdf2asset_dictionary' is deprecated, please use 'rdf2assets' instead!", stacklevel=2)
471
- logging.warning("'rdf2asset_dictionary' is deprecated, please use 'rdf2assets' instead!")
472
- return rdf2assets(graph_store, transformation_rules, stop_on_exception, use_orphanage)
473
-
474
-
475
- def _create_orphanage(orphanage_external_id: str, dataset_id: int, meta_keys: NeatMetadataKeys) -> dict:
476
- now = str(datetime_utc_now())
477
- return {
478
- "external_id": orphanage_external_id,
479
- "name": "Orphanage",
480
- "parent_external_id": None,
481
- "description": "Used to store all assets which parent does not exist",
482
- "metadata": {
483
- meta_keys.type: "Orphanage",
484
- "cdfResourceType": "Asset",
485
- meta_keys.identifier: "orphanage",
486
- meta_keys.active: "true",
487
- meta_keys.start_time: now,
488
- meta_keys.update_time: now,
489
- },
490
- "data_set_id": dataset_id,
491
- "labels": ["Orphanage", "non-historic"],
492
- }
493
-
494
-
495
- def _asset2dict(asset: Asset) -> dict:
496
- """Return asset as dict representation
497
-
498
- Args:
499
- asset : Instance of Asset class
500
-
501
- Returns:
502
- Asset in dict representation
503
- """
504
-
505
- return {
506
- "external_id": asset.external_id,
507
- "name": asset.name,
508
- "description": asset.description,
509
- "parent_external_id": asset.parent_external_id,
510
- "data_set_id": asset.data_set_id,
511
- "metadata": asset.metadata,
512
- }
513
-
514
-
515
- def _flatten_labels(labels: list[dict[str, str]]) -> set[str]:
516
- """Flatten labels"""
517
- result = set()
518
- if labels is None:
519
- return result
520
- for label in labels:
521
- if "externalId" in label:
522
- result.add(label["externalId"])
523
- elif "external_id" in label:
524
- result.add(label["external_id"])
525
- else:
526
- logging.warning(f"Label {label} does not have externalId")
527
- return result
528
-
529
-
530
- def _is_historic(labels) -> bool:
531
- """Check if asset is historic"""
532
- return "historic" in labels
533
-
534
-
535
- def _categorize_cdf_assets(
536
- client: CogniteClient, data_set_id: int, partitions: int
537
- ) -> tuple[pd.DataFrame | None, dict[str, set]]:
538
- """Categorize CDF assets
539
-
540
- Args:
541
- client : Instance of CogniteClient
542
- data_set_id : Id of data set
543
- partitions : Number of partitions
544
-
545
- Returns:
546
- CDF assets as pandas dataframe and dictionary with categorized assets
547
- """
548
- cdf_assets = client.assets.list(data_set_ids=data_set_id, limit=-1, partitions=partitions)
549
-
550
- cdf_assets = remove_non_existing_labels(client, cdf_assets)
551
-
552
- cdf_asset_df = AssetList(resources=cdf_assets).to_pandas()
553
-
554
- logging.info(f"Number of assets in CDF {len(cdf_asset_df)} that have been fetched")
555
-
556
- if cdf_asset_df.empty:
557
- return None, {"non-historic": set(), "historic": set()}
558
- if "labels" not in cdf_asset_df:
559
- # Add empty list for labels column.
560
- cdf_asset_df["labels"] = np.empty((len(cdf_asset_df), 0)).tolist()
561
-
562
- cdf_columns = set(cdf_asset_df.columns)
563
- expected_columns = {"external_id", "labels", "parent_external_id", "data_set_id", "name", "description", "metadata"}
564
-
565
- cdf_asset_df = cdf_asset_df[list(expected_columns.intersection(cdf_columns))]
566
- cdf_asset_df = cdf_asset_df.where(pd.notnull(cdf_asset_df), None)
567
- cdf_asset_df["labels"] = cdf_asset_df["labels"].apply(_flatten_labels).values # type: ignore
568
- cdf_asset_df["is_historic"] = cdf_asset_df.labels.apply(_is_historic).values
569
-
570
- categorized_asset_ids = {
571
- "historic": set(cdf_asset_df[cdf_asset_df.is_historic].external_id.values),
572
- "non-historic": set(cdf_asset_df[~cdf_asset_df.is_historic].external_id.values),
573
- }
574
-
575
- cdf_asset_df.drop(["is_historic"], axis=1, inplace=True)
576
- msg = f"CDF assets categorized into {len(categorized_asset_ids['historic'])} historic"
577
- msg += f" and {len(categorized_asset_ids['non-historic'])} non-historic assets"
578
- logging.info(msg)
579
-
580
- return cdf_asset_df, categorized_asset_ids
581
-
582
-
583
- def order_assets(assets: dict[str, dict]) -> list[Asset]:
584
- """Order assets in a way that parent assets are created before child assets
585
-
586
- Args:
587
- assets : List of assets to be created
588
-
589
- Returns:
590
- Ordered list of assets
591
- """
592
- hierarchy = AssetHierarchy([Asset(**asset) for asset in assets.values()], ignore_orphans=True)
593
- insert_dct = hierarchy.groupby_parent_xid()
594
- subtree_count = hierarchy.count_subtree(insert_dct)
595
-
596
- hierarchy = None
597
-
598
- asset_creation_order = pd.DataFrame.from_dict(subtree_count, orient="index", columns=["order"]).sort_values(
599
- by="order", ascending=False
600
- )
601
- asset_creation_order["external_id"] = asset_creation_order.index
602
-
603
- hierarchy = AssetList([Asset(**asset) for asset in assets.values()]).to_pandas()
604
- hierarchy = hierarchy.where(pd.notnull(hierarchy), None)
605
- hierarchy = hierarchy.merge(asset_creation_order, left_on="external_id", right_on="external_id")
606
- hierarchy = hierarchy.sort_values(by="order", ascending=False)
607
- hierarchy.reset_index(drop=True, inplace=True)
608
- hierarchy.labels = hierarchy.labels.apply(_flatten_labels)
609
- hierarchy.drop(["order"], axis=1, inplace=True)
610
-
611
- return [Asset(**row.to_dict()) for _, row in hierarchy.iterrows()]
612
-
613
-
614
- def _assets_to_create(rdf_assets: dict, asset_ids: set) -> list[Asset]:
615
- """Return list of assets to be created
616
-
617
- Args:
618
- rdf_assets : Dictionary containing assets derived from knowledge graph (RDF)
619
- asset_ids : Set of asset ids to be created
620
-
621
- Returns:
622
- Ordered list of assets to be created
623
- """
624
- start_time = datetime_utc_now()
625
- if asset_ids:
626
- logging.info("Wrangling assets to be created into their final form")
627
- ordered_assets = order_assets({external_id: rdf_assets[external_id] for external_id in asset_ids})
628
-
629
- logging.info(f"Wrangling completed in {(datetime_utc_now() - start_time).seconds} seconds")
630
- return ordered_assets
631
- return []
632
-
633
-
634
- def _assets_to_update(
635
- rdf_assets: dict,
636
- cdf_assets: pd.DataFrame | None,
637
- asset_ids: set,
638
- meta_keys: NeatMetadataKeys,
639
- exclude_paths: list = EXCLUDE_PATHS,
640
- ) -> tuple[list[Asset], dict[str, dict]]:
641
- """Return list of assets to be updated
642
-
643
- Args:
644
- rdf_assets : Dictionary containing assets derived from knowledge graph (RDF)
645
- cdf_assets : Dataframe containing assets from CDF
646
- asset_ids : Candidate assets to be updated
647
- meta_keys : The neat meta data keys.
648
- exclude_paths : Paths not to be checked when diffing rdf and cdf assets, by default EXCLUDE_PATHS
649
-
650
- Returns:
651
- List of assets to be updated and detailed report of changes per asset
652
- """
653
-
654
- start_time = datetime_utc_now()
655
- assets = []
656
- report = {}
657
- if not asset_ids:
658
- return [], {}
659
- logging.info("Wrangling assets to be updated into their final form")
660
- if cdf_assets is None:
661
- cdf_asset_subset = {}
662
- else:
663
- cdf_asset_subset = {
664
- row["external_id"]: row
665
- for row in cdf_assets[cdf_assets["external_id"].isin(asset_ids)].to_dict(orient="records")
666
- }
667
- for external_id in asset_ids:
668
- cdf_asset = cdf_asset_subset[external_id]
669
- diffing_result = DeepDiff(cdf_asset, rdf_assets[external_id], exclude_paths=exclude_paths)
670
-
671
- if diffing_result and f"root['metadata']['{meta_keys.active}']" not in diffing_result.affected_paths:
672
- asset = Asset(**rdf_assets[external_id])
673
- if asset.metadata is None:
674
- asset.metadata = {}
675
- try:
676
- asset.metadata[meta_keys.start_time] = cdf_asset[external_id]["metadata"][meta_keys.start_time]
677
- except KeyError:
678
- asset.metadata[meta_keys.start_time] = str(datetime.now(UTC))
679
- asset.metadata[meta_keys.update_time] = str(datetime.now(UTC))
680
- assets.append(asset)
681
-
682
- report[external_id] = dict(diffing_result)
683
-
684
- logging.info(f"Wrangling of {len(assets)} completed in {(datetime_utc_now() - start_time).seconds} seconds")
685
- return assets, report
686
-
687
-
688
- def _assets_to_resurrect(
689
- rdf_assets: dict, cdf_assets: pd.DataFrame | None, asset_ids: set, meta_keys: NeatMetadataKeys
690
- ) -> list[Asset]:
691
- """Returns list of assets to be resurrected
692
-
693
- Args:
694
- rdf_assets : Dictionary containing assets derived from knowledge graph (RDF)
695
- cdf_assets : Dataframe containing assets from CDF
696
- asset_ids : Set of asset ids to be resurrected
697
-
698
- Returns:
699
- List of assets to be resurrected
700
- """
701
- start_time = datetime_utc_now()
702
- assets = []
703
- if not asset_ids:
704
- return []
705
- logging.info("Wrangling assets to be resurrected into their final form")
706
- if cdf_assets is None:
707
- cdf_asset_subset = {}
708
- else:
709
- cdf_asset_subset = {
710
- row["external_id"]: row
711
- for row in cdf_assets[cdf_assets["external_id"].isin(asset_ids)].to_dict(orient="records")
712
- }
713
- for external_id in asset_ids:
714
- cdf_asset = cdf_asset_subset[external_id]
715
-
716
- asset = Asset(**rdf_assets[external_id])
717
- if asset.metadata is None:
718
- asset.metadata = {}
719
- now = str(datetime.now(UTC))
720
- try:
721
- asset.metadata[meta_keys.start_time] = cdf_asset[external_id]["metadata"][meta_keys.start_time]
722
- except KeyError:
723
- asset.metadata[meta_keys.start_time] = now
724
- asset.metadata[meta_keys.update_time] = now
725
- asset.metadata[meta_keys.resurrection_time] = now
726
- assets.append(asset)
727
-
728
- logging.info(f"Wrangling of {len(assets)} completed in {(datetime_utc_now() - start_time).seconds} seconds")
729
- return assets
730
-
731
-
732
- def _assets_to_decommission(
733
- cdf_assets: pd.DataFrame | None, asset_ids: set[str], meta_keys: NeatMetadataKeys
734
- ) -> list[Asset]:
735
- start_time = datetime_utc_now()
736
-
737
- assets = []
738
- if not asset_ids:
739
- return []
740
- logging.info("Wrangling assets to be decommissioned into their final form")
741
- if cdf_assets is None:
742
- cdf_asset_subset: dict[str, dict] = {}
743
- else:
744
- cdf_asset_subset = {
745
- row["external_id"]: row
746
- for row in cdf_assets[cdf_assets["external_id"].isin(asset_ids)].to_dict(orient="records")
747
- }
748
-
749
- for external_id in asset_ids:
750
- cdf_asset = cdf_asset_subset[external_id]
751
-
752
- now = str(datetime.now(UTC))
753
- cdf_asset["metadata"][meta_keys.update_time] = now
754
- cdf_asset["metadata"].pop(meta_keys.resurrection_time, None)
755
- cdf_asset["metadata"][meta_keys.end_time] = now
756
- cdf_asset["metadata"][meta_keys.active] = "false"
757
- try:
758
- cdf_asset["labels"].remove("non-historic")
759
- except KeyError:
760
- logging.info(f"Asset {external_id} missed label 'non-historic'")
761
- cdf_asset["labels"].add("historic")
762
-
763
- assets.append(Asset(**cdf_asset))
764
-
765
- logging.info(f"Wrangling completed in {(datetime_utc_now() - start_time).seconds} seconds")
766
- return assets
767
-
768
-
769
- @overload
770
- def categorize_assets(
771
- client: CogniteClient,
772
- rdf_assets: dict,
773
- data_set_id: int,
774
- return_report: Literal[False] = False,
775
- partitions: int = 2,
776
- stop_on_exception: bool = False,
777
- meta_keys: NeatMetadataKeys | None = None,
778
- ) -> dict: ...
779
-
780
-
781
- @overload
782
- def categorize_assets(
783
- client: CogniteClient,
784
- rdf_assets: dict,
785
- data_set_id: int,
786
- return_report: Literal[True],
787
- partitions: int = 2,
788
- stop_on_exception: bool = False,
789
- meta_keys: NeatMetadataKeys | None = None,
790
- ) -> tuple[dict, dict]: ...
791
-
792
-
793
- def categorize_assets(
794
- client: CogniteClient,
795
- rdf_assets: dict,
796
- data_set_id: int,
797
- return_report: bool = False,
798
- partitions: int = 2,
799
- stop_on_exception: bool = False,
800
- meta_keys: NeatMetadataKeys | None = None,
801
- ) -> tuple[dict, dict] | dict:
802
- """Categorize assets on those that are to be created, updated and decommissioned
803
-
804
- Args:
805
- client : Instance of CogniteClient
806
- rdf_assets : Dictionary containing asset external_id - asset pairs
807
- data_set_id : Dataset id to which assets are to be/are stored
808
- partitions : Number of partitions to use when fetching assets from CDF, by default 2
809
- stop_on_exception : Whether to stop on exception or not, by default False
810
- return_report : Whether to report on the diffing results or not, by default False
811
- meta_keys : The metadata keys used by neat.
812
-
813
- Returns:
814
- dictionary containing asset category - list of asset pairs
815
- """
816
- meta_keys = NeatMetadataKeys() if meta_keys is None else meta_keys
817
-
818
- # TODO: Cache categorized assets somewhere instead of creating them
819
- cdf_assets, categorized_asset_ids = _categorize_cdf_assets(client, data_set_id, partitions)
820
-
821
- rdf_asset_ids = set(rdf_assets.keys())
822
-
823
- # ids to create
824
- create_ids = rdf_asset_ids.difference(
825
- categorized_asset_ids["historic"].union(categorized_asset_ids["non-historic"])
826
- )
827
-
828
- # ids potentially to update
829
- update_ids = rdf_asset_ids.intersection(categorized_asset_ids["non-historic"])
830
-
831
- # ids to decommission
832
- decommission_ids = categorized_asset_ids["non-historic"].difference(rdf_asset_ids)
833
-
834
- # ids to resurrect
835
- resurrect_ids = categorized_asset_ids["historic"].intersection(rdf_asset_ids)
836
-
837
- logging.info(f"Number of assets to create: { len(create_ids)}")
838
- logging.info(f"Number of assets to potentially update: { len(update_ids)}")
839
- logging.info(f"Number of assets to decommission: { len(decommission_ids)}")
840
- logging.info(f"Number of assets to resurrect: { len(resurrect_ids)}")
841
-
842
- categorized_assets_update, report_update = _assets_to_update(
843
- rdf_assets, cdf_assets, update_ids, meta_keys=meta_keys
844
- )
845
- report = {
846
- "create": create_ids,
847
- "resurrect": resurrect_ids,
848
- "decommission": decommission_ids,
849
- "update": report_update,
850
- }
851
- categorized_assets = {
852
- "create": _assets_to_create(rdf_assets, create_ids),
853
- "update": categorized_assets_update,
854
- "resurrect": _assets_to_resurrect(rdf_assets, cdf_assets, resurrect_ids, meta_keys),
855
- "decommission": _assets_to_decommission(cdf_assets, decommission_ids, meta_keys),
856
- }
857
-
858
- return (categorized_assets, report) if return_report else categorized_assets
859
-
860
-
861
- def _micro_batch_push(
862
- client: CogniteClient,
863
- assets: Sequence[Asset | AssetUpdate],
864
- batch_size: int = 1000,
865
- push_type: str = "update",
866
- message: str = "Updated",
867
- max_retries: int = 1,
868
- retry_delay: int = 5,
869
- ):
870
- """Updates assets in batches of 1000
871
-
872
- Args:
873
- client : CogniteClient
874
- Instance of CogniteClient
875
- assets : list
876
- List of assets to be created or updated
877
- batch_size : int, optional
878
- Size of batch, by default 1000
879
- push_type : str, optional
880
- Type of push, either "update" or "create", by default "update"
881
- message : str, optional
882
- Message to logged, by default "Updated"
883
- """
884
- total = len(assets)
885
- counter = 0
886
- if push_type not in ["update", "create"]:
887
- logging.info(f"push_type {push_type} not supported")
888
- raise ValueError(f"push_type {push_type} not supported")
889
- for batch in chunker(assets, batch_size):
890
- counter += len(batch)
891
- start_time = datetime_utc_now()
892
-
893
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="microbatch-assets")
894
- def upsert_assets(batch):
895
- if push_type == "update":
896
- client.assets.update(batch)
897
- elif push_type == "create":
898
- client.assets.create_hierarchy(batch)
899
-
900
- try:
901
- upsert_assets(batch)
902
- except CogniteDuplicatedError:
903
- # this is handling of very rare case when some assets might be lost . Normally this should not happen.
904
- # Last attempt to recover
905
- client.assets.create_hierarchy(batch, upsert=True) # type: ignore[arg-type]
906
-
907
- delta_time = (datetime_utc_now() - start_time).seconds
908
-
909
- msg = f"{message} {counter} of {total} assets, batch processing time: {delta_time:.2f} "
910
- msg += f"seconds ETC: {delta_time * (total - counter) / (60*batch_size) :.2f} minutes"
911
- logging.info(msg)
912
-
913
-
914
- def upload_assets(
915
- client: CogniteClient,
916
- categorized_assets: Mapping[str, Sequence[Asset | AssetUpdate]],
917
- batch_size: int = 5000,
918
- max_retries: int = 1,
919
- retry_delay: int = 3,
920
- ):
921
- """Uploads categorized assets to CDF
922
-
923
- Args:
924
- client : CogniteClient
925
- Instance of CogniteClient
926
- categorized_assets : Dict[str, list]
927
- dictionary containing asset category - list of asset pairs
928
- batch_size : int, optional
929
- Size of batch, by default 5000
930
-
931
- !!! note "batch_size"
932
- If batch size is set to 1 or None, all assets will be pushed to CDF in one go.
933
- """
934
- if batch_size:
935
- logging.info(f"Uploading assets in batches of {batch_size}")
936
- if categorized_assets["create"]:
937
- _micro_batch_push(
938
- client,
939
- categorized_assets["create"],
940
- batch_size,
941
- push_type="create",
942
- message="Created",
943
- max_retries=max_retries,
944
- retry_delay=retry_delay,
945
- )
946
-
947
- if categorized_assets["update"]:
948
- _micro_batch_push(
949
- client,
950
- categorized_assets["update"],
951
- batch_size,
952
- message="Updated",
953
- max_retries=max_retries,
954
- retry_delay=retry_delay,
955
- )
956
-
957
- if categorized_assets["resurrect"]:
958
- _micro_batch_push(
959
- client,
960
- categorized_assets["resurrect"],
961
- batch_size,
962
- message="Resurrected",
963
- max_retries=max_retries,
964
- retry_delay=retry_delay,
965
- )
966
-
967
- if categorized_assets["decommission"]:
968
- _micro_batch_push(
969
- client,
970
- categorized_assets["decommission"],
971
- batch_size,
972
- message="Decommissioned",
973
- max_retries=max_retries,
974
- retry_delay=retry_delay,
975
- )
976
-
977
- else:
978
- logging.info("Batch size not set, pushing all assets to CDF in one go!")
979
-
980
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-assets")
981
- def create_assets():
982
- if categorized_assets["create"]:
983
- try:
984
- client.assets.create_hierarchy(categorized_assets["create"])
985
- except CogniteDuplicatedError:
986
- client.assets.create_hierarchy(categorized_assets["create"], upsert=True)
987
-
988
- if categorized_assets["update"]:
989
- client.assets.create_hierarchy(categorized_assets["update"], upsert=True, upsert_mode="replace")
990
-
991
- if categorized_assets["resurrect"]:
992
- client.assets.create_hierarchy(categorized_assets["resurrect"], upsert=True, upsert_mode="replace")
993
-
994
- if categorized_assets["decommission"]:
995
- client.assets.create_hierarchy(categorized_assets["decommission"], upsert=True, upsert_mode="replace")
996
-
997
- create_assets()
998
-
999
-
1000
- AssetLike: TypeAlias = Asset | dict[str, Any]
1001
-
1002
-
1003
- @overload
1004
- def remove_non_existing_labels(client: CogniteClient, assets: Sequence[AssetLike]) -> Sequence[AssetLike]: ...
1005
-
1006
-
1007
- @overload
1008
- def remove_non_existing_labels(client: CogniteClient, assets: Mapping[str, AssetLike]) -> Mapping[str, AssetLike]: ...
1009
-
1010
-
1011
- def remove_non_existing_labels(
1012
- client: CogniteClient, assets: Sequence[AssetLike] | Mapping[str, AssetLike]
1013
- ) -> Sequence[AssetLike] | Mapping[str, AssetLike]:
1014
- cdf_labels = client.labels.list(limit=-1)
1015
- if not cdf_labels:
1016
- # No labels, nothing to check.
1017
- return assets
1018
-
1019
- available_labels = {label.external_id for label in cdf_labels}
1020
-
1021
- def clean_asset_labels(asset: Asset | dict[str, Any]) -> Asset | dict[str, Any]:
1022
- if isinstance(asset, Asset):
1023
- asset.labels = [label for label in (asset.labels or []) if label.external_id in available_labels] or None
1024
- elif isinstance(asset, dict) and "labels" in asset:
1025
- asset["labels"] = [label for label in asset["labels"] if label in available_labels]
1026
- return asset
1027
-
1028
- if isinstance(assets, Sequence):
1029
- return [clean_asset_labels(a) for a in assets]
1030
-
1031
- elif isinstance(assets, dict):
1032
- return {external_id: clean_asset_labels(a) for external_id, a in assets.items()}
1033
-
1034
- raise ValueError(f"Invalid format for Assets={type(assets)}")
1035
-
1036
-
1037
- def unique_asset_labels(assets: Iterable[Asset | dict[str, Any]]) -> set[str]:
1038
- labels: set[str] = set()
1039
- for asset in assets:
1040
- if isinstance(asset, Asset):
1041
- labels |= {label.external_id for label in (asset.labels or []) if label.external_id}
1042
- elif isinstance(asset, dict) and (asset_labels := asset.get("labels")):
1043
- labels |= set(asset_labels)
1044
- else:
1045
- raise ValueError(f"Unsupported {type(asset)}")
1046
- return labels