cognite-neat 0.87.4__py3-none-any.whl → 0.88.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (132) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/crud.py +11 -21
  5. cognite/neat/app/api/routers/workflows.py +24 -94
  6. cognite/neat/graph/extractors/_classic_cdf/_assets.py +8 -2
  7. cognite/neat/graph/extractors/_mock_graph_generator.py +2 -2
  8. cognite/neat/graph/loaders/_base.py +17 -12
  9. cognite/neat/graph/loaders/_rdf2asset.py +223 -58
  10. cognite/neat/graph/loaders/_rdf2dms.py +1 -1
  11. cognite/neat/graph/stores/_base.py +5 -0
  12. cognite/neat/rules/analysis/_asset.py +31 -1
  13. cognite/neat/rules/importers/_inference2rules.py +31 -35
  14. cognite/neat/rules/models/information/_rules.py +1 -1
  15. cognite/neat/workflows/steps/data_contracts.py +17 -43
  16. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  17. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  18. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  19. cognite/neat/workflows/steps_registry.py +5 -7
  20. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
  21. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +24 -132
  22. cognite/neat/app/api/routers/core.py +0 -91
  23. cognite/neat/app/api/routers/data_exploration.py +0 -336
  24. cognite/neat/app/api/routers/rules.py +0 -203
  25. cognite/neat/legacy/__init__.py +0 -0
  26. cognite/neat/legacy/graph/__init__.py +0 -3
  27. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  28. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  29. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  30. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  31. cognite/neat/legacy/graph/exceptions.py +0 -90
  32. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  33. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  34. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  35. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  36. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  37. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  38. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  39. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  40. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  41. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  42. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  43. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  44. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  45. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  46. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  47. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  48. cognite/neat/legacy/graph/models.py +0 -6
  49. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  50. cognite/neat/legacy/graph/stores/_base.py +0 -400
  51. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  52. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  53. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  54. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  55. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  56. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  57. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  58. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  59. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  60. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  61. cognite/neat/legacy/rules/__init__.py +0 -0
  62. cognite/neat/legacy/rules/analysis.py +0 -231
  63. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  64. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  65. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  66. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  67. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  68. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  69. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  70. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  71. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  72. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  73. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  74. cognite/neat/legacy/rules/exceptions.py +0 -2972
  75. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  76. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  77. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  78. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  79. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  80. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  81. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  82. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  83. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  84. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  85. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  86. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  87. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  88. cognite/neat/legacy/rules/importers/_base.py +0 -66
  89. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  90. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  91. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  92. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  93. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  94. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  95. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  96. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  97. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  98. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  99. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  100. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  101. cognite/neat/legacy/rules/models/__init__.py +0 -5
  102. cognite/neat/legacy/rules/models/_base.py +0 -151
  103. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  104. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  105. cognite/neat/legacy/rules/models/rules.py +0 -1289
  106. cognite/neat/legacy/rules/models/tables.py +0 -9
  107. cognite/neat/legacy/rules/models/value_types.py +0 -118
  108. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  109. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  110. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  111. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  112. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  113. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  114. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  115. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  116. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  117. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  118. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  119. cognite/neat/workflows/migration/__init__.py +0 -0
  120. cognite/neat/workflows/migration/steps.py +0 -91
  121. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  122. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  123. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  124. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  125. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  126. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  127. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  128. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  129. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  130. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
  131. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
  132. {cognite_neat-0.87.4.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
@@ -1,559 +0,0 @@
1
- import logging
2
- import warnings
3
- from collections.abc import Collection
4
- from typing import Any, Literal, cast, overload
5
- from warnings import warn
6
-
7
- import pandas as pd
8
- from cognite.client import CogniteClient
9
- from cognite.client.data_classes import LabelFilter, Relationship, RelationshipUpdate
10
- from cognite.client.exceptions import CogniteDuplicatedError
11
-
12
- from cognite.neat.legacy.graph.exceptions import NamespaceRequired
13
- from cognite.neat.legacy.graph.loaders.core.models import RelationshipDefinition, RelationshipDefinitions
14
- from cognite.neat.legacy.graph.loaders.core.rdf_to_assets import _categorize_cdf_assets
15
- from cognite.neat.legacy.graph.stores import NeatGraphStoreBase
16
- from cognite.neat.legacy.rules.models.rules import Rules
17
- from cognite.neat.utils.auxiliary import retry_decorator
18
- from cognite.neat.utils.collection_ import chunker
19
- from cognite.neat.utils.rdf_ import remove_namespace_from_uri
20
- from cognite.neat.utils.time_ import datetime_utc_now, epoch_now_ms
21
-
22
-
23
- def define_relationships(rules: Rules, data_set_id: int, stop_on_exception: bool = False) -> RelationshipDefinitions:
24
- """Define relationships from transformation rules
25
-
26
- Args:
27
- rules: Transformation rules which holds data model
28
- data_set_id: CDF data set id to which relationships belong to
29
- stop_on_exception: Whether to stop on exception or to continue. Defaults to False.
30
-
31
- Returns:
32
- RelationshipDefinitions instance holding relationship definitions extracted from transformation rules
33
- which are used to generate CDF relationships
34
- """
35
- relationships = {}
36
- if rules.metadata.namespace is None:
37
- raise NamespaceRequired("Load Relationships")
38
- namespace = rules.metadata.namespace
39
- prefix = rules.metadata.prefix
40
-
41
- # Unique ids used to check for redefinitions of relationships
42
- ids = set()
43
-
44
- for row, rule in rules.properties.items():
45
- if "Relationship" in rule.cdf_resource_type:
46
- label_set = {rule.class_id, rule.expected_value_type.suffix, "non-historic", rule.property_id}
47
- if rule.label:
48
- label_set.add(rule.label)
49
- relationship = RelationshipDefinition(
50
- source_class=rule.class_id,
51
- target_class=rule.expected_value_type.suffix,
52
- property_=rule.property_id,
53
- labels=list(label_set),
54
- target_type=rule.target_type,
55
- source_type=rule.source_type,
56
- relationship_external_id_rule=rule.relationship_external_id_rule,
57
- )
58
-
59
- id_ = f"{rule.class_id}({rule.property_id})"
60
- if id_ in ids:
61
- msg = f"Relationship {rule.property_id} redefined at {row} in transformation rules!"
62
- if stop_on_exception:
63
- logging.error(msg)
64
- raise ValueError(msg)
65
- else:
66
- msg += " Skipping redefinition!"
67
- warnings.warn(msg, stacklevel=2)
68
- logging.warning(msg)
69
- else:
70
- relationships[row] = relationship
71
- ids.add(id_)
72
-
73
- if relationships:
74
- return RelationshipDefinitions(
75
- data_set_id=data_set_id, prefix=prefix, namespace=namespace, relationships=relationships
76
- )
77
-
78
- msg = "No relationship defined in transformation rule sheet!"
79
- if stop_on_exception:
80
- logging.error(msg)
81
- raise ValueError(msg)
82
- else:
83
- warnings.warn(msg, stacklevel=2)
84
- logging.warning(msg)
85
- return RelationshipDefinitions(data_set_id=data_set_id, prefix=prefix, namespace=namespace, relationships={})
86
-
87
-
88
- def rdf2relationships(
89
- graph_store: NeatGraphStoreBase,
90
- rules: Rules,
91
- data_set_id: int,
92
- relationship_external_id_prefix: str | None = None,
93
- stop_on_exception: bool = False,
94
- ) -> pd.DataFrame:
95
- """Converts RDF triples to relationships
96
-
97
- Args:
98
- graph : Graph instance holding RDF triples
99
- rules : Transformation rules which holds data model and relationship definitions
100
-
101
- Returns:
102
- Dataframe holding relationships
103
- """
104
-
105
- # Step 1: Generate relationship definitions
106
- relationship_definitions = define_relationships(rules, stop_on_exception)
107
-
108
- # Step 2: Generation relationships
109
-
110
- query_statement_template_by_reference = """
111
- SELECT ?source ?target
112
- WHERE {
113
- ?source a prefix:source_class .
114
- ?target a prefix:target_class .
115
- ?source prefix:property_ ?target
116
- }"""
117
-
118
- query_statement_template_by_value = """
119
- SELECT ?source_id ?target_id
120
- WHERE {
121
- ?source a prefix:source_class .
122
- ?source prefix:property_ ?target .
123
- ?source prefix:source_ext_id_prop_name ?source_id .
124
- ?target a prefix:target_class .
125
- ?target prefix:target_ext_id_prop_name ?target_id .
126
- }
127
- """
128
-
129
- relationship_dfs = []
130
- for id_, definition in relationship_definitions.relationships.items():
131
- try:
132
- logging.debug("Processing relationship: " + id_)
133
- external_id_prop_name = definition.relationship_external_id_rule
134
- if external_id_prop_name:
135
- query = (
136
- query_statement_template_by_value.replace("prefix", relationship_definitions.prefix)
137
- .replace("source_ext_id_prop_name", external_id_prop_name)
138
- .replace("target_ext_id_prop_name", external_id_prop_name)
139
- .replace("source_class", definition.source_class)
140
- .replace("target_class", definition.target_class)
141
- .replace("property_", definition.property_)
142
- )
143
- else:
144
- query = (
145
- query_statement_template_by_reference.replace("prefix", relationship_definitions.prefix)
146
- .replace("source_class", definition.source_class)
147
- .replace("target_class", definition.target_class)
148
- .replace("property_", definition.property_)
149
- )
150
-
151
- logging.debug("Rel query: " + query)
152
- relationship_data_frame = pd.DataFrame(list(graph_store.query(query)))
153
- relationship_data_frame.rename(columns={0: "source_external_id", 1: "target_external_id"}, inplace=True)
154
-
155
- # removes namespace
156
- relationship_data_frame = relationship_data_frame.map(remove_namespace_from_uri) # type: ignore[operator]
157
-
158
- # adding prefix
159
- if relationship_external_id_prefix:
160
- relationship_data_frame["source_external_id"] = (
161
- relationship_external_id_prefix + relationship_data_frame["source_external_id"]
162
- )
163
- relationship_data_frame["target_external_id"] = (
164
- relationship_external_id_prefix + relationship_data_frame["target_external_id"]
165
- )
166
-
167
- relationship_data_frame["target_type"] = definition.target_type
168
- relationship_data_frame["source_type"] = definition.source_type
169
-
170
- # to make sure that by default we set Relationship to active, i.e. non-historic)
171
- relationship_data_frame["labels"] = [definition.labels] * len(relationship_data_frame)
172
-
173
- # set default external id
174
- relationship_data_frame["external_id"] = (
175
- relationship_data_frame["source_external_id"] + ":" + relationship_data_frame["target_external_id"]
176
- )
177
- relationship_data_frame["data_set_id"] = data_set_id
178
- relationship_dfs += [relationship_data_frame]
179
- except Exception as e:
180
- logging.error("Error processing relationship: " + id_)
181
- if stop_on_exception:
182
- raise e
183
- continue
184
-
185
- if relationship_dfs:
186
- relationship_df = pd.concat(relationship_dfs)
187
- relationship_df.reset_index(inplace=True, drop=True)
188
-
189
- # Remove duplicate rows, if any. This should not happen, but it is better to be safe than sorry
190
- relationship_df.drop_duplicates(subset=["external_id"], inplace=True)
191
-
192
- # Remove duplicate rows, if any. This should not happen, but it is better to be safe than sorry
193
- relationship_df.drop_duplicates(subset=["external_id"], inplace=True)
194
- relationship_df["start_time"] = len(relationship_df) * [epoch_now_ms()]
195
- return relationship_df
196
- else:
197
- return pd.DataFrame(
198
- columns=[
199
- "source_external_id",
200
- "target_external_id",
201
- "target_type",
202
- "source_type",
203
- "labels",
204
- "external_id",
205
- "data_set_id",
206
- "start_time",
207
- ]
208
- )
209
-
210
-
211
- def rdf2relationship_data_frame(
212
- graph_store: NeatGraphStoreBase, transformation_rules: Rules, stop_on_exception: bool = False
213
- ) -> pd.DataFrame:
214
- warn("'rdf2relationship_data_frame' is deprecated, please use 'rdf2relationships' instead!", stacklevel=2)
215
- logging.warning("'rdf2relationship_data_frame' is deprecated, please use 'rdf2relationships' instead!")
216
- return rdf2relationships(graph_store, transformation_rules, stop_on_exception)
217
-
218
-
219
- def _filter_relationship_xids(relationship_data_frame: pd.DataFrame, asset_xids: list | set) -> set:
220
- return set(
221
- relationship_data_frame[
222
- (relationship_data_frame["source_external_id"].isin(asset_xids))
223
- | (relationship_data_frame["target_external_id"].isin(asset_xids))
224
- ]["external_id"]
225
- )
226
-
227
-
228
- def _categorize_rdf_relationship_xids(
229
- rdf_relationships: pd.DataFrame, categorized_asset_ids: dict
230
- ) -> dict[str, set[str]]:
231
- """Categorizes the external ids of the RDF relationship."""
232
-
233
- missing_asset_ids = (
234
- set(rdf_relationships.target_external_id)
235
- .union(rdf_relationships.source_external_id)
236
- .difference(categorized_asset_ids["historic"].union(categorized_asset_ids["non-historic"]))
237
- )
238
-
239
- if missing_asset_ids:
240
- msg = f"Relationships are referring to these assets {missing_asset_ids}, which are missing in CDF."
241
- msg += "Relationships will not be created for assets that are missing in CDF."
242
- msg += "Please make sure that all assets are present in CDF before creating relationships."
243
- logging.warning(msg)
244
-
245
- # First mask all relationships which contain assets that do not exist in CDF
246
- mask_impossible = _filter_relationship_xids(rdf_relationships, missing_asset_ids)
247
-
248
- # Then mask all relationships which contain assets that are historic while masking
249
- # all impossible relationships
250
- mask_historic = _filter_relationship_xids(rdf_relationships, categorized_asset_ids["historic"]).difference(
251
- mask_impossible
252
- )
253
-
254
- mask_non_historic = (
255
- _filter_relationship_xids(rdf_relationships, categorized_asset_ids["non-historic"])
256
- .difference(mask_historic)
257
- .difference(mask_impossible)
258
- )
259
-
260
- return {"impossible": mask_impossible, "historic": mask_historic, "non-historic": mask_non_historic}
261
-
262
-
263
- def _get_label_based_cdf_relationship_xids(client, data_set_id, labels, partitions) -> set:
264
- """Get external ids of relationships in CDF for a given data set filtered on labels"""
265
-
266
- labels = LabelFilter(contains_any=labels) if labels is not None else None
267
- relationship_data_frame = client.relationships.list(
268
- data_set_ids=data_set_id, limit=-1, labels=labels, partitions=partitions
269
- ).to_pandas()
270
- return set() if relationship_data_frame.empty else set(relationship_data_frame.external_id)
271
-
272
-
273
- def _categorize_cdf_relationship_xids(client, data_set_id, partitions) -> dict[str, set]:
274
- return {
275
- "historic": _get_label_based_cdf_relationship_xids(client, data_set_id, ["historic"], partitions),
276
- "non-historic": _get_label_based_cdf_relationship_xids(client, data_set_id, ["non-historic"], partitions),
277
- }
278
-
279
-
280
- def _relationship_to_create(relationships: pd.DataFrame) -> list[Relationship]:
281
- start_time = datetime_utc_now()
282
- if relationships.empty:
283
- return []
284
- logging.info("Wrangling assets to be created into their final form")
285
- relationship_list = [Relationship(**cast(dict[str, Any], row)) for row in relationships.to_dict(orient="records")]
286
- logging.info(f"Wrangling completed in {(datetime_utc_now() - start_time).seconds} seconds")
287
- return relationship_list
288
-
289
-
290
- def _relationships_to_decommission(external_ids: Collection[str]) -> list[RelationshipUpdate]:
291
- start_time = datetime_utc_now()
292
- relationships = []
293
- if not external_ids:
294
- return []
295
-
296
- logging.info("Wrangling relationships to be decommissioned into their final form")
297
-
298
- for external_id in external_ids:
299
- # Create relationship update object instance
300
- relationship = RelationshipUpdate(external_id=external_id)
301
-
302
- # Remove "non-historic" label and add "historic" label
303
- relationship.labels.remove("non-historic")
304
- relationship.labels.add(["historic"])
305
-
306
- # Set end time of relationships
307
- relationship.end_time.set(epoch_now_ms())
308
-
309
- # Add relationship to list of relationship updates
310
- relationships += [relationship]
311
-
312
- logging.info(f"Wrangling of {len(relationships)} completed in {(datetime_utc_now() - start_time).seconds} seconds")
313
- return relationships
314
-
315
-
316
- def _relationships_to_resurrect(external_ids: Collection[str]) -> list[RelationshipUpdate]:
317
- start_time = datetime_utc_now()
318
- relationships = []
319
- if not external_ids:
320
- return []
321
-
322
- logging.info("Wrangling relationships to be resurrected into their final form")
323
-
324
- for external_id in external_ids:
325
- # Create relationship update object instance
326
- relationship = RelationshipUpdate(external_id=external_id)
327
-
328
- # Remove "non-historic" label and add "historic" label
329
- relationship.labels.remove("historic")
330
- relationship.labels.add(["non-historic"])
331
-
332
- # Set end time of relationships
333
- relationship.end_time.set(None)
334
-
335
- # Add relationship to list of relationship updates
336
- relationships += [relationship]
337
-
338
- logging.info(f"Wrangling of {len(relationships)} completed in {(datetime_utc_now() - start_time).seconds} seconds")
339
- return relationships
340
-
341
-
342
- @overload
343
- def categorize_relationships(
344
- client: CogniteClient,
345
- rdf_relationships: pd.DataFrame,
346
- data_set_id: int,
347
- return_report: Literal[False] = False,
348
- partitions: int = 40,
349
- ) -> dict[str, list[Relationship] | list[RelationshipUpdate]]: ...
350
-
351
-
352
- @overload
353
- def categorize_relationships(
354
- client: CogniteClient,
355
- rdf_relationships: pd.DataFrame,
356
- data_set_id: int,
357
- return_report: Literal[True],
358
- partitions: int = 40,
359
- ) -> tuple[dict[str, list[Relationship] | list[RelationshipUpdate]], dict[str, set]]: ...
360
-
361
-
362
- def categorize_relationships(
363
- client: CogniteClient,
364
- rdf_relationships: pd.DataFrame,
365
- data_set_id: int,
366
- return_report: bool = False,
367
- partitions: int = 40,
368
- ) -> (
369
- tuple[dict[str, list[Relationship] | list[RelationshipUpdate]], dict[str, set]]
370
- | dict[str, list[Relationship] | list[RelationshipUpdate]]
371
- ):
372
- """Categorize relationships on those that are to be created, decommissioned or resurrected
373
-
374
- Args:
375
- client : CogniteClient
376
- rdf_relationships : Dataframe holding relationships
377
- data_set_id : CDF data set id to which relationships are to be uploaded
378
- partitions : Number of partitions to use when querying CDF for relationships
379
- return_report : Whether to return report or not
380
-
381
- Returns:
382
- Categorized relationships to be created, decommissioned or resurrected
383
- """
384
- # TODO also figure out which relationships to be deleted
385
-
386
- _, categorized_asset_ids = _categorize_cdf_assets(client, data_set_id=data_set_id, partitions=partitions)
387
- categorized_rdf_relationships = _categorize_rdf_relationship_xids(rdf_relationships, categorized_asset_ids)
388
- categorized_cdf_relationships = _categorize_cdf_relationship_xids(client, data_set_id, partitions=partitions)
389
-
390
- cdf_relationships_all = categorized_cdf_relationships["historic"].union(
391
- categorized_cdf_relationships["non-historic"]
392
- )
393
- rdf_relationships_all = categorized_rdf_relationships["historic"].union(
394
- categorized_rdf_relationships["non-historic"]
395
- )
396
-
397
- # relationships to create
398
- # NonHistoric_rdf - (Historic_cdf U Non-historic_cdf)
399
- create_xids = categorized_rdf_relationships["non-historic"].difference(cdf_relationships_all)
400
-
401
- # relationships to decommission
402
- # rdf: Historic_rdf ∩ NonHistoric_cdf U (All_cdf - All_rdf)
403
- decommission_xids = (
404
- categorized_rdf_relationships["historic"]
405
- .intersection(categorized_cdf_relationships["non-historic"])
406
- .union(categorized_cdf_relationships["non-historic"].difference(rdf_relationships_all))
407
- )
408
-
409
- # relationships to resurrect
410
- # NonHistoric_rdf ∩ Historic_cdf
411
- resurrect_xids = categorized_rdf_relationships["non-historic"].intersection(
412
- categorized_cdf_relationships["historic"]
413
- )
414
-
415
- logging.info(f"Number of relationships to create: { len(create_xids)}")
416
- logging.info(f"Number of relationships to decommission: { len(decommission_xids)}")
417
- logging.info(f"Number of relationships to resurrect: { len(resurrect_xids)}")
418
-
419
- report = {"create": create_xids, "resurrect": resurrect_xids, "decommission": decommission_xids}
420
- categorized_relationships: dict[str, list[Relationship] | list[RelationshipUpdate]] = {
421
- "create": _relationship_to_create(rdf_relationships[rdf_relationships.external_id.isin(create_xids)]),
422
- "resurrect": _relationships_to_resurrect(resurrect_xids),
423
- "decommission": _relationships_to_decommission(decommission_xids),
424
- }
425
-
426
- return (categorized_relationships, report) if return_report else categorized_relationships
427
-
428
-
429
- def _micro_batch_push(
430
- client: CogniteClient,
431
- relationships: list,
432
- batch_size: int = 1000,
433
- push_type: str = "update",
434
- message: str = "Updated",
435
- max_retries: int = 1,
436
- retry_delay: int = 5,
437
- ):
438
- """Updates assets in batches of 1000
439
-
440
- Args:
441
- client : CogniteClient
442
- Instance of CogniteClient
443
- relationships : list
444
- List of relationships to be created or updated
445
- batch_size : int, optional
446
- Size of batch, by default 1000
447
- push_type : str, optional
448
- Type of push, either "update" or "create", by default "update"
449
- message : str, optional
450
- Message to logged, by default "Updated"
451
- """
452
- total = len(relationships)
453
- counter = 0
454
- if push_type not in ["update", "create"]:
455
- logging.info(f"push_type {push_type} not supported")
456
- raise ValueError(f"push_type {push_type} not supported")
457
-
458
- for batch in chunker(relationships, batch_size):
459
- counter += len(batch)
460
- start_time = datetime_utc_now()
461
-
462
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="microbatch-relationships")
463
- def update_relationships(batch):
464
- if push_type == "update":
465
- client.relationships.update(batch)
466
- elif push_type == "create":
467
- client.relationships.create(batch)
468
-
469
- try:
470
- update_relationships(batch)
471
- except CogniteDuplicatedError as e:
472
- # This situation should not happen but if it does, we need to handle it
473
- exists = {d["externalId"] for d in e.duplicated}
474
- missing_relationships = [t for t in batch if t.external_id not in exists]
475
- client.relationships.create(missing_relationships)
476
-
477
- delta_time = (datetime_utc_now() - start_time).seconds
478
-
479
- msg = f"{message} {counter} of {total} relationships, batch processing time: {delta_time:.2f} "
480
- msg += f"seconds ETC: {delta_time * (total - counter) / (60*batch_size) :.2f} minutes"
481
- logging.info(msg)
482
-
483
-
484
- def upload_relationships(
485
- client: CogniteClient,
486
- categorized_relationships: dict[str, list[Relationship] | list[RelationshipUpdate]],
487
- batch_size: int = 5000,
488
- max_retries: int = 1,
489
- retry_delay: int = 3,
490
- ):
491
- """Uploads categorized relationships to CDF
492
-
493
- Args:
494
- client: Instance of CogniteClient
495
- categorized_relationships: Categories of relationships to be uploaded
496
- batch_size: Size of batch, by default 5000
497
- max_retries: Maximum times to retry the upload, by default 1
498
- retry_delay: Time delay before retrying the upload, by default 3
499
-
500
- !!! note "batch_size"
501
- If batch size is set to 1 or None, all relationships will be pushed to CDF in one go.
502
- """
503
- if batch_size:
504
- logging.info(f"Uploading relationships in batches of {batch_size}")
505
- if categorized_relationships["create"]:
506
- _micro_batch_push(
507
- client,
508
- categorized_relationships["create"],
509
- batch_size,
510
- push_type="create",
511
- message="Created",
512
- max_retries=max_retries,
513
- retry_delay=retry_delay,
514
- )
515
-
516
- if categorized_relationships["resurrect"]:
517
- _micro_batch_push(
518
- client,
519
- categorized_relationships["resurrect"],
520
- batch_size,
521
- message="Resurrected",
522
- max_retries=max_retries,
523
- retry_delay=retry_delay,
524
- )
525
-
526
- if categorized_relationships["decommission"]:
527
- _micro_batch_push(
528
- client,
529
- categorized_relationships["decommission"],
530
- batch_size,
531
- message="Decommissioned",
532
- max_retries=max_retries,
533
- retry_delay=retry_delay,
534
- )
535
-
536
- else:
537
- logging.info("Batch size not set, pushing all relationships to CDF in one go!")
538
-
539
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-relationships")
540
- def create_relationships():
541
- if categorized_relationships["create"]:
542
- client.relationships.create(categorized_relationships["create"])
543
-
544
- if categorized_relationships["resurrect"]:
545
- client.relationships.update(categorized_relationships["resurrect"])
546
-
547
- if categorized_relationships["decommission"]:
548
- client.relationships.update(categorized_relationships["decommission"])
549
-
550
- try:
551
- create_relationships()
552
- except CogniteDuplicatedError as e:
553
- # This situation should not happen, but if it does, the code attempts to handle it
554
- exists = {d["externalId"] for d in e.duplicated}
555
- missing_relationships = [
556
- t for t in cast(list[Relationship], categorized_relationships["create"]) if t.external_id not in exists
557
- ]
558
-
559
- client.relationships.create(missing_relationships)