cognite-neat 0.87.6__py3-none-any.whl → 0.88.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (171) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/app/api/data_classes/rest.py +0 -19
  3. cognite/neat/app/api/explorer.py +6 -4
  4. cognite/neat/app/api/routers/configuration.py +1 -1
  5. cognite/neat/app/api/routers/crud.py +11 -21
  6. cognite/neat/app/api/routers/workflows.py +24 -94
  7. cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
  8. cognite/neat/app/ui/neat-app/build/index.html +1 -1
  9. cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
  10. cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
  11. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
  12. cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
  13. cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
  14. cognite/neat/config.py +44 -27
  15. cognite/neat/exceptions.py +6 -0
  16. cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
  17. cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
  18. cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
  19. cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
  20. cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
  21. cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
  22. cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
  23. cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
  24. cognite/neat/graph/queries/_base.py +22 -29
  25. cognite/neat/graph/queries/_shared.py +1 -1
  26. cognite/neat/graph/stores/_base.py +24 -11
  27. cognite/neat/graph/transformers/_rdfpath.py +3 -2
  28. cognite/neat/issues.py +8 -0
  29. cognite/neat/rules/exporters/_rules2ontology.py +28 -20
  30. cognite/neat/rules/exporters/_validation.py +15 -21
  31. cognite/neat/rules/importers/_inference2rules.py +31 -35
  32. cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +3 -7
  33. cognite/neat/rules/importers/_spreadsheet2rules.py +30 -27
  34. cognite/neat/rules/issues/dms.py +20 -0
  35. cognite/neat/rules/issues/importing.py +15 -0
  36. cognite/neat/rules/issues/ontology.py +298 -0
  37. cognite/neat/rules/issues/spreadsheet.py +48 -0
  38. cognite/neat/rules/issues/tables.py +72 -0
  39. cognite/neat/rules/models/_rdfpath.py +4 -4
  40. cognite/neat/rules/models/_types/_field.py +9 -19
  41. cognite/neat/rules/models/information/_rules.py +5 -4
  42. cognite/neat/utils/rdf_.py +17 -9
  43. cognite/neat/utils/regex_patterns.py +52 -0
  44. cognite/neat/workflows/steps/data_contracts.py +17 -43
  45. cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
  46. cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
  47. cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
  48. cognite/neat/workflows/steps_registry.py +5 -7
  49. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/METADATA +2 -6
  50. cognite_neat-0.88.1.dist-info/RECORD +209 -0
  51. cognite/neat/app/api/routers/core.py +0 -91
  52. cognite/neat/app/api/routers/data_exploration.py +0 -336
  53. cognite/neat/app/api/routers/rules.py +0 -203
  54. cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
  55. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
  56. cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
  57. cognite/neat/graph/stores/_oxrdflib.py +0 -247
  58. cognite/neat/legacy/__init__.py +0 -0
  59. cognite/neat/legacy/graph/__init__.py +0 -3
  60. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
  61. cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
  62. cognite/neat/legacy/graph/examples/__init__.py +0 -10
  63. cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
  64. cognite/neat/legacy/graph/exceptions.py +0 -90
  65. cognite/neat/legacy/graph/extractors/__init__.py +0 -6
  66. cognite/neat/legacy/graph/extractors/_base.py +0 -14
  67. cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
  68. cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
  69. cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
  70. cognite/neat/legacy/graph/loaders/__init__.py +0 -23
  71. cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
  72. cognite/neat/legacy/graph/loaders/_base.py +0 -67
  73. cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
  74. cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
  75. cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
  76. cognite/neat/legacy/graph/loaders/core/models.py +0 -136
  77. cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
  78. cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
  79. cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
  80. cognite/neat/legacy/graph/loaders/validator.py +0 -87
  81. cognite/neat/legacy/graph/models.py +0 -6
  82. cognite/neat/legacy/graph/stores/__init__.py +0 -13
  83. cognite/neat/legacy/graph/stores/_base.py +0 -400
  84. cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
  85. cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
  86. cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
  87. cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
  88. cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
  89. cognite/neat/legacy/graph/transformations/__init__.py +0 -0
  90. cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
  91. cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
  92. cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
  93. cognite/neat/legacy/graph/transformations/transformer.py +0 -322
  94. cognite/neat/legacy/rules/__init__.py +0 -0
  95. cognite/neat/legacy/rules/analysis.py +0 -231
  96. cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
  97. cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
  98. cognite/neat/legacy/rules/examples/__init__.py +0 -18
  99. cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
  100. cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
  101. cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
  102. cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
  103. cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
  104. cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
  105. cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
  106. cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
  107. cognite/neat/legacy/rules/exceptions.py +0 -2972
  108. cognite/neat/legacy/rules/exporters/__init__.py +0 -20
  109. cognite/neat/legacy/rules/exporters/_base.py +0 -45
  110. cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
  111. cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
  112. cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
  113. cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
  114. cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
  115. cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
  116. cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
  117. cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
  118. cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
  119. cognite/neat/legacy/rules/exporters/_validation.py +0 -146
  120. cognite/neat/legacy/rules/importers/__init__.py +0 -22
  121. cognite/neat/legacy/rules/importers/_base.py +0 -66
  122. cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
  123. cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
  124. cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
  125. cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
  126. cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
  127. cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
  128. cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
  129. cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
  130. cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
  131. cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
  132. cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
  133. cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
  134. cognite/neat/legacy/rules/models/__init__.py +0 -5
  135. cognite/neat/legacy/rules/models/_base.py +0 -151
  136. cognite/neat/legacy/rules/models/raw_rules.py +0 -316
  137. cognite/neat/legacy/rules/models/rdfpath.py +0 -237
  138. cognite/neat/legacy/rules/models/rules.py +0 -1289
  139. cognite/neat/legacy/rules/models/tables.py +0 -9
  140. cognite/neat/legacy/rules/models/value_types.py +0 -118
  141. cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
  142. cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  143. cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  144. cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  145. cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
  146. cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  147. cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
  148. cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  149. cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
  150. cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
  151. cognite/neat/rules/exceptions.py +0 -2972
  152. cognite/neat/rules/models/_types/_base.py +0 -16
  153. cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
  154. cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
  155. cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
  156. cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
  157. cognite/neat/workflows/migration/__init__.py +0 -0
  158. cognite/neat/workflows/migration/steps.py +0 -91
  159. cognite/neat/workflows/migration/wf_manifests.py +0 -33
  160. cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
  161. cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
  162. cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
  163. cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
  164. cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
  165. cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
  166. cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
  167. cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
  168. cognite_neat-0.87.6.dist-info/RECORD +0 -319
  169. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/LICENSE +0 -0
  170. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/WHEEL +0 -0
  171. {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/entry_points.txt +0 -0
@@ -1,559 +0,0 @@
1
- import logging
2
- import warnings
3
- from collections.abc import Collection
4
- from typing import Any, Literal, cast, overload
5
- from warnings import warn
6
-
7
- import pandas as pd
8
- from cognite.client import CogniteClient
9
- from cognite.client.data_classes import LabelFilter, Relationship, RelationshipUpdate
10
- from cognite.client.exceptions import CogniteDuplicatedError
11
-
12
- from cognite.neat.legacy.graph.exceptions import NamespaceRequired
13
- from cognite.neat.legacy.graph.loaders.core.models import RelationshipDefinition, RelationshipDefinitions
14
- from cognite.neat.legacy.graph.loaders.core.rdf_to_assets import _categorize_cdf_assets
15
- from cognite.neat.legacy.graph.stores import NeatGraphStoreBase
16
- from cognite.neat.legacy.rules.models.rules import Rules
17
- from cognite.neat.utils.auxiliary import retry_decorator
18
- from cognite.neat.utils.collection_ import chunker
19
- from cognite.neat.utils.rdf_ import remove_namespace_from_uri
20
- from cognite.neat.utils.time_ import datetime_utc_now, epoch_now_ms
21
-
22
-
23
- def define_relationships(rules: Rules, data_set_id: int, stop_on_exception: bool = False) -> RelationshipDefinitions:
24
- """Define relationships from transformation rules
25
-
26
- Args:
27
- rules: Transformation rules which holds data model
28
- data_set_id: CDF data set id to which relationships belong to
29
- stop_on_exception: Whether to stop on exception or to continue. Defaults to False.
30
-
31
- Returns:
32
- RelationshipDefinitions instance holding relationship definitions extracted from transformation rules
33
- which are used to generate CDF relationships
34
- """
35
- relationships = {}
36
- if rules.metadata.namespace is None:
37
- raise NamespaceRequired("Load Relationships")
38
- namespace = rules.metadata.namespace
39
- prefix = rules.metadata.prefix
40
-
41
- # Unique ids used to check for redefinitions of relationships
42
- ids = set()
43
-
44
- for row, rule in rules.properties.items():
45
- if "Relationship" in rule.cdf_resource_type:
46
- label_set = {rule.class_id, rule.expected_value_type.suffix, "non-historic", rule.property_id}
47
- if rule.label:
48
- label_set.add(rule.label)
49
- relationship = RelationshipDefinition(
50
- source_class=rule.class_id,
51
- target_class=rule.expected_value_type.suffix,
52
- property_=rule.property_id,
53
- labels=list(label_set),
54
- target_type=rule.target_type,
55
- source_type=rule.source_type,
56
- relationship_external_id_rule=rule.relationship_external_id_rule,
57
- )
58
-
59
- id_ = f"{rule.class_id}({rule.property_id})"
60
- if id_ in ids:
61
- msg = f"Relationship {rule.property_id} redefined at {row} in transformation rules!"
62
- if stop_on_exception:
63
- logging.error(msg)
64
- raise ValueError(msg)
65
- else:
66
- msg += " Skipping redefinition!"
67
- warnings.warn(msg, stacklevel=2)
68
- logging.warning(msg)
69
- else:
70
- relationships[row] = relationship
71
- ids.add(id_)
72
-
73
- if relationships:
74
- return RelationshipDefinitions(
75
- data_set_id=data_set_id, prefix=prefix, namespace=namespace, relationships=relationships
76
- )
77
-
78
- msg = "No relationship defined in transformation rule sheet!"
79
- if stop_on_exception:
80
- logging.error(msg)
81
- raise ValueError(msg)
82
- else:
83
- warnings.warn(msg, stacklevel=2)
84
- logging.warning(msg)
85
- return RelationshipDefinitions(data_set_id=data_set_id, prefix=prefix, namespace=namespace, relationships={})
86
-
87
-
88
- def rdf2relationships(
89
- graph_store: NeatGraphStoreBase,
90
- rules: Rules,
91
- data_set_id: int,
92
- relationship_external_id_prefix: str | None = None,
93
- stop_on_exception: bool = False,
94
- ) -> pd.DataFrame:
95
- """Converts RDF triples to relationships
96
-
97
- Args:
98
- graph : Graph instance holding RDF triples
99
- rules : Transformation rules which holds data model and relationship definitions
100
-
101
- Returns:
102
- Dataframe holding relationships
103
- """
104
-
105
- # Step 1: Generate relationship definitions
106
- relationship_definitions = define_relationships(rules, stop_on_exception)
107
-
108
- # Step 2: Generation relationships
109
-
110
- query_statement_template_by_reference = """
111
- SELECT ?source ?target
112
- WHERE {
113
- ?source a prefix:source_class .
114
- ?target a prefix:target_class .
115
- ?source prefix:property_ ?target
116
- }"""
117
-
118
- query_statement_template_by_value = """
119
- SELECT ?source_id ?target_id
120
- WHERE {
121
- ?source a prefix:source_class .
122
- ?source prefix:property_ ?target .
123
- ?source prefix:source_ext_id_prop_name ?source_id .
124
- ?target a prefix:target_class .
125
- ?target prefix:target_ext_id_prop_name ?target_id .
126
- }
127
- """
128
-
129
- relationship_dfs = []
130
- for id_, definition in relationship_definitions.relationships.items():
131
- try:
132
- logging.debug("Processing relationship: " + id_)
133
- external_id_prop_name = definition.relationship_external_id_rule
134
- if external_id_prop_name:
135
- query = (
136
- query_statement_template_by_value.replace("prefix", relationship_definitions.prefix)
137
- .replace("source_ext_id_prop_name", external_id_prop_name)
138
- .replace("target_ext_id_prop_name", external_id_prop_name)
139
- .replace("source_class", definition.source_class)
140
- .replace("target_class", definition.target_class)
141
- .replace("property_", definition.property_)
142
- )
143
- else:
144
- query = (
145
- query_statement_template_by_reference.replace("prefix", relationship_definitions.prefix)
146
- .replace("source_class", definition.source_class)
147
- .replace("target_class", definition.target_class)
148
- .replace("property_", definition.property_)
149
- )
150
-
151
- logging.debug("Rel query: " + query)
152
- relationship_data_frame = pd.DataFrame(list(graph_store.query(query)))
153
- relationship_data_frame.rename(columns={0: "source_external_id", 1: "target_external_id"}, inplace=True)
154
-
155
- # removes namespace
156
- relationship_data_frame = relationship_data_frame.map(remove_namespace_from_uri) # type: ignore[operator]
157
-
158
- # adding prefix
159
- if relationship_external_id_prefix:
160
- relationship_data_frame["source_external_id"] = (
161
- relationship_external_id_prefix + relationship_data_frame["source_external_id"]
162
- )
163
- relationship_data_frame["target_external_id"] = (
164
- relationship_external_id_prefix + relationship_data_frame["target_external_id"]
165
- )
166
-
167
- relationship_data_frame["target_type"] = definition.target_type
168
- relationship_data_frame["source_type"] = definition.source_type
169
-
170
- # to make sure that by default we set Relationship to active, i.e. non-historic)
171
- relationship_data_frame["labels"] = [definition.labels] * len(relationship_data_frame)
172
-
173
- # set default external id
174
- relationship_data_frame["external_id"] = (
175
- relationship_data_frame["source_external_id"] + ":" + relationship_data_frame["target_external_id"]
176
- )
177
- relationship_data_frame["data_set_id"] = data_set_id
178
- relationship_dfs += [relationship_data_frame]
179
- except Exception as e:
180
- logging.error("Error processing relationship: " + id_)
181
- if stop_on_exception:
182
- raise e
183
- continue
184
-
185
- if relationship_dfs:
186
- relationship_df = pd.concat(relationship_dfs)
187
- relationship_df.reset_index(inplace=True, drop=True)
188
-
189
- # Remove duplicate rows, if any. This should not happen, but it is better to be safe than sorry
190
- relationship_df.drop_duplicates(subset=["external_id"], inplace=True)
191
-
192
- # Remove duplicate rows, if any. This should not happen, but it is better to be safe than sorry
193
- relationship_df.drop_duplicates(subset=["external_id"], inplace=True)
194
- relationship_df["start_time"] = len(relationship_df) * [epoch_now_ms()]
195
- return relationship_df
196
- else:
197
- return pd.DataFrame(
198
- columns=[
199
- "source_external_id",
200
- "target_external_id",
201
- "target_type",
202
- "source_type",
203
- "labels",
204
- "external_id",
205
- "data_set_id",
206
- "start_time",
207
- ]
208
- )
209
-
210
-
211
- def rdf2relationship_data_frame(
212
- graph_store: NeatGraphStoreBase, transformation_rules: Rules, stop_on_exception: bool = False
213
- ) -> pd.DataFrame:
214
- warn("'rdf2relationship_data_frame' is deprecated, please use 'rdf2relationships' instead!", stacklevel=2)
215
- logging.warning("'rdf2relationship_data_frame' is deprecated, please use 'rdf2relationships' instead!")
216
- return rdf2relationships(graph_store, transformation_rules, stop_on_exception)
217
-
218
-
219
- def _filter_relationship_xids(relationship_data_frame: pd.DataFrame, asset_xids: list | set) -> set:
220
- return set(
221
- relationship_data_frame[
222
- (relationship_data_frame["source_external_id"].isin(asset_xids))
223
- | (relationship_data_frame["target_external_id"].isin(asset_xids))
224
- ]["external_id"]
225
- )
226
-
227
-
228
- def _categorize_rdf_relationship_xids(
229
- rdf_relationships: pd.DataFrame, categorized_asset_ids: dict
230
- ) -> dict[str, set[str]]:
231
- """Categorizes the external ids of the RDF relationship."""
232
-
233
- missing_asset_ids = (
234
- set(rdf_relationships.target_external_id)
235
- .union(rdf_relationships.source_external_id)
236
- .difference(categorized_asset_ids["historic"].union(categorized_asset_ids["non-historic"]))
237
- )
238
-
239
- if missing_asset_ids:
240
- msg = f"Relationships are referring to these assets {missing_asset_ids}, which are missing in CDF."
241
- msg += "Relationships will not be created for assets that are missing in CDF."
242
- msg += "Please make sure that all assets are present in CDF before creating relationships."
243
- logging.warning(msg)
244
-
245
- # First mask all relationships which contain assets that do not exist in CDF
246
- mask_impossible = _filter_relationship_xids(rdf_relationships, missing_asset_ids)
247
-
248
- # Then mask all relationships which contain assets that are historic while masking
249
- # all impossible relationships
250
- mask_historic = _filter_relationship_xids(rdf_relationships, categorized_asset_ids["historic"]).difference(
251
- mask_impossible
252
- )
253
-
254
- mask_non_historic = (
255
- _filter_relationship_xids(rdf_relationships, categorized_asset_ids["non-historic"])
256
- .difference(mask_historic)
257
- .difference(mask_impossible)
258
- )
259
-
260
- return {"impossible": mask_impossible, "historic": mask_historic, "non-historic": mask_non_historic}
261
-
262
-
263
- def _get_label_based_cdf_relationship_xids(client, data_set_id, labels, partitions) -> set:
264
- """Get external ids of relationships in CDF for a given data set filtered on labels"""
265
-
266
- labels = LabelFilter(contains_any=labels) if labels is not None else None
267
- relationship_data_frame = client.relationships.list(
268
- data_set_ids=data_set_id, limit=-1, labels=labels, partitions=partitions
269
- ).to_pandas()
270
- return set() if relationship_data_frame.empty else set(relationship_data_frame.external_id)
271
-
272
-
273
- def _categorize_cdf_relationship_xids(client, data_set_id, partitions) -> dict[str, set]:
274
- return {
275
- "historic": _get_label_based_cdf_relationship_xids(client, data_set_id, ["historic"], partitions),
276
- "non-historic": _get_label_based_cdf_relationship_xids(client, data_set_id, ["non-historic"], partitions),
277
- }
278
-
279
-
280
- def _relationship_to_create(relationships: pd.DataFrame) -> list[Relationship]:
281
- start_time = datetime_utc_now()
282
- if relationships.empty:
283
- return []
284
- logging.info("Wrangling assets to be created into their final form")
285
- relationship_list = [Relationship(**cast(dict[str, Any], row)) for row in relationships.to_dict(orient="records")]
286
- logging.info(f"Wrangling completed in {(datetime_utc_now() - start_time).seconds} seconds")
287
- return relationship_list
288
-
289
-
290
- def _relationships_to_decommission(external_ids: Collection[str]) -> list[RelationshipUpdate]:
291
- start_time = datetime_utc_now()
292
- relationships = []
293
- if not external_ids:
294
- return []
295
-
296
- logging.info("Wrangling relationships to be decommissioned into their final form")
297
-
298
- for external_id in external_ids:
299
- # Create relationship update object instance
300
- relationship = RelationshipUpdate(external_id=external_id)
301
-
302
- # Remove "non-historic" label and add "historic" label
303
- relationship.labels.remove("non-historic")
304
- relationship.labels.add(["historic"])
305
-
306
- # Set end time of relationships
307
- relationship.end_time.set(epoch_now_ms())
308
-
309
- # Add relationship to list of relationship updates
310
- relationships += [relationship]
311
-
312
- logging.info(f"Wrangling of {len(relationships)} completed in {(datetime_utc_now() - start_time).seconds} seconds")
313
- return relationships
314
-
315
-
316
- def _relationships_to_resurrect(external_ids: Collection[str]) -> list[RelationshipUpdate]:
317
- start_time = datetime_utc_now()
318
- relationships = []
319
- if not external_ids:
320
- return []
321
-
322
- logging.info("Wrangling relationships to be resurrected into their final form")
323
-
324
- for external_id in external_ids:
325
- # Create relationship update object instance
326
- relationship = RelationshipUpdate(external_id=external_id)
327
-
328
- # Remove "non-historic" label and add "historic" label
329
- relationship.labels.remove("historic")
330
- relationship.labels.add(["non-historic"])
331
-
332
- # Set end time of relationships
333
- relationship.end_time.set(None)
334
-
335
- # Add relationship to list of relationship updates
336
- relationships += [relationship]
337
-
338
- logging.info(f"Wrangling of {len(relationships)} completed in {(datetime_utc_now() - start_time).seconds} seconds")
339
- return relationships
340
-
341
-
342
- @overload
343
- def categorize_relationships(
344
- client: CogniteClient,
345
- rdf_relationships: pd.DataFrame,
346
- data_set_id: int,
347
- return_report: Literal[False] = False,
348
- partitions: int = 40,
349
- ) -> dict[str, list[Relationship] | list[RelationshipUpdate]]: ...
350
-
351
-
352
- @overload
353
- def categorize_relationships(
354
- client: CogniteClient,
355
- rdf_relationships: pd.DataFrame,
356
- data_set_id: int,
357
- return_report: Literal[True],
358
- partitions: int = 40,
359
- ) -> tuple[dict[str, list[Relationship] | list[RelationshipUpdate]], dict[str, set]]: ...
360
-
361
-
362
- def categorize_relationships(
363
- client: CogniteClient,
364
- rdf_relationships: pd.DataFrame,
365
- data_set_id: int,
366
- return_report: bool = False,
367
- partitions: int = 40,
368
- ) -> (
369
- tuple[dict[str, list[Relationship] | list[RelationshipUpdate]], dict[str, set]]
370
- | dict[str, list[Relationship] | list[RelationshipUpdate]]
371
- ):
372
- """Categorize relationships on those that are to be created, decommissioned or resurrected
373
-
374
- Args:
375
- client : CogniteClient
376
- rdf_relationships : Dataframe holding relationships
377
- data_set_id : CDF data set id to which relationships are to be uploaded
378
- partitions : Number of partitions to use when querying CDF for relationships
379
- return_report : Whether to return report or not
380
-
381
- Returns:
382
- Categorized relationships to be created, decommissioned or resurrected
383
- """
384
- # TODO also figure out which relationships to be deleted
385
-
386
- _, categorized_asset_ids = _categorize_cdf_assets(client, data_set_id=data_set_id, partitions=partitions)
387
- categorized_rdf_relationships = _categorize_rdf_relationship_xids(rdf_relationships, categorized_asset_ids)
388
- categorized_cdf_relationships = _categorize_cdf_relationship_xids(client, data_set_id, partitions=partitions)
389
-
390
- cdf_relationships_all = categorized_cdf_relationships["historic"].union(
391
- categorized_cdf_relationships["non-historic"]
392
- )
393
- rdf_relationships_all = categorized_rdf_relationships["historic"].union(
394
- categorized_rdf_relationships["non-historic"]
395
- )
396
-
397
- # relationships to create
398
- # NonHistoric_rdf - (Historic_cdf U Non-historic_cdf)
399
- create_xids = categorized_rdf_relationships["non-historic"].difference(cdf_relationships_all)
400
-
401
- # relationships to decommission
402
- # rdf: Historic_rdf ∩ NonHistoric_cdf U (All_cdf - All_rdf)
403
- decommission_xids = (
404
- categorized_rdf_relationships["historic"]
405
- .intersection(categorized_cdf_relationships["non-historic"])
406
- .union(categorized_cdf_relationships["non-historic"].difference(rdf_relationships_all))
407
- )
408
-
409
- # relationships to resurrect
410
- # NonHistoric_rdf ∩ Historic_cdf
411
- resurrect_xids = categorized_rdf_relationships["non-historic"].intersection(
412
- categorized_cdf_relationships["historic"]
413
- )
414
-
415
- logging.info(f"Number of relationships to create: { len(create_xids)}")
416
- logging.info(f"Number of relationships to decommission: { len(decommission_xids)}")
417
- logging.info(f"Number of relationships to resurrect: { len(resurrect_xids)}")
418
-
419
- report = {"create": create_xids, "resurrect": resurrect_xids, "decommission": decommission_xids}
420
- categorized_relationships: dict[str, list[Relationship] | list[RelationshipUpdate]] = {
421
- "create": _relationship_to_create(rdf_relationships[rdf_relationships.external_id.isin(create_xids)]),
422
- "resurrect": _relationships_to_resurrect(resurrect_xids),
423
- "decommission": _relationships_to_decommission(decommission_xids),
424
- }
425
-
426
- return (categorized_relationships, report) if return_report else categorized_relationships
427
-
428
-
429
- def _micro_batch_push(
430
- client: CogniteClient,
431
- relationships: list,
432
- batch_size: int = 1000,
433
- push_type: str = "update",
434
- message: str = "Updated",
435
- max_retries: int = 1,
436
- retry_delay: int = 5,
437
- ):
438
- """Updates assets in batches of 1000
439
-
440
- Args:
441
- client : CogniteClient
442
- Instance of CogniteClient
443
- relationships : list
444
- List of relationships to be created or updated
445
- batch_size : int, optional
446
- Size of batch, by default 1000
447
- push_type : str, optional
448
- Type of push, either "update" or "create", by default "update"
449
- message : str, optional
450
- Message to logged, by default "Updated"
451
- """
452
- total = len(relationships)
453
- counter = 0
454
- if push_type not in ["update", "create"]:
455
- logging.info(f"push_type {push_type} not supported")
456
- raise ValueError(f"push_type {push_type} not supported")
457
-
458
- for batch in chunker(relationships, batch_size):
459
- counter += len(batch)
460
- start_time = datetime_utc_now()
461
-
462
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="microbatch-relationships")
463
- def update_relationships(batch):
464
- if push_type == "update":
465
- client.relationships.update(batch)
466
- elif push_type == "create":
467
- client.relationships.create(batch)
468
-
469
- try:
470
- update_relationships(batch)
471
- except CogniteDuplicatedError as e:
472
- # This situation should not happen but if it does, we need to handle it
473
- exists = {d["externalId"] for d in e.duplicated}
474
- missing_relationships = [t for t in batch if t.external_id not in exists]
475
- client.relationships.create(missing_relationships)
476
-
477
- delta_time = (datetime_utc_now() - start_time).seconds
478
-
479
- msg = f"{message} {counter} of {total} relationships, batch processing time: {delta_time:.2f} "
480
- msg += f"seconds ETC: {delta_time * (total - counter) / (60*batch_size) :.2f} minutes"
481
- logging.info(msg)
482
-
483
-
484
- def upload_relationships(
485
- client: CogniteClient,
486
- categorized_relationships: dict[str, list[Relationship] | list[RelationshipUpdate]],
487
- batch_size: int = 5000,
488
- max_retries: int = 1,
489
- retry_delay: int = 3,
490
- ):
491
- """Uploads categorized relationships to CDF
492
-
493
- Args:
494
- client: Instance of CogniteClient
495
- categorized_relationships: Categories of relationships to be uploaded
496
- batch_size: Size of batch, by default 5000
497
- max_retries: Maximum times to retry the upload, by default 1
498
- retry_delay: Time delay before retrying the upload, by default 3
499
-
500
- !!! note "batch_size"
501
- If batch size is set to 1 or None, all relationships will be pushed to CDF in one go.
502
- """
503
- if batch_size:
504
- logging.info(f"Uploading relationships in batches of {batch_size}")
505
- if categorized_relationships["create"]:
506
- _micro_batch_push(
507
- client,
508
- categorized_relationships["create"],
509
- batch_size,
510
- push_type="create",
511
- message="Created",
512
- max_retries=max_retries,
513
- retry_delay=retry_delay,
514
- )
515
-
516
- if categorized_relationships["resurrect"]:
517
- _micro_batch_push(
518
- client,
519
- categorized_relationships["resurrect"],
520
- batch_size,
521
- message="Resurrected",
522
- max_retries=max_retries,
523
- retry_delay=retry_delay,
524
- )
525
-
526
- if categorized_relationships["decommission"]:
527
- _micro_batch_push(
528
- client,
529
- categorized_relationships["decommission"],
530
- batch_size,
531
- message="Decommissioned",
532
- max_retries=max_retries,
533
- retry_delay=retry_delay,
534
- )
535
-
536
- else:
537
- logging.info("Batch size not set, pushing all relationships to CDF in one go!")
538
-
539
- @retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-relationships")
540
- def create_relationships():
541
- if categorized_relationships["create"]:
542
- client.relationships.create(categorized_relationships["create"])
543
-
544
- if categorized_relationships["resurrect"]:
545
- client.relationships.update(categorized_relationships["resurrect"])
546
-
547
- if categorized_relationships["decommission"]:
548
- client.relationships.update(categorized_relationships["decommission"])
549
-
550
- try:
551
- create_relationships()
552
- except CogniteDuplicatedError as e:
553
- # This situation should not happen, but if it does, the code attempts to handle it
554
- exists = {d["externalId"] for d in e.duplicated}
555
- missing_relationships = [
556
- t for t in cast(list[Relationship], categorized_relationships["create"]) if t.external_id not in exists
557
- ]
558
-
559
- client.relationships.create(missing_relationships)