cognite-neat 0.87.6__py3-none-any.whl → 0.88.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/app/api/data_classes/rest.py +0 -19
- cognite/neat/app/api/explorer.py +6 -4
- cognite/neat/app/api/routers/crud.py +11 -21
- cognite/neat/app/api/routers/workflows.py +24 -94
- cognite/neat/graph/stores/_base.py +5 -0
- cognite/neat/rules/importers/_inference2rules.py +31 -35
- cognite/neat/workflows/steps/data_contracts.py +17 -43
- cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
- cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
- cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
- cognite/neat/workflows/steps_registry.py +5 -7
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.0.dist-info}/RECORD +17 -125
- cognite/neat/app/api/routers/core.py +0 -91
- cognite/neat/app/api/routers/data_exploration.py +0 -336
- cognite/neat/app/api/routers/rules.py +0 -203
- cognite/neat/legacy/__init__.py +0 -0
- cognite/neat/legacy/graph/__init__.py +0 -3
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
- cognite/neat/legacy/graph/examples/__init__.py +0 -10
- cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
- cognite/neat/legacy/graph/exceptions.py +0 -90
- cognite/neat/legacy/graph/extractors/__init__.py +0 -6
- cognite/neat/legacy/graph/extractors/_base.py +0 -14
- cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
- cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
- cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
- cognite/neat/legacy/graph/loaders/__init__.py +0 -23
- cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
- cognite/neat/legacy/graph/loaders/_base.py +0 -67
- cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
- cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
- cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
- cognite/neat/legacy/graph/loaders/core/models.py +0 -136
- cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
- cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
- cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
- cognite/neat/legacy/graph/loaders/validator.py +0 -87
- cognite/neat/legacy/graph/models.py +0 -6
- cognite/neat/legacy/graph/stores/__init__.py +0 -13
- cognite/neat/legacy/graph/stores/_base.py +0 -400
- cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
- cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
- cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
- cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
- cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
- cognite/neat/legacy/graph/transformations/__init__.py +0 -0
- cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
- cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
- cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
- cognite/neat/legacy/graph/transformations/transformer.py +0 -322
- cognite/neat/legacy/rules/__init__.py +0 -0
- cognite/neat/legacy/rules/analysis.py +0 -231
- cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
- cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
- cognite/neat/legacy/rules/examples/__init__.py +0 -18
- cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
- cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
- cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
- cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
- cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
- cognite/neat/legacy/rules/exceptions.py +0 -2972
- cognite/neat/legacy/rules/exporters/__init__.py +0 -20
- cognite/neat/legacy/rules/exporters/_base.py +0 -45
- cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
- cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
- cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
- cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
- cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
- cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
- cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
- cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
- cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
- cognite/neat/legacy/rules/exporters/_validation.py +0 -146
- cognite/neat/legacy/rules/importers/__init__.py +0 -22
- cognite/neat/legacy/rules/importers/_base.py +0 -66
- cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
- cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
- cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
- cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
- cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
- cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
- cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
- cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
- cognite/neat/legacy/rules/models/__init__.py +0 -5
- cognite/neat/legacy/rules/models/_base.py +0 -151
- cognite/neat/legacy/rules/models/raw_rules.py +0 -316
- cognite/neat/legacy/rules/models/rdfpath.py +0 -237
- cognite/neat/legacy/rules/models/rules.py +0 -1289
- cognite/neat/legacy/rules/models/tables.py +0 -9
- cognite/neat/legacy/rules/models/value_types.py +0 -118
- cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
- cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
- cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
- cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
- cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
- cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
- cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
- cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
- cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
- cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/workflows/migration/__init__.py +0 -0
- cognite/neat/workflows/migration/steps.py +0 -91
- cognite/neat/workflows/migration/wf_manifests.py +0 -33
- cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
- cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
- cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
- cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
- cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
- cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
- cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
- cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,559 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import warnings
|
|
3
|
-
from collections.abc import Collection
|
|
4
|
-
from typing import Any, Literal, cast, overload
|
|
5
|
-
from warnings import warn
|
|
6
|
-
|
|
7
|
-
import pandas as pd
|
|
8
|
-
from cognite.client import CogniteClient
|
|
9
|
-
from cognite.client.data_classes import LabelFilter, Relationship, RelationshipUpdate
|
|
10
|
-
from cognite.client.exceptions import CogniteDuplicatedError
|
|
11
|
-
|
|
12
|
-
from cognite.neat.legacy.graph.exceptions import NamespaceRequired
|
|
13
|
-
from cognite.neat.legacy.graph.loaders.core.models import RelationshipDefinition, RelationshipDefinitions
|
|
14
|
-
from cognite.neat.legacy.graph.loaders.core.rdf_to_assets import _categorize_cdf_assets
|
|
15
|
-
from cognite.neat.legacy.graph.stores import NeatGraphStoreBase
|
|
16
|
-
from cognite.neat.legacy.rules.models.rules import Rules
|
|
17
|
-
from cognite.neat.utils.auxiliary import retry_decorator
|
|
18
|
-
from cognite.neat.utils.collection_ import chunker
|
|
19
|
-
from cognite.neat.utils.rdf_ import remove_namespace_from_uri
|
|
20
|
-
from cognite.neat.utils.time_ import datetime_utc_now, epoch_now_ms
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def define_relationships(rules: Rules, data_set_id: int, stop_on_exception: bool = False) -> RelationshipDefinitions:
|
|
24
|
-
"""Define relationships from transformation rules
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
rules: Transformation rules which holds data model
|
|
28
|
-
data_set_id: CDF data set id to which relationships belong to
|
|
29
|
-
stop_on_exception: Whether to stop on exception or to continue. Defaults to False.
|
|
30
|
-
|
|
31
|
-
Returns:
|
|
32
|
-
RelationshipDefinitions instance holding relationship definitions extracted from transformation rules
|
|
33
|
-
which are used to generate CDF relationships
|
|
34
|
-
"""
|
|
35
|
-
relationships = {}
|
|
36
|
-
if rules.metadata.namespace is None:
|
|
37
|
-
raise NamespaceRequired("Load Relationships")
|
|
38
|
-
namespace = rules.metadata.namespace
|
|
39
|
-
prefix = rules.metadata.prefix
|
|
40
|
-
|
|
41
|
-
# Unique ids used to check for redefinitions of relationships
|
|
42
|
-
ids = set()
|
|
43
|
-
|
|
44
|
-
for row, rule in rules.properties.items():
|
|
45
|
-
if "Relationship" in rule.cdf_resource_type:
|
|
46
|
-
label_set = {rule.class_id, rule.expected_value_type.suffix, "non-historic", rule.property_id}
|
|
47
|
-
if rule.label:
|
|
48
|
-
label_set.add(rule.label)
|
|
49
|
-
relationship = RelationshipDefinition(
|
|
50
|
-
source_class=rule.class_id,
|
|
51
|
-
target_class=rule.expected_value_type.suffix,
|
|
52
|
-
property_=rule.property_id,
|
|
53
|
-
labels=list(label_set),
|
|
54
|
-
target_type=rule.target_type,
|
|
55
|
-
source_type=rule.source_type,
|
|
56
|
-
relationship_external_id_rule=rule.relationship_external_id_rule,
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
id_ = f"{rule.class_id}({rule.property_id})"
|
|
60
|
-
if id_ in ids:
|
|
61
|
-
msg = f"Relationship {rule.property_id} redefined at {row} in transformation rules!"
|
|
62
|
-
if stop_on_exception:
|
|
63
|
-
logging.error(msg)
|
|
64
|
-
raise ValueError(msg)
|
|
65
|
-
else:
|
|
66
|
-
msg += " Skipping redefinition!"
|
|
67
|
-
warnings.warn(msg, stacklevel=2)
|
|
68
|
-
logging.warning(msg)
|
|
69
|
-
else:
|
|
70
|
-
relationships[row] = relationship
|
|
71
|
-
ids.add(id_)
|
|
72
|
-
|
|
73
|
-
if relationships:
|
|
74
|
-
return RelationshipDefinitions(
|
|
75
|
-
data_set_id=data_set_id, prefix=prefix, namespace=namespace, relationships=relationships
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
msg = "No relationship defined in transformation rule sheet!"
|
|
79
|
-
if stop_on_exception:
|
|
80
|
-
logging.error(msg)
|
|
81
|
-
raise ValueError(msg)
|
|
82
|
-
else:
|
|
83
|
-
warnings.warn(msg, stacklevel=2)
|
|
84
|
-
logging.warning(msg)
|
|
85
|
-
return RelationshipDefinitions(data_set_id=data_set_id, prefix=prefix, namespace=namespace, relationships={})
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def rdf2relationships(
|
|
89
|
-
graph_store: NeatGraphStoreBase,
|
|
90
|
-
rules: Rules,
|
|
91
|
-
data_set_id: int,
|
|
92
|
-
relationship_external_id_prefix: str | None = None,
|
|
93
|
-
stop_on_exception: bool = False,
|
|
94
|
-
) -> pd.DataFrame:
|
|
95
|
-
"""Converts RDF triples to relationships
|
|
96
|
-
|
|
97
|
-
Args:
|
|
98
|
-
graph : Graph instance holding RDF triples
|
|
99
|
-
rules : Transformation rules which holds data model and relationship definitions
|
|
100
|
-
|
|
101
|
-
Returns:
|
|
102
|
-
Dataframe holding relationships
|
|
103
|
-
"""
|
|
104
|
-
|
|
105
|
-
# Step 1: Generate relationship definitions
|
|
106
|
-
relationship_definitions = define_relationships(rules, stop_on_exception)
|
|
107
|
-
|
|
108
|
-
# Step 2: Generation relationships
|
|
109
|
-
|
|
110
|
-
query_statement_template_by_reference = """
|
|
111
|
-
SELECT ?source ?target
|
|
112
|
-
WHERE {
|
|
113
|
-
?source a prefix:source_class .
|
|
114
|
-
?target a prefix:target_class .
|
|
115
|
-
?source prefix:property_ ?target
|
|
116
|
-
}"""
|
|
117
|
-
|
|
118
|
-
query_statement_template_by_value = """
|
|
119
|
-
SELECT ?source_id ?target_id
|
|
120
|
-
WHERE {
|
|
121
|
-
?source a prefix:source_class .
|
|
122
|
-
?source prefix:property_ ?target .
|
|
123
|
-
?source prefix:source_ext_id_prop_name ?source_id .
|
|
124
|
-
?target a prefix:target_class .
|
|
125
|
-
?target prefix:target_ext_id_prop_name ?target_id .
|
|
126
|
-
}
|
|
127
|
-
"""
|
|
128
|
-
|
|
129
|
-
relationship_dfs = []
|
|
130
|
-
for id_, definition in relationship_definitions.relationships.items():
|
|
131
|
-
try:
|
|
132
|
-
logging.debug("Processing relationship: " + id_)
|
|
133
|
-
external_id_prop_name = definition.relationship_external_id_rule
|
|
134
|
-
if external_id_prop_name:
|
|
135
|
-
query = (
|
|
136
|
-
query_statement_template_by_value.replace("prefix", relationship_definitions.prefix)
|
|
137
|
-
.replace("source_ext_id_prop_name", external_id_prop_name)
|
|
138
|
-
.replace("target_ext_id_prop_name", external_id_prop_name)
|
|
139
|
-
.replace("source_class", definition.source_class)
|
|
140
|
-
.replace("target_class", definition.target_class)
|
|
141
|
-
.replace("property_", definition.property_)
|
|
142
|
-
)
|
|
143
|
-
else:
|
|
144
|
-
query = (
|
|
145
|
-
query_statement_template_by_reference.replace("prefix", relationship_definitions.prefix)
|
|
146
|
-
.replace("source_class", definition.source_class)
|
|
147
|
-
.replace("target_class", definition.target_class)
|
|
148
|
-
.replace("property_", definition.property_)
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
logging.debug("Rel query: " + query)
|
|
152
|
-
relationship_data_frame = pd.DataFrame(list(graph_store.query(query)))
|
|
153
|
-
relationship_data_frame.rename(columns={0: "source_external_id", 1: "target_external_id"}, inplace=True)
|
|
154
|
-
|
|
155
|
-
# removes namespace
|
|
156
|
-
relationship_data_frame = relationship_data_frame.map(remove_namespace_from_uri) # type: ignore[operator]
|
|
157
|
-
|
|
158
|
-
# adding prefix
|
|
159
|
-
if relationship_external_id_prefix:
|
|
160
|
-
relationship_data_frame["source_external_id"] = (
|
|
161
|
-
relationship_external_id_prefix + relationship_data_frame["source_external_id"]
|
|
162
|
-
)
|
|
163
|
-
relationship_data_frame["target_external_id"] = (
|
|
164
|
-
relationship_external_id_prefix + relationship_data_frame["target_external_id"]
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
relationship_data_frame["target_type"] = definition.target_type
|
|
168
|
-
relationship_data_frame["source_type"] = definition.source_type
|
|
169
|
-
|
|
170
|
-
# to make sure that by default we set Relationship to active, i.e. non-historic)
|
|
171
|
-
relationship_data_frame["labels"] = [definition.labels] * len(relationship_data_frame)
|
|
172
|
-
|
|
173
|
-
# set default external id
|
|
174
|
-
relationship_data_frame["external_id"] = (
|
|
175
|
-
relationship_data_frame["source_external_id"] + ":" + relationship_data_frame["target_external_id"]
|
|
176
|
-
)
|
|
177
|
-
relationship_data_frame["data_set_id"] = data_set_id
|
|
178
|
-
relationship_dfs += [relationship_data_frame]
|
|
179
|
-
except Exception as e:
|
|
180
|
-
logging.error("Error processing relationship: " + id_)
|
|
181
|
-
if stop_on_exception:
|
|
182
|
-
raise e
|
|
183
|
-
continue
|
|
184
|
-
|
|
185
|
-
if relationship_dfs:
|
|
186
|
-
relationship_df = pd.concat(relationship_dfs)
|
|
187
|
-
relationship_df.reset_index(inplace=True, drop=True)
|
|
188
|
-
|
|
189
|
-
# Remove duplicate rows, if any. This should not happen, but it is better to be safe than sorry
|
|
190
|
-
relationship_df.drop_duplicates(subset=["external_id"], inplace=True)
|
|
191
|
-
|
|
192
|
-
# Remove duplicate rows, if any. This should not happen, but it is better to be safe than sorry
|
|
193
|
-
relationship_df.drop_duplicates(subset=["external_id"], inplace=True)
|
|
194
|
-
relationship_df["start_time"] = len(relationship_df) * [epoch_now_ms()]
|
|
195
|
-
return relationship_df
|
|
196
|
-
else:
|
|
197
|
-
return pd.DataFrame(
|
|
198
|
-
columns=[
|
|
199
|
-
"source_external_id",
|
|
200
|
-
"target_external_id",
|
|
201
|
-
"target_type",
|
|
202
|
-
"source_type",
|
|
203
|
-
"labels",
|
|
204
|
-
"external_id",
|
|
205
|
-
"data_set_id",
|
|
206
|
-
"start_time",
|
|
207
|
-
]
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
def rdf2relationship_data_frame(
|
|
212
|
-
graph_store: NeatGraphStoreBase, transformation_rules: Rules, stop_on_exception: bool = False
|
|
213
|
-
) -> pd.DataFrame:
|
|
214
|
-
warn("'rdf2relationship_data_frame' is deprecated, please use 'rdf2relationships' instead!", stacklevel=2)
|
|
215
|
-
logging.warning("'rdf2relationship_data_frame' is deprecated, please use 'rdf2relationships' instead!")
|
|
216
|
-
return rdf2relationships(graph_store, transformation_rules, stop_on_exception)
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
def _filter_relationship_xids(relationship_data_frame: pd.DataFrame, asset_xids: list | set) -> set:
|
|
220
|
-
return set(
|
|
221
|
-
relationship_data_frame[
|
|
222
|
-
(relationship_data_frame["source_external_id"].isin(asset_xids))
|
|
223
|
-
| (relationship_data_frame["target_external_id"].isin(asset_xids))
|
|
224
|
-
]["external_id"]
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
def _categorize_rdf_relationship_xids(
|
|
229
|
-
rdf_relationships: pd.DataFrame, categorized_asset_ids: dict
|
|
230
|
-
) -> dict[str, set[str]]:
|
|
231
|
-
"""Categorizes the external ids of the RDF relationship."""
|
|
232
|
-
|
|
233
|
-
missing_asset_ids = (
|
|
234
|
-
set(rdf_relationships.target_external_id)
|
|
235
|
-
.union(rdf_relationships.source_external_id)
|
|
236
|
-
.difference(categorized_asset_ids["historic"].union(categorized_asset_ids["non-historic"]))
|
|
237
|
-
)
|
|
238
|
-
|
|
239
|
-
if missing_asset_ids:
|
|
240
|
-
msg = f"Relationships are referring to these assets {missing_asset_ids}, which are missing in CDF."
|
|
241
|
-
msg += "Relationships will not be created for assets that are missing in CDF."
|
|
242
|
-
msg += "Please make sure that all assets are present in CDF before creating relationships."
|
|
243
|
-
logging.warning(msg)
|
|
244
|
-
|
|
245
|
-
# First mask all relationships which contain assets that do not exist in CDF
|
|
246
|
-
mask_impossible = _filter_relationship_xids(rdf_relationships, missing_asset_ids)
|
|
247
|
-
|
|
248
|
-
# Then mask all relationships which contain assets that are historic while masking
|
|
249
|
-
# all impossible relationships
|
|
250
|
-
mask_historic = _filter_relationship_xids(rdf_relationships, categorized_asset_ids["historic"]).difference(
|
|
251
|
-
mask_impossible
|
|
252
|
-
)
|
|
253
|
-
|
|
254
|
-
mask_non_historic = (
|
|
255
|
-
_filter_relationship_xids(rdf_relationships, categorized_asset_ids["non-historic"])
|
|
256
|
-
.difference(mask_historic)
|
|
257
|
-
.difference(mask_impossible)
|
|
258
|
-
)
|
|
259
|
-
|
|
260
|
-
return {"impossible": mask_impossible, "historic": mask_historic, "non-historic": mask_non_historic}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
def _get_label_based_cdf_relationship_xids(client, data_set_id, labels, partitions) -> set:
|
|
264
|
-
"""Get external ids of relationships in CDF for a given data set filtered on labels"""
|
|
265
|
-
|
|
266
|
-
labels = LabelFilter(contains_any=labels) if labels is not None else None
|
|
267
|
-
relationship_data_frame = client.relationships.list(
|
|
268
|
-
data_set_ids=data_set_id, limit=-1, labels=labels, partitions=partitions
|
|
269
|
-
).to_pandas()
|
|
270
|
-
return set() if relationship_data_frame.empty else set(relationship_data_frame.external_id)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
def _categorize_cdf_relationship_xids(client, data_set_id, partitions) -> dict[str, set]:
|
|
274
|
-
return {
|
|
275
|
-
"historic": _get_label_based_cdf_relationship_xids(client, data_set_id, ["historic"], partitions),
|
|
276
|
-
"non-historic": _get_label_based_cdf_relationship_xids(client, data_set_id, ["non-historic"], partitions),
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
def _relationship_to_create(relationships: pd.DataFrame) -> list[Relationship]:
|
|
281
|
-
start_time = datetime_utc_now()
|
|
282
|
-
if relationships.empty:
|
|
283
|
-
return []
|
|
284
|
-
logging.info("Wrangling assets to be created into their final form")
|
|
285
|
-
relationship_list = [Relationship(**cast(dict[str, Any], row)) for row in relationships.to_dict(orient="records")]
|
|
286
|
-
logging.info(f"Wrangling completed in {(datetime_utc_now() - start_time).seconds} seconds")
|
|
287
|
-
return relationship_list
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
def _relationships_to_decommission(external_ids: Collection[str]) -> list[RelationshipUpdate]:
|
|
291
|
-
start_time = datetime_utc_now()
|
|
292
|
-
relationships = []
|
|
293
|
-
if not external_ids:
|
|
294
|
-
return []
|
|
295
|
-
|
|
296
|
-
logging.info("Wrangling relationships to be decommissioned into their final form")
|
|
297
|
-
|
|
298
|
-
for external_id in external_ids:
|
|
299
|
-
# Create relationship update object instance
|
|
300
|
-
relationship = RelationshipUpdate(external_id=external_id)
|
|
301
|
-
|
|
302
|
-
# Remove "non-historic" label and add "historic" label
|
|
303
|
-
relationship.labels.remove("non-historic")
|
|
304
|
-
relationship.labels.add(["historic"])
|
|
305
|
-
|
|
306
|
-
# Set end time of relationships
|
|
307
|
-
relationship.end_time.set(epoch_now_ms())
|
|
308
|
-
|
|
309
|
-
# Add relationship to list of relationship updates
|
|
310
|
-
relationships += [relationship]
|
|
311
|
-
|
|
312
|
-
logging.info(f"Wrangling of {len(relationships)} completed in {(datetime_utc_now() - start_time).seconds} seconds")
|
|
313
|
-
return relationships
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
def _relationships_to_resurrect(external_ids: Collection[str]) -> list[RelationshipUpdate]:
|
|
317
|
-
start_time = datetime_utc_now()
|
|
318
|
-
relationships = []
|
|
319
|
-
if not external_ids:
|
|
320
|
-
return []
|
|
321
|
-
|
|
322
|
-
logging.info("Wrangling relationships to be resurrected into their final form")
|
|
323
|
-
|
|
324
|
-
for external_id in external_ids:
|
|
325
|
-
# Create relationship update object instance
|
|
326
|
-
relationship = RelationshipUpdate(external_id=external_id)
|
|
327
|
-
|
|
328
|
-
# Remove "non-historic" label and add "historic" label
|
|
329
|
-
relationship.labels.remove("historic")
|
|
330
|
-
relationship.labels.add(["non-historic"])
|
|
331
|
-
|
|
332
|
-
# Set end time of relationships
|
|
333
|
-
relationship.end_time.set(None)
|
|
334
|
-
|
|
335
|
-
# Add relationship to list of relationship updates
|
|
336
|
-
relationships += [relationship]
|
|
337
|
-
|
|
338
|
-
logging.info(f"Wrangling of {len(relationships)} completed in {(datetime_utc_now() - start_time).seconds} seconds")
|
|
339
|
-
return relationships
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
@overload
|
|
343
|
-
def categorize_relationships(
|
|
344
|
-
client: CogniteClient,
|
|
345
|
-
rdf_relationships: pd.DataFrame,
|
|
346
|
-
data_set_id: int,
|
|
347
|
-
return_report: Literal[False] = False,
|
|
348
|
-
partitions: int = 40,
|
|
349
|
-
) -> dict[str, list[Relationship] | list[RelationshipUpdate]]: ...
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
@overload
|
|
353
|
-
def categorize_relationships(
|
|
354
|
-
client: CogniteClient,
|
|
355
|
-
rdf_relationships: pd.DataFrame,
|
|
356
|
-
data_set_id: int,
|
|
357
|
-
return_report: Literal[True],
|
|
358
|
-
partitions: int = 40,
|
|
359
|
-
) -> tuple[dict[str, list[Relationship] | list[RelationshipUpdate]], dict[str, set]]: ...
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
def categorize_relationships(
|
|
363
|
-
client: CogniteClient,
|
|
364
|
-
rdf_relationships: pd.DataFrame,
|
|
365
|
-
data_set_id: int,
|
|
366
|
-
return_report: bool = False,
|
|
367
|
-
partitions: int = 40,
|
|
368
|
-
) -> (
|
|
369
|
-
tuple[dict[str, list[Relationship] | list[RelationshipUpdate]], dict[str, set]]
|
|
370
|
-
| dict[str, list[Relationship] | list[RelationshipUpdate]]
|
|
371
|
-
):
|
|
372
|
-
"""Categorize relationships on those that are to be created, decommissioned or resurrected
|
|
373
|
-
|
|
374
|
-
Args:
|
|
375
|
-
client : CogniteClient
|
|
376
|
-
rdf_relationships : Dataframe holding relationships
|
|
377
|
-
data_set_id : CDF data set id to which relationships are to be uploaded
|
|
378
|
-
partitions : Number of partitions to use when querying CDF for relationships
|
|
379
|
-
return_report : Whether to return report or not
|
|
380
|
-
|
|
381
|
-
Returns:
|
|
382
|
-
Categorized relationships to be created, decommissioned or resurrected
|
|
383
|
-
"""
|
|
384
|
-
# TODO also figure out which relationships to be deleted
|
|
385
|
-
|
|
386
|
-
_, categorized_asset_ids = _categorize_cdf_assets(client, data_set_id=data_set_id, partitions=partitions)
|
|
387
|
-
categorized_rdf_relationships = _categorize_rdf_relationship_xids(rdf_relationships, categorized_asset_ids)
|
|
388
|
-
categorized_cdf_relationships = _categorize_cdf_relationship_xids(client, data_set_id, partitions=partitions)
|
|
389
|
-
|
|
390
|
-
cdf_relationships_all = categorized_cdf_relationships["historic"].union(
|
|
391
|
-
categorized_cdf_relationships["non-historic"]
|
|
392
|
-
)
|
|
393
|
-
rdf_relationships_all = categorized_rdf_relationships["historic"].union(
|
|
394
|
-
categorized_rdf_relationships["non-historic"]
|
|
395
|
-
)
|
|
396
|
-
|
|
397
|
-
# relationships to create
|
|
398
|
-
# NonHistoric_rdf - (Historic_cdf U Non-historic_cdf)
|
|
399
|
-
create_xids = categorized_rdf_relationships["non-historic"].difference(cdf_relationships_all)
|
|
400
|
-
|
|
401
|
-
# relationships to decommission
|
|
402
|
-
# rdf: Historic_rdf ∩ NonHistoric_cdf U (All_cdf - All_rdf)
|
|
403
|
-
decommission_xids = (
|
|
404
|
-
categorized_rdf_relationships["historic"]
|
|
405
|
-
.intersection(categorized_cdf_relationships["non-historic"])
|
|
406
|
-
.union(categorized_cdf_relationships["non-historic"].difference(rdf_relationships_all))
|
|
407
|
-
)
|
|
408
|
-
|
|
409
|
-
# relationships to resurrect
|
|
410
|
-
# NonHistoric_rdf ∩ Historic_cdf
|
|
411
|
-
resurrect_xids = categorized_rdf_relationships["non-historic"].intersection(
|
|
412
|
-
categorized_cdf_relationships["historic"]
|
|
413
|
-
)
|
|
414
|
-
|
|
415
|
-
logging.info(f"Number of relationships to create: { len(create_xids)}")
|
|
416
|
-
logging.info(f"Number of relationships to decommission: { len(decommission_xids)}")
|
|
417
|
-
logging.info(f"Number of relationships to resurrect: { len(resurrect_xids)}")
|
|
418
|
-
|
|
419
|
-
report = {"create": create_xids, "resurrect": resurrect_xids, "decommission": decommission_xids}
|
|
420
|
-
categorized_relationships: dict[str, list[Relationship] | list[RelationshipUpdate]] = {
|
|
421
|
-
"create": _relationship_to_create(rdf_relationships[rdf_relationships.external_id.isin(create_xids)]),
|
|
422
|
-
"resurrect": _relationships_to_resurrect(resurrect_xids),
|
|
423
|
-
"decommission": _relationships_to_decommission(decommission_xids),
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
return (categorized_relationships, report) if return_report else categorized_relationships
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
def _micro_batch_push(
|
|
430
|
-
client: CogniteClient,
|
|
431
|
-
relationships: list,
|
|
432
|
-
batch_size: int = 1000,
|
|
433
|
-
push_type: str = "update",
|
|
434
|
-
message: str = "Updated",
|
|
435
|
-
max_retries: int = 1,
|
|
436
|
-
retry_delay: int = 5,
|
|
437
|
-
):
|
|
438
|
-
"""Updates assets in batches of 1000
|
|
439
|
-
|
|
440
|
-
Args:
|
|
441
|
-
client : CogniteClient
|
|
442
|
-
Instance of CogniteClient
|
|
443
|
-
relationships : list
|
|
444
|
-
List of relationships to be created or updated
|
|
445
|
-
batch_size : int, optional
|
|
446
|
-
Size of batch, by default 1000
|
|
447
|
-
push_type : str, optional
|
|
448
|
-
Type of push, either "update" or "create", by default "update"
|
|
449
|
-
message : str, optional
|
|
450
|
-
Message to logged, by default "Updated"
|
|
451
|
-
"""
|
|
452
|
-
total = len(relationships)
|
|
453
|
-
counter = 0
|
|
454
|
-
if push_type not in ["update", "create"]:
|
|
455
|
-
logging.info(f"push_type {push_type} not supported")
|
|
456
|
-
raise ValueError(f"push_type {push_type} not supported")
|
|
457
|
-
|
|
458
|
-
for batch in chunker(relationships, batch_size):
|
|
459
|
-
counter += len(batch)
|
|
460
|
-
start_time = datetime_utc_now()
|
|
461
|
-
|
|
462
|
-
@retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="microbatch-relationships")
|
|
463
|
-
def update_relationships(batch):
|
|
464
|
-
if push_type == "update":
|
|
465
|
-
client.relationships.update(batch)
|
|
466
|
-
elif push_type == "create":
|
|
467
|
-
client.relationships.create(batch)
|
|
468
|
-
|
|
469
|
-
try:
|
|
470
|
-
update_relationships(batch)
|
|
471
|
-
except CogniteDuplicatedError as e:
|
|
472
|
-
# This situation should not happen but if it does, we need to handle it
|
|
473
|
-
exists = {d["externalId"] for d in e.duplicated}
|
|
474
|
-
missing_relationships = [t for t in batch if t.external_id not in exists]
|
|
475
|
-
client.relationships.create(missing_relationships)
|
|
476
|
-
|
|
477
|
-
delta_time = (datetime_utc_now() - start_time).seconds
|
|
478
|
-
|
|
479
|
-
msg = f"{message} {counter} of {total} relationships, batch processing time: {delta_time:.2f} "
|
|
480
|
-
msg += f"seconds ETC: {delta_time * (total - counter) / (60*batch_size) :.2f} minutes"
|
|
481
|
-
logging.info(msg)
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
def upload_relationships(
|
|
485
|
-
client: CogniteClient,
|
|
486
|
-
categorized_relationships: dict[str, list[Relationship] | list[RelationshipUpdate]],
|
|
487
|
-
batch_size: int = 5000,
|
|
488
|
-
max_retries: int = 1,
|
|
489
|
-
retry_delay: int = 3,
|
|
490
|
-
):
|
|
491
|
-
"""Uploads categorized relationships to CDF
|
|
492
|
-
|
|
493
|
-
Args:
|
|
494
|
-
client: Instance of CogniteClient
|
|
495
|
-
categorized_relationships: Categories of relationships to be uploaded
|
|
496
|
-
batch_size: Size of batch, by default 5000
|
|
497
|
-
max_retries: Maximum times to retry the upload, by default 1
|
|
498
|
-
retry_delay: Time delay before retrying the upload, by default 3
|
|
499
|
-
|
|
500
|
-
!!! note "batch_size"
|
|
501
|
-
If batch size is set to 1 or None, all relationships will be pushed to CDF in one go.
|
|
502
|
-
"""
|
|
503
|
-
if batch_size:
|
|
504
|
-
logging.info(f"Uploading relationships in batches of {batch_size}")
|
|
505
|
-
if categorized_relationships["create"]:
|
|
506
|
-
_micro_batch_push(
|
|
507
|
-
client,
|
|
508
|
-
categorized_relationships["create"],
|
|
509
|
-
batch_size,
|
|
510
|
-
push_type="create",
|
|
511
|
-
message="Created",
|
|
512
|
-
max_retries=max_retries,
|
|
513
|
-
retry_delay=retry_delay,
|
|
514
|
-
)
|
|
515
|
-
|
|
516
|
-
if categorized_relationships["resurrect"]:
|
|
517
|
-
_micro_batch_push(
|
|
518
|
-
client,
|
|
519
|
-
categorized_relationships["resurrect"],
|
|
520
|
-
batch_size,
|
|
521
|
-
message="Resurrected",
|
|
522
|
-
max_retries=max_retries,
|
|
523
|
-
retry_delay=retry_delay,
|
|
524
|
-
)
|
|
525
|
-
|
|
526
|
-
if categorized_relationships["decommission"]:
|
|
527
|
-
_micro_batch_push(
|
|
528
|
-
client,
|
|
529
|
-
categorized_relationships["decommission"],
|
|
530
|
-
batch_size,
|
|
531
|
-
message="Decommissioned",
|
|
532
|
-
max_retries=max_retries,
|
|
533
|
-
retry_delay=retry_delay,
|
|
534
|
-
)
|
|
535
|
-
|
|
536
|
-
else:
|
|
537
|
-
logging.info("Batch size not set, pushing all relationships to CDF in one go!")
|
|
538
|
-
|
|
539
|
-
@retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-relationships")
|
|
540
|
-
def create_relationships():
|
|
541
|
-
if categorized_relationships["create"]:
|
|
542
|
-
client.relationships.create(categorized_relationships["create"])
|
|
543
|
-
|
|
544
|
-
if categorized_relationships["resurrect"]:
|
|
545
|
-
client.relationships.update(categorized_relationships["resurrect"])
|
|
546
|
-
|
|
547
|
-
if categorized_relationships["decommission"]:
|
|
548
|
-
client.relationships.update(categorized_relationships["decommission"])
|
|
549
|
-
|
|
550
|
-
try:
|
|
551
|
-
create_relationships()
|
|
552
|
-
except CogniteDuplicatedError as e:
|
|
553
|
-
# This situation should not happen, but if it does, the code attempts to handle it
|
|
554
|
-
exists = {d["externalId"] for d in e.duplicated}
|
|
555
|
-
missing_relationships = [
|
|
556
|
-
t for t in cast(list[Relationship], categorized_relationships["create"]) if t.external_id not in exists
|
|
557
|
-
]
|
|
558
|
-
|
|
559
|
-
client.relationships.create(missing_relationships)
|