cognite-neat 0.87.6__py3-none-any.whl → 0.88.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/app/api/data_classes/rest.py +0 -19
- cognite/neat/app/api/explorer.py +6 -4
- cognite/neat/app/api/routers/configuration.py +1 -1
- cognite/neat/app/api/routers/crud.py +11 -21
- cognite/neat/app/api/routers/workflows.py +24 -94
- cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
- cognite/neat/app/ui/neat-app/build/index.html +1 -1
- cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
- cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
- cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
- cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
- cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
- cognite/neat/config.py +44 -27
- cognite/neat/exceptions.py +6 -0
- cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
- cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
- cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
- cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
- cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
- cognite/neat/graph/queries/_base.py +22 -29
- cognite/neat/graph/queries/_shared.py +1 -1
- cognite/neat/graph/stores/_base.py +24 -11
- cognite/neat/graph/transformers/_rdfpath.py +3 -2
- cognite/neat/issues.py +8 -0
- cognite/neat/rules/exporters/_rules2ontology.py +28 -20
- cognite/neat/rules/exporters/_validation.py +15 -21
- cognite/neat/rules/importers/_inference2rules.py +31 -35
- cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +3 -7
- cognite/neat/rules/importers/_spreadsheet2rules.py +30 -27
- cognite/neat/rules/issues/dms.py +20 -0
- cognite/neat/rules/issues/importing.py +15 -0
- cognite/neat/rules/issues/ontology.py +298 -0
- cognite/neat/rules/issues/spreadsheet.py +48 -0
- cognite/neat/rules/issues/tables.py +72 -0
- cognite/neat/rules/models/_rdfpath.py +4 -4
- cognite/neat/rules/models/_types/_field.py +9 -19
- cognite/neat/rules/models/information/_rules.py +5 -4
- cognite/neat/utils/rdf_.py +17 -9
- cognite/neat/utils/regex_patterns.py +52 -0
- cognite/neat/workflows/steps/data_contracts.py +17 -43
- cognite/neat/workflows/steps/lib/current/graph_extractor.py +28 -24
- cognite/neat/workflows/steps/lib/current/graph_loader.py +4 -21
- cognite/neat/workflows/steps/lib/current/graph_store.py +18 -134
- cognite/neat/workflows/steps_registry.py +5 -7
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/METADATA +2 -6
- cognite_neat-0.88.1.dist-info/RECORD +209 -0
- cognite/neat/app/api/routers/core.py +0 -91
- cognite/neat/app/api/routers/data_exploration.py +0 -336
- cognite/neat/app/api/routers/rules.py +0 -203
- cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
- cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
- cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
- cognite/neat/graph/stores/_oxrdflib.py +0 -247
- cognite/neat/legacy/__init__.py +0 -0
- cognite/neat/legacy/graph/__init__.py +0 -3
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44-dirty.xml +0 -20182
- cognite/neat/legacy/graph/examples/Knowledge-Graph-Nordic44.xml +0 -20163
- cognite/neat/legacy/graph/examples/__init__.py +0 -10
- cognite/neat/legacy/graph/examples/skos-capturing-sheet-wind-topics.xlsx +0 -0
- cognite/neat/legacy/graph/exceptions.py +0 -90
- cognite/neat/legacy/graph/extractors/__init__.py +0 -6
- cognite/neat/legacy/graph/extractors/_base.py +0 -14
- cognite/neat/legacy/graph/extractors/_dexpi.py +0 -44
- cognite/neat/legacy/graph/extractors/_graph_capturing_sheet.py +0 -403
- cognite/neat/legacy/graph/extractors/_mock_graph_generator.py +0 -361
- cognite/neat/legacy/graph/loaders/__init__.py +0 -23
- cognite/neat/legacy/graph/loaders/_asset_loader.py +0 -511
- cognite/neat/legacy/graph/loaders/_base.py +0 -67
- cognite/neat/legacy/graph/loaders/_exceptions.py +0 -85
- cognite/neat/legacy/graph/loaders/core/__init__.py +0 -0
- cognite/neat/legacy/graph/loaders/core/labels.py +0 -58
- cognite/neat/legacy/graph/loaders/core/models.py +0 -136
- cognite/neat/legacy/graph/loaders/core/rdf_to_assets.py +0 -1046
- cognite/neat/legacy/graph/loaders/core/rdf_to_relationships.py +0 -559
- cognite/neat/legacy/graph/loaders/rdf_to_dms.py +0 -309
- cognite/neat/legacy/graph/loaders/validator.py +0 -87
- cognite/neat/legacy/graph/models.py +0 -6
- cognite/neat/legacy/graph/stores/__init__.py +0 -13
- cognite/neat/legacy/graph/stores/_base.py +0 -400
- cognite/neat/legacy/graph/stores/_graphdb_store.py +0 -52
- cognite/neat/legacy/graph/stores/_memory_store.py +0 -43
- cognite/neat/legacy/graph/stores/_oxigraph_store.py +0 -151
- cognite/neat/legacy/graph/stores/_oxrdflib.py +0 -247
- cognite/neat/legacy/graph/stores/_rdf_to_graph.py +0 -42
- cognite/neat/legacy/graph/transformations/__init__.py +0 -0
- cognite/neat/legacy/graph/transformations/entity_matcher.py +0 -101
- cognite/neat/legacy/graph/transformations/query_generator/__init__.py +0 -3
- cognite/neat/legacy/graph/transformations/query_generator/sparql.py +0 -575
- cognite/neat/legacy/graph/transformations/transformer.py +0 -322
- cognite/neat/legacy/rules/__init__.py +0 -0
- cognite/neat/legacy/rules/analysis.py +0 -231
- cognite/neat/legacy/rules/examples/Rules-Nordic44-to-graphql.xlsx +0 -0
- cognite/neat/legacy/rules/examples/Rules-Nordic44.xlsx +0 -0
- cognite/neat/legacy/rules/examples/__init__.py +0 -18
- cognite/neat/legacy/rules/examples/power-grid-containers.yaml +0 -124
- cognite/neat/legacy/rules/examples/power-grid-example.xlsx +0 -0
- cognite/neat/legacy/rules/examples/power-grid-model.yaml +0 -224
- cognite/neat/legacy/rules/examples/rules-template.xlsx +0 -0
- cognite/neat/legacy/rules/examples/sheet2cdf-transformation-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/skos-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/source-to-solution-mapping-rules.xlsx +0 -0
- cognite/neat/legacy/rules/examples/wind-energy.owl +0 -1511
- cognite/neat/legacy/rules/exceptions.py +0 -2972
- cognite/neat/legacy/rules/exporters/__init__.py +0 -20
- cognite/neat/legacy/rules/exporters/_base.py +0 -45
- cognite/neat/legacy/rules/exporters/_core/__init__.py +0 -5
- cognite/neat/legacy/rules/exporters/_core/rules2labels.py +0 -24
- cognite/neat/legacy/rules/exporters/_rules2dms.py +0 -885
- cognite/neat/legacy/rules/exporters/_rules2excel.py +0 -213
- cognite/neat/legacy/rules/exporters/_rules2graphql.py +0 -183
- cognite/neat/legacy/rules/exporters/_rules2ontology.py +0 -524
- cognite/neat/legacy/rules/exporters/_rules2pydantic_models.py +0 -748
- cognite/neat/legacy/rules/exporters/_rules2rules.py +0 -105
- cognite/neat/legacy/rules/exporters/_rules2triples.py +0 -38
- cognite/neat/legacy/rules/exporters/_validation.py +0 -146
- cognite/neat/legacy/rules/importers/__init__.py +0 -22
- cognite/neat/legacy/rules/importers/_base.py +0 -66
- cognite/neat/legacy/rules/importers/_dict2rules.py +0 -158
- cognite/neat/legacy/rules/importers/_dms2rules.py +0 -194
- cognite/neat/legacy/rules/importers/_graph2rules.py +0 -308
- cognite/neat/legacy/rules/importers/_json2rules.py +0 -39
- cognite/neat/legacy/rules/importers/_owl2rules/__init__.py +0 -3
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2classes.py +0 -239
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2metadata.py +0 -260
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2properties.py +0 -217
- cognite/neat/legacy/rules/importers/_owl2rules/_owl2rules.py +0 -290
- cognite/neat/legacy/rules/importers/_spreadsheet2rules.py +0 -45
- cognite/neat/legacy/rules/importers/_xsd2rules.py +0 -20
- cognite/neat/legacy/rules/importers/_yaml2rules.py +0 -39
- cognite/neat/legacy/rules/models/__init__.py +0 -5
- cognite/neat/legacy/rules/models/_base.py +0 -151
- cognite/neat/legacy/rules/models/raw_rules.py +0 -316
- cognite/neat/legacy/rules/models/rdfpath.py +0 -237
- cognite/neat/legacy/rules/models/rules.py +0 -1289
- cognite/neat/legacy/rules/models/tables.py +0 -9
- cognite/neat/legacy/rules/models/value_types.py +0 -118
- cognite/neat/legacy/workflows/examples/Export_DMS/workflow.yaml +0 -89
- cognite/neat/legacy/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
- cognite/neat/legacy/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
- cognite/neat/legacy/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/legacy/workflows/examples/Import_DMS/workflow.yaml +0 -65
- cognite/neat/legacy/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
- cognite/neat/legacy/workflows/examples/Validate_Rules/workflow.yaml +0 -67
- cognite/neat/legacy/workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
- cognite/neat/legacy/workflows/examples/Visualize_Data_Model_Using_Mock_Graph/workflow.yaml +0 -95
- cognite/neat/legacy/workflows/examples/Visualize_Semantic_Data_Model/workflow.yaml +0 -111
- cognite/neat/rules/exceptions.py +0 -2972
- cognite/neat/rules/models/_types/_base.py +0 -16
- cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
- cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
- cognite/neat/workflows/examples/Extract_RDF_Graph_and_Generate_Assets/workflow.yaml +0 -270
- cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
- cognite/neat/workflows/migration/__init__.py +0 -0
- cognite/neat/workflows/migration/steps.py +0 -91
- cognite/neat/workflows/migration/wf_manifests.py +0 -33
- cognite/neat/workflows/steps/lib/legacy/__init__.py +0 -7
- cognite/neat/workflows/steps/lib/legacy/graph_contextualization.py +0 -82
- cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +0 -746
- cognite/neat/workflows/steps/lib/legacy/graph_loader.py +0 -606
- cognite/neat/workflows/steps/lib/legacy/graph_store.py +0 -307
- cognite/neat/workflows/steps/lib/legacy/graph_transformer.py +0 -58
- cognite/neat/workflows/steps/lib/legacy/rules_exporter.py +0 -511
- cognite/neat/workflows/steps/lib/legacy/rules_importer.py +0 -612
- cognite_neat-0.87.6.dist-info/RECORD +0 -319
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/LICENSE +0 -0
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/WHEEL +0 -0
- {cognite_neat-0.87.6.dist-info → cognite_neat-0.88.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,309 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from collections.abc import Iterable
|
|
3
|
-
from itertools import islice
|
|
4
|
-
from typing import Literal, cast, overload
|
|
5
|
-
|
|
6
|
-
from cognite.client import CogniteClient
|
|
7
|
-
from cognite.client.data_classes.data_modeling import EdgeApply, InstanceApply, NodeApply
|
|
8
|
-
from pydantic_core import ErrorDetails
|
|
9
|
-
|
|
10
|
-
from cognite.neat.exceptions import NeatException
|
|
11
|
-
from cognite.neat.legacy.graph.stores import NeatGraphStoreBase
|
|
12
|
-
from cognite.neat.legacy.graph.transformations.query_generator.sparql import triples2dictionary
|
|
13
|
-
from cognite.neat.legacy.rules.exporters._rules2dms import DMSSchemaComponents
|
|
14
|
-
from cognite.neat.legacy.rules.exporters._rules2pydantic_models import add_class_prefix_to_xid, rules_to_pydantic_models
|
|
15
|
-
from cognite.neat.legacy.rules.models.rules import Rules
|
|
16
|
-
from cognite.neat.utils.auxiliary import retry_decorator
|
|
17
|
-
from cognite.neat.utils.collection_ import chunker
|
|
18
|
-
from cognite.neat.utils.time_ import datetime_utc_now
|
|
19
|
-
|
|
20
|
-
from ._base import CogniteLoader
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class DMSLoader(CogniteLoader[InstanceApply]):
|
|
24
|
-
"""Loads a Neat Graph into CDF as nodes and edges.
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
rules: Rules object
|
|
28
|
-
graph_store: Graph store
|
|
29
|
-
add_class_prefix: Add class prefix to external_id. Defaults to False.
|
|
30
|
-
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
def __init__(self, rules: Rules, graph_store: NeatGraphStoreBase, add_class_prefix: bool = False):
|
|
34
|
-
super().__init__(rules, graph_store)
|
|
35
|
-
self.add_class_prefix = add_class_prefix
|
|
36
|
-
|
|
37
|
-
@overload
|
|
38
|
-
def load(self, stop_on_exception: Literal[True]) -> Iterable[InstanceApply]: ...
|
|
39
|
-
|
|
40
|
-
@overload
|
|
41
|
-
def load(self, stop_on_exception: Literal[False] = False) -> Iterable[InstanceApply | ErrorDetails]: ...
|
|
42
|
-
|
|
43
|
-
def load(self, stop_on_exception: bool = False) -> Iterable[InstanceApply | ErrorDetails]:
|
|
44
|
-
"""Load the graph with data."""
|
|
45
|
-
if self.rules.metadata.namespace is None:
|
|
46
|
-
raise ValueError("Namespace is not defined in transformation rules metadata")
|
|
47
|
-
|
|
48
|
-
data_model = DMSSchemaComponents.from_rules(self.rules)
|
|
49
|
-
pydantic_models = rules_to_pydantic_models(self.rules)
|
|
50
|
-
|
|
51
|
-
exclude = {
|
|
52
|
-
class_name
|
|
53
|
-
for class_name in self.rules.classes
|
|
54
|
-
if f"{self.rules.space}:{class_name}" not in data_model.containers
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
for class_name, triples in self._iterate_class_triples(exclude_classes=exclude):
|
|
58
|
-
logging.info(f"<DMSLoader> Processing class : {class_name}")
|
|
59
|
-
counter = 0
|
|
60
|
-
start_time = datetime_utc_now()
|
|
61
|
-
for instance_dict in triples2dictionary(triples).values():
|
|
62
|
-
counter += 1
|
|
63
|
-
try:
|
|
64
|
-
instance = pydantic_models[class_name].from_dict(instance_dict) # type: ignore[attr-defined]
|
|
65
|
-
if self.add_class_prefix:
|
|
66
|
-
instance.external_id = add_class_prefix_to_xid(
|
|
67
|
-
class_name=type(instance).__name__, external_id=instance.external_id
|
|
68
|
-
)
|
|
69
|
-
new_node = instance.to_node(data_model, self.add_class_prefix) # type: ignore[attr-defined]
|
|
70
|
-
is_valid, reason = is_node_valid(new_node)
|
|
71
|
-
if is_valid:
|
|
72
|
-
yield new_node
|
|
73
|
-
else:
|
|
74
|
-
yield ErrorDetails(
|
|
75
|
-
input=instance_dict["external_id"],
|
|
76
|
-
loc=tuple(["Nodes"]),
|
|
77
|
-
msg=f"Not valid node {new_node.external_id}. Reason: {reason}",
|
|
78
|
-
type="Node validation error",
|
|
79
|
-
)
|
|
80
|
-
continue
|
|
81
|
-
|
|
82
|
-
new_edges = instance.to_edge(data_model, self.add_class_prefix)
|
|
83
|
-
for new_edge in new_edges:
|
|
84
|
-
is_valid, reason = is_edge_valid(new_edge)
|
|
85
|
-
if is_valid:
|
|
86
|
-
yield new_edge
|
|
87
|
-
else:
|
|
88
|
-
yield ErrorDetails(
|
|
89
|
-
input=instance_dict["external_id"],
|
|
90
|
-
loc=tuple(["Edges"]),
|
|
91
|
-
msg=f"Not valid edge {new_edge.external_id}. Reason: {reason}",
|
|
92
|
-
type="Edge validation error",
|
|
93
|
-
)
|
|
94
|
-
continue
|
|
95
|
-
|
|
96
|
-
delta_time = datetime_utc_now() - start_time
|
|
97
|
-
delta_time = (delta_time.seconds * 1000000 + delta_time.microseconds) / 1000
|
|
98
|
-
|
|
99
|
-
except Exception as e:
|
|
100
|
-
logging.error(
|
|
101
|
-
f"Instance {instance_dict['external_id']} of {class_name}"
|
|
102
|
-
f" cannot be resolved to nodes and edges. Reason: {e}"
|
|
103
|
-
)
|
|
104
|
-
if stop_on_exception:
|
|
105
|
-
raise e
|
|
106
|
-
|
|
107
|
-
if isinstance(e, NeatException):
|
|
108
|
-
yield e.to_error_dict()
|
|
109
|
-
else:
|
|
110
|
-
yield ErrorDetails(
|
|
111
|
-
input=instance_dict["external_id"],
|
|
112
|
-
loc=tuple(["rdf2nodes_and_edges"]),
|
|
113
|
-
msg=str(e),
|
|
114
|
-
type=f"Exception of type {type(e).__name__} occurred \
|
|
115
|
-
when processing instance of {class_name}",
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
def load_to_cdf(
|
|
119
|
-
self, client: CogniteClient, batch_size: int | None = 1000, max_retries: int = 1, retry_delay: int = 3
|
|
120
|
-
) -> None:
|
|
121
|
-
"""Uploads nodes to CDF
|
|
122
|
-
|
|
123
|
-
Args:
|
|
124
|
-
client: Instance of CogniteClient
|
|
125
|
-
batch_size: Size of batch. Default to 1000.
|
|
126
|
-
max_retries: Maximum times to retry the upload. Default to 1.
|
|
127
|
-
retry_delay: Time delay before retrying the upload. Default to 3.
|
|
128
|
-
|
|
129
|
-
!!! note "batch_size"
|
|
130
|
-
If batch size is set to 1 or None, all nodes will be pushed to CDF in one go.
|
|
131
|
-
"""
|
|
132
|
-
if batch_size is None:
|
|
133
|
-
logging.info("Batch size not set, pushing all nodes and edges to CDF in one go!")
|
|
134
|
-
nodes, edges, errors = self.as_nodes_and_edges(stop_on_exception=False)
|
|
135
|
-
|
|
136
|
-
@retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-instances")
|
|
137
|
-
def create_instances():
|
|
138
|
-
client.data_modeling.instances.apply(
|
|
139
|
-
nodes=nodes, edges=edges, auto_create_start_nodes=True, auto_create_end_nodes=True
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
create_instances()
|
|
143
|
-
return
|
|
144
|
-
logging.info(f"Uploading nodes in batches of {batch_size}")
|
|
145
|
-
for instances in _batched(self.load(stop_on_exception=False), batch_size):
|
|
146
|
-
nodes = [instance for instance in instances if isinstance(instance, NodeApply)]
|
|
147
|
-
edges = [instance for instance in instances if isinstance(instance, EdgeApply)]
|
|
148
|
-
# Todo make _micro_batch_push handle both nodes and edges simultaneously
|
|
149
|
-
_micro_batch_push(
|
|
150
|
-
client, nodes, batch_size, message="Upload", max_retries=max_retries, retry_delay=retry_delay
|
|
151
|
-
)
|
|
152
|
-
_micro_batch_push(
|
|
153
|
-
client, edges, batch_size, message="Upload", max_retries=max_retries, retry_delay=retry_delay
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
def as_nodes_and_edges(
|
|
157
|
-
self, stop_on_exception: bool = False
|
|
158
|
-
) -> tuple[list[NodeApply], list[EdgeApply], list[ErrorDetails]]:
|
|
159
|
-
nodes = []
|
|
160
|
-
edges = []
|
|
161
|
-
exceptions: list[ErrorDetails] = []
|
|
162
|
-
for instance in self.load(stop_on_exception): # type: ignore[call-overload]
|
|
163
|
-
if isinstance(instance, NodeApply):
|
|
164
|
-
nodes.append(instance)
|
|
165
|
-
elif isinstance(instance, EdgeApply):
|
|
166
|
-
edges.append(instance)
|
|
167
|
-
elif isinstance(instance, dict):
|
|
168
|
-
exceptions.append(cast(ErrorDetails, instance))
|
|
169
|
-
else:
|
|
170
|
-
raise ValueError(f"Unknown instance type: {type(instance)}")
|
|
171
|
-
return nodes, edges, exceptions
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
def _batched(iterable: Iterable, size: int):
|
|
175
|
-
"Batch data into lists of length n. The last batch may be shorter."
|
|
176
|
-
# batched('ABCDEFG', 3) --> ABC DEF G
|
|
177
|
-
it = iter(iterable)
|
|
178
|
-
while True:
|
|
179
|
-
batch = list(islice(it, size))
|
|
180
|
-
if not batch:
|
|
181
|
-
return
|
|
182
|
-
yield batch
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
def is_node_valid(node: NodeApply) -> tuple[bool, str]:
|
|
186
|
-
return is_valid_external_id(node.external_id)
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
def is_edge_valid(edge: EdgeApply) -> tuple[bool, str]:
|
|
190
|
-
for external_id in [edge.external_id, edge.start_node.external_id, edge.end_node.external_id]:
|
|
191
|
-
is_valid, reason = is_valid_external_id(external_id)
|
|
192
|
-
if not is_valid:
|
|
193
|
-
return False, reason
|
|
194
|
-
return True, ""
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
def is_valid_external_id(external_id: str) -> tuple[bool, str]:
|
|
198
|
-
if external_id is None or external_id == "" or len(external_id) >= 255:
|
|
199
|
-
return False, f"external_id {external_id} is empty of too long"
|
|
200
|
-
return True, ""
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
def upload_nodes(
|
|
204
|
-
client: CogniteClient, nodes: list[NodeApply], batch_size: int = 1000, max_retries: int = 1, retry_delay: int = 3
|
|
205
|
-
):
|
|
206
|
-
"""Uploads nodes to CDF
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
client: Instance of CogniteClient
|
|
210
|
-
nodes: List of nodes to upload to CDF
|
|
211
|
-
batch_size: Size of batch. Defaults to 1000.
|
|
212
|
-
max_retries: Maximum times to retry the upload. Defaults to 1.
|
|
213
|
-
retry_delay: Time delay before retrying the upload. Defaults to 3.
|
|
214
|
-
|
|
215
|
-
!!! note "batch_size"
|
|
216
|
-
If batch size is set to 1 or None, all nodes will be pushed to CDF in one go.
|
|
217
|
-
"""
|
|
218
|
-
if batch_size:
|
|
219
|
-
logging.info(f"Uploading nodes in batches of {batch_size}")
|
|
220
|
-
_micro_batch_push(client, nodes, batch_size, message="Upload", max_retries=max_retries, retry_delay=retry_delay)
|
|
221
|
-
|
|
222
|
-
else:
|
|
223
|
-
logging.info("Batch size not set, pushing all nodes to CDF in one go!")
|
|
224
|
-
|
|
225
|
-
@retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-nodes")
|
|
226
|
-
def create_nodes():
|
|
227
|
-
client.data_modeling.instances.apply(nodes=nodes)
|
|
228
|
-
|
|
229
|
-
create_nodes()
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
def upload_edges(
|
|
233
|
-
client: CogniteClient, edges: list[EdgeApply], batch_size: int = 5000, max_retries: int = 1, retry_delay: int = 3
|
|
234
|
-
):
|
|
235
|
-
"""Uploads edges to CDF
|
|
236
|
-
|
|
237
|
-
Args:
|
|
238
|
-
client: Instance of CogniteClient
|
|
239
|
-
edges: List of edges to upload to CDF
|
|
240
|
-
batch_size: Size of batch. Defaults to 5000.
|
|
241
|
-
max_retries: Maximum times to retry the upload. Defaults to 1.
|
|
242
|
-
retry_delay: Time delay before retrying the upload. Defaults to 3.
|
|
243
|
-
|
|
244
|
-
!!! note "batch_size"
|
|
245
|
-
If batch size is set to 1 or None, all edges will be pushed to CDF in one go.
|
|
246
|
-
|
|
247
|
-
"""
|
|
248
|
-
if batch_size:
|
|
249
|
-
logging.info(f"Uploading edges in batches of {batch_size}")
|
|
250
|
-
_micro_batch_push(client, edges, batch_size, message="Upload", max_retries=max_retries, retry_delay=retry_delay)
|
|
251
|
-
|
|
252
|
-
else:
|
|
253
|
-
logging.info("Batch size not set, pushing all edges to CDF in one go!")
|
|
254
|
-
|
|
255
|
-
@retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name="create-edges")
|
|
256
|
-
def create_nodes():
|
|
257
|
-
client.data_modeling.instances.apply(edges=edges, auto_create_start_nodes=True, auto_create_end_nodes=True)
|
|
258
|
-
|
|
259
|
-
create_nodes()
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
def _micro_batch_push(
|
|
263
|
-
client: CogniteClient,
|
|
264
|
-
nodes_or_edges: list[NodeApply] | list[EdgeApply],
|
|
265
|
-
batch_size: int = 1000,
|
|
266
|
-
message: str = "Upload",
|
|
267
|
-
max_retries: int = 1,
|
|
268
|
-
retry_delay: int = 3,
|
|
269
|
-
):
|
|
270
|
-
"""Uploads nodes or edges in batches
|
|
271
|
-
|
|
272
|
-
Args:
|
|
273
|
-
client: Instance of CogniteClient
|
|
274
|
-
nodes_or_edges: List of nodes or edges
|
|
275
|
-
batch_size: Size of batch. Defaults to 1000.
|
|
276
|
-
message: Message to logged. Defaults to "Upload".
|
|
277
|
-
max_retries: Maximum times to retry the upload. Defaults to 1.
|
|
278
|
-
retry_delay: Time delay before retrying the upload. Defaults to 3.
|
|
279
|
-
"""
|
|
280
|
-
total = len(nodes_or_edges)
|
|
281
|
-
counter = 0
|
|
282
|
-
|
|
283
|
-
if nodes_or_edges and isinstance(nodes_or_edges[0], NodeApply):
|
|
284
|
-
push_type = "nodes"
|
|
285
|
-
elif nodes_or_edges and isinstance(nodes_or_edges[0], EdgeApply):
|
|
286
|
-
push_type = "edges"
|
|
287
|
-
else:
|
|
288
|
-
raise ValueError("nodes_or_edges must be a list of NodeApply or EdgeApply objects")
|
|
289
|
-
|
|
290
|
-
for batch in chunker(nodes_or_edges, batch_size):
|
|
291
|
-
counter += len(batch)
|
|
292
|
-
start_time = datetime_utc_now()
|
|
293
|
-
|
|
294
|
-
@retry_decorator(max_retries=max_retries, retry_delay=retry_delay, component_name=f"microbatch-{push_type}")
|
|
295
|
-
def upsert_nodes_or_edges(upload_batch):
|
|
296
|
-
if push_type == "nodes":
|
|
297
|
-
client.data_modeling.instances.apply(nodes=upload_batch)
|
|
298
|
-
elif push_type == "edges":
|
|
299
|
-
client.data_modeling.instances.apply(
|
|
300
|
-
edges=upload_batch, auto_create_start_nodes=True, auto_create_end_nodes=True
|
|
301
|
-
)
|
|
302
|
-
|
|
303
|
-
upsert_nodes_or_edges(batch)
|
|
304
|
-
|
|
305
|
-
delta_time = (datetime_utc_now() - start_time).seconds
|
|
306
|
-
|
|
307
|
-
msg = f"{message} {counter} of {total} {push_type}, batch processing time: {delta_time:.2f} "
|
|
308
|
-
msg += f"seconds ETC: {delta_time * (total - counter) / (60*batch_size) :.2f} minutes"
|
|
309
|
-
logging.info(msg)
|
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
"""Should contain methods to validate Graph Transformation Rules sheet,
|
|
2
|
-
as well App Data Model (RDF)
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import logging
|
|
6
|
-
from typing import Any
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def _find_circular_reference_path(
|
|
10
|
-
asset: dict[str, Any], assets: dict[str, dict[str, Any]], max_hierarchy_depth: int = 10000
|
|
11
|
-
) -> list:
|
|
12
|
-
original_external_id = asset.get("external_id", "")
|
|
13
|
-
circle: list[str] = [original_external_id]
|
|
14
|
-
parent_external_id = asset.get("parent_external_id")
|
|
15
|
-
if isinstance(parent_external_id, str):
|
|
16
|
-
ref = assets.get(parent_external_id)
|
|
17
|
-
else:
|
|
18
|
-
ref = None
|
|
19
|
-
|
|
20
|
-
hop = 0
|
|
21
|
-
while ref is not None and hop < max_hierarchy_depth:
|
|
22
|
-
hop += 1
|
|
23
|
-
if external_id := ref.get("external_id"):
|
|
24
|
-
circle.append(external_id)
|
|
25
|
-
if len(circle) != len(set(circle)):
|
|
26
|
-
msg = (
|
|
27
|
-
f"Found circular reference in asset hierarchy which starts with "
|
|
28
|
-
f"{original_external_id} and enters loop at {circle[-1]}. "
|
|
29
|
-
)
|
|
30
|
-
logging.error(msg)
|
|
31
|
-
return circle
|
|
32
|
-
if parent_external_id := ref.get("parent_external_id"):
|
|
33
|
-
ref = assets.get(parent_external_id)
|
|
34
|
-
else:
|
|
35
|
-
ref = None
|
|
36
|
-
|
|
37
|
-
if hop >= max_hierarchy_depth:
|
|
38
|
-
msg = (
|
|
39
|
-
f"Your asset hierarchy is too deep. Max depth is {max_hierarchy_depth}. "
|
|
40
|
-
"You probably have a circular reference."
|
|
41
|
-
)
|
|
42
|
-
logging.error(msg)
|
|
43
|
-
return circle
|
|
44
|
-
else:
|
|
45
|
-
return []
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def validate_asset_hierarchy(
|
|
49
|
-
assets: dict[str, dict[str, Any]],
|
|
50
|
-
) -> tuple[list[str], list[list[str]], dict[str, list[str]]]:
|
|
51
|
-
"""Validates asset hierarchy and reports on orphan assets and circular dependency
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
assets : A dictionary of assets with external_id as key
|
|
55
|
-
|
|
56
|
-
Returns:
|
|
57
|
-
List of orphan assets external ids and list of circular path of external ids.
|
|
58
|
-
If both lists are empty, the hierarchy is healthy.
|
|
59
|
-
"""
|
|
60
|
-
orphan_assets: list[str] = []
|
|
61
|
-
circular_reference_paths: list[list[str]] = []
|
|
62
|
-
parent_children_map: dict[str, list[str]] = {}
|
|
63
|
-
|
|
64
|
-
for asset in assets.values():
|
|
65
|
-
parent_external_id = asset.get("parent_external_id")
|
|
66
|
-
asset_extarnal_id = asset.get("external_id")
|
|
67
|
-
if asset_extarnal_id and parent_external_id:
|
|
68
|
-
if parent_external_id in parent_children_map:
|
|
69
|
-
parent_children_map[parent_external_id].append(asset_extarnal_id)
|
|
70
|
-
else:
|
|
71
|
-
parent_children_map[parent_external_id] = [asset_extarnal_id]
|
|
72
|
-
if parent_external_id is not None and parent_external_id not in assets:
|
|
73
|
-
msg = (
|
|
74
|
-
f"Found orphan asset {asset.get('external_id')} with parent {parent_external_id} which does not exist."
|
|
75
|
-
)
|
|
76
|
-
logging.error(msg)
|
|
77
|
-
if external_id := asset.get("external_id"):
|
|
78
|
-
orphan_assets.append(external_id)
|
|
79
|
-
circular_reference_path = _find_circular_reference_path(asset, assets)
|
|
80
|
-
if not len(circular_reference_path):
|
|
81
|
-
continue
|
|
82
|
-
|
|
83
|
-
# Save the circle only once, not once for every asset
|
|
84
|
-
if set(circular_reference_path) in [set(path) for path in circular_reference_paths]:
|
|
85
|
-
continue
|
|
86
|
-
circular_reference_paths.append(circular_reference_path)
|
|
87
|
-
return orphan_assets, circular_reference_paths, parent_children_map
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from ._base import NeatGraphStoreBase
|
|
2
|
-
from ._graphdb_store import GraphDBStore
|
|
3
|
-
from ._memory_store import MemoryStore
|
|
4
|
-
from ._oxigraph_store import OxiGraphStore
|
|
5
|
-
|
|
6
|
-
STORE_BY_TYPE: dict[str, type[NeatGraphStoreBase]] = {}
|
|
7
|
-
for store in NeatGraphStoreBase.__subclasses__():
|
|
8
|
-
STORE_BY_TYPE[store.rdf_store_type] = store # type: ignore[type-abstract]
|
|
9
|
-
|
|
10
|
-
del store # Cleanup namespace
|
|
11
|
-
AVAILABLE_STORES = set(STORE_BY_TYPE.keys())
|
|
12
|
-
|
|
13
|
-
__all__ = ["NeatGraphStoreBase", "MemoryStore", "OxiGraphStore", "GraphDBStore", "STORE_BY_TYPE", "AVAILABLE_STORES"]
|