cognite-neat 0.78.4__py3-none-any.whl → 0.79.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/graph/_tracking/__init__.py +4 -0
- cognite/neat/graph/_tracking/base.py +30 -0
- cognite/neat/graph/_tracking/log.py +27 -0
- cognite/neat/graph/extractors/__init__.py +17 -2
- cognite/neat/graph/extractors/_classic_cdf/{_asset_hierarchy.py → _assets.py} +29 -22
- cognite/neat/graph/extractors/_classic_cdf/_events.py +117 -0
- cognite/neat/graph/extractors/_classic_cdf/_files.py +131 -0
- cognite/neat/graph/extractors/_classic_cdf/_labels.py +72 -0
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py +153 -0
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py +92 -0
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +118 -0
- cognite/neat/graph/issues/__init__.py +0 -0
- cognite/neat/graph/issues/loader.py +104 -0
- cognite/neat/graph/loaders/__init__.py +4 -0
- cognite/neat/graph/loaders/_base.py +109 -0
- cognite/neat/graph/loaders/_rdf2dms.py +280 -0
- cognite/neat/graph/stores/_base.py +34 -4
- cognite/neat/graph/stores/_provenance.py +99 -0
- cognite/neat/issues.py +150 -0
- cognite/neat/rules/exporters/_base.py +2 -3
- cognite/neat/rules/exporters/_rules2dms.py +5 -5
- cognite/neat/rules/importers/_base.py +1 -1
- cognite/neat/rules/issues/__init__.py +2 -3
- cognite/neat/rules/issues/base.py +9 -133
- cognite/neat/rules/issues/spreadsheet.py +3 -2
- cognite/neat/rules/models/_base.py +6 -0
- cognite/neat/rules/models/dms/_rules.py +3 -0
- cognite/neat/rules/models/dms/_schema.py +133 -3
- cognite/neat/rules/models/domain.py +3 -0
- cognite/neat/rules/models/information/_rules.py +4 -1
- cognite/neat/{rules/exporters/_models.py → utils/upload.py} +26 -6
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/METADATA +2 -2
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/RECORD +37 -21
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
import json
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from collections.abc import Iterable, Sequence
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
from cognite.client import CogniteClient
|
|
10
|
+
from cognite.client import data_modeling as dm
|
|
11
|
+
from cognite.client.data_classes.capabilities import Capability, DataModelInstancesAcl
|
|
12
|
+
from cognite.client.data_classes.data_modeling import ViewId
|
|
13
|
+
from cognite.client.data_classes.data_modeling.views import SingleEdgeConnection
|
|
14
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
15
|
+
from pydantic import ValidationInfo, create_model, field_validator
|
|
16
|
+
from pydantic.main import Model
|
|
17
|
+
|
|
18
|
+
from cognite.neat.graph._tracking import LogTracker, Tracker
|
|
19
|
+
from cognite.neat.graph.issues import loader as loader_issues
|
|
20
|
+
from cognite.neat.graph.stores import NeatGraphStoreBase
|
|
21
|
+
from cognite.neat.issues import NeatIssue, NeatIssueList
|
|
22
|
+
from cognite.neat.rules.models import DMSRules
|
|
23
|
+
from cognite.neat.rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE
|
|
24
|
+
from cognite.neat.utils.upload import UploadDiffsID
|
|
25
|
+
|
|
26
|
+
from ._base import CDFLoader
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
graph_store: NeatGraphStoreBase,
|
|
33
|
+
data_model: dm.DataModel[dm.View] | None,
|
|
34
|
+
instance_space: str,
|
|
35
|
+
class_by_view_id: dict[ViewId, str] | None = None,
|
|
36
|
+
creat_issues: Sequence[NeatIssue] | None = None,
|
|
37
|
+
tracker: type[Tracker] | None = None,
|
|
38
|
+
):
|
|
39
|
+
super().__init__(graph_store)
|
|
40
|
+
self.data_model = data_model
|
|
41
|
+
self.instance_space = instance_space
|
|
42
|
+
self.class_by_view_id = class_by_view_id or {}
|
|
43
|
+
self._issues = NeatIssueList[NeatIssue](creat_issues or [])
|
|
44
|
+
self._tracker: type[Tracker] = tracker or LogTracker
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def from_data_model_id(
|
|
48
|
+
cls,
|
|
49
|
+
client: CogniteClient,
|
|
50
|
+
data_model_id: dm.DataModelId,
|
|
51
|
+
graph_store: NeatGraphStoreBase,
|
|
52
|
+
instance_space: str,
|
|
53
|
+
) -> "DMSLoader":
|
|
54
|
+
issues: list[NeatIssue] = []
|
|
55
|
+
data_model: dm.DataModel[dm.View] | None = None
|
|
56
|
+
try:
|
|
57
|
+
data_model = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True).latest_version()
|
|
58
|
+
except Exception as e:
|
|
59
|
+
issues.append(loader_issues.MissingDataModelError(identifier=repr(data_model_id), reason=str(e)))
|
|
60
|
+
|
|
61
|
+
return cls(graph_store, data_model, instance_space, {}, issues)
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_rules(cls, rules: DMSRules, graph_store: NeatGraphStoreBase, instance_space: str) -> "DMSLoader":
|
|
65
|
+
issues: list[NeatIssue] = []
|
|
66
|
+
data_model: dm.DataModel[dm.View] | None = None
|
|
67
|
+
try:
|
|
68
|
+
data_model = rules.as_schema().as_read_model()
|
|
69
|
+
except Exception as e:
|
|
70
|
+
issues.append(
|
|
71
|
+
loader_issues.FailedConvertError(
|
|
72
|
+
identifier=rules.metadata.as_identifier(), target_format="read DMS model", reason=str(e)
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
return cls(graph_store, data_model, instance_space, {}, issues)
|
|
76
|
+
|
|
77
|
+
def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue]:
|
|
78
|
+
if self._issues.has_errors and stop_on_exception:
|
|
79
|
+
raise self._issues.as_exception()
|
|
80
|
+
elif self._issues.has_errors:
|
|
81
|
+
yield from self._issues
|
|
82
|
+
return
|
|
83
|
+
if not self.data_model:
|
|
84
|
+
# There should already be an error in this case.
|
|
85
|
+
return
|
|
86
|
+
view_ids = [repr(v.as_id()) for v in self.data_model.views]
|
|
87
|
+
tracker = self._tracker(type(self).__name__, view_ids, "views")
|
|
88
|
+
for view in self.data_model.views:
|
|
89
|
+
view_id = view.as_id()
|
|
90
|
+
tracker.start(repr(view_id))
|
|
91
|
+
pydantic_cls, edge_by_properties, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
92
|
+
yield from issues
|
|
93
|
+
tracker.issue(issues)
|
|
94
|
+
class_name = self.class_by_view_id.get(view.as_id(), view.external_id)
|
|
95
|
+
triples = self.graph_store.queries.triples_of_type_instances(class_name)
|
|
96
|
+
for identifier, properties in _triples2dictionary(triples).items():
|
|
97
|
+
try:
|
|
98
|
+
yield self._create_node(identifier, properties, pydantic_cls, view_id)
|
|
99
|
+
except ValueError as e:
|
|
100
|
+
error = loader_issues.InvalidInstanceError(type_="node", identifier=identifier, reason=str(e))
|
|
101
|
+
tracker.issue(error)
|
|
102
|
+
if stop_on_exception:
|
|
103
|
+
raise error.as_exception() from e
|
|
104
|
+
yield error
|
|
105
|
+
yield from self._create_edges(identifier, properties, edge_by_properties, tracker)
|
|
106
|
+
tracker.finish(repr(view_id))
|
|
107
|
+
|
|
108
|
+
def write_to_file(self, filepath: Path) -> None:
|
|
109
|
+
if filepath.suffix not in [".json", ".yaml", ".yml"]:
|
|
110
|
+
raise ValueError(f"File format {filepath.suffix} is not supported")
|
|
111
|
+
dumped: dict[str, list] = {"nodes": [], "edges": [], "issues": []}
|
|
112
|
+
for item in self.load(stop_on_exception=False):
|
|
113
|
+
key = {
|
|
114
|
+
dm.NodeApply: "nodes",
|
|
115
|
+
dm.EdgeApply: "edges",
|
|
116
|
+
NeatIssue: "issues",
|
|
117
|
+
}.get(type(item))
|
|
118
|
+
if key is None:
|
|
119
|
+
# This should never happen, and is a bug in neat
|
|
120
|
+
raise ValueError(f"Item {item} is not supported. This is a bug in neat please report it.")
|
|
121
|
+
dumped[key].append(item.dump())
|
|
122
|
+
with filepath.open("w", encoding=self._encoding, newline=self._new_line) as f:
|
|
123
|
+
if filepath.suffix == ".json":
|
|
124
|
+
json.dump(dumped, f, indent=2)
|
|
125
|
+
else:
|
|
126
|
+
yaml.safe_dump(dumped, f, sort_keys=False)
|
|
127
|
+
|
|
128
|
+
def _create_validation_classes(
|
|
129
|
+
self, view: dm.View
|
|
130
|
+
) -> tuple[type[Model], dict[str, dm.EdgeConnection], NeatIssueList]:
|
|
131
|
+
issues = NeatIssueList[NeatIssue]()
|
|
132
|
+
field_definitions: dict[str, tuple[type, Any]] = {}
|
|
133
|
+
edge_by_property: dict[str, dm.EdgeConnection] = {}
|
|
134
|
+
direct_relation_by_property: dict[str, dm.DirectRelation] = {}
|
|
135
|
+
for prop_name, prop in view.properties.items():
|
|
136
|
+
if isinstance(prop, dm.EdgeConnection):
|
|
137
|
+
edge_by_property[prop_name] = prop
|
|
138
|
+
if isinstance(prop, dm.MappedProperty):
|
|
139
|
+
if isinstance(prop.type, dm.DirectRelation):
|
|
140
|
+
direct_relation_by_property[prop_name] = prop.type
|
|
141
|
+
python_type: Any = dict
|
|
142
|
+
else:
|
|
143
|
+
data_type = _DATA_TYPE_BY_DMS_TYPE.get(prop.type._type)
|
|
144
|
+
if not data_type:
|
|
145
|
+
issues.append(
|
|
146
|
+
loader_issues.InvalidClassWarning(
|
|
147
|
+
class_name=repr(view.as_id()),
|
|
148
|
+
reason=f"Unknown data type for property {prop_name}: {prop.type._type}",
|
|
149
|
+
)
|
|
150
|
+
)
|
|
151
|
+
continue
|
|
152
|
+
python_type = data_type.python
|
|
153
|
+
if prop.type.is_list:
|
|
154
|
+
python_type = list[python_type]
|
|
155
|
+
default_value: Any = prop.default_value
|
|
156
|
+
if prop.nullable:
|
|
157
|
+
python_type = python_type | None
|
|
158
|
+
else:
|
|
159
|
+
default_value = ...
|
|
160
|
+
|
|
161
|
+
field_definitions[prop_name] = (python_type, default_value)
|
|
162
|
+
|
|
163
|
+
def parse_list(cls, value: Any, info: ValidationInfo) -> list[str]:
|
|
164
|
+
if isinstance(value, list) and cls.model_fields[info.field_name].annotation is not list:
|
|
165
|
+
if len(value) == 1:
|
|
166
|
+
return value[0]
|
|
167
|
+
raise ValueError(f"Got multiple values for {info.field_name}: {value}")
|
|
168
|
+
return value
|
|
169
|
+
|
|
170
|
+
validators: dict[str, classmethod] = {"parse_list": field_validator("*", mode="before")(parse_list)} # type: ignore[dict-item,arg-type]
|
|
171
|
+
if direct_relation_by_property:
|
|
172
|
+
|
|
173
|
+
def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
|
|
174
|
+
# We validate above that we only get one value for single direct relations.
|
|
175
|
+
if cls.model_fields[info.field_name].annotation is list:
|
|
176
|
+
return [{"space": self.instance_space, "externalId": v} for v in value]
|
|
177
|
+
elif value:
|
|
178
|
+
return {"space": self.instance_space, "externalId": value[0]}
|
|
179
|
+
return {}
|
|
180
|
+
|
|
181
|
+
validators["parse_direct_relation"] = field_validator(*direct_relation_by_property.keys(), mode="before")( # type: ignore[assignment]
|
|
182
|
+
parse_direct_relation # type: ignore[arg-type]
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
|
|
186
|
+
return pydantic_cls, edge_by_property, issues
|
|
187
|
+
|
|
188
|
+
def _create_node(
|
|
189
|
+
self, identifier: str, properties: dict[str, list[str]], pydantic_cls: type[Model], view_id: dm.ViewId
|
|
190
|
+
) -> dm.InstanceApply:
|
|
191
|
+
created = pydantic_cls.model_validate(properties)
|
|
192
|
+
|
|
193
|
+
return dm.NodeApply(
|
|
194
|
+
space=self.instance_space,
|
|
195
|
+
external_id=identifier,
|
|
196
|
+
# type=#RDF type
|
|
197
|
+
sources=[dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump().items()))],
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
def _create_edges(
|
|
201
|
+
self,
|
|
202
|
+
identifier: str,
|
|
203
|
+
properties: dict[str, list[str]],
|
|
204
|
+
edge_by_properties: dict[str, dm.EdgeConnection],
|
|
205
|
+
tracker: Tracker,
|
|
206
|
+
) -> Iterable[dm.EdgeApply | NeatIssue]:
|
|
207
|
+
for prop, values in properties.items():
|
|
208
|
+
if prop not in edge_by_properties:
|
|
209
|
+
continue
|
|
210
|
+
edge = edge_by_properties[prop]
|
|
211
|
+
if isinstance(edge, SingleEdgeConnection) and len(values) > 1:
|
|
212
|
+
error = loader_issues.InvalidInstanceError(
|
|
213
|
+
type_="edge",
|
|
214
|
+
identifier=identifier,
|
|
215
|
+
reason=f"Multiple values for single edge {edge}. Expected only one.",
|
|
216
|
+
)
|
|
217
|
+
tracker.issue(error)
|
|
218
|
+
yield error
|
|
219
|
+
for target in values:
|
|
220
|
+
yield dm.EdgeApply(
|
|
221
|
+
space=self.instance_space,
|
|
222
|
+
external_id=f"{identifier}.{prop}.{target}",
|
|
223
|
+
type=edge.type,
|
|
224
|
+
start_node=dm.DirectRelationReference(self.instance_space, identifier),
|
|
225
|
+
end_node=dm.DirectRelationReference(self.instance_space, target),
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
def _get_required_capabilities(self) -> list[Capability]:
|
|
229
|
+
return [
|
|
230
|
+
DataModelInstancesAcl(
|
|
231
|
+
actions=[
|
|
232
|
+
DataModelInstancesAcl.Action.Write,
|
|
233
|
+
DataModelInstancesAcl.Action.Write_Properties,
|
|
234
|
+
DataModelInstancesAcl.Action.Read,
|
|
235
|
+
],
|
|
236
|
+
scope=DataModelInstancesAcl.Scope.SpaceID([self.instance_space]),
|
|
237
|
+
)
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
def _upload_to_cdf(
|
|
241
|
+
self,
|
|
242
|
+
client: CogniteClient,
|
|
243
|
+
items: list[dm.InstanceApply],
|
|
244
|
+
return_diffs: bool,
|
|
245
|
+
dry_run: bool,
|
|
246
|
+
read_issues: NeatIssueList,
|
|
247
|
+
) -> UploadDiffsID:
|
|
248
|
+
result = UploadDiffsID(name=type(self).__name__, issues=read_issues)
|
|
249
|
+
try:
|
|
250
|
+
nodes = [item for item in items if isinstance(item, dm.NodeApply)]
|
|
251
|
+
edges = [item for item in items if isinstance(item, dm.EdgeApply)]
|
|
252
|
+
upserted = client.data_modeling.instances.apply(
|
|
253
|
+
nodes,
|
|
254
|
+
edges,
|
|
255
|
+
auto_create_end_nodes=True,
|
|
256
|
+
auto_create_start_nodes=True,
|
|
257
|
+
skip_on_version_conflict=True,
|
|
258
|
+
)
|
|
259
|
+
except CogniteAPIError as e:
|
|
260
|
+
result.error_messages.append(str(e))
|
|
261
|
+
result.failed.append([repr(instance.as_id()) for instance in items]) # type: ignore[arg-type, attr-defined]
|
|
262
|
+
else:
|
|
263
|
+
for instance in itertools.chain(upserted.nodes, upserted.edges):
|
|
264
|
+
if instance.was_modified and instance.created_time == instance.last_updated_time:
|
|
265
|
+
result.created.append(repr(instance.as_id()))
|
|
266
|
+
elif instance.was_modified:
|
|
267
|
+
result.changed.append(repr(instance.as_id()))
|
|
268
|
+
else:
|
|
269
|
+
result.unchanged.append(repr(instance.as_id()))
|
|
270
|
+
return result if return_diffs else result.as_upload_result_ids() # type: ignore[return-value]
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _triples2dictionary(
|
|
274
|
+
triples: Iterable[tuple[str, str, str]],
|
|
275
|
+
) -> dict[str, dict[str, list[str]]]:
|
|
276
|
+
"""Converts list of triples to dictionary"""
|
|
277
|
+
values_by_property_by_identifier: dict[str, dict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
|
|
278
|
+
for id_, property_, value in triples:
|
|
279
|
+
values_by_property_by_identifier[id_][property_].append(value)
|
|
280
|
+
return values_by_property_by_identifier
|
|
@@ -3,17 +3,22 @@ import sys
|
|
|
3
3
|
import time
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
5
5
|
from collections.abc import Iterable, Iterator
|
|
6
|
+
from datetime import datetime
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Literal, TypeAlias, cast
|
|
8
9
|
|
|
9
10
|
import pandas as pd
|
|
11
|
+
import pytz
|
|
10
12
|
from prometheus_client import Gauge, Summary
|
|
11
|
-
from rdflib import Graph, Namespace, URIRef
|
|
13
|
+
from rdflib import RDF, Graph, Namespace, URIRef
|
|
12
14
|
from rdflib.query import Result, ResultRow
|
|
13
15
|
|
|
14
16
|
from cognite.neat.constants import DEFAULT_NAMESPACE, PREFIXES
|
|
15
17
|
from cognite.neat.graph.models import Triple
|
|
16
18
|
from cognite.neat.graph.stores._rdf_to_graph import rdf_file_to_graph
|
|
19
|
+
from cognite.neat.utils import remove_namespace
|
|
20
|
+
|
|
21
|
+
from ._provenance import Change, Provenance
|
|
17
22
|
|
|
18
23
|
if sys.version_info >= (3, 11):
|
|
19
24
|
pass
|
|
@@ -48,6 +53,7 @@ class NeatGraphStoreBase(ABC):
|
|
|
48
53
|
namespace: Namespace = DEFAULT_NAMESPACE,
|
|
49
54
|
prefixes: dict = PREFIXES,
|
|
50
55
|
):
|
|
56
|
+
_start = datetime.now(pytz.utc)
|
|
51
57
|
self.graph = graph or Graph()
|
|
52
58
|
self.base_prefix: str = base_prefix
|
|
53
59
|
self.namespace: Namespace = namespace
|
|
@@ -62,6 +68,16 @@ class NeatGraphStoreBase(ABC):
|
|
|
62
68
|
self.internal_storage_dir_orig: Path | None = None
|
|
63
69
|
self.storage_dirs_to_delete: list[Path] = []
|
|
64
70
|
self.queries = _Queries(self)
|
|
71
|
+
self.provenance = Provenance(
|
|
72
|
+
[
|
|
73
|
+
Change.record(
|
|
74
|
+
activity=f"{type(self).__name__}.__init__",
|
|
75
|
+
start=_start,
|
|
76
|
+
end=datetime.now(pytz.utc),
|
|
77
|
+
description="Initialize graph store",
|
|
78
|
+
)
|
|
79
|
+
]
|
|
80
|
+
)
|
|
65
81
|
|
|
66
82
|
@abstractmethod
|
|
67
83
|
def _set_graph(self) -> None:
|
|
@@ -109,9 +125,9 @@ class NeatGraphStoreBase(ABC):
|
|
|
109
125
|
|
|
110
126
|
if base_prefix:
|
|
111
127
|
self.base_prefix = base_prefix
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
128
|
+
if self.base_prefix:
|
|
129
|
+
self.graph.bind(self.base_prefix, self.namespace)
|
|
130
|
+
logging.info("Adding prefix %s with namespace %s", self.base_prefix, self.namespace)
|
|
115
131
|
logging.info("Graph initialized")
|
|
116
132
|
|
|
117
133
|
def reinitialize_graph(self):
|
|
@@ -361,3 +377,17 @@ class _Queries:
|
|
|
361
377
|
logging.info(query)
|
|
362
378
|
# Select queries gives an iterable of result rows
|
|
363
379
|
return cast(list[ResultRow], list(self.store.query(query)))
|
|
380
|
+
|
|
381
|
+
def triples_of_type_instances(self, rdf_type: str) -> list[tuple[str, str, str]]:
|
|
382
|
+
"""Get all triples of a given type.
|
|
383
|
+
|
|
384
|
+
This method assumes the graph has been transformed into the default namespace.
|
|
385
|
+
"""
|
|
386
|
+
query = (
|
|
387
|
+
f"SELECT ?instance ?prop ?value "
|
|
388
|
+
f"WHERE {{ ?instance a <{self.store.namespace[rdf_type]}> . ?instance ?prop ?value . }} order by ?instance"
|
|
389
|
+
)
|
|
390
|
+
result = self.store.query(query)
|
|
391
|
+
|
|
392
|
+
# We cannot include the RDF.type in case there is a neat:type property
|
|
393
|
+
return [remove_namespace(*triple) for triple in result if triple[1] != RDF.type] # type: ignore[misc, index]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# we will use prov-o to represent the provenance of the neat graph store
|
|
2
|
+
# basically tracking changes that occur in the graph store
|
|
3
|
+
# prov-o use concepts of Agent, Activity and Entity to represent provenance
|
|
4
|
+
# where in case of neat we have:
|
|
5
|
+
# Agent: triples extractors, graph enhancers, contextualizers, etc.
|
|
6
|
+
# Activity: write triple, add connection, etc.
|
|
7
|
+
# Entity: neat graph store
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
import uuid
|
|
11
|
+
from collections import UserList
|
|
12
|
+
from collections.abc import Sequence
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from typing import TypeVar
|
|
16
|
+
|
|
17
|
+
from rdflib import PROV, RDF, Literal, URIRef
|
|
18
|
+
|
|
19
|
+
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class Agent:
|
|
24
|
+
id_: URIRef = DEFAULT_NAMESPACE.agent
|
|
25
|
+
acted_on_behalf_of: str = "NEAT"
|
|
26
|
+
|
|
27
|
+
def as_triples(self):
|
|
28
|
+
return [
|
|
29
|
+
(self.id_, RDF.type, PROV[type(self).__name__]),
|
|
30
|
+
(self.id_, PROV.actedOnBehalfOf, self.acted_on_behalf_of),
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class Activity:
|
|
36
|
+
was_associated_with: Agent
|
|
37
|
+
ended_at_time: datetime
|
|
38
|
+
started_at_time: datetime
|
|
39
|
+
used: str # this would be set to for example Extractor, Enhancer, Contextualizer, etc.
|
|
40
|
+
id_: URIRef = DEFAULT_NAMESPACE[f"activity-{uuid.uuid4()}"]
|
|
41
|
+
|
|
42
|
+
def as_triples(self):
|
|
43
|
+
return [
|
|
44
|
+
(self.id_, RDF.type, PROV[type(self).__name__]),
|
|
45
|
+
(self.id_, PROV.wasAssociatedWith, self.was_associated_with.id_),
|
|
46
|
+
(self.id_, PROV.startedAtTime, Literal(self.started_at_time)),
|
|
47
|
+
(self.id_, PROV.endedAtTime, Literal(self.ended_at_time)),
|
|
48
|
+
(self.id_, PROV.used, self.used),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class Entity:
|
|
54
|
+
was_generated_by: Activity
|
|
55
|
+
was_attributed_to: Agent
|
|
56
|
+
id_: URIRef = DEFAULT_NAMESPACE["graph-store"]
|
|
57
|
+
|
|
58
|
+
def as_triples(self):
|
|
59
|
+
return [
|
|
60
|
+
(self.id_, RDF.type, PROV[type(self).__name__]),
|
|
61
|
+
(self.id_, PROV.wasGeneratedBy, self.was_generated_by.id_),
|
|
62
|
+
(self.id_, PROV.wasAttributedTo, self.was_attributed_to.id_),
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(frozen=True)
|
|
67
|
+
class Change:
|
|
68
|
+
agent: Agent
|
|
69
|
+
activity: Activity
|
|
70
|
+
entity: Entity
|
|
71
|
+
description: str
|
|
72
|
+
|
|
73
|
+
def as_triples(self):
|
|
74
|
+
return self.agent.as_triples() + self.activity.as_triples() + self.entity.as_triples()
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def record(cls, activity: str, start: datetime, end: datetime, description: str):
|
|
78
|
+
"""User friendly method to record a change that occurred in the graph store."""
|
|
79
|
+
agent = Agent()
|
|
80
|
+
activity = Activity(used=activity, was_associated_with=agent, started_at_time=start, ended_at_time=end)
|
|
81
|
+
entity = Entity(was_generated_by=activity, was_attributed_to=agent)
|
|
82
|
+
return cls(agent, activity, entity, description)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
T_Change = TypeVar("T_Change", bound=Change)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class Provenance(UserList[T_Change]):
|
|
89
|
+
def __init__(self, changes: Sequence[T_Change] | None = None):
|
|
90
|
+
super().__init__(changes or [])
|
|
91
|
+
|
|
92
|
+
def did_this_happen(self, this: str) -> bool:
|
|
93
|
+
return any(change.description == this for change in self)
|
|
94
|
+
|
|
95
|
+
def __delitem__(self, *args, **kwargs):
|
|
96
|
+
raise TypeError("Cannot delete change from provenance")
|
|
97
|
+
|
|
98
|
+
def __setitem__(self, *args, **kwargs):
|
|
99
|
+
raise TypeError("Cannot modify change from provenance")
|
cognite/neat/issues.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import warnings
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections import UserList
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from functools import total_ordering
|
|
8
|
+
from typing import Any, ClassVar, TypeVar
|
|
9
|
+
from warnings import WarningMessage
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
if sys.version_info < (3, 11):
|
|
14
|
+
from exceptiongroup import ExceptionGroup
|
|
15
|
+
from typing_extensions import Self
|
|
16
|
+
else:
|
|
17
|
+
from typing import Self
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@total_ordering
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class NeatIssue(ABC):
|
|
23
|
+
description: ClassVar[str]
|
|
24
|
+
fix: ClassVar[str]
|
|
25
|
+
|
|
26
|
+
def message(self) -> str:
|
|
27
|
+
"""Return a human-readable message for the issue.
|
|
28
|
+
|
|
29
|
+
This is the default implementation, which returns the description.
|
|
30
|
+
It is recommended to override this method in subclasses with a more
|
|
31
|
+
specific message.
|
|
32
|
+
"""
|
|
33
|
+
return self.description
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def dump(self) -> dict[str, Any]:
|
|
37
|
+
"""Return a dictionary representation of the issue."""
|
|
38
|
+
raise NotImplementedError()
|
|
39
|
+
|
|
40
|
+
def __lt__(self, other: "NeatIssue") -> bool:
|
|
41
|
+
if not isinstance(other, NeatIssue):
|
|
42
|
+
return NotImplemented
|
|
43
|
+
return (type(self).__name__, self.message()) < (type(other).__name__, other.message())
|
|
44
|
+
|
|
45
|
+
def __eq__(self, other: object) -> bool:
|
|
46
|
+
if not isinstance(other, NeatIssue):
|
|
47
|
+
return NotImplemented
|
|
48
|
+
return (type(self).__name__, self.message()) == (type(other).__name__, other.message())
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class NeatError(NeatIssue, ABC):
|
|
53
|
+
def dump(self) -> dict[str, Any]:
|
|
54
|
+
return {"errorType": type(self).__name__}
|
|
55
|
+
|
|
56
|
+
def as_exception(self) -> ValueError:
|
|
57
|
+
return ValueError(self.message())
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(frozen=True)
|
|
61
|
+
class NeatWarning(NeatIssue, ABC, UserWarning):
|
|
62
|
+
def dump(self) -> dict[str, Any]:
|
|
63
|
+
return {"warningType": type(self).__name__}
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def from_warning(cls, warning: WarningMessage) -> "NeatWarning":
|
|
67
|
+
return DefaultWarning.from_warning_message(warning)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(frozen=True)
|
|
71
|
+
class DefaultWarning(NeatWarning):
|
|
72
|
+
description = "A warning was raised during validation."
|
|
73
|
+
fix = "No fix is available."
|
|
74
|
+
|
|
75
|
+
warning: str | Warning
|
|
76
|
+
category: type[Warning]
|
|
77
|
+
source: str | None = None
|
|
78
|
+
|
|
79
|
+
def dump(self) -> dict[str, Any]:
|
|
80
|
+
output = super().dump()
|
|
81
|
+
output["msg"] = str(self.warning)
|
|
82
|
+
output["category"] = self.category.__name__
|
|
83
|
+
output["source"] = self.source
|
|
84
|
+
return output
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def from_warning_message(cls, warning: WarningMessage) -> NeatWarning:
|
|
88
|
+
if isinstance(warning.message, NeatWarning):
|
|
89
|
+
return warning.message
|
|
90
|
+
|
|
91
|
+
return cls(
|
|
92
|
+
warning=warning.message,
|
|
93
|
+
category=warning.category,
|
|
94
|
+
source=warning.source,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def message(self) -> str:
|
|
98
|
+
return str(self.warning)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
T_NeatIssue = TypeVar("T_NeatIssue", bound=NeatIssue)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class NeatIssueList(UserList[T_NeatIssue], ABC):
|
|
105
|
+
def __init__(self, issues: Sequence[T_NeatIssue] | None = None, title: str | None = None):
|
|
106
|
+
super().__init__(issues or [])
|
|
107
|
+
self.title = title
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def errors(self) -> Self:
|
|
111
|
+
return type(self)([issue for issue in self if isinstance(issue, NeatError)]) # type: ignore[misc]
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def has_errors(self) -> bool:
|
|
115
|
+
return any(isinstance(issue, NeatError) for issue in self)
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def warnings(self) -> Self:
|
|
119
|
+
return type(self)([issue for issue in self if isinstance(issue, NeatWarning)]) # type: ignore[misc]
|
|
120
|
+
|
|
121
|
+
def as_errors(self) -> ExceptionGroup:
|
|
122
|
+
return ExceptionGroup(
|
|
123
|
+
"Operation failed",
|
|
124
|
+
[ValueError(issue.message()) for issue in self if isinstance(issue, NeatError)],
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def trigger_warnings(self) -> None:
|
|
128
|
+
for warning in [issue for issue in self if isinstance(issue, NeatWarning)]:
|
|
129
|
+
warnings.warn(warning, stacklevel=2)
|
|
130
|
+
|
|
131
|
+
def to_pandas(self) -> pd.DataFrame:
|
|
132
|
+
return pd.DataFrame([issue.dump() for issue in self])
|
|
133
|
+
|
|
134
|
+
def _repr_html_(self) -> str | None:
|
|
135
|
+
return self.to_pandas()._repr_html_() # type: ignore[operator]
|
|
136
|
+
|
|
137
|
+
def as_exception(self) -> "MultiValueError":
|
|
138
|
+
return MultiValueError(self.errors)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class MultiValueError(ValueError):
|
|
142
|
+
"""This is a container for multiple errors.
|
|
143
|
+
|
|
144
|
+
It is used in the pydantic field_validator/model_validator to collect multiple errors, which
|
|
145
|
+
can then be caught in a try-except block and returned as an IssueList.
|
|
146
|
+
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(self, errors: Sequence[T_NeatIssue]):
|
|
150
|
+
self.errors = list(errors)
|
|
@@ -7,8 +7,7 @@ from cognite.client import CogniteClient
|
|
|
7
7
|
|
|
8
8
|
from cognite.neat.rules._shared import Rules
|
|
9
9
|
from cognite.neat.rules.models import DMSRules, InformationRules, RoleTypes
|
|
10
|
-
|
|
11
|
-
from ._models import UploadResult
|
|
10
|
+
from cognite.neat.utils.upload import UploadDiffsCount
|
|
12
11
|
|
|
13
12
|
T_Export = TypeVar("T_Export")
|
|
14
13
|
|
|
@@ -38,5 +37,5 @@ class BaseExporter(ABC, Generic[T_Export]):
|
|
|
38
37
|
|
|
39
38
|
class CDFExporter(BaseExporter[T_Export]):
|
|
40
39
|
@abstractmethod
|
|
41
|
-
def export_to_cdf(self, rules: Rules, client: CogniteClient, dry_run: bool = False) -> Iterable[
|
|
40
|
+
def export_to_cdf(self, rules: Rules, client: CogniteClient, dry_run: bool = False) -> Iterable[UploadDiffsCount]:
|
|
42
41
|
raise NotImplementedError
|
|
@@ -31,9 +31,9 @@ from cognite.neat.utils.cdf_loaders import (
|
|
|
31
31
|
TransformationLoader,
|
|
32
32
|
ViewLoader,
|
|
33
33
|
)
|
|
34
|
+
from cognite.neat.utils.upload import UploadDiffsCount
|
|
34
35
|
|
|
35
36
|
from ._base import CDFExporter
|
|
36
|
-
from ._models import UploadResult
|
|
37
37
|
|
|
38
38
|
Component: TypeAlias = Literal["all", "spaces", "data_models", "views", "containers", "node_types"]
|
|
39
39
|
|
|
@@ -123,7 +123,7 @@ class DMSExporter(CDFExporter[DMSSchema]):
|
|
|
123
123
|
raise ValueError(f"{type(rules).__name__} cannot be exported to DMS")
|
|
124
124
|
return dms_rules.as_schema(include_pipeline=self.export_pipeline, instance_space=self.instance_space)
|
|
125
125
|
|
|
126
|
-
def delete_from_cdf(self, rules: Rules, client: CogniteClient, dry_run: bool = False) -> Iterable[
|
|
126
|
+
def delete_from_cdf(self, rules: Rules, client: CogniteClient, dry_run: bool = False) -> Iterable[UploadDiffsCount]:
|
|
127
127
|
schema, to_export = self._prepare_schema_and_exporters(rules, client)
|
|
128
128
|
|
|
129
129
|
# we need to reverse order in which we are picking up the items to delete
|
|
@@ -161,7 +161,7 @@ class DMSExporter(CDFExporter[DMSSchema]):
|
|
|
161
161
|
deleted -= failed_deleted
|
|
162
162
|
error_messages.append(f"Failed delete: {e.message}")
|
|
163
163
|
|
|
164
|
-
yield
|
|
164
|
+
yield UploadDiffsCount(
|
|
165
165
|
name=loader.resource_name,
|
|
166
166
|
deleted=deleted,
|
|
167
167
|
skipped=0,
|
|
@@ -169,7 +169,7 @@ class DMSExporter(CDFExporter[DMSSchema]):
|
|
|
169
169
|
error_messages=error_messages,
|
|
170
170
|
)
|
|
171
171
|
|
|
172
|
-
def export_to_cdf(self, rules: Rules, client: CogniteClient, dry_run: bool = False) -> Iterable[
|
|
172
|
+
def export_to_cdf(self, rules: Rules, client: CogniteClient, dry_run: bool = False) -> Iterable[UploadDiffsCount]:
|
|
173
173
|
schema, to_export = self._prepare_schema_and_exporters(rules, client)
|
|
174
174
|
|
|
175
175
|
# The conversion from DMS to GraphQL does not seem to be triggered even if the views
|
|
@@ -250,7 +250,7 @@ class DMSExporter(CDFExporter[DMSSchema]):
|
|
|
250
250
|
changed -= failed_changed
|
|
251
251
|
error_messages.append(e.message)
|
|
252
252
|
|
|
253
|
-
yield
|
|
253
|
+
yield UploadDiffsCount(
|
|
254
254
|
name=loader.resource_name,
|
|
255
255
|
created=created,
|
|
256
256
|
changed=changed,
|
|
@@ -118,4 +118,4 @@ def _handle_issues(
|
|
|
118
118
|
future_result._result = "success"
|
|
119
119
|
finally:
|
|
120
120
|
if warning_logger:
|
|
121
|
-
issues.extend([warning_cls.from_warning(warning) for warning in warning_logger])
|
|
121
|
+
issues.extend([warning_cls.from_warning(warning) for warning in warning_logger]) # type: ignore[misc]
|