cognite-neat 0.78.3__py3-none-any.whl → 0.78.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (38) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/graph/_tracking/__init__.py +4 -0
  3. cognite/neat/graph/_tracking/base.py +30 -0
  4. cognite/neat/graph/_tracking/log.py +27 -0
  5. cognite/neat/graph/extractors/__init__.py +19 -0
  6. cognite/neat/graph/extractors/_classic_cdf/__init__.py +0 -0
  7. cognite/neat/graph/extractors/_classic_cdf/_assets.py +107 -0
  8. cognite/neat/graph/extractors/_classic_cdf/_events.py +117 -0
  9. cognite/neat/graph/extractors/_classic_cdf/_files.py +131 -0
  10. cognite/neat/graph/extractors/_classic_cdf/_labels.py +72 -0
  11. cognite/neat/graph/extractors/_classic_cdf/_relationships.py +153 -0
  12. cognite/neat/graph/extractors/_classic_cdf/_sequences.py +92 -0
  13. cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +118 -0
  14. cognite/neat/graph/issues/__init__.py +0 -0
  15. cognite/neat/graph/issues/loader.py +104 -0
  16. cognite/neat/graph/loaders/__init__.py +4 -0
  17. cognite/neat/graph/loaders/_base.py +109 -0
  18. cognite/neat/graph/loaders/_rdf2dms.py +280 -0
  19. cognite/neat/graph/stores/_base.py +19 -4
  20. cognite/neat/issues.py +150 -0
  21. cognite/neat/rules/exporters/_base.py +2 -3
  22. cognite/neat/rules/exporters/_rules2dms.py +5 -5
  23. cognite/neat/rules/importers/_base.py +1 -1
  24. cognite/neat/rules/issues/__init__.py +2 -3
  25. cognite/neat/rules/issues/base.py +9 -133
  26. cognite/neat/rules/issues/spreadsheet.py +3 -2
  27. cognite/neat/rules/models/_base.py +6 -0
  28. cognite/neat/rules/models/dms/_rules.py +3 -0
  29. cognite/neat/rules/models/dms/_schema.py +133 -3
  30. cognite/neat/rules/models/domain.py +3 -0
  31. cognite/neat/rules/models/information/_rules.py +4 -1
  32. cognite/neat/{rules/exporters/_models.py → utils/upload.py} +26 -6
  33. cognite/neat/utils/utils.py +24 -0
  34. {cognite_neat-0.78.3.dist-info → cognite_neat-0.78.5.dist-info}/METADATA +2 -2
  35. {cognite_neat-0.78.3.dist-info → cognite_neat-0.78.5.dist-info}/RECORD +38 -21
  36. {cognite_neat-0.78.3.dist-info → cognite_neat-0.78.5.dist-info}/LICENSE +0 -0
  37. {cognite_neat-0.78.3.dist-info → cognite_neat-0.78.5.dist-info}/WHEEL +0 -0
  38. {cognite_neat-0.78.3.dist-info → cognite_neat-0.78.5.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,92 @@
1
+ from collections.abc import Iterable
2
+ from datetime import datetime
3
+ from pathlib import Path
4
+ from typing import cast
5
+
6
+ import pytz
7
+ from cognite.client import CogniteClient
8
+ from cognite.client.data_classes import Sequence, SequenceList
9
+ from pydantic import AnyHttpUrl, ValidationError
10
+ from rdflib import RDF, Literal, Namespace, URIRef
11
+
12
+ from cognite.neat.constants import DEFAULT_NAMESPACE
13
+ from cognite.neat.graph.extractors._base import BaseExtractor
14
+ from cognite.neat.graph.models import Triple
15
+ from cognite.neat.utils.utils import string_to_ideal_type
16
+
17
+
18
+ class SequencesExtractor(BaseExtractor):
19
+ def __init__(
20
+ self,
21
+ sequence: Iterable[Sequence],
22
+ namespace: Namespace | None = None,
23
+ ):
24
+ self.namespace = namespace or DEFAULT_NAMESPACE
25
+ self.sequence = sequence
26
+
27
+ @classmethod
28
+ def from_dataset(
29
+ cls,
30
+ client: CogniteClient,
31
+ data_set_external_id: str,
32
+ namespace: Namespace | None = None,
33
+ ):
34
+ return cls(cast(Iterable[Sequence], client.sequences(data_set_external_ids=data_set_external_id)), namespace)
35
+
36
+ @classmethod
37
+ def from_file(cls, file_path: str, namespace: Namespace | None = None):
38
+ return cls(SequenceList.load(Path(file_path).read_text()), namespace)
39
+
40
+ def extract(self) -> Iterable[Triple]:
41
+ """Extract sequences as triples."""
42
+ for sequence in self.sequence:
43
+ yield from self._sequence2triples(sequence, self.namespace)
44
+
45
+ @classmethod
46
+ def _sequence2triples(cls, sequence: Sequence, namespace: Namespace) -> list[Triple]:
47
+ id_ = namespace[str(sequence.id)]
48
+
49
+ # Set rdf type
50
+ triples: list[Triple] = [(id_, RDF.type, namespace.Sequence)]
51
+
52
+ # Create attributes
53
+
54
+ if sequence.external_id:
55
+ triples.append((id_, namespace.external_id, Literal(sequence.external_id)))
56
+
57
+ if sequence.name:
58
+ triples.append((id_, namespace.name, Literal(sequence.name)))
59
+
60
+ if sequence.metadata:
61
+ for key, value in sequence.metadata.items():
62
+ if value:
63
+ type_aware_value = string_to_ideal_type(value)
64
+ try:
65
+ triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
66
+ except ValidationError:
67
+ triples.append((id_, namespace[key], Literal(type_aware_value)))
68
+
69
+ if sequence.description:
70
+ triples.append((id_, namespace.description, Literal(sequence.description)))
71
+
72
+ if sequence.created_time:
73
+ triples.append(
74
+ (id_, namespace.created_time, Literal(datetime.fromtimestamp(sequence.created_time / 1000, pytz.utc)))
75
+ )
76
+
77
+ if sequence.last_updated_time:
78
+ triples.append(
79
+ (
80
+ id_,
81
+ namespace.last_updated_time,
82
+ Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, pytz.utc)),
83
+ )
84
+ )
85
+
86
+ if sequence.data_set_id:
87
+ triples.append((id_, namespace.data_set_id, namespace[str(sequence.data_set_id)]))
88
+
89
+ if sequence.asset_id:
90
+ triples.append((id_, namespace.asset, namespace[str(sequence.asset_id)]))
91
+
92
+ return triples
@@ -0,0 +1,118 @@
1
+ from collections.abc import Iterable
2
+ from datetime import datetime
3
+ from pathlib import Path
4
+ from typing import cast
5
+
6
+ import pytz
7
+ from cognite.client import CogniteClient
8
+ from cognite.client.data_classes import TimeSeries, TimeSeriesList
9
+ from pydantic import AnyHttpUrl, ValidationError
10
+ from rdflib import RDF, Literal, Namespace, URIRef
11
+
12
+ from cognite.neat.constants import DEFAULT_NAMESPACE
13
+ from cognite.neat.graph.extractors._base import BaseExtractor
14
+ from cognite.neat.graph.models import Triple
15
+ from cognite.neat.utils.utils import string_to_ideal_type
16
+
17
+
18
+ class TimeSeriesExtractor(BaseExtractor):
19
+ def __init__(
20
+ self,
21
+ timeseries: Iterable[TimeSeries],
22
+ namespace: Namespace | None = None,
23
+ ):
24
+ self.namespace = namespace or DEFAULT_NAMESPACE
25
+ self.timeseries = timeseries
26
+
27
+ @classmethod
28
+ def from_dataset(
29
+ cls,
30
+ client: CogniteClient,
31
+ data_set_external_id: str,
32
+ namespace: Namespace | None = None,
33
+ ):
34
+ return cls(
35
+ cast(Iterable[TimeSeries], client.time_series(data_set_external_ids=data_set_external_id)), namespace
36
+ )
37
+
38
+ @classmethod
39
+ def from_file(cls, file_path: str, namespace: Namespace | None = None):
40
+ return cls(TimeSeriesList.load(Path(file_path).read_text()), namespace)
41
+
42
+ def extract(self) -> Iterable[Triple]:
43
+ """Extract timeseries as triples."""
44
+ for timeseries in self.timeseries:
45
+ yield from self._timeseries2triples(timeseries, self.namespace)
46
+
47
+ @classmethod
48
+ def _timeseries2triples(cls, timeseries: TimeSeries, namespace: Namespace) -> list[Triple]:
49
+ id_ = namespace[str(timeseries.id)]
50
+
51
+ # Set rdf type
52
+ triples: list[Triple] = [(id_, RDF.type, namespace.TimeSeries)]
53
+
54
+ # Create attributes
55
+
56
+ if timeseries.external_id:
57
+ triples.append((id_, namespace.external_id, Literal(timeseries.external_id)))
58
+
59
+ if timeseries.name:
60
+ triples.append((id_, namespace.name, Literal(timeseries.name)))
61
+
62
+ if timeseries.is_string:
63
+ triples.append((id_, namespace.is_string, Literal(timeseries.is_string)))
64
+
65
+ if timeseries.metadata:
66
+ for key, value in timeseries.metadata.items():
67
+ if value:
68
+ type_aware_value = string_to_ideal_type(value)
69
+ try:
70
+ triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
71
+ except ValidationError:
72
+ triples.append((id_, namespace[key], Literal(type_aware_value)))
73
+
74
+ if timeseries.unit:
75
+ triples.append((id_, namespace.unit, Literal(timeseries.unit)))
76
+
77
+ if namespace.is_step:
78
+ triples.append((id_, namespace.is_step, Literal(timeseries.is_step)))
79
+
80
+ if timeseries.description:
81
+ triples.append((id_, namespace.description, Literal(timeseries.description)))
82
+
83
+ if timeseries.security_categories:
84
+ for category in timeseries.security_categories:
85
+ triples.append((id_, namespace.security_categories, Literal(category)))
86
+
87
+ if timeseries.created_time:
88
+ triples.append(
89
+ (id_, namespace.created_time, Literal(datetime.fromtimestamp(timeseries.created_time / 1000, pytz.utc)))
90
+ )
91
+
92
+ if timeseries.last_updated_time:
93
+ triples.append(
94
+ (
95
+ id_,
96
+ namespace.last_updated_time,
97
+ Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, pytz.utc)),
98
+ )
99
+ )
100
+
101
+ if timeseries.legacy_name:
102
+ triples.append((id_, namespace.legacy_name, Literal(timeseries.legacy_name)))
103
+
104
+ # Create connections
105
+ if timeseries.unit_external_id:
106
+ # try to create connection to QUDT unit catalog
107
+ try:
108
+ triples.append((id_, namespace.unit_external_id, URIRef(str(AnyHttpUrl(timeseries.unit_external_id)))))
109
+ except ValidationError:
110
+ triples.append((id_, namespace.unit_external_id, Literal(timeseries.unit_external_id)))
111
+
112
+ if timeseries.data_set_id:
113
+ triples.append((id_, namespace.dataset, namespace[str(timeseries.data_set_id)]))
114
+
115
+ if timeseries.asset_id:
116
+ triples.append((id_, namespace.asset, namespace[str(timeseries.asset_id)]))
117
+
118
+ return triples
File without changes
@@ -0,0 +1,104 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any
3
+
4
+ from cognite.neat.issues import NeatError, NeatWarning
5
+
6
+ __all__ = [
7
+ "FailedAuthorizationError",
8
+ "MissingDataModelError",
9
+ "FailedConvertError",
10
+ "InvalidClassWarning",
11
+ "InvalidInstanceError",
12
+ ]
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class FailedAuthorizationError(NeatError):
17
+ description = "Missing authorization for {action}: {reason}"
18
+
19
+ action: str
20
+ reason: str
21
+
22
+ def message(self) -> str:
23
+ return self.description.format(action=self.action, reason=self.reason)
24
+
25
+ def dump(self) -> dict[str, Any]:
26
+ output = super().dump()
27
+ output["action"] = self.action
28
+ output["reason"] = self.reason
29
+ return output
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class MissingDataModelError(NeatError):
34
+ description = "The data model with identifier {identifier} is missing: {reason}"
35
+ fix = "Check the data model identifier and try again."
36
+
37
+ identifier: str
38
+ reason: str
39
+
40
+ def message(self) -> str:
41
+ return self.description.format(identifier=self.identifier, reason=self.reason)
42
+
43
+ def dump(self) -> dict[str, Any]:
44
+ output = super().dump()
45
+ output["identifier"] = self.identifier
46
+ output["reason"] = self.reason
47
+ return output
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class FailedConvertError(NeatError):
52
+ description = "Failed to convert the {identifier} to {target_format}: {reason}"
53
+ fix = "Check the error message and correct the rules."
54
+ identifier: str
55
+ target_format: str
56
+ reason: str
57
+
58
+ def message(self) -> str:
59
+ return self.description.format(identifier=self.identifier, target_format=self.target_format, reason=self.reason)
60
+
61
+ def dump(self) -> dict[str, Any]:
62
+ output = super().dump()
63
+ output["identifier"] = self.identifier
64
+ output["targetFormat"] = self.target_format
65
+ output["reason"] = self.reason
66
+ return output
67
+
68
+
69
+ @dataclass(frozen=True)
70
+ class InvalidClassWarning(NeatWarning):
71
+ description = "The class {class_name} is invalid and will be skipped. {reason}"
72
+ fix = "Check the error message and correct the class."
73
+
74
+ class_name: str
75
+ reason: str
76
+
77
+ def message(self) -> str:
78
+ return self.description.format(class_name=self.class_name, reason=self.reason)
79
+
80
+ def dump(self) -> dict[str, Any]:
81
+ output = super().dump()
82
+ output["class_name"] = self.class_name
83
+ output["reason"] = self.reason
84
+ return output
85
+
86
+
87
+ @dataclass(frozen=True)
88
+ class InvalidInstanceError(NeatError):
89
+ description = "The {type_} with identifier {identifier} is invalid and will be skipped. {reason}"
90
+ fix = "Check the error message and correct the instance."
91
+
92
+ type_: str
93
+ identifier: str
94
+ reason: str
95
+
96
+ def message(self) -> str:
97
+ return self.description.format(type_=self.type_, identifier=self.identifier, reason=self.reason)
98
+
99
+ def dump(self) -> dict[str, Any]:
100
+ output = super().dump()
101
+ output["type"] = self.type_
102
+ output["identifier"] = self.identifier
103
+ output["reason"] = self.reason
104
+ return output
@@ -0,0 +1,4 @@
1
+ from ._base import BaseLoader, CDFLoader
2
+ from ._rdf2dms import DMSLoader
3
+
4
+ __all__ = ["BaseLoader", "CDFLoader", "DMSLoader"]
@@ -0,0 +1,109 @@
1
+ from abc import ABC, abstractmethod
2
+ from collections.abc import Iterable
3
+ from pathlib import Path
4
+ from typing import ClassVar, Generic, Literal, TypeVar, overload
5
+
6
+ from cognite.client import CogniteClient
7
+ from cognite.client.data_classes.capabilities import Capability
8
+
9
+ from cognite.neat.graph import NeatGraphStoreBase
10
+ from cognite.neat.graph.issues.loader import FailedAuthorizationError
11
+ from cognite.neat.issues import NeatIssue, NeatIssueList
12
+ from cognite.neat.utils.upload import UploadDiffsID, UploadResultIDs
13
+
14
+ T_Output = TypeVar("T_Output")
15
+
16
+
17
+ class BaseLoader(ABC, Generic[T_Output]):
18
+ _new_line = "\n"
19
+ _encoding = "utf-8"
20
+
21
+ def __init__(self, graph_store: NeatGraphStoreBase):
22
+ self.graph_store = graph_store
23
+
24
+ @abstractmethod
25
+ def write_to_file(self, filepath: Path) -> None:
26
+ raise NotImplementedError
27
+
28
+ def load(self, stop_on_exception: bool = False) -> Iterable[T_Output | NeatIssue]:
29
+ """Load the graph with data."""
30
+ return self._load(stop_on_exception)
31
+
32
+ @abstractmethod
33
+ def _load(self, stop_on_exception: bool = False) -> Iterable[T_Output | NeatIssue]:
34
+ """Load the graph with data."""
35
+ pass
36
+
37
+
38
+ class CDFLoader(BaseLoader[T_Output]):
39
+ _UPLOAD_BATCH_SIZE: ClassVar[int] = 1000
40
+
41
+ @overload
42
+ def load_into_cdf_iterable(
43
+ self, client: CogniteClient, return_diffs: Literal[False] = False, dry_run: bool = False
44
+ ) -> Iterable[UploadResultIDs]: ...
45
+
46
+ @overload
47
+ def load_into_cdf_iterable(
48
+ self, client: CogniteClient, return_diffs: Literal[True], dry_run: bool = False
49
+ ) -> Iterable[UploadDiffsID]: ...
50
+
51
+ def load_into_cdf_iterable(
52
+ self, client: CogniteClient, return_diffs: bool = False, dry_run: bool = False
53
+ ) -> Iterable[UploadResultIDs] | Iterable[UploadDiffsID]:
54
+ yield from self._load_into_cdf_iterable(client, return_diffs, dry_run)
55
+
56
+ @overload
57
+ def load_into_cdf(
58
+ self, client: CogniteClient, return_diffs: Literal[False] = False, dry_run: bool = False
59
+ ) -> list[UploadResultIDs]: ...
60
+
61
+ @overload
62
+ def load_into_cdf(
63
+ self, client: CogniteClient, return_diffs: Literal[True], dry_run: bool = False
64
+ ) -> list[UploadDiffsID]: ...
65
+
66
+ def load_into_cdf(
67
+ self, client: CogniteClient, return_diffs: bool = False, dry_run: bool = False
68
+ ) -> list[UploadResultIDs] | list[UploadDiffsID]:
69
+ return list(self._load_into_cdf_iterable(client, return_diffs, dry_run)) # type: ignore[return-value]
70
+
71
+ def _load_into_cdf_iterable(
72
+ self, client: CogniteClient, return_diffs: bool = False, dry_run: bool = False
73
+ ) -> Iterable[UploadResultIDs] | Iterable[UploadDiffsID]:
74
+ missing_capabilities = client.iam.verify_capabilities(self._get_required_capabilities())
75
+ result_cls = UploadDiffsID if return_diffs else UploadResultIDs
76
+ if missing_capabilities:
77
+ result = result_cls(name=type(self).__name__)
78
+ result.issues.append(FailedAuthorizationError(action="Upload to CDF", reason=str(missing_capabilities)))
79
+ yield result
80
+ return
81
+
82
+ issues = NeatIssueList[NeatIssue]()
83
+ items: list[T_Output] = []
84
+ for result in self.load(stop_on_exception=False):
85
+ if isinstance(result, NeatIssue):
86
+ issues.append(result)
87
+ else:
88
+ items.append(result)
89
+
90
+ if len(items) >= self._UPLOAD_BATCH_SIZE:
91
+ yield self._upload_to_cdf(client, items, return_diffs, dry_run, issues)
92
+ items.clear()
93
+ if items:
94
+ yield self._upload_to_cdf(client, items, return_diffs, dry_run, issues)
95
+
96
+ @abstractmethod
97
+ def _get_required_capabilities(self) -> list[Capability]:
98
+ raise NotImplementedError
99
+
100
+ @abstractmethod
101
+ def _upload_to_cdf(
102
+ self,
103
+ client: CogniteClient,
104
+ items: list[T_Output],
105
+ return_diffs: bool,
106
+ dry_run: bool,
107
+ read_issues: NeatIssueList,
108
+ ) -> UploadResultIDs | UploadDiffsID:
109
+ raise NotImplementedError