cognite-neat 0.78.4__py3-none-any.whl → 0.79.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/graph/_tracking/__init__.py +4 -0
- cognite/neat/graph/_tracking/base.py +30 -0
- cognite/neat/graph/_tracking/log.py +27 -0
- cognite/neat/graph/extractors/__init__.py +17 -2
- cognite/neat/graph/extractors/_classic_cdf/{_asset_hierarchy.py → _assets.py} +29 -22
- cognite/neat/graph/extractors/_classic_cdf/_events.py +117 -0
- cognite/neat/graph/extractors/_classic_cdf/_files.py +131 -0
- cognite/neat/graph/extractors/_classic_cdf/_labels.py +72 -0
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py +153 -0
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py +92 -0
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +118 -0
- cognite/neat/graph/issues/__init__.py +0 -0
- cognite/neat/graph/issues/loader.py +104 -0
- cognite/neat/graph/loaders/__init__.py +4 -0
- cognite/neat/graph/loaders/_base.py +109 -0
- cognite/neat/graph/loaders/_rdf2dms.py +280 -0
- cognite/neat/graph/stores/_base.py +34 -4
- cognite/neat/graph/stores/_provenance.py +99 -0
- cognite/neat/issues.py +150 -0
- cognite/neat/rules/exporters/_base.py +2 -3
- cognite/neat/rules/exporters/_rules2dms.py +5 -5
- cognite/neat/rules/importers/_base.py +1 -1
- cognite/neat/rules/issues/__init__.py +2 -3
- cognite/neat/rules/issues/base.py +9 -133
- cognite/neat/rules/issues/spreadsheet.py +3 -2
- cognite/neat/rules/models/_base.py +6 -0
- cognite/neat/rules/models/dms/_rules.py +3 -0
- cognite/neat/rules/models/dms/_schema.py +133 -3
- cognite/neat/rules/models/domain.py +3 -0
- cognite/neat/rules/models/information/_rules.py +4 -1
- cognite/neat/{rules/exporters/_models.py → utils/upload.py} +26 -6
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/METADATA +2 -2
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/RECORD +37 -21
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.78.4.dist-info → cognite_neat-0.79.0.dist-info}/entry_points.txt +0 -0
cognite/neat/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.79.0"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from collections.abc import Sequence
|
|
3
|
+
|
|
4
|
+
from cognite.neat.issues import NeatIssue
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Tracker(ABC):
|
|
8
|
+
def __init__(self, name: str, units: list[str], unit_type: str) -> None:
|
|
9
|
+
self.name = name
|
|
10
|
+
self.units = units
|
|
11
|
+
self.unit_type = unit_type
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def start(self, unit: str) -> None:
|
|
15
|
+
raise NotImplementedError()
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def finish(self, unit: str) -> None:
|
|
19
|
+
raise NotImplementedError()
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def _issue(self, issue: NeatIssue) -> None:
|
|
23
|
+
raise NotImplementedError()
|
|
24
|
+
|
|
25
|
+
def issue(self, issue: NeatIssue | Sequence[NeatIssue]) -> None:
|
|
26
|
+
if isinstance(issue, NeatIssue):
|
|
27
|
+
self._issue(issue)
|
|
28
|
+
return
|
|
29
|
+
for item in issue:
|
|
30
|
+
self._issue(item)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from logging import getLogger
|
|
2
|
+
|
|
3
|
+
from cognite.neat.issues import NeatError, NeatIssue, NeatWarning
|
|
4
|
+
|
|
5
|
+
from .base import Tracker
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class LogTracker(Tracker):
|
|
9
|
+
def __init__(self, name: str, units: list[str], unit_type: str) -> None:
|
|
10
|
+
super().__init__(name, units, unit_type)
|
|
11
|
+
self._logger = getLogger(__name__)
|
|
12
|
+
self._total_units = len(units)
|
|
13
|
+
self._count = 1
|
|
14
|
+
self._logger.info(f"Staring {self.name} and will process {len(units)} {unit_type}.")
|
|
15
|
+
|
|
16
|
+
def start(self, unit: str) -> None:
|
|
17
|
+
self._logger.info(f"Starting {unit} {self._count}/{self._total_units}.")
|
|
18
|
+
self._count += 1
|
|
19
|
+
|
|
20
|
+
def finish(self, unit: str) -> None:
|
|
21
|
+
self._logger.info(f"Finished {unit}.")
|
|
22
|
+
|
|
23
|
+
def _issue(self, issue: NeatIssue) -> None:
|
|
24
|
+
if isinstance(issue, NeatWarning):
|
|
25
|
+
self._logger.warning(issue)
|
|
26
|
+
elif isinstance(issue, NeatError):
|
|
27
|
+
self._logger.error(issue)
|
|
@@ -1,4 +1,19 @@
|
|
|
1
|
-
from ._classic_cdf.
|
|
1
|
+
from ._classic_cdf._assets import AssetsExtractor
|
|
2
|
+
from ._classic_cdf._events import EventsExtractor
|
|
3
|
+
from ._classic_cdf._files import FilesExtractor
|
|
4
|
+
from ._classic_cdf._labels import LabelsExtractor
|
|
5
|
+
from ._classic_cdf._relationships import RelationshipsExtractor
|
|
6
|
+
from ._classic_cdf._sequences import SequencesExtractor
|
|
7
|
+
from ._classic_cdf._timeseries import TimeSeriesExtractor
|
|
2
8
|
from ._mock_graph_generator import MockGraphGenerator
|
|
3
9
|
|
|
4
|
-
__all__ = [
|
|
10
|
+
__all__ = [
|
|
11
|
+
"AssetsExtractor",
|
|
12
|
+
"MockGraphGenerator",
|
|
13
|
+
"RelationshipsExtractor",
|
|
14
|
+
"TimeSeriesExtractor",
|
|
15
|
+
"SequencesExtractor",
|
|
16
|
+
"EventsExtractor",
|
|
17
|
+
"FilesExtractor",
|
|
18
|
+
"LabelsExtractor",
|
|
19
|
+
]
|
|
@@ -2,6 +2,7 @@ from collections.abc import Iterable
|
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import cast
|
|
5
|
+
from urllib.parse import quote
|
|
5
6
|
|
|
6
7
|
import pytz
|
|
7
8
|
from cognite.client import CogniteClient
|
|
@@ -14,7 +15,7 @@ from cognite.neat.graph.models import Triple
|
|
|
14
15
|
from cognite.neat.utils.utils import string_to_ideal_type
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
class
|
|
18
|
+
class AssetsExtractor(BaseExtractor):
|
|
18
19
|
def __init__(
|
|
19
20
|
self,
|
|
20
21
|
assets: Iterable[Asset],
|
|
@@ -48,53 +49,59 @@ class AssetHierarchyExtractor(BaseExtractor):
|
|
|
48
49
|
@classmethod
|
|
49
50
|
def _asset2triples(cls, asset: Asset, namespace: Namespace) -> list[Triple]:
|
|
50
51
|
"""Converts an asset to triples."""
|
|
51
|
-
|
|
52
|
+
id_ = namespace[str(asset.id)]
|
|
52
53
|
|
|
54
|
+
# Set rdf type
|
|
55
|
+
triples: list[Triple] = [(id_, RDF.type, namespace["Asset"])]
|
|
56
|
+
|
|
57
|
+
# Create attributes
|
|
53
58
|
if asset.name:
|
|
54
|
-
triples.append((
|
|
59
|
+
triples.append((id_, namespace.name, Literal(asset.name)))
|
|
55
60
|
|
|
56
61
|
if asset.description:
|
|
57
|
-
triples.append((
|
|
62
|
+
triples.append((id_, namespace.description, Literal(asset.description)))
|
|
58
63
|
|
|
59
64
|
if asset.external_id:
|
|
60
|
-
triples.append((
|
|
65
|
+
triples.append((id_, namespace.external_id, Literal(asset.external_id)))
|
|
61
66
|
|
|
62
67
|
if asset.source:
|
|
63
|
-
triples.append((
|
|
68
|
+
triples.append((id_, namespace.source, Literal(asset.source)))
|
|
64
69
|
|
|
65
70
|
# properties ref creation and update
|
|
66
71
|
triples.append(
|
|
67
72
|
(
|
|
68
|
-
|
|
69
|
-
namespace
|
|
73
|
+
id_,
|
|
74
|
+
namespace.created_time,
|
|
70
75
|
Literal(datetime.fromtimestamp(asset.created_time / 1000, pytz.utc)),
|
|
71
76
|
)
|
|
72
77
|
)
|
|
73
78
|
triples.append(
|
|
74
79
|
(
|
|
75
|
-
|
|
76
|
-
namespace
|
|
80
|
+
id_,
|
|
81
|
+
namespace.last_updated_time,
|
|
77
82
|
Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, pytz.utc)),
|
|
78
83
|
)
|
|
79
84
|
)
|
|
80
85
|
|
|
81
|
-
if asset.parent_id:
|
|
82
|
-
triples.append((namespace[str(asset.id)], namespace["parent"], namespace[str(asset.parent_id)]))
|
|
83
|
-
|
|
84
|
-
if asset.root_id:
|
|
85
|
-
triples.append((namespace[str(asset.id)], namespace["root"], namespace[str(asset.root_id)]))
|
|
86
|
-
|
|
87
|
-
if asset.data_set_id:
|
|
88
|
-
triples.append((namespace[str(asset.id)], namespace["dataset"], namespace[str(asset.data_set_id)]))
|
|
89
|
-
|
|
90
86
|
if asset.labels:
|
|
91
87
|
for label in asset.labels:
|
|
92
|
-
# external_id can create ill-formed URIs, so we
|
|
93
|
-
|
|
88
|
+
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
89
|
+
# since labels do not have internal ids, we use the external_id as the id
|
|
90
|
+
triples.append((id_, namespace.label, namespace[quote(label.dump()["externalId"])]))
|
|
94
91
|
|
|
95
92
|
if asset.metadata:
|
|
96
93
|
for key, value in asset.metadata.items():
|
|
97
94
|
if value:
|
|
98
|
-
triples.append((
|
|
95
|
+
triples.append((id_, namespace[key], Literal(string_to_ideal_type(value))))
|
|
96
|
+
|
|
97
|
+
# Create connections:
|
|
98
|
+
if asset.parent_id:
|
|
99
|
+
triples.append((id_, namespace.parent, namespace[str(asset.parent_id)]))
|
|
100
|
+
|
|
101
|
+
if asset.root_id:
|
|
102
|
+
triples.append((id_, namespace.root, namespace[str(asset.root_id)]))
|
|
103
|
+
|
|
104
|
+
if asset.data_set_id:
|
|
105
|
+
triples.append((id_, namespace.dataset, namespace[str(asset.data_set_id)]))
|
|
99
106
|
|
|
100
107
|
return triples
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import cast
|
|
5
|
+
|
|
6
|
+
import pytz
|
|
7
|
+
from cognite.client import CogniteClient
|
|
8
|
+
from cognite.client.data_classes import Event, EventList
|
|
9
|
+
from pydantic import AnyHttpUrl, ValidationError
|
|
10
|
+
from rdflib import RDF, Literal, Namespace, URIRef
|
|
11
|
+
|
|
12
|
+
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
13
|
+
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
14
|
+
from cognite.neat.graph.models import Triple
|
|
15
|
+
from cognite.neat.utils.utils import string_to_ideal_type
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EventsExtractor(BaseExtractor):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
events: Iterable[Event],
|
|
22
|
+
namespace: Namespace | None = None,
|
|
23
|
+
):
|
|
24
|
+
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
25
|
+
self.events = events
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def from_dataset(
|
|
29
|
+
cls,
|
|
30
|
+
client: CogniteClient,
|
|
31
|
+
data_set_external_id: str,
|
|
32
|
+
namespace: Namespace | None = None,
|
|
33
|
+
):
|
|
34
|
+
return cls(cast(Iterable[Event], client.events(data_set_external_ids=data_set_external_id)), namespace)
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def from_file(cls, file_path: str, namespace: Namespace | None = None):
|
|
38
|
+
return cls(EventList.load(Path(file_path).read_text()), namespace)
|
|
39
|
+
|
|
40
|
+
def extract(self) -> Iterable[Triple]:
|
|
41
|
+
"""Extract events as triples."""
|
|
42
|
+
for event in self.events:
|
|
43
|
+
yield from self._event2triples(event, self.namespace)
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def _event2triples(cls, event: Event, namespace: Namespace) -> list[Triple]:
|
|
47
|
+
id_ = namespace[str(event.id)]
|
|
48
|
+
|
|
49
|
+
# Set rdf type
|
|
50
|
+
triples: list[Triple] = [(id_, RDF.type, namespace.Event)]
|
|
51
|
+
|
|
52
|
+
# Create attributes
|
|
53
|
+
|
|
54
|
+
if event.external_id:
|
|
55
|
+
triples.append((id_, namespace.external_id, Literal(event.external_id)))
|
|
56
|
+
|
|
57
|
+
if event.source:
|
|
58
|
+
triples.append((id_, namespace.type, Literal(event.source)))
|
|
59
|
+
|
|
60
|
+
if event.type:
|
|
61
|
+
triples.append((id_, namespace.type, Literal(event.type)))
|
|
62
|
+
|
|
63
|
+
if event.subtype:
|
|
64
|
+
triples.append((id_, namespace.subtype, Literal(event.subtype)))
|
|
65
|
+
|
|
66
|
+
if event.metadata:
|
|
67
|
+
for key, value in event.metadata.items():
|
|
68
|
+
if value:
|
|
69
|
+
type_aware_value = string_to_ideal_type(value)
|
|
70
|
+
try:
|
|
71
|
+
triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
|
|
72
|
+
except ValidationError:
|
|
73
|
+
triples.append((id_, namespace[key], Literal(type_aware_value)))
|
|
74
|
+
|
|
75
|
+
if event.description:
|
|
76
|
+
triples.append((id_, namespace.description, Literal(event.description)))
|
|
77
|
+
|
|
78
|
+
if event.created_time:
|
|
79
|
+
triples.append(
|
|
80
|
+
(id_, namespace.created_time, Literal(datetime.fromtimestamp(event.created_time / 1000, pytz.utc)))
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if event.last_updated_time:
|
|
84
|
+
triples.append(
|
|
85
|
+
(
|
|
86
|
+
id_,
|
|
87
|
+
namespace.last_updated_time,
|
|
88
|
+
Literal(datetime.fromtimestamp(event.last_updated_time / 1000, pytz.utc)),
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if event.start_time:
|
|
93
|
+
triples.append(
|
|
94
|
+
(
|
|
95
|
+
id_,
|
|
96
|
+
namespace.start_time,
|
|
97
|
+
Literal(datetime.fromtimestamp(event.start_time / 1000, pytz.utc)),
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if event.end_time:
|
|
102
|
+
triples.append(
|
|
103
|
+
(
|
|
104
|
+
id_,
|
|
105
|
+
namespace.end_time,
|
|
106
|
+
Literal(datetime.fromtimestamp(event.end_time / 1000, pytz.utc)),
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if event.data_set_id:
|
|
111
|
+
triples.append((id_, namespace.data_set_id, namespace[str(event.data_set_id)]))
|
|
112
|
+
|
|
113
|
+
if event.asset_ids:
|
|
114
|
+
for asset_id in event.asset_ids:
|
|
115
|
+
triples.append((id_, namespace.asset, namespace[str(asset_id)]))
|
|
116
|
+
|
|
117
|
+
return triples
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import cast
|
|
5
|
+
from urllib.parse import quote
|
|
6
|
+
|
|
7
|
+
import pytz
|
|
8
|
+
from cognite.client import CogniteClient
|
|
9
|
+
from cognite.client.data_classes import FileMetadata, FileMetadataList
|
|
10
|
+
from pydantic import AnyHttpUrl, ValidationError
|
|
11
|
+
from rdflib import RDF, Literal, Namespace, URIRef
|
|
12
|
+
|
|
13
|
+
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
14
|
+
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
15
|
+
from cognite.neat.graph.models import Triple
|
|
16
|
+
from cognite.neat.utils.utils import string_to_ideal_type
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FilesExtractor(BaseExtractor):
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
events: Iterable[FileMetadata],
|
|
23
|
+
namespace: Namespace | None = None,
|
|
24
|
+
):
|
|
25
|
+
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
26
|
+
self.events = events
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def from_dataset(
|
|
30
|
+
cls,
|
|
31
|
+
client: CogniteClient,
|
|
32
|
+
data_set_external_id: str,
|
|
33
|
+
namespace: Namespace | None = None,
|
|
34
|
+
):
|
|
35
|
+
return cls(cast(Iterable[FileMetadata], client.files(data_set_external_ids=data_set_external_id)), namespace)
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def from_file(cls, file_path: str, namespace: Namespace | None = None):
|
|
39
|
+
return cls(FileMetadataList.load(Path(file_path).read_text()), namespace)
|
|
40
|
+
|
|
41
|
+
def extract(self) -> Iterable[Triple]:
|
|
42
|
+
"""Extract files metadata as triples."""
|
|
43
|
+
for event in self.events:
|
|
44
|
+
yield from self._file2triples(event, self.namespace)
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def _file2triples(cls, file: FileMetadata, namespace: Namespace) -> list[Triple]:
|
|
48
|
+
id_ = namespace[str(file.id)]
|
|
49
|
+
|
|
50
|
+
# Set rdf type
|
|
51
|
+
triples: list[Triple] = [(id_, RDF.type, namespace.File)]
|
|
52
|
+
|
|
53
|
+
# Create attributes
|
|
54
|
+
|
|
55
|
+
if file.external_id:
|
|
56
|
+
triples.append((id_, namespace.external_id, Literal(file.external_id)))
|
|
57
|
+
|
|
58
|
+
if file.source:
|
|
59
|
+
triples.append((id_, namespace.type, Literal(file.source)))
|
|
60
|
+
|
|
61
|
+
if file.mime_type:
|
|
62
|
+
triples.append((id_, namespace.mime_type, Literal(file.mime_type)))
|
|
63
|
+
|
|
64
|
+
if file.uploaded:
|
|
65
|
+
triples.append((id_, namespace.uploaded, Literal(file.uploaded)))
|
|
66
|
+
|
|
67
|
+
if file.source:
|
|
68
|
+
triples.append((id_, namespace.source, Literal(file.source)))
|
|
69
|
+
|
|
70
|
+
if file.metadata:
|
|
71
|
+
for key, value in file.metadata.items():
|
|
72
|
+
if value:
|
|
73
|
+
type_aware_value = string_to_ideal_type(value)
|
|
74
|
+
try:
|
|
75
|
+
triples.append((id_, namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
|
|
76
|
+
except ValidationError:
|
|
77
|
+
triples.append((id_, namespace[key], Literal(type_aware_value)))
|
|
78
|
+
|
|
79
|
+
if file.source_created_time:
|
|
80
|
+
triples.append(
|
|
81
|
+
(
|
|
82
|
+
id_,
|
|
83
|
+
namespace.source_created_time,
|
|
84
|
+
Literal(datetime.fromtimestamp(file.source_created_time / 1000, pytz.utc)),
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
if file.source_modified_time:
|
|
88
|
+
triples.append(
|
|
89
|
+
(
|
|
90
|
+
id_,
|
|
91
|
+
namespace.source_created_time,
|
|
92
|
+
Literal(datetime.fromtimestamp(file.source_modified_time / 1000, pytz.utc)),
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
if file.uploaded_time:
|
|
96
|
+
triples.append(
|
|
97
|
+
(id_, namespace.uploaded_time, Literal(datetime.fromtimestamp(file.uploaded_time / 1000, pytz.utc)))
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if file.created_time:
|
|
101
|
+
triples.append(
|
|
102
|
+
(id_, namespace.created_time, Literal(datetime.fromtimestamp(file.created_time / 1000, pytz.utc)))
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if file.last_updated_time:
|
|
106
|
+
triples.append(
|
|
107
|
+
(
|
|
108
|
+
id_,
|
|
109
|
+
namespace.last_updated_time,
|
|
110
|
+
Literal(datetime.fromtimestamp(file.last_updated_time / 1000, pytz.utc)),
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if file.labels:
|
|
115
|
+
for label in file.labels:
|
|
116
|
+
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
117
|
+
# since labels do not have internal ids, we use the external_id as the id
|
|
118
|
+
triples.append((id_, namespace.label, namespace[quote(label.dump()["externalId"])]))
|
|
119
|
+
|
|
120
|
+
if file.security_categories:
|
|
121
|
+
for category in file.security_categories:
|
|
122
|
+
triples.append((id_, namespace.security_categories, Literal(category)))
|
|
123
|
+
|
|
124
|
+
if file.data_set_id:
|
|
125
|
+
triples.append((id_, namespace.data_set_id, namespace[str(file.data_set_id)]))
|
|
126
|
+
|
|
127
|
+
if file.asset_ids:
|
|
128
|
+
for asset_id in file.asset_ids:
|
|
129
|
+
triples.append((id_, namespace.asset, namespace[str(asset_id)]))
|
|
130
|
+
|
|
131
|
+
return triples
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import cast
|
|
5
|
+
from urllib.parse import quote
|
|
6
|
+
|
|
7
|
+
import pytz
|
|
8
|
+
from cognite.client import CogniteClient
|
|
9
|
+
from cognite.client.data_classes import LabelDefinition, LabelDefinitionList
|
|
10
|
+
from rdflib import RDF, Literal, Namespace
|
|
11
|
+
|
|
12
|
+
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
13
|
+
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
14
|
+
from cognite.neat.graph.models import Triple
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LabelsExtractor(BaseExtractor):
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
labels: Iterable[LabelDefinition],
|
|
21
|
+
namespace: Namespace | None = None,
|
|
22
|
+
):
|
|
23
|
+
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
24
|
+
self.labels = labels
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def from_dataset(
|
|
28
|
+
cls,
|
|
29
|
+
client: CogniteClient,
|
|
30
|
+
data_set_external_id: str,
|
|
31
|
+
namespace: Namespace | None = None,
|
|
32
|
+
):
|
|
33
|
+
return cls(
|
|
34
|
+
cast(Iterable[LabelDefinition], client.labels(data_set_external_ids=data_set_external_id)), namespace
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def from_file(cls, file_path: str, namespace: Namespace | None = None):
|
|
39
|
+
return cls(LabelDefinitionList.load(Path(file_path).read_text()), namespace)
|
|
40
|
+
|
|
41
|
+
def extract(self) -> Iterable[Triple]:
|
|
42
|
+
"""Extract labels as triples."""
|
|
43
|
+
for label in self.labels:
|
|
44
|
+
yield from self._labels2triples(label, self.namespace)
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def _labels2triples(cls, label: LabelDefinition, namespace: Namespace) -> list[Triple]:
|
|
48
|
+
id_ = namespace[quote(cast(str, label.external_id))]
|
|
49
|
+
|
|
50
|
+
# Set rdf type
|
|
51
|
+
triples: list[Triple] = [(id_, RDF.type, namespace.Label)]
|
|
52
|
+
|
|
53
|
+
# Create attributes
|
|
54
|
+
|
|
55
|
+
if label.external_id:
|
|
56
|
+
triples.append((id_, namespace.external_id, Literal(label.external_id)))
|
|
57
|
+
|
|
58
|
+
if label.name:
|
|
59
|
+
triples.append((id_, namespace.name, Literal(label.name)))
|
|
60
|
+
|
|
61
|
+
if label.description:
|
|
62
|
+
triples.append((id_, namespace.description, Literal(label.description)))
|
|
63
|
+
|
|
64
|
+
if label.created_time:
|
|
65
|
+
triples.append(
|
|
66
|
+
(id_, namespace.created_time, Literal(datetime.fromtimestamp(label.created_time / 1000, pytz.utc)))
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if label.data_set_id:
|
|
70
|
+
triples.append((id_, namespace.data_set_id, namespace[str(label.data_set_id)]))
|
|
71
|
+
|
|
72
|
+
return triples
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import cast
|
|
6
|
+
from urllib.parse import quote
|
|
7
|
+
|
|
8
|
+
import pytz
|
|
9
|
+
from cognite.client import CogniteClient
|
|
10
|
+
from cognite.client.data_classes import Relationship, RelationshipList
|
|
11
|
+
from rdflib import RDF, Literal, Namespace
|
|
12
|
+
|
|
13
|
+
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
14
|
+
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
15
|
+
from cognite.neat.graph.models import Triple
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RelationshipsExtractor(BaseExtractor):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
relationships: Iterable[Relationship],
|
|
22
|
+
namespace: Namespace | None = None,
|
|
23
|
+
):
|
|
24
|
+
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
25
|
+
self.relationships = relationships
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def from_dataset(
|
|
29
|
+
cls,
|
|
30
|
+
client: CogniteClient,
|
|
31
|
+
data_set_external_id: str,
|
|
32
|
+
namespace: Namespace | None = None,
|
|
33
|
+
):
|
|
34
|
+
return cls(
|
|
35
|
+
cast(Iterable[Relationship], client.relationships(data_set_external_ids=data_set_external_id)), namespace
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def from_file(cls, file_path: str, namespace: Namespace | None = None):
|
|
40
|
+
return cls(RelationshipList.load(Path(file_path).read_text()), namespace)
|
|
41
|
+
|
|
42
|
+
def extract(self) -> Iterable[Triple]:
|
|
43
|
+
"""Extracts an asset with the given asset_id."""
|
|
44
|
+
for relationship in self.relationships:
|
|
45
|
+
yield from self._relationship2triples(relationship, self.namespace)
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def _relationship2triples(cls, relationship: Relationship, namespace: Namespace) -> list[Triple]:
|
|
49
|
+
"""Converts an asset to triples."""
|
|
50
|
+
|
|
51
|
+
# relationships do not have an internal id, so we generate one
|
|
52
|
+
id_ = namespace[str(uuid.uuid4())]
|
|
53
|
+
|
|
54
|
+
# Set rdf type
|
|
55
|
+
triples: list[Triple] = [(id_, RDF.type, namespace["Relationship"])]
|
|
56
|
+
|
|
57
|
+
# Set source and target types
|
|
58
|
+
if relationship.source_type:
|
|
59
|
+
triples.append(
|
|
60
|
+
(
|
|
61
|
+
id_,
|
|
62
|
+
namespace.source_type,
|
|
63
|
+
namespace[relationship.source_type.title()],
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if relationship.target_type:
|
|
68
|
+
triples.append(
|
|
69
|
+
(
|
|
70
|
+
id_,
|
|
71
|
+
namespace.target_type,
|
|
72
|
+
namespace[relationship.target_type.title()],
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Create attributes
|
|
77
|
+
if relationship.external_id:
|
|
78
|
+
triples.append((id_, namespace.external_id, Literal(relationship.external_id)))
|
|
79
|
+
|
|
80
|
+
if relationship.source_external_id:
|
|
81
|
+
triples.append(
|
|
82
|
+
(
|
|
83
|
+
id_,
|
|
84
|
+
namespace.source_external_id,
|
|
85
|
+
Literal(relationship.source_external_id),
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if relationship.target_external_id:
|
|
90
|
+
triples.append(
|
|
91
|
+
(
|
|
92
|
+
id_,
|
|
93
|
+
namespace.target_external_id,
|
|
94
|
+
Literal(relationship.target_external_id),
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
if relationship.start_time:
|
|
99
|
+
triples.append(
|
|
100
|
+
(
|
|
101
|
+
id_,
|
|
102
|
+
namespace.start_time,
|
|
103
|
+
Literal(datetime.fromtimestamp(relationship.start_time / 1000, pytz.utc)),
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if relationship.end_time:
|
|
108
|
+
triples.append(
|
|
109
|
+
(
|
|
110
|
+
id_,
|
|
111
|
+
namespace.end_time,
|
|
112
|
+
Literal(datetime.fromtimestamp(relationship.end_time / 1000, pytz.utc)),
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if relationship.created_time:
|
|
117
|
+
triples.append(
|
|
118
|
+
(
|
|
119
|
+
id_,
|
|
120
|
+
namespace.created_time,
|
|
121
|
+
Literal(datetime.fromtimestamp(relationship.created_time / 1000, pytz.utc)),
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
if relationship.last_updated_time:
|
|
126
|
+
triples.append(
|
|
127
|
+
(
|
|
128
|
+
id_,
|
|
129
|
+
namespace.last_updated_time,
|
|
130
|
+
Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, pytz.utc)),
|
|
131
|
+
)
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
if relationship.confidence:
|
|
135
|
+
triples.append(
|
|
136
|
+
(
|
|
137
|
+
id_,
|
|
138
|
+
namespace.confidence,
|
|
139
|
+
Literal(relationship.confidence),
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if relationship.labels:
|
|
144
|
+
for label in relationship.labels:
|
|
145
|
+
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
146
|
+
# since labels do not have internal ids, we use the external_id as the id
|
|
147
|
+
triples.append((id_, namespace.label, namespace[quote(label.dump()["externalId"])]))
|
|
148
|
+
|
|
149
|
+
# Create connection
|
|
150
|
+
if relationship.data_set_id:
|
|
151
|
+
triples.append((id_, namespace.dataset, namespace[str(relationship.data_set_id)]))
|
|
152
|
+
|
|
153
|
+
return triples
|