cognite-neat 0.97.3__py3-none-any.whl → 0.99.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_client/__init__.py +4 -0
- cognite/neat/_client/_api/data_modeling_loaders.py +512 -0
- cognite/neat/_client/_api/schema.py +50 -0
- cognite/neat/_client/_api_client.py +17 -0
- cognite/neat/_client/data_classes/__init__.py +0 -0
- cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
- cognite/neat/{_rules/models/dms/_schema.py → _client/data_classes/schema.py} +32 -281
- cognite/neat/_graph/_shared.py +14 -15
- cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +23 -12
- cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
- cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
- cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
- cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
- cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
- cognite/neat/_graph/extractors/_rdf_file.py +6 -7
- cognite/neat/_graph/loaders/__init__.py +1 -2
- cognite/neat/_graph/queries/_base.py +17 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +50 -134
- cognite/neat/_graph/transformers/_prune_graph.py +1 -1
- cognite/neat/_graph/transformers/_rdfpath.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +6 -0
- cognite/neat/_issues/warnings/_external.py +8 -0
- cognite/neat/_issues/warnings/_models.py +9 -0
- cognite/neat/_issues/warnings/_properties.py +16 -0
- cognite/neat/_rules/_constants.py +7 -6
- cognite/neat/_rules/_shared.py +3 -8
- cognite/neat/_rules/analysis/__init__.py +1 -2
- cognite/neat/_rules/analysis/_base.py +10 -27
- cognite/neat/_rules/analysis/_dms.py +4 -10
- cognite/neat/_rules/analysis/_information.py +2 -10
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/exporters/_base.py +3 -4
- cognite/neat/_rules/exporters/_rules2dms.py +29 -40
- cognite/neat/_rules/exporters/_rules2excel.py +15 -72
- cognite/neat/_rules/exporters/_rules2ontology.py +4 -4
- cognite/neat/_rules/importers/_base.py +3 -4
- cognite/neat/_rules/importers/_dms2rules.py +21 -45
- cognite/neat/_rules/importers/_dtdl2rules/dtdl_converter.py +1 -7
- cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +7 -10
- cognite/neat/_rules/importers/_rdf/_base.py +17 -29
- cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2classes.py +2 -2
- cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2metadata.py +5 -10
- cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2properties.py +1 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +55 -51
- cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2classes.py +2 -2
- cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2metadata.py +5 -8
- cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2properties.py +1 -2
- cognite/neat/_rules/importers/_rdf/_shared.py +25 -140
- cognite/neat/_rules/importers/_spreadsheet2rules.py +10 -41
- cognite/neat/_rules/models/__init__.py +3 -17
- cognite/neat/_rules/models/_base_rules.py +118 -62
- cognite/neat/_rules/models/dms/__init__.py +2 -2
- cognite/neat/_rules/models/dms/_exporter.py +20 -178
- cognite/neat/_rules/models/dms/_rules.py +65 -128
- cognite/neat/_rules/models/dms/_rules_input.py +72 -56
- cognite/neat/_rules/models/dms/_validation.py +16 -109
- cognite/neat/_rules/models/entities/_single_value.py +32 -4
- cognite/neat/_rules/models/information/_rules.py +19 -122
- cognite/neat/_rules/models/information/_rules_input.py +32 -41
- cognite/neat/_rules/models/information/_validation.py +34 -102
- cognite/neat/_rules/models/mapping/__init__.py +2 -3
- cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
- cognite/neat/_rules/models/mapping/_classic2core.yaml +339 -0
- cognite/neat/_rules/transformers/__init__.py +3 -6
- cognite/neat/_rules/transformers/_converters.py +128 -206
- cognite/neat/_rules/transformers/_mapping.py +105 -34
- cognite/neat/_rules/transformers/_verification.py +5 -16
- cognite/neat/_session/_base.py +83 -21
- cognite/neat/_session/_collector.py +126 -0
- cognite/neat/_session/_drop.py +35 -0
- cognite/neat/_session/_inspect.py +22 -10
- cognite/neat/_session/_mapping.py +39 -0
- cognite/neat/_session/_prepare.py +222 -27
- cognite/neat/_session/_read.py +109 -19
- cognite/neat/_session/_set.py +2 -2
- cognite/neat/_session/_show.py +11 -11
- cognite/neat/_session/_to.py +27 -14
- cognite/neat/_session/exceptions.py +20 -3
- cognite/neat/_store/_base.py +27 -24
- cognite/neat/_store/_provenance.py +2 -2
- cognite/neat/_utils/auxiliary.py +19 -0
- cognite/neat/_utils/rdf_.py +28 -1
- cognite/neat/_version.py +1 -1
- cognite/neat/_workflows/steps/data_contracts.py +2 -10
- cognite/neat/_workflows/steps/lib/current/rules_exporter.py +14 -49
- cognite/neat/_workflows/steps/lib/current/rules_importer.py +4 -1
- cognite/neat/_workflows/steps/lib/current/rules_validator.py +5 -9
- {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/METADATA +4 -3
- {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/RECORD +97 -100
- cognite/neat/_graph/loaders/_rdf2asset.py +0 -416
- cognite/neat/_rules/analysis/_asset.py +0 -173
- cognite/neat/_rules/models/asset/__init__.py +0 -13
- cognite/neat/_rules/models/asset/_rules.py +0 -109
- cognite/neat/_rules/models/asset/_rules_input.py +0 -101
- cognite/neat/_rules/models/asset/_validation.py +0 -45
- cognite/neat/_rules/models/domain.py +0 -136
- cognite/neat/_rules/models/mapping/_base.py +0 -131
- cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
- cognite/neat/_utils/cdf/loaders/_base.py +0 -54
- cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
- cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
- /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
- {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.97.3.dist-info → cognite_neat-0.99.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,177 +1,37 @@
|
|
|
1
|
-
from collections.abc import
|
|
2
|
-
from datetime import datetime, timezone
|
|
1
|
+
from collections.abc import Iterable
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
from typing import cast
|
|
5
4
|
|
|
6
5
|
from cognite.client import CogniteClient
|
|
7
6
|
from cognite.client.data_classes import Asset, AssetFilter, AssetList
|
|
8
|
-
from rdflib import RDF, Literal, Namespace
|
|
9
7
|
|
|
10
|
-
from
|
|
11
|
-
|
|
12
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
13
|
-
from ._labels import LabelsExtractor
|
|
8
|
+
from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
14
9
|
|
|
15
10
|
|
|
16
11
|
class AssetsExtractor(ClassicCDFBaseExtractor[Asset]):
|
|
17
|
-
"""Extract data from Cognite Data Fusions Assets into Neat.
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
items (Iterable[Asset]): An iterable of assets.
|
|
21
|
-
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
22
|
-
to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
|
|
23
|
-
If None or if the function returns None, the asset will be set to the default type "Asset".
|
|
24
|
-
total (int, optional): The total number of assets to load. If passed, you will get a progress bar if rich
|
|
25
|
-
is installed. Defaults to None.
|
|
26
|
-
limit (int, optional): The maximal number of assets to load. Defaults to None. This is typically used for
|
|
27
|
-
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
28
|
-
limit the extraction to 1000 assets to test the setup.
|
|
29
|
-
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
30
|
-
a JSON string.
|
|
31
|
-
skip_metadata_values (set[str] | frozenset[str] | None, optional): A set of values to skip when unpacking
|
|
32
|
-
metadata. Defaults to frozenset({"nan", "null", "none", ""}).
|
|
33
|
-
"""
|
|
12
|
+
"""Extract data from Cognite Data Fusions Assets into Neat."""
|
|
34
13
|
|
|
35
14
|
_default_rdf_type = "Asset"
|
|
15
|
+
_instance_id_prefix = InstanceIdPrefix.asset
|
|
36
16
|
|
|
37
17
|
@classmethod
|
|
38
|
-
def
|
|
39
|
-
cls,
|
|
40
|
-
client: CogniteClient,
|
|
41
|
-
data_set_external_id: str,
|
|
42
|
-
namespace: Namespace | None = None,
|
|
43
|
-
to_type: Callable[[Asset], str | None] | None = None,
|
|
44
|
-
limit: int | None = None,
|
|
45
|
-
unpack_metadata: bool = True,
|
|
46
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
47
|
-
):
|
|
18
|
+
def _from_dataset(cls, client: CogniteClient, data_set_external_id: str) -> tuple[int | None, Iterable[Asset]]:
|
|
48
19
|
total = client.assets.aggregate_count(filter=AssetFilter(data_set_ids=[{"externalId": data_set_external_id}]))
|
|
49
|
-
|
|
50
|
-
return
|
|
51
|
-
client.assets(data_set_external_ids=data_set_external_id),
|
|
52
|
-
namespace,
|
|
53
|
-
to_type,
|
|
54
|
-
total,
|
|
55
|
-
limit,
|
|
56
|
-
unpack_metadata=unpack_metadata,
|
|
57
|
-
skip_metadata_values=skip_metadata_values,
|
|
58
|
-
)
|
|
20
|
+
items = client.assets(data_set_external_ids=data_set_external_id)
|
|
21
|
+
return total, items
|
|
59
22
|
|
|
60
23
|
@classmethod
|
|
61
|
-
def
|
|
62
|
-
cls,
|
|
63
|
-
client: CogniteClient,
|
|
64
|
-
root_asset_external_id: str,
|
|
65
|
-
namespace: Namespace | None = None,
|
|
66
|
-
to_type: Callable[[Asset], str | None] | None = None,
|
|
67
|
-
limit: int | None = None,
|
|
68
|
-
unpack_metadata: bool = True,
|
|
69
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
70
|
-
):
|
|
24
|
+
def _from_hierarchy(cls, client: CogniteClient, root_asset_external_id: str) -> tuple[int | None, Iterable[Asset]]:
|
|
71
25
|
total = client.assets.aggregate_count(
|
|
72
26
|
filter=AssetFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
73
27
|
)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
Iterable[Asset],
|
|
78
|
-
client.assets(asset_subtree_external_ids=root_asset_external_id),
|
|
79
|
-
),
|
|
80
|
-
namespace,
|
|
81
|
-
to_type,
|
|
82
|
-
total,
|
|
83
|
-
limit,
|
|
84
|
-
unpack_metadata=unpack_metadata,
|
|
85
|
-
skip_metadata_values=skip_metadata_values,
|
|
28
|
+
items = cast(
|
|
29
|
+
Iterable[Asset],
|
|
30
|
+
client.assets(asset_subtree_external_ids=root_asset_external_id),
|
|
86
31
|
)
|
|
32
|
+
return total, items
|
|
87
33
|
|
|
88
34
|
@classmethod
|
|
89
|
-
def
|
|
90
|
-
cls,
|
|
91
|
-
file_path: str,
|
|
92
|
-
namespace: Namespace | None = None,
|
|
93
|
-
to_type: Callable[[Asset], str] | None = None,
|
|
94
|
-
limit: int | None = None,
|
|
95
|
-
unpack_metadata: bool = True,
|
|
96
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
97
|
-
):
|
|
35
|
+
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Asset]]:
|
|
98
36
|
assets = AssetList.load(Path(file_path).read_text())
|
|
99
|
-
return
|
|
100
|
-
assets,
|
|
101
|
-
namespace,
|
|
102
|
-
to_type,
|
|
103
|
-
total=len(assets),
|
|
104
|
-
limit=limit,
|
|
105
|
-
unpack_metadata=unpack_metadata,
|
|
106
|
-
skip_metadata_values=skip_metadata_values,
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
def _item2triples(self, asset: Asset) -> list[Triple]:
|
|
110
|
-
"""Converts an asset to triples."""
|
|
111
|
-
id_ = self.namespace[f"{InstanceIdPrefix.asset}{asset.id}"]
|
|
112
|
-
|
|
113
|
-
type_ = self._get_rdf_type(asset)
|
|
114
|
-
|
|
115
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
116
|
-
|
|
117
|
-
# Create attributes
|
|
118
|
-
if asset.name:
|
|
119
|
-
triples.append((id_, self.namespace.name, Literal(asset.name)))
|
|
120
|
-
|
|
121
|
-
if asset.description:
|
|
122
|
-
triples.append((id_, self.namespace.description, Literal(asset.description)))
|
|
123
|
-
|
|
124
|
-
if asset.external_id:
|
|
125
|
-
triples.append((id_, self.namespace.external_id, Literal(asset.external_id)))
|
|
126
|
-
|
|
127
|
-
if asset.source:
|
|
128
|
-
triples.append((id_, self.namespace.source, Literal(asset.source)))
|
|
129
|
-
|
|
130
|
-
# properties' ref creation and update
|
|
131
|
-
triples.append(
|
|
132
|
-
(
|
|
133
|
-
id_,
|
|
134
|
-
self.namespace.created_time,
|
|
135
|
-
Literal(datetime.fromtimestamp(asset.created_time / 1000, timezone.utc)),
|
|
136
|
-
)
|
|
137
|
-
)
|
|
138
|
-
triples.append(
|
|
139
|
-
(
|
|
140
|
-
id_,
|
|
141
|
-
self.namespace.last_updated_time,
|
|
142
|
-
Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, timezone.utc)),
|
|
143
|
-
)
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
if asset.labels:
|
|
147
|
-
for label in asset.labels:
|
|
148
|
-
# external_id can create ill-formed URIs, so we create websafe URIs
|
|
149
|
-
# since labels do not have internal ids, we use the external_id as the id
|
|
150
|
-
triples.append(
|
|
151
|
-
(
|
|
152
|
-
id_,
|
|
153
|
-
self.namespace.label,
|
|
154
|
-
self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(label)}"],
|
|
155
|
-
)
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
if asset.metadata:
|
|
159
|
-
triples.extend(self._metadata_to_triples(id_, asset.metadata))
|
|
160
|
-
|
|
161
|
-
# Create connections:
|
|
162
|
-
if asset.parent_id:
|
|
163
|
-
triples.append((id_, self.namespace.parent, self.namespace[f"{InstanceIdPrefix.asset}{asset.parent_id}"]))
|
|
164
|
-
|
|
165
|
-
if asset.root_id:
|
|
166
|
-
triples.append((id_, self.namespace.root, self.namespace[f"{InstanceIdPrefix.asset}{asset.root_id}"]))
|
|
167
|
-
|
|
168
|
-
if asset.data_set_id:
|
|
169
|
-
triples.append(
|
|
170
|
-
(
|
|
171
|
-
id_,
|
|
172
|
-
self.namespace.dataset,
|
|
173
|
-
self.namespace[f"{InstanceIdPrefix.data_set}{asset.data_set_id}"],
|
|
174
|
-
)
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
return triples
|
|
37
|
+
return len(assets), assets
|
|
@@ -2,18 +2,22 @@ import json
|
|
|
2
2
|
import re
|
|
3
3
|
import sys
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
5
|
-
from collections.abc import Callable, Iterable, Set
|
|
6
|
-
from
|
|
5
|
+
from collections.abc import Callable, Iterable, Sequence, Set
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Generic, TypeVar
|
|
7
9
|
|
|
8
|
-
from cognite.client
|
|
9
|
-
from
|
|
10
|
+
from cognite.client import CogniteClient
|
|
11
|
+
from cognite.client.data_classes._base import WriteableCogniteResource
|
|
12
|
+
from pydantic import AnyHttpUrl, ValidationError
|
|
13
|
+
from rdflib import RDF, XSD, Literal, Namespace, URIRef
|
|
10
14
|
|
|
11
15
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
12
16
|
from cognite.neat._graph.extractors._base import BaseExtractor
|
|
13
17
|
from cognite.neat._shared import Triple
|
|
14
18
|
from cognite.neat._utils.auxiliary import string_to_ideal_type
|
|
15
19
|
|
|
16
|
-
T_CogniteResource = TypeVar("T_CogniteResource", bound=
|
|
20
|
+
T_CogniteResource = TypeVar("T_CogniteResource", bound=WriteableCogniteResource)
|
|
17
21
|
|
|
18
22
|
DEFAULT_SKIP_METADATA_VALUES = frozenset({"nan", "null", "none", ""})
|
|
19
23
|
|
|
@@ -61,9 +65,13 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
61
65
|
a JSON string.
|
|
62
66
|
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
63
67
|
values in this set will be skipped.
|
|
68
|
+
camel_case (bool, optional): Whether to use camelCase instead of snake_case for property names.
|
|
69
|
+
Defaults to True.
|
|
70
|
+
as_write (bool, optional): Whether to use the write/request format of the items. Defaults to False.
|
|
64
71
|
"""
|
|
65
72
|
|
|
66
73
|
_default_rdf_type: str
|
|
74
|
+
_instance_id_prefix: str
|
|
67
75
|
_SPACE_PATTERN = re.compile(r"\s+")
|
|
68
76
|
|
|
69
77
|
def __init__(
|
|
@@ -75,6 +83,8 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
75
83
|
limit: int | None = None,
|
|
76
84
|
unpack_metadata: bool = True,
|
|
77
85
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
86
|
+
camel_case: bool = True,
|
|
87
|
+
as_write: bool = False,
|
|
78
88
|
):
|
|
79
89
|
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
80
90
|
self.items = items
|
|
@@ -83,6 +93,8 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
83
93
|
self.limit = min(limit, total) if limit and total else limit
|
|
84
94
|
self.unpack_metadata = unpack_metadata
|
|
85
95
|
self.skip_metadata_values = skip_metadata_values
|
|
96
|
+
self.camel_case = camel_case
|
|
97
|
+
self.as_write = as_write
|
|
86
98
|
|
|
87
99
|
def extract(self) -> Iterable[Triple]:
|
|
88
100
|
"""Extracts an asset with the given asset_id."""
|
|
@@ -104,9 +116,48 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
104
116
|
if self.limit and no >= self.limit:
|
|
105
117
|
break
|
|
106
118
|
|
|
107
|
-
@abstractmethod
|
|
108
119
|
def _item2triples(self, item: T_CogniteResource) -> list[Triple]:
|
|
109
|
-
|
|
120
|
+
id_value: str | None
|
|
121
|
+
if hasattr(item, "id"):
|
|
122
|
+
id_value = str(item.id)
|
|
123
|
+
else:
|
|
124
|
+
id_value = self._fallback_id(item)
|
|
125
|
+
if id_value is None:
|
|
126
|
+
return []
|
|
127
|
+
|
|
128
|
+
id_ = self.namespace[f"{self._instance_id_prefix}{id_value}"]
|
|
129
|
+
|
|
130
|
+
type_ = self._get_rdf_type(item)
|
|
131
|
+
|
|
132
|
+
# Set rdf type
|
|
133
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
134
|
+
if self.as_write:
|
|
135
|
+
item = item.as_write()
|
|
136
|
+
dumped = item.dump(self.camel_case)
|
|
137
|
+
dumped.pop("id", None)
|
|
138
|
+
# We have parentId so we don't need parentExternalId
|
|
139
|
+
dumped.pop("parentExternalId", None)
|
|
140
|
+
if "metadata" in dumped:
|
|
141
|
+
triples.extend(self._metadata_to_triples(id_, dumped.pop("metadata")))
|
|
142
|
+
if "columns" in dumped:
|
|
143
|
+
columns = dumped.pop("columns")
|
|
144
|
+
triples.append(
|
|
145
|
+
(id_, self.namespace.columns, Literal(json.dumps({"columns": columns}), datatype=XSD._NS["json"]))
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
for key, value in dumped.items():
|
|
149
|
+
if value is None or value == []:
|
|
150
|
+
continue
|
|
151
|
+
values = value if isinstance(value, Sequence) and not isinstance(value, str) else [value]
|
|
152
|
+
for raw in values:
|
|
153
|
+
triples.append((id_, self.namespace[key], self._as_object(raw, key)))
|
|
154
|
+
return triples
|
|
155
|
+
|
|
156
|
+
def _fallback_id(self, item: T_CogniteResource) -> str | None:
|
|
157
|
+
raise AttributeError(
|
|
158
|
+
f"Item of type {type(item)} does not have an id attribute. "
|
|
159
|
+
f"Please implement the _fallback_id method in the extractor."
|
|
160
|
+
)
|
|
110
161
|
|
|
111
162
|
def _metadata_to_triples(self, id_: URIRef, metadata: dict[str, str]) -> Iterable[Triple]:
|
|
112
163
|
if self.unpack_metadata:
|
|
@@ -125,3 +176,99 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
125
176
|
if self.to_type:
|
|
126
177
|
type_ = self.to_type(item) or type_
|
|
127
178
|
return self._SPACE_PATTERN.sub("_", type_)
|
|
179
|
+
|
|
180
|
+
def _as_object(self, raw: Any, key: str) -> Literal | URIRef:
|
|
181
|
+
if key in {"data_set_id", "dataSetId"}:
|
|
182
|
+
return self.namespace[f"{InstanceIdPrefix.data_set}{raw}"]
|
|
183
|
+
elif key in {"assetId", "asset_id", "assetIds", "asset_ids", "parentId", "rootId", "parent_id", "root_id"}:
|
|
184
|
+
return self.namespace[f"{InstanceIdPrefix.asset}{raw}"]
|
|
185
|
+
elif key in {
|
|
186
|
+
"startTime",
|
|
187
|
+
"endTime",
|
|
188
|
+
"createdTime",
|
|
189
|
+
"lastUpdatedTime",
|
|
190
|
+
"start_time",
|
|
191
|
+
"end_time",
|
|
192
|
+
"created_time",
|
|
193
|
+
"last_updated_time",
|
|
194
|
+
} and isinstance(raw, int):
|
|
195
|
+
return Literal(datetime.fromtimestamp(raw / 1000, timezone.utc), datatype=XSD.dateTime)
|
|
196
|
+
elif key == "labels":
|
|
197
|
+
from ._labels import LabelsExtractor
|
|
198
|
+
|
|
199
|
+
return self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(raw)}"]
|
|
200
|
+
elif key in {"sourceType", "targetType", "source_type", "target_type"} and isinstance(raw, str):
|
|
201
|
+
# Relationship types. Titled so they can be looked up.
|
|
202
|
+
return self.namespace[raw.title()]
|
|
203
|
+
elif key in {"unit_external_id", "unitExternalId"}:
|
|
204
|
+
try:
|
|
205
|
+
return URIRef(str(AnyHttpUrl(raw)))
|
|
206
|
+
except ValidationError:
|
|
207
|
+
...
|
|
208
|
+
return Literal(raw)
|
|
209
|
+
|
|
210
|
+
@classmethod
|
|
211
|
+
def from_dataset(
|
|
212
|
+
cls,
|
|
213
|
+
client: CogniteClient,
|
|
214
|
+
data_set_external_id: str,
|
|
215
|
+
namespace: Namespace | None = None,
|
|
216
|
+
to_type: Callable[[T_CogniteResource], str | None] | None = None,
|
|
217
|
+
limit: int | None = None,
|
|
218
|
+
unpack_metadata: bool = True,
|
|
219
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
220
|
+
camel_case: bool = True,
|
|
221
|
+
as_write: bool = False,
|
|
222
|
+
):
|
|
223
|
+
total, items = cls._from_dataset(client, data_set_external_id)
|
|
224
|
+
return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
@abstractmethod
|
|
228
|
+
def _from_dataset(
|
|
229
|
+
cls, client: CogniteClient, data_set_external_id: str
|
|
230
|
+
) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
231
|
+
raise NotImplementedError
|
|
232
|
+
|
|
233
|
+
@classmethod
|
|
234
|
+
def from_hierarchy(
|
|
235
|
+
cls,
|
|
236
|
+
client: CogniteClient,
|
|
237
|
+
root_asset_external_id: str,
|
|
238
|
+
namespace: Namespace | None = None,
|
|
239
|
+
to_type: Callable[[T_CogniteResource], str | None] | None = None,
|
|
240
|
+
limit: int | None = None,
|
|
241
|
+
unpack_metadata: bool = True,
|
|
242
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
243
|
+
camel_case: bool = True,
|
|
244
|
+
as_write: bool = False,
|
|
245
|
+
):
|
|
246
|
+
total, items = cls._from_hierarchy(client, root_asset_external_id)
|
|
247
|
+
return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
|
|
248
|
+
|
|
249
|
+
@classmethod
|
|
250
|
+
@abstractmethod
|
|
251
|
+
def _from_hierarchy(
|
|
252
|
+
cls, client: CogniteClient, root_asset_external_id: str
|
|
253
|
+
) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
254
|
+
raise NotImplementedError
|
|
255
|
+
|
|
256
|
+
@classmethod
|
|
257
|
+
def from_file(
|
|
258
|
+
cls,
|
|
259
|
+
file_path: str | Path,
|
|
260
|
+
namespace: Namespace | None = None,
|
|
261
|
+
to_type: Callable[[T_CogniteResource], str | None] | None = None,
|
|
262
|
+
limit: int | None = None,
|
|
263
|
+
unpack_metadata: bool = True,
|
|
264
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
265
|
+
camel_case: bool = True,
|
|
266
|
+
as_write: bool = False,
|
|
267
|
+
):
|
|
268
|
+
total, items = cls._from_file(file_path)
|
|
269
|
+
return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
|
|
270
|
+
|
|
271
|
+
@classmethod
|
|
272
|
+
@abstractmethod
|
|
273
|
+
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
274
|
+
raise NotImplementedError
|
|
@@ -1,12 +1,15 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from collections import defaultdict
|
|
2
3
|
from collections.abc import Iterable, Sequence
|
|
3
4
|
from typing import ClassVar, NamedTuple
|
|
4
5
|
|
|
5
6
|
from cognite.client import CogniteClient
|
|
7
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
6
8
|
from rdflib import Namespace
|
|
7
9
|
|
|
8
10
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
9
11
|
from cognite.neat._graph.extractors._base import BaseExtractor
|
|
12
|
+
from cognite.neat._issues.warnings import AuthWarning
|
|
10
13
|
from cognite.neat._shared import Triple
|
|
11
14
|
from cognite.neat._utils.collection_ import chunker
|
|
12
15
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri
|
|
@@ -96,6 +99,7 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
96
99
|
self._root_asset_external_id = root_asset_external_id
|
|
97
100
|
self._data_set_external_id = data_set_external_id
|
|
98
101
|
self._namespace = namespace or DEFAULT_NAMESPACE
|
|
102
|
+
self._extractor_args = dict(namespace=self._namespace, unpack_metadata=False, as_write=True, camel_case=True)
|
|
99
103
|
|
|
100
104
|
self._source_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
|
|
101
105
|
self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
|
|
@@ -110,18 +114,25 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
110
114
|
|
|
111
115
|
yield from self._extract_core_end_nodes()
|
|
112
116
|
|
|
113
|
-
|
|
114
|
-
|
|
117
|
+
try:
|
|
118
|
+
yield from self._extract_labels()
|
|
119
|
+
except CogniteAPIError as e:
|
|
120
|
+
warnings.warn(AuthWarning("extract labels", str(e)), stacklevel=2)
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
yield from self._extract_data_sets()
|
|
124
|
+
except CogniteAPIError as e:
|
|
125
|
+
warnings.warn(AuthWarning("extract data sets", str(e)), stacklevel=2)
|
|
115
126
|
|
|
116
127
|
def _extract_core_start_nodes(self):
|
|
117
128
|
for core_node in self._classic_node_types:
|
|
118
129
|
if self._data_set_external_id:
|
|
119
130
|
extractor = core_node.extractor_cls.from_dataset(
|
|
120
|
-
self._client, self._data_set_external_id, self.
|
|
131
|
+
self._client, self._data_set_external_id, **self._extractor_args
|
|
121
132
|
)
|
|
122
133
|
elif self._root_asset_external_id:
|
|
123
134
|
extractor = core_node.extractor_cls.from_hierarchy(
|
|
124
|
-
self._client, self._root_asset_external_id, self.
|
|
135
|
+
self._client, self._root_asset_external_id, **self._extractor_args
|
|
125
136
|
)
|
|
126
137
|
else:
|
|
127
138
|
raise ValueError("Exactly one of data_set_external_id or root_asset_external_id must be set.")
|
|
@@ -135,7 +146,7 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
135
146
|
relationship_iterator = self._client.relationships(
|
|
136
147
|
source_external_ids=list(chunk), source_types=[start_type]
|
|
137
148
|
)
|
|
138
|
-
extractor = RelationshipsExtractor(relationship_iterator, self.
|
|
149
|
+
extractor = RelationshipsExtractor(relationship_iterator, **self._extractor_args)
|
|
139
150
|
# This is a private attribute, but we need to set it to log the target nodes.
|
|
140
151
|
extractor._log_target_nodes = True
|
|
141
152
|
|
|
@@ -165,28 +176,28 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
165
176
|
description=f"Extracting end nodes {core_node.resource_type.removesuffix('_')}",
|
|
166
177
|
):
|
|
167
178
|
resource_iterator = api.retrieve_multiple(external_ids=list(chunk), ignore_unknown_ids=True)
|
|
168
|
-
extractor = core_node.extractor_cls(resource_iterator, self.
|
|
179
|
+
extractor = core_node.extractor_cls(resource_iterator, **self._extractor_args)
|
|
169
180
|
yield from self._extract_with_logging_label_dataset(extractor)
|
|
170
181
|
|
|
171
182
|
def _extract_labels(self):
|
|
172
183
|
for chunk in self._chunk(list(self._labels), description="Extracting labels"):
|
|
173
184
|
label_iterator = self._client.labels.retrieve(external_id=list(chunk), ignore_unknown_ids=True)
|
|
174
|
-
yield from LabelsExtractor(label_iterator, self.
|
|
185
|
+
yield from LabelsExtractor(label_iterator, **self._extractor_args).extract()
|
|
175
186
|
|
|
176
187
|
def _extract_data_sets(self):
|
|
177
188
|
for chunk in self._chunk(list(self._data_set_ids), description="Extracting data sets"):
|
|
178
189
|
data_set_iterator = self._client.data_sets.retrieve_multiple(ids=list(chunk), ignore_unknown_ids=True)
|
|
179
|
-
yield from DataSetExtractor(data_set_iterator, self.
|
|
190
|
+
yield from DataSetExtractor(data_set_iterator, **self._extractor_args).extract()
|
|
180
191
|
|
|
181
192
|
def _extract_with_logging_label_dataset(
|
|
182
193
|
self, extractor: ClassicCDFBaseExtractor, resource_type: InstanceIdPrefix | None = None
|
|
183
194
|
) -> Iterable[Triple]:
|
|
184
195
|
for triple in extractor.extract():
|
|
185
|
-
if triple[1] == self._namespace.
|
|
196
|
+
if triple[1] == self._namespace.externalId and resource_type is not None:
|
|
186
197
|
self._source_external_ids_by_type[resource_type].add(remove_namespace_from_uri(triple[2]))
|
|
187
|
-
elif triple[1] == self._namespace.
|
|
198
|
+
elif triple[1] == self._namespace.labels:
|
|
188
199
|
self._labels.add(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.label))
|
|
189
|
-
elif triple[1] == self._namespace.
|
|
200
|
+
elif triple[1] == self._namespace.datasetId:
|
|
190
201
|
self._data_set_ids.add(
|
|
191
202
|
int(remove_namespace_from_uri(triple[2]).removeprefix(InstanceIdPrefix.data_set))
|
|
192
203
|
)
|
|
@@ -202,7 +213,7 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
202
213
|
else:
|
|
203
214
|
to_iterate = track(
|
|
204
215
|
to_iterate,
|
|
205
|
-
total=(len(items) //
|
|
216
|
+
total=(len(items) // 1_000) + 1,
|
|
206
217
|
description=description,
|
|
207
218
|
)
|
|
208
219
|
return to_iterate
|
|
@@ -1,110 +1,35 @@
|
|
|
1
|
-
from collections.abc import
|
|
2
|
-
from datetime import datetime, timezone
|
|
1
|
+
from collections.abc import Iterable
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
4
|
from cognite.client import CogniteClient
|
|
6
5
|
from cognite.client.data_classes import DataSet, DataSetList
|
|
7
6
|
from cognite.client.utils.useful_types import SequenceNotStr
|
|
8
|
-
from rdflib import RDF, Literal, Namespace
|
|
9
7
|
|
|
10
|
-
from
|
|
11
|
-
|
|
12
|
-
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
|
|
8
|
+
from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix, T_CogniteResource
|
|
13
9
|
|
|
14
10
|
|
|
15
11
|
class DataSetExtractor(ClassicCDFBaseExtractor[DataSet]):
|
|
16
|
-
"""Extract DataSets from Cognite Data Fusions into Neat.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
items (Iterable[Asset]): An iterable of assets.
|
|
20
|
-
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
21
|
-
to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
|
|
22
|
-
If None or if the function returns None, the asset will be set to the default type "Asset".
|
|
23
|
-
total (int, optional): The total number of assets to load. If passed, you will get a progress bar if rich
|
|
24
|
-
is installed. Defaults to None.
|
|
25
|
-
limit (int, optional): The maximal number of assets to load. Defaults to None. This is typically used for
|
|
26
|
-
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
27
|
-
limit the extraction to 1000 assets to test the setup.
|
|
28
|
-
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
29
|
-
a JSON string.
|
|
30
|
-
skip_metadata_values (set[str] | frozenset[str] | None, optional): A set of values to skip when unpacking
|
|
31
|
-
metadata. Defaults to frozenset({"nan", "null", "none", ""}).
|
|
32
|
-
"""
|
|
12
|
+
"""Extract DataSets from Cognite Data Fusions into Neat."""
|
|
33
13
|
|
|
34
14
|
_default_rdf_type = "DataSet"
|
|
15
|
+
_instance_id_prefix = InstanceIdPrefix.data_set
|
|
35
16
|
|
|
36
17
|
@classmethod
|
|
37
|
-
def
|
|
18
|
+
def _from_dataset(
|
|
38
19
|
cls,
|
|
39
20
|
client: CogniteClient,
|
|
40
|
-
data_set_external_id: SequenceNotStr[str],
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
):
|
|
45
|
-
return cls(
|
|
46
|
-
client.data_sets.retrieve_multiple(external_ids=data_set_external_id),
|
|
47
|
-
namespace=namespace,
|
|
48
|
-
total=len(data_set_external_id),
|
|
49
|
-
unpack_metadata=unpack_metadata,
|
|
50
|
-
skip_metadata_values=skip_metadata_values,
|
|
51
|
-
)
|
|
21
|
+
data_set_external_id: SequenceNotStr[str], # type: ignore[override]
|
|
22
|
+
) -> tuple[int | None, Iterable[DataSet]]:
|
|
23
|
+
items = client.data_sets.retrieve_multiple(external_ids=data_set_external_id)
|
|
24
|
+
return len(items), items
|
|
52
25
|
|
|
53
26
|
@classmethod
|
|
54
|
-
def
|
|
55
|
-
cls,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
unpack_metadata: bool = True,
|
|
59
|
-
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
60
|
-
):
|
|
61
|
-
data_sets = DataSetList.load(Path(file_path).read_text())
|
|
62
|
-
return cls(
|
|
63
|
-
data_sets,
|
|
64
|
-
namespace=namespace,
|
|
65
|
-
total=len(data_sets),
|
|
66
|
-
unpack_metadata=unpack_metadata,
|
|
67
|
-
skip_metadata_values=skip_metadata_values,
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
def _item2triples(self, item: DataSet) -> list[Triple]:
|
|
71
|
-
"""Converts an asset to triples."""
|
|
72
|
-
id_ = self.namespace[f"{InstanceIdPrefix.data_set}{item.id}"]
|
|
73
|
-
|
|
74
|
-
type_ = self._get_rdf_type(item)
|
|
75
|
-
|
|
76
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
77
|
-
|
|
78
|
-
# Create attributes
|
|
79
|
-
if item.name:
|
|
80
|
-
triples.append((id_, self.namespace.name, Literal(item.name)))
|
|
27
|
+
def _from_hierarchy(
|
|
28
|
+
cls, client: CogniteClient, root_asset_external_id: str
|
|
29
|
+
) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
30
|
+
raise NotImplementedError("DataSets do not have a hierarchy.")
|
|
81
31
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
triples.append((id_, self.namespace.external_id, Literal(item.external_id)))
|
|
87
|
-
|
|
88
|
-
# properties' ref creation and update
|
|
89
|
-
triples.append(
|
|
90
|
-
(
|
|
91
|
-
id_,
|
|
92
|
-
self.namespace.created_time,
|
|
93
|
-
Literal(datetime.fromtimestamp(item.created_time / 1000, timezone.utc)),
|
|
94
|
-
)
|
|
95
|
-
)
|
|
96
|
-
triples.append(
|
|
97
|
-
(
|
|
98
|
-
id_,
|
|
99
|
-
self.namespace.last_updated_time,
|
|
100
|
-
Literal(datetime.fromtimestamp(item.last_updated_time / 1000, timezone.utc)),
|
|
101
|
-
)
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
if item.write_protected:
|
|
105
|
-
triples.append((id_, self.namespace.write_protected, Literal(item.write_protected)))
|
|
106
|
-
|
|
107
|
-
if item.metadata:
|
|
108
|
-
triples.extend(self._metadata_to_triples(id_, item.metadata))
|
|
109
|
-
|
|
110
|
-
return triples
|
|
32
|
+
@classmethod
|
|
33
|
+
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[DataSet]]:
|
|
34
|
+
data_sets = DataSetList.load(Path(file_path).read_text())
|
|
35
|
+
return len(data_sets), data_sets
|