cognite-neat 0.88.0__py3-none-any.whl → 0.88.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/app/api/routers/configuration.py +1 -1
- cognite/neat/app/ui/neat-app/build/asset-manifest.json +7 -7
- cognite/neat/app/ui/neat-app/build/index.html +1 -1
- cognite/neat/app/ui/neat-app/build/static/css/{main.38a62222.css → main.72e3d92e.css} +2 -2
- cognite/neat/app/ui/neat-app/build/static/css/main.72e3d92e.css.map +1 -0
- cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js +3 -0
- cognite/neat/app/ui/neat-app/build/static/js/{main.ec7f72e2.js.LICENSE.txt → main.5a52cf09.js.LICENSE.txt} +0 -9
- cognite/neat/app/ui/neat-app/build/static/js/main.5a52cf09.js.map +1 -0
- cognite/neat/config.py +44 -27
- cognite/neat/exceptions.py +8 -2
- cognite/neat/graph/extractors/_classic_cdf/_assets.py +21 -73
- cognite/neat/graph/extractors/_classic_cdf/_base.py +102 -0
- cognite/neat/graph/extractors/_classic_cdf/_events.py +46 -42
- cognite/neat/graph/extractors/_classic_cdf/_files.py +41 -45
- cognite/neat/graph/extractors/_classic_cdf/_labels.py +75 -52
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py +49 -27
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py +47 -50
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py +47 -49
- cognite/neat/graph/loaders/_base.py +4 -4
- cognite/neat/graph/loaders/_rdf2asset.py +12 -14
- cognite/neat/graph/loaders/_rdf2dms.py +14 -10
- cognite/neat/graph/queries/_base.py +22 -29
- cognite/neat/graph/queries/_shared.py +1 -1
- cognite/neat/graph/stores/_base.py +19 -11
- cognite/neat/graph/transformers/_rdfpath.py +3 -2
- cognite/neat/issues/__init__.py +16 -0
- cognite/neat/{issues.py → issues/_base.py} +78 -2
- cognite/neat/issues/errors/external.py +21 -0
- cognite/neat/issues/errors/properties.py +75 -0
- cognite/neat/issues/errors/resources.py +123 -0
- cognite/neat/issues/errors/schema.py +0 -0
- cognite/neat/{rules/issues → issues}/formatters.py +9 -9
- cognite/neat/issues/neat_warnings/__init__.py +2 -0
- cognite/neat/issues/neat_warnings/identifier.py +27 -0
- cognite/neat/issues/neat_warnings/models.py +22 -0
- cognite/neat/issues/neat_warnings/properties.py +77 -0
- cognite/neat/issues/neat_warnings/resources.py +125 -0
- cognite/neat/rules/exporters/_rules2dms.py +3 -2
- cognite/neat/rules/exporters/_rules2ontology.py +28 -20
- cognite/neat/rules/exporters/_validation.py +15 -21
- cognite/neat/rules/importers/__init__.py +7 -3
- cognite/neat/rules/importers/_base.py +3 -3
- cognite/neat/rules/importers/_dms2rules.py +39 -18
- cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py +44 -53
- cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py +6 -5
- cognite/neat/rules/importers/_rdf/__init__.py +0 -0
- cognite/neat/rules/importers/_rdf/_imf2rules/__init__.py +3 -0
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2classes.py +82 -0
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2metadata.py +34 -0
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2properties.py +123 -0
- cognite/neat/rules/importers/{_owl2rules/_owl2rules.py → _rdf/_imf2rules/_imf2rules.py} +15 -11
- cognite/neat/rules/importers/{_inference2rules.py → _rdf/_inference2rules.py} +1 -1
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2classes.py +57 -0
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2metadata.py +68 -0
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2properties.py +59 -0
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2rules.py +76 -0
- cognite/neat/rules/importers/_rdf/_shared.py +586 -0
- cognite/neat/rules/importers/_spreadsheet2rules.py +31 -28
- cognite/neat/rules/importers/_yaml2rules.py +2 -1
- cognite/neat/rules/issues/__init__.py +1 -5
- cognite/neat/rules/issues/base.py +2 -21
- cognite/neat/rules/issues/dms.py +20 -134
- cognite/neat/rules/issues/ontology.py +298 -0
- cognite/neat/rules/issues/spreadsheet.py +51 -3
- cognite/neat/rules/issues/tables.py +72 -0
- cognite/neat/rules/models/_rdfpath.py +4 -4
- cognite/neat/rules/models/_types/_field.py +14 -21
- cognite/neat/rules/models/asset/_validation.py +1 -1
- cognite/neat/rules/models/dms/_schema.py +53 -30
- cognite/neat/rules/models/dms/_validation.py +2 -2
- cognite/neat/rules/models/entities.py +3 -0
- cognite/neat/rules/models/information/_rules.py +5 -4
- cognite/neat/rules/models/information/_validation.py +1 -1
- cognite/neat/utils/rdf_.py +17 -9
- cognite/neat/utils/regex_patterns.py +52 -0
- cognite/neat/workflows/steps/lib/current/rules_importer.py +73 -1
- cognite/neat/workflows/steps/lib/current/rules_validator.py +19 -7
- {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/METADATA +2 -6
- {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/RECORD +85 -72
- cognite/neat/app/ui/neat-app/build/static/css/main.38a62222.css.map +0 -1
- cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js +0 -3
- cognite/neat/app/ui/neat-app/build/static/js/main.ec7f72e2.js.map +0 -1
- cognite/neat/graph/issues/loader.py +0 -104
- cognite/neat/graph/stores/_oxrdflib.py +0 -247
- cognite/neat/rules/exceptions.py +0 -2972
- cognite/neat/rules/importers/_owl2rules/_owl2classes.py +0 -215
- cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +0 -213
- cognite/neat/rules/importers/_owl2rules/_owl2properties.py +0 -203
- cognite/neat/rules/issues/importing.py +0 -408
- cognite/neat/rules/models/_types/_base.py +0 -16
- cognite/neat/workflows/examples/Export_Rules_to_Ontology/workflow.yaml +0 -152
- cognite/neat/workflows/examples/Extract_DEXPI_Graph_and_Export_Rules/workflow.yaml +0 -139
- cognite/neat/workflows/examples/Ontology_to_Data_Model/workflow.yaml +0 -116
- /cognite/neat/{graph/issues → issues/errors}/__init__.py +0 -0
- /cognite/neat/rules/importers/{_owl2rules → _rdf/_owl2rules}/__init__.py +0 -0
- {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/LICENSE +0 -0
- {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/WHEEL +0 -0
- {cognite_neat-0.88.0.dist-info → cognite_neat-0.88.2.dist-info}/entry_points.txt +0 -0
cognite/neat/config.py
CHANGED
|
@@ -7,7 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any, Literal, cast
|
|
8
8
|
|
|
9
9
|
import yaml
|
|
10
|
-
from pydantic import BaseModel, Field,
|
|
10
|
+
from pydantic import BaseModel, Field, model_validator
|
|
11
11
|
from yaml import safe_load
|
|
12
12
|
|
|
13
13
|
from cognite.neat.constants import EXAMPLE_GRAPHS, EXAMPLE_RULES, EXAMPLE_WORKFLOWS
|
|
@@ -58,7 +58,8 @@ class Config(BaseModel, arbitrary_types_allowed=True):
|
|
|
58
58
|
log_level: Literal["ERROR", "WARNING", "INFO", "DEBUG"] = "INFO"
|
|
59
59
|
log_format: str = LOG_FORMAT
|
|
60
60
|
download_workflows_from_cdf: bool = Field(
|
|
61
|
-
default=False,
|
|
61
|
+
default=False,
|
|
62
|
+
description="Downloads all workflows from CDF automatically and stores them locally",
|
|
62
63
|
)
|
|
63
64
|
stop_on_error: bool = False
|
|
64
65
|
|
|
@@ -93,30 +94,43 @@ class Config(BaseModel, arbitrary_types_allowed=True):
|
|
|
93
94
|
)
|
|
94
95
|
return data
|
|
95
96
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
97
|
+
def as_legacy_config(
|
|
98
|
+
self,
|
|
99
|
+
) -> dict[str, Any]:
|
|
100
|
+
config: dict[str, Any] = {}
|
|
101
|
+
|
|
102
|
+
config["workflows_store_type"] = self.workflows_store_type
|
|
103
|
+
config["data_store_path"] = str(self.data_store_path)
|
|
104
|
+
config["workflows_downloader_filter"] = self.workflow_downloader_filter
|
|
105
|
+
|
|
106
|
+
config["cdf_client"] = {}
|
|
107
|
+
if self.cdf_auth_config.CDF_PROJECT not in {"Missing", "NOT SET"}:
|
|
108
|
+
config["cdf_client"]["project"] = self.cdf_auth_config.CDF_PROJECT
|
|
109
|
+
if self.cdf_auth_config.CDF_CLUSTER not in {"Missing", "NOT SET"}:
|
|
110
|
+
config["cdf_client"]["cluster"] = self.cdf_auth_config.CDF_CLUSTER
|
|
111
|
+
if self.cdf_auth_config.CDF_URL:
|
|
112
|
+
config["cdf_client"]["base_url"] = self.cdf_auth_config.CDF_URL
|
|
113
|
+
if self.cdf_auth_config.IDP_CLIENT_ID:
|
|
114
|
+
config["cdf_client"]["client_id"] = self.cdf_auth_config.IDP_CLIENT_ID
|
|
115
|
+
if self.cdf_auth_config.IDP_CLIENT_SECRET:
|
|
116
|
+
config["cdf_client"]["client_secret"] = self.cdf_auth_config.IDP_CLIENT_SECRET
|
|
117
|
+
if self.cdf_auth_config.IDP_TOKEN_URL:
|
|
118
|
+
config["cdf_client"]["token_url"] = self.cdf_auth_config.IDP_TOKEN_URL
|
|
119
|
+
if self.cdf_auth_config.IDP_SCOPES:
|
|
120
|
+
config["cdf_client"]["scopes"] = self.cdf_auth_config.idp_scopes
|
|
121
|
+
if self.cdf_auth_config.CDF_TIMEOUT:
|
|
122
|
+
config["cdf_client"]["timeout"] = self.cdf_auth_config.CDF_TIMEOUT
|
|
123
|
+
if self.cdf_auth_config.CDF_MAX_WORKERS:
|
|
124
|
+
config["cdf_client"]["max_workers"] = self.cdf_auth_config.CDF_MAX_WORKERS
|
|
125
|
+
|
|
126
|
+
config["cdf_default_dataset_id"] = self.cdf_default_dataset_id
|
|
127
|
+
config["load_examples"] = self.load_examples
|
|
128
|
+
config["log_level"] = self.log_level
|
|
129
|
+
config["log_format"] = self.log_format
|
|
130
|
+
config["download_workflows_from_cdf"] = self.download_workflows_from_cdf
|
|
131
|
+
config["stop_on_error"] = self.stop_on_error
|
|
132
|
+
|
|
133
|
+
return config
|
|
120
134
|
|
|
121
135
|
@property
|
|
122
136
|
def _dir_suffix(self) -> str:
|
|
@@ -191,7 +205,10 @@ class Config(BaseModel, arbitrary_types_allowed=True):
|
|
|
191
205
|
),
|
|
192
206
|
data_store_path=Path(os.environ.get("NEAT_DATA_PATH", "/app/data")),
|
|
193
207
|
cdf_default_dataset_id=int(os.environ.get("NEAT_CDF_DEFAULT_DATASET_ID", 6476640149881990)),
|
|
194
|
-
log_level=cast(
|
|
208
|
+
log_level=cast(
|
|
209
|
+
Literal["ERROR", "WARNING", "INFO", "DEBUG"],
|
|
210
|
+
os.environ.get("NEAT_LOG_LEVEL", "INFO"),
|
|
211
|
+
),
|
|
195
212
|
workflow_downloader_filter=workflow_downloader_filter,
|
|
196
213
|
load_examples=bool(os.environ.get("NEAT_LOAD_EXAMPLES", True) in ["True", "true", "1"]),
|
|
197
214
|
)
|
cognite/neat/exceptions.py
CHANGED
|
@@ -102,6 +102,12 @@ class InvalidWorkFlowError(NeatException):
|
|
|
102
102
|
return self.message
|
|
103
103
|
|
|
104
104
|
|
|
105
|
+
class NeatValueError(NeatException, ValueError): ...
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class NeatTypeError(NeatException, TypeError): ...
|
|
109
|
+
|
|
110
|
+
|
|
105
111
|
def wrangle_warnings(list_of_warnings: list[WarningMessage]) -> list[dict]:
|
|
106
112
|
warning_list: list[dict] = []
|
|
107
113
|
for warning in list_of_warnings:
|
|
@@ -115,12 +121,12 @@ def wrangle_warnings(list_of_warnings: list[WarningMessage]) -> list[dict]:
|
|
|
115
121
|
def _neat_warning_to_dict(warning: WarningMessage) -> dict:
|
|
116
122
|
category: Any = warning.category
|
|
117
123
|
return {
|
|
118
|
-
"type": category.
|
|
124
|
+
"type": category.resource_type,
|
|
119
125
|
"loc": (),
|
|
120
126
|
"msg": str(warning.message),
|
|
121
127
|
"input": None,
|
|
122
128
|
"ctx": dict(
|
|
123
|
-
type_=category.
|
|
129
|
+
type_=category.resource_type,
|
|
124
130
|
code=category.code,
|
|
125
131
|
description=category.description,
|
|
126
132
|
example=category.example,
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
import re
|
|
3
|
-
from collections.abc import Callable, Iterable
|
|
1
|
+
from collections.abc import Callable, Iterable, Set
|
|
4
2
|
from datetime import datetime, timezone
|
|
5
3
|
from pathlib import Path
|
|
6
4
|
from typing import cast
|
|
@@ -9,17 +7,17 @@ from cognite.client import CogniteClient
|
|
|
9
7
|
from cognite.client.data_classes import Asset, AssetFilter, AssetList
|
|
10
8
|
from rdflib import RDF, Literal, Namespace
|
|
11
9
|
|
|
12
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
13
|
-
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
14
10
|
from cognite.neat.graph.models import Triple
|
|
15
|
-
from cognite.neat.utils.auxiliary import create_sha256_hash
|
|
11
|
+
from cognite.neat.utils.auxiliary import create_sha256_hash
|
|
16
12
|
|
|
13
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
|
|
17
14
|
|
|
18
|
-
|
|
15
|
+
|
|
16
|
+
class AssetsExtractor(ClassicCDFExtractor[Asset]):
|
|
19
17
|
"""Extract data from Cognite Data Fusions Assets into Neat.
|
|
20
18
|
|
|
21
19
|
Args:
|
|
22
|
-
|
|
20
|
+
items (Iterable[Asset]): An iterable of assets.
|
|
23
21
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
24
22
|
to_type (Callable[[Asset], str | None], optional): A function to convert an asset to a type. Defaults to None.
|
|
25
23
|
If None or if the function returns None, the asset will be set to the default type "Asset".
|
|
@@ -34,25 +32,7 @@ class AssetsExtractor(BaseExtractor):
|
|
|
34
32
|
metadata. Defaults to frozenset({"nan", "null", "none", ""}).
|
|
35
33
|
"""
|
|
36
34
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def __init__(
|
|
40
|
-
self,
|
|
41
|
-
assets: Iterable[Asset],
|
|
42
|
-
namespace: Namespace | None = None,
|
|
43
|
-
to_type: Callable[[Asset], str | None] | None = None,
|
|
44
|
-
total: int | None = None,
|
|
45
|
-
limit: int | None = None,
|
|
46
|
-
unpack_metadata: bool = True,
|
|
47
|
-
skip_metadata_values: set[str] | frozenset[str] | None = frozenset({"nan", "null", "none", ""}),
|
|
48
|
-
):
|
|
49
|
-
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
50
|
-
self.assets = assets
|
|
51
|
-
self.to_type = to_type
|
|
52
|
-
self.total = total
|
|
53
|
-
self.limit = min(limit, total) if limit and total else limit
|
|
54
|
-
self.unpack_metadata = unpack_metadata
|
|
55
|
-
self.skip_metadata_values = skip_metadata_values
|
|
35
|
+
_default_rdf_type = "Asset"
|
|
56
36
|
|
|
57
37
|
@classmethod
|
|
58
38
|
def from_dataset(
|
|
@@ -63,19 +43,18 @@ class AssetsExtractor(BaseExtractor):
|
|
|
63
43
|
to_type: Callable[[Asset], str | None] | None = None,
|
|
64
44
|
limit: int | None = None,
|
|
65
45
|
unpack_metadata: bool = True,
|
|
46
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
66
47
|
):
|
|
67
48
|
total = client.assets.aggregate_count(filter=AssetFilter(data_set_ids=[{"externalId": data_set_external_id}]))
|
|
68
49
|
|
|
69
50
|
return cls(
|
|
70
|
-
|
|
71
|
-
Iterable[Asset],
|
|
72
|
-
client.assets(data_set_external_ids=data_set_external_id),
|
|
73
|
-
),
|
|
51
|
+
client.assets(data_set_external_ids=data_set_external_id),
|
|
74
52
|
namespace,
|
|
75
53
|
to_type,
|
|
76
54
|
total,
|
|
77
55
|
limit,
|
|
78
56
|
unpack_metadata=unpack_metadata,
|
|
57
|
+
skip_metadata_values=skip_metadata_values,
|
|
79
58
|
)
|
|
80
59
|
|
|
81
60
|
@classmethod
|
|
@@ -87,6 +66,7 @@ class AssetsExtractor(BaseExtractor):
|
|
|
87
66
|
to_type: Callable[[Asset], str | None] | None = None,
|
|
88
67
|
limit: int | None = None,
|
|
89
68
|
unpack_metadata: bool = True,
|
|
69
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
90
70
|
):
|
|
91
71
|
total = client.assets.aggregate_count(
|
|
92
72
|
filter=AssetFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
|
|
@@ -102,6 +82,7 @@ class AssetsExtractor(BaseExtractor):
|
|
|
102
82
|
total,
|
|
103
83
|
limit,
|
|
104
84
|
unpack_metadata=unpack_metadata,
|
|
85
|
+
skip_metadata_values=skip_metadata_values,
|
|
105
86
|
)
|
|
106
87
|
|
|
107
88
|
@classmethod
|
|
@@ -112,44 +93,24 @@ class AssetsExtractor(BaseExtractor):
|
|
|
112
93
|
to_type: Callable[[Asset], str] | None = None,
|
|
113
94
|
limit: int | None = None,
|
|
114
95
|
unpack_metadata: bool = True,
|
|
96
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
115
97
|
):
|
|
98
|
+
assets = AssetList.load(Path(file_path).read_text())
|
|
116
99
|
return cls(
|
|
117
|
-
|
|
100
|
+
assets,
|
|
118
101
|
namespace,
|
|
119
102
|
to_type,
|
|
120
|
-
|
|
103
|
+
total=len(assets),
|
|
104
|
+
limit=limit,
|
|
121
105
|
unpack_metadata=unpack_metadata,
|
|
106
|
+
skip_metadata_values=skip_metadata_values,
|
|
122
107
|
)
|
|
123
108
|
|
|
124
|
-
def
|
|
125
|
-
"""Extracts an asset with the given asset_id."""
|
|
126
|
-
if self.total:
|
|
127
|
-
try:
|
|
128
|
-
from rich.progress import track
|
|
129
|
-
except ModuleNotFoundError:
|
|
130
|
-
to_iterate = self.assets
|
|
131
|
-
else:
|
|
132
|
-
to_iterate = track(
|
|
133
|
-
self.assets,
|
|
134
|
-
total=self.limit or self.total,
|
|
135
|
-
description="Extracting Assets",
|
|
136
|
-
)
|
|
137
|
-
else:
|
|
138
|
-
to_iterate = self.assets
|
|
139
|
-
for no, asset in enumerate(to_iterate):
|
|
140
|
-
yield from self._asset2triples(asset)
|
|
141
|
-
if self.limit and no >= self.limit:
|
|
142
|
-
break
|
|
143
|
-
|
|
144
|
-
def _asset2triples(self, asset: Asset) -> list[Triple]:
|
|
109
|
+
def _item2triples(self, asset: Asset) -> list[Triple]:
|
|
145
110
|
"""Converts an asset to triples."""
|
|
146
111
|
id_ = self.namespace[f"Asset_{asset.id}"]
|
|
147
112
|
|
|
148
|
-
|
|
149
|
-
type_ = "Asset"
|
|
150
|
-
if self.to_type:
|
|
151
|
-
type_ = self.to_type(asset) or type_
|
|
152
|
-
type_ = self._SPACE_PATTERN.sub("_", type_)
|
|
113
|
+
type_ = self._get_rdf_type(asset)
|
|
153
114
|
|
|
154
115
|
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
155
116
|
|
|
@@ -195,20 +156,7 @@ class AssetsExtractor(BaseExtractor):
|
|
|
195
156
|
)
|
|
196
157
|
|
|
197
158
|
if asset.metadata:
|
|
198
|
-
|
|
199
|
-
for key, value in asset.metadata.items():
|
|
200
|
-
if value and (
|
|
201
|
-
self.skip_metadata_values is None or value.casefold() not in self.skip_metadata_values
|
|
202
|
-
):
|
|
203
|
-
triples.append(
|
|
204
|
-
(
|
|
205
|
-
id_,
|
|
206
|
-
self.namespace[key],
|
|
207
|
-
Literal(string_to_ideal_type(value)),
|
|
208
|
-
)
|
|
209
|
-
)
|
|
210
|
-
else:
|
|
211
|
-
triples.append((id_, self.namespace.metadata, Literal(json.dumps(asset.metadata))))
|
|
159
|
+
triples.extend(self._metadata_to_triples(id_, asset.metadata))
|
|
212
160
|
|
|
213
161
|
# Create connections:
|
|
214
162
|
if asset.parent_id:
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import Callable, Iterable, Set
|
|
5
|
+
from typing import Generic, TypeVar
|
|
6
|
+
|
|
7
|
+
from cognite.client.data_classes._base import CogniteResource
|
|
8
|
+
from rdflib import Literal, Namespace, URIRef
|
|
9
|
+
|
|
10
|
+
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
11
|
+
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
12
|
+
from cognite.neat.graph.models import Triple
|
|
13
|
+
from cognite.neat.utils.auxiliary import string_to_ideal_type
|
|
14
|
+
|
|
15
|
+
T_CogniteResource = TypeVar("T_CogniteResource", bound=CogniteResource)
|
|
16
|
+
|
|
17
|
+
DEFAULT_SKIP_METADATA_VALUES = frozenset({"nan", "null", "none", ""})
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ClassicCDFExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
21
|
+
"""This is the Base Extractor for all classic CDF resources.
|
|
22
|
+
|
|
23
|
+
A classic resource is recognized in that it has a metadata attribute of type dict[str, str].
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
items (Iterable[T_CogniteResource]): An iterable of classic resource.
|
|
27
|
+
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
28
|
+
to_type (Callable[[T_CogniteResource], str | None], optional): A function to convert an item to a type.
|
|
29
|
+
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
30
|
+
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
31
|
+
is installed. Defaults to None.
|
|
32
|
+
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
33
|
+
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
34
|
+
limit the extraction to 1000 assets to test the setup.
|
|
35
|
+
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
36
|
+
a JSON string.
|
|
37
|
+
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
38
|
+
values in this set will be skipped.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
_default_rdf_type: str
|
|
42
|
+
_SPACE_PATTERN = re.compile(r"\s+")
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
items: Iterable[T_CogniteResource],
|
|
47
|
+
namespace: Namespace | None = None,
|
|
48
|
+
to_type: Callable[[T_CogniteResource], str | None] | None = None,
|
|
49
|
+
total: int | None = None,
|
|
50
|
+
limit: int | None = None,
|
|
51
|
+
unpack_metadata: bool = True,
|
|
52
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
53
|
+
):
|
|
54
|
+
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
55
|
+
self.items = items
|
|
56
|
+
self.to_type = to_type
|
|
57
|
+
self.total = total
|
|
58
|
+
self.limit = min(limit, total) if limit and total else limit
|
|
59
|
+
self.unpack_metadata = unpack_metadata
|
|
60
|
+
self.skip_metadata_values = skip_metadata_values
|
|
61
|
+
|
|
62
|
+
def extract(self) -> Iterable[Triple]:
|
|
63
|
+
"""Extracts an asset with the given asset_id."""
|
|
64
|
+
if self.total:
|
|
65
|
+
try:
|
|
66
|
+
from rich.progress import track
|
|
67
|
+
except ModuleNotFoundError:
|
|
68
|
+
to_iterate = self.items
|
|
69
|
+
else:
|
|
70
|
+
to_iterate = track(
|
|
71
|
+
self.items,
|
|
72
|
+
total=self.limit or self.total,
|
|
73
|
+
description=f"Extracting {type(self).__name__.removesuffix('Extractor')}",
|
|
74
|
+
)
|
|
75
|
+
else:
|
|
76
|
+
to_iterate = self.items
|
|
77
|
+
for no, asset in enumerate(to_iterate):
|
|
78
|
+
yield from self._item2triples(asset)
|
|
79
|
+
if self.limit and no >= self.limit:
|
|
80
|
+
break
|
|
81
|
+
|
|
82
|
+
@abstractmethod
|
|
83
|
+
def _item2triples(self, item: T_CogniteResource) -> list[Triple]:
|
|
84
|
+
raise NotImplementedError()
|
|
85
|
+
|
|
86
|
+
def _metadata_to_triples(self, id_: URIRef, metadata: dict[str, str]) -> Iterable[Triple]:
|
|
87
|
+
if self.unpack_metadata:
|
|
88
|
+
for key, value in metadata.items():
|
|
89
|
+
if value and (self.skip_metadata_values is None or value.casefold() not in self.skip_metadata_values):
|
|
90
|
+
yield (
|
|
91
|
+
id_,
|
|
92
|
+
self.namespace[key],
|
|
93
|
+
Literal(string_to_ideal_type(value)),
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
yield id_, self.namespace.metadata, Literal(json.dumps(metadata))
|
|
97
|
+
|
|
98
|
+
def _get_rdf_type(self, item: T_CogniteResource) -> str:
|
|
99
|
+
type_ = self._default_rdf_type
|
|
100
|
+
if self.to_type:
|
|
101
|
+
type_ = self.to_type(item) or type_
|
|
102
|
+
return self._SPACE_PATTERN.sub("_", type_)
|
|
@@ -1,39 +1,36 @@
|
|
|
1
|
-
import
|
|
2
|
-
from collections.abc import Iterable
|
|
1
|
+
from collections.abc import Callable, Set
|
|
3
2
|
from datetime import datetime, timezone
|
|
4
3
|
from pathlib import Path
|
|
5
|
-
from typing import cast
|
|
6
4
|
|
|
7
5
|
from cognite.client import CogniteClient
|
|
8
|
-
from cognite.client.data_classes import Event, EventList
|
|
9
|
-
from
|
|
10
|
-
from rdflib import RDF, Literal, Namespace, URIRef
|
|
6
|
+
from cognite.client.data_classes import Event, EventFilter, EventList
|
|
7
|
+
from rdflib import RDF, Literal, Namespace
|
|
11
8
|
|
|
12
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
13
|
-
from cognite.neat.graph.extractors._base import BaseExtractor
|
|
14
9
|
from cognite.neat.graph.models import Triple
|
|
15
|
-
from cognite.neat.utils.auxiliary import string_to_ideal_type
|
|
16
10
|
|
|
11
|
+
from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFExtractor
|
|
17
12
|
|
|
18
|
-
|
|
13
|
+
|
|
14
|
+
class EventsExtractor(ClassicCDFExtractor[Event]):
|
|
19
15
|
"""Extract data from Cognite Data Fusions Events into Neat.
|
|
20
16
|
|
|
21
17
|
Args:
|
|
22
|
-
|
|
18
|
+
items (Iterable[Event]): An iterable of items.
|
|
23
19
|
namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
|
|
20
|
+
to_type (Callable[[Event], str | None], optional): A function to convert an item to a type.
|
|
21
|
+
Defaults to None. If None or if the function returns None, the asset will be set to the default type.
|
|
22
|
+
total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
|
|
23
|
+
is installed. Defaults to None.
|
|
24
|
+
limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
|
|
25
|
+
testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
|
|
26
|
+
limit the extraction to 1000 assets to test the setup.
|
|
24
27
|
unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
|
|
25
28
|
a JSON string.
|
|
29
|
+
skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
|
|
30
|
+
values in this set will be skipped.
|
|
26
31
|
"""
|
|
27
32
|
|
|
28
|
-
|
|
29
|
-
self,
|
|
30
|
-
events: Iterable[Event],
|
|
31
|
-
namespace: Namespace | None = None,
|
|
32
|
-
unpack_metadata: bool = True,
|
|
33
|
-
):
|
|
34
|
-
self.namespace = namespace or DEFAULT_NAMESPACE
|
|
35
|
-
self.events = events
|
|
36
|
-
self.unpack_metadata = unpack_metadata
|
|
33
|
+
_default_rdf_type = "Event"
|
|
37
34
|
|
|
38
35
|
@classmethod
|
|
39
36
|
def from_dataset(
|
|
@@ -41,15 +38,21 @@ class EventsExtractor(BaseExtractor):
|
|
|
41
38
|
client: CogniteClient,
|
|
42
39
|
data_set_external_id: str,
|
|
43
40
|
namespace: Namespace | None = None,
|
|
41
|
+
to_type: Callable[[Event], str | None] | None = None,
|
|
42
|
+
limit: int | None = None,
|
|
44
43
|
unpack_metadata: bool = True,
|
|
44
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
45
45
|
):
|
|
46
|
+
total = client.events.aggregate_count(filter=EventFilter(data_set_ids=[{"externalId": data_set_external_id}]))
|
|
47
|
+
|
|
46
48
|
return cls(
|
|
47
|
-
|
|
48
|
-
Iterable[Event],
|
|
49
|
-
client.events(data_set_external_ids=data_set_external_id),
|
|
50
|
-
),
|
|
49
|
+
client.events(data_set_external_ids=data_set_external_id),
|
|
51
50
|
namespace,
|
|
52
|
-
|
|
51
|
+
to_type,
|
|
52
|
+
total=total,
|
|
53
|
+
limit=limit,
|
|
54
|
+
unpack_metadata=unpack_metadata,
|
|
55
|
+
skip_metadata_values=skip_metadata_values,
|
|
53
56
|
)
|
|
54
57
|
|
|
55
58
|
@classmethod
|
|
@@ -57,20 +60,30 @@ class EventsExtractor(BaseExtractor):
|
|
|
57
60
|
cls,
|
|
58
61
|
file_path: str,
|
|
59
62
|
namespace: Namespace | None = None,
|
|
63
|
+
to_type: Callable[[Event], str | None] | None = None,
|
|
64
|
+
limit: int | None = None,
|
|
60
65
|
unpack_metadata: bool = True,
|
|
66
|
+
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
61
67
|
):
|
|
62
|
-
|
|
68
|
+
events = EventList.load(Path(file_path).read_text())
|
|
63
69
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
70
|
+
return cls(
|
|
71
|
+
events,
|
|
72
|
+
namespace,
|
|
73
|
+
to_type,
|
|
74
|
+
total=len(events),
|
|
75
|
+
limit=limit,
|
|
76
|
+
unpack_metadata=unpack_metadata,
|
|
77
|
+
skip_metadata_values=skip_metadata_values,
|
|
78
|
+
)
|
|
68
79
|
|
|
69
|
-
def
|
|
80
|
+
def _item2triples(self, event: Event) -> list[Triple]:
|
|
70
81
|
id_ = self.namespace[f"Event_{event.id}"]
|
|
71
82
|
|
|
83
|
+
type_ = self._get_rdf_type(event)
|
|
84
|
+
|
|
72
85
|
# Set rdf type
|
|
73
|
-
triples: list[Triple] = [(id_, RDF.type, self.namespace
|
|
86
|
+
triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
|
|
74
87
|
|
|
75
88
|
# Create attributes
|
|
76
89
|
|
|
@@ -87,16 +100,7 @@ class EventsExtractor(BaseExtractor):
|
|
|
87
100
|
triples.append((id_, self.namespace.subtype, Literal(event.subtype)))
|
|
88
101
|
|
|
89
102
|
if event.metadata:
|
|
90
|
-
|
|
91
|
-
for key, value in event.metadata.items():
|
|
92
|
-
if value:
|
|
93
|
-
type_aware_value = string_to_ideal_type(value)
|
|
94
|
-
try:
|
|
95
|
-
triples.append((id_, self.namespace[key], URIRef(str(AnyHttpUrl(type_aware_value))))) # type: ignore
|
|
96
|
-
except ValidationError:
|
|
97
|
-
triples.append((id_, self.namespace[key], Literal(type_aware_value)))
|
|
98
|
-
else:
|
|
99
|
-
triples.append((id_, self.namespace.metadata, Literal(json.dumps(event.metadata))))
|
|
103
|
+
triples.extend(self._metadata_to_triples(id_, event.metadata))
|
|
100
104
|
|
|
101
105
|
if event.description:
|
|
102
106
|
triples.append((id_, self.namespace.description, Literal(event.description)))
|