cognite-neat 0.105.2__py3-none-any.whl → 0.106.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognite/neat/_config.py +6 -260
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +26 -13
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +4 -1
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +2 -2
- cognite/neat/_graph/loaders/_rdf2dms.py +7 -2
- cognite/neat/_graph/transformers/_base.py +4 -8
- cognite/neat/_graph/transformers/_classic_cdf.py +51 -41
- cognite/neat/_graph/transformers/_rdfpath.py +1 -1
- cognite/neat/_issues/warnings/_external.py +1 -1
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +4 -2
- cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
- cognite/neat/_rules/transformers/_mapping.py +3 -2
- cognite/neat/_session/_base.py +6 -7
- cognite/neat/_session/_inspect.py +6 -2
- cognite/neat/_session/_mapping.py +6 -8
- cognite/neat/_session/_prepare.py +9 -10
- cognite/neat/_session/_read.py +35 -26
- cognite/neat/_session/_set.py +9 -0
- cognite/neat/_session/_state.py +3 -1
- cognite/neat/_session/_to.py +11 -13
- cognite/neat/_store/_graph_store.py +33 -28
- cognite/neat/_utils/collection_.py +32 -11
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/METADATA +1 -7
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/RECORD +28 -28
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/entry_points.txt +0 -0
cognite/neat/_config.py
CHANGED
|
@@ -1,265 +1,11 @@
|
|
|
1
|
-
import
|
|
2
|
-
import logging
|
|
3
|
-
import os
|
|
4
|
-
import shutil
|
|
5
|
-
import sys
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Any, Literal, cast
|
|
1
|
+
from typing import Literal
|
|
8
2
|
|
|
9
|
-
import
|
|
10
|
-
from pydantic import BaseModel, Field, model_validator
|
|
11
|
-
from yaml import safe_load
|
|
3
|
+
from pydantic import BaseModel
|
|
12
4
|
|
|
13
|
-
from cognite.neat._constants import EXAMPLE_GRAPHS, EXAMPLE_RULES, EXAMPLE_WORKFLOWS
|
|
14
|
-
from cognite.neat._utils.auth import EnvironmentVariables
|
|
15
5
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
else:
|
|
20
|
-
from backports.strenum import StrEnum
|
|
21
|
-
from typing_extensions import Self
|
|
6
|
+
class NeatConfig(BaseModel, validate_assignment=True):
|
|
7
|
+
progress_bar: Literal["tqdm", "rich", "tqdm-notebook", "infer"] | None = "infer"
|
|
8
|
+
use_iterate_bar_threshold: int | None = 500
|
|
22
9
|
|
|
23
|
-
LOG_FORMAT = "%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s"
|
|
24
|
-
LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
|
|
25
10
|
|
|
26
|
-
|
|
27
|
-
class RulesStoreType(StrEnum):
|
|
28
|
-
"""Rules Store type"""
|
|
29
|
-
|
|
30
|
-
CDF = "cdf"
|
|
31
|
-
FILE = "file"
|
|
32
|
-
URL = "url"
|
|
33
|
-
GOOGLE_SHEET = "google_sheet"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class WorkflowsStoreType(StrEnum):
|
|
37
|
-
"""Workflows Store type"""
|
|
38
|
-
|
|
39
|
-
CDF = "cdf"
|
|
40
|
-
FILE = "file"
|
|
41
|
-
URL = "url"
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class Config(BaseModel, arbitrary_types_allowed=True):
|
|
45
|
-
workflows_store_type: WorkflowsStoreType = WorkflowsStoreType.FILE
|
|
46
|
-
data_store_path: Path = Field(default_factory=lambda: Path.cwd() / "data")
|
|
47
|
-
|
|
48
|
-
workflow_downloader_filter: list[str] | None = Field(
|
|
49
|
-
description="List of workflow names+tags to filter on when downloading workflows from CDF. "
|
|
50
|
-
"Example name:workflow_name=version,tag:tag_name",
|
|
51
|
-
default=None,
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
cdf_auth_config: EnvironmentVariables = Field(default_factory=EnvironmentVariables.default)
|
|
55
|
-
cdf_default_dataset_id: int = 0
|
|
56
|
-
load_examples: bool = True
|
|
57
|
-
|
|
58
|
-
log_level: Literal["ERROR", "WARNING", "INFO", "DEBUG"] = "INFO"
|
|
59
|
-
log_format: str = LOG_FORMAT
|
|
60
|
-
download_workflows_from_cdf: bool = Field(
|
|
61
|
-
default=False,
|
|
62
|
-
description="Downloads all workflows from CDF automatically and stores them locally",
|
|
63
|
-
)
|
|
64
|
-
stop_on_error: bool = False
|
|
65
|
-
|
|
66
|
-
@model_validator(mode="before")
|
|
67
|
-
def backwards_compatible(cls, data: Any):
|
|
68
|
-
if not isinstance(data, dict):
|
|
69
|
-
return data
|
|
70
|
-
if "cdf_client" in data:
|
|
71
|
-
cdf_client = data["cdf_client"]
|
|
72
|
-
if isinstance(cdf_client, dict):
|
|
73
|
-
if "base_url" in cdf_client:
|
|
74
|
-
base_url = cdf_client["base_url"]
|
|
75
|
-
cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
|
|
76
|
-
else:
|
|
77
|
-
base_url, cluster = "Missing", "Missing"
|
|
78
|
-
if "scopes" in cdf_client:
|
|
79
|
-
scopes = cdf_client["scopes"]
|
|
80
|
-
if isinstance(scopes, list):
|
|
81
|
-
scopes = ",".join(scopes)
|
|
82
|
-
else:
|
|
83
|
-
scopes = "Missing"
|
|
84
|
-
data["cdf_auth_config"] = EnvironmentVariables(
|
|
85
|
-
CDF_PROJECT=cdf_client.get("project", "Missing"),
|
|
86
|
-
CDF_CLUSTER=cluster,
|
|
87
|
-
CDF_URL=base_url,
|
|
88
|
-
IDP_CLIENT_ID=cdf_client.get("client_id", "Missing"),
|
|
89
|
-
IDP_CLIENT_SECRET=cdf_client.get("client_secret", "Missing"),
|
|
90
|
-
IDP_TOKEN_URL=cdf_client.get("token_url", "Missing"),
|
|
91
|
-
IDP_SCOPES=scopes,
|
|
92
|
-
CDF_TIMEOUT=int(cdf_client.get("timeout", 60)),
|
|
93
|
-
CDF_MAX_WORKERS=int(cdf_client.get("max_workers", 3)),
|
|
94
|
-
)
|
|
95
|
-
return data
|
|
96
|
-
|
|
97
|
-
def as_legacy_config(
|
|
98
|
-
self,
|
|
99
|
-
) -> dict[str, Any]:
|
|
100
|
-
config: dict[str, Any] = {}
|
|
101
|
-
|
|
102
|
-
config["workflows_store_type"] = self.workflows_store_type
|
|
103
|
-
config["data_store_path"] = str(self.data_store_path)
|
|
104
|
-
config["workflows_downloader_filter"] = self.workflow_downloader_filter
|
|
105
|
-
|
|
106
|
-
config["cdf_client"] = {}
|
|
107
|
-
if self.cdf_auth_config.CDF_PROJECT not in {"Missing", "NOT SET"}:
|
|
108
|
-
config["cdf_client"]["project"] = self.cdf_auth_config.CDF_PROJECT
|
|
109
|
-
if self.cdf_auth_config.CDF_CLUSTER not in {"Missing", "NOT SET"}:
|
|
110
|
-
config["cdf_client"]["cluster"] = self.cdf_auth_config.CDF_CLUSTER
|
|
111
|
-
if self.cdf_auth_config.CDF_URL:
|
|
112
|
-
config["cdf_client"]["base_url"] = self.cdf_auth_config.CDF_URL
|
|
113
|
-
if self.cdf_auth_config.IDP_CLIENT_ID:
|
|
114
|
-
config["cdf_client"]["client_id"] = self.cdf_auth_config.IDP_CLIENT_ID
|
|
115
|
-
if self.cdf_auth_config.IDP_CLIENT_SECRET:
|
|
116
|
-
config["cdf_client"]["client_secret"] = self.cdf_auth_config.IDP_CLIENT_SECRET
|
|
117
|
-
if self.cdf_auth_config.IDP_TOKEN_URL:
|
|
118
|
-
config["cdf_client"]["token_url"] = self.cdf_auth_config.IDP_TOKEN_URL
|
|
119
|
-
if self.cdf_auth_config.IDP_SCOPES:
|
|
120
|
-
config["cdf_client"]["scopes"] = self.cdf_auth_config.idp_scopes
|
|
121
|
-
if self.cdf_auth_config.CDF_TIMEOUT:
|
|
122
|
-
config["cdf_client"]["timeout"] = self.cdf_auth_config.CDF_TIMEOUT
|
|
123
|
-
if self.cdf_auth_config.CDF_MAX_WORKERS:
|
|
124
|
-
config["cdf_client"]["max_workers"] = self.cdf_auth_config.CDF_MAX_WORKERS
|
|
125
|
-
|
|
126
|
-
config["cdf_default_dataset_id"] = self.cdf_default_dataset_id
|
|
127
|
-
config["load_examples"] = self.load_examples
|
|
128
|
-
config["log_level"] = self.log_level
|
|
129
|
-
config["log_format"] = self.log_format
|
|
130
|
-
config["download_workflows_from_cdf"] = self.download_workflows_from_cdf
|
|
131
|
-
config["stop_on_error"] = self.stop_on_error
|
|
132
|
-
|
|
133
|
-
return config
|
|
134
|
-
|
|
135
|
-
@property
|
|
136
|
-
def _dir_suffix(self) -> str:
|
|
137
|
-
is_test_running = "pytest" in sys.modules
|
|
138
|
-
if is_test_running:
|
|
139
|
-
# Todo change the below to f"-{os.getpid()}" when all tests supports parallel execution.
|
|
140
|
-
return ""
|
|
141
|
-
return ""
|
|
142
|
-
|
|
143
|
-
@property
|
|
144
|
-
def rules_store_path(self) -> Path:
|
|
145
|
-
return self.data_store_path / f"rules{self._dir_suffix}"
|
|
146
|
-
|
|
147
|
-
@property
|
|
148
|
-
def workflows_store_path(self) -> Path:
|
|
149
|
-
return self.data_store_path / f"workflows{self._dir_suffix}"
|
|
150
|
-
|
|
151
|
-
@property
|
|
152
|
-
def source_graph_path(self) -> Path:
|
|
153
|
-
return self.data_store_path / f"source-graphs{self._dir_suffix}"
|
|
154
|
-
|
|
155
|
-
@property
|
|
156
|
-
def staging_path(self) -> Path:
|
|
157
|
-
return self.data_store_path / f"staging{self._dir_suffix}"
|
|
158
|
-
|
|
159
|
-
@classmethod
|
|
160
|
-
def from_yaml(cls, filepath: Path) -> Self:
|
|
161
|
-
return cls(**safe_load(filepath.read_text()))
|
|
162
|
-
|
|
163
|
-
def to_yaml(self, filepath: Path):
|
|
164
|
-
# Parse as json to avoid Path and Enum objects
|
|
165
|
-
dump = json.loads(self.model_dump_json())
|
|
166
|
-
|
|
167
|
-
with filepath.open("w") as f:
|
|
168
|
-
yaml.safe_dump(dump, f)
|
|
169
|
-
|
|
170
|
-
@classmethod
|
|
171
|
-
def from_env(cls) -> Self:
|
|
172
|
-
missing = "Missing"
|
|
173
|
-
# This is to be backwards compatible with the old config
|
|
174
|
-
|
|
175
|
-
base_url: str | None = None
|
|
176
|
-
if "NEAT_CDF_BASE_URL" in os.environ:
|
|
177
|
-
base_url = os.environ["NEAT_CDF_BASE_URL"]
|
|
178
|
-
if isinstance(base_url, str):
|
|
179
|
-
cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
|
|
180
|
-
else:
|
|
181
|
-
cluster = missing
|
|
182
|
-
variables = EnvironmentVariables(
|
|
183
|
-
CDF_PROJECT=os.environ.get("NEAT_CDF_PROJECT", missing),
|
|
184
|
-
CDF_CLUSTER=cluster,
|
|
185
|
-
CDF_URL=base_url,
|
|
186
|
-
IDP_CLIENT_ID=os.environ.get("NEAT_CDF_CLIENT_ID"),
|
|
187
|
-
IDP_CLIENT_SECRET=os.environ.get("NEAT_CDF_CLIENT_SECRET"),
|
|
188
|
-
IDP_TOKEN_URL=os.environ.get("NEAT_CDF_TOKEN_URL"),
|
|
189
|
-
IDP_SCOPES=os.environ.get("NEAT_CDF_SCOPES"),
|
|
190
|
-
CDF_TIMEOUT=int(os.environ["NEAT_CDF_CLIENT_TIMEOUT"] if "NEAT_CDF_CLIENT_TIMEOUT" in os.environ else 60),
|
|
191
|
-
CDF_MAX_WORKERS=int(
|
|
192
|
-
os.environ["NEAT_CDF_CLIENT_MAX_WORKERS"] if "NEAT_CDF_CLIENT_MAX_WORKERS" in os.environ else 3
|
|
193
|
-
),
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
if workflow_downloader_filter_value := os.environ.get("NEAT_WORKFLOW_DOWNLOADER_FILTER", None):
|
|
197
|
-
workflow_downloader_filter = workflow_downloader_filter_value.split(",")
|
|
198
|
-
else:
|
|
199
|
-
workflow_downloader_filter = None
|
|
200
|
-
|
|
201
|
-
return cls(
|
|
202
|
-
cdf_auth_config=variables,
|
|
203
|
-
workflows_store_type=os.environ.get( # type: ignore[arg-type]
|
|
204
|
-
"NEAT_WORKFLOWS_STORE_TYPE", WorkflowsStoreType.FILE
|
|
205
|
-
),
|
|
206
|
-
data_store_path=Path(os.environ.get("NEAT_DATA_PATH", "_app/data")),
|
|
207
|
-
cdf_default_dataset_id=int(os.environ.get("NEAT_CDF_DEFAULT_DATASET_ID", 6476640149881990)),
|
|
208
|
-
log_level=cast(
|
|
209
|
-
Literal["ERROR", "WARNING", "INFO", "DEBUG"],
|
|
210
|
-
os.environ.get("NEAT_LOG_LEVEL", "INFO"),
|
|
211
|
-
),
|
|
212
|
-
workflow_downloader_filter=workflow_downloader_filter,
|
|
213
|
-
load_examples=bool(os.environ.get("NEAT_LOAD_EXAMPLES", True) in ["True", "true", "1"]),
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def copy_examples_to_directory(config: Config):
|
|
218
|
-
"""
|
|
219
|
-
Copier over all the examples to the target_data_directory,
|
|
220
|
-
without overwriting
|
|
221
|
-
|
|
222
|
-
Args:
|
|
223
|
-
target_data_dir : The target directory
|
|
224
|
-
suffix : The suffix to add to the directory names
|
|
225
|
-
|
|
226
|
-
"""
|
|
227
|
-
|
|
228
|
-
print(f"Copying examples into {config.data_store_path}")
|
|
229
|
-
_copy_examples(EXAMPLE_RULES, config.rules_store_path)
|
|
230
|
-
_copy_examples(EXAMPLE_GRAPHS, config.source_graph_path)
|
|
231
|
-
_copy_examples(EXAMPLE_WORKFLOWS, config.workflows_store_path)
|
|
232
|
-
config.staging_path.mkdir(exist_ok=True, parents=True)
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
def create_data_dir_structure(config: Config) -> None:
|
|
236
|
-
"""
|
|
237
|
-
Create the data directory structure in empty directory
|
|
238
|
-
|
|
239
|
-
Args:
|
|
240
|
-
target_data_dir : The target directory
|
|
241
|
-
suffix : The suffix to add to the directory names
|
|
242
|
-
|
|
243
|
-
"""
|
|
244
|
-
for path in (
|
|
245
|
-
config.rules_store_path,
|
|
246
|
-
config.source_graph_path,
|
|
247
|
-
config.staging_path,
|
|
248
|
-
config.workflows_store_path,
|
|
249
|
-
):
|
|
250
|
-
path.mkdir(exist_ok=True, parents=True)
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
def _copy_examples(source_dir: Path, target_dir: Path):
|
|
254
|
-
for current in source_dir.rglob("*"):
|
|
255
|
-
if current.is_dir():
|
|
256
|
-
continue
|
|
257
|
-
relative = current.relative_to(source_dir)
|
|
258
|
-
if not (target := target_dir / relative).exists():
|
|
259
|
-
target.parent.mkdir(exist_ok=True, parents=True)
|
|
260
|
-
shutil.copy2(current, target)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
def configure_logging(level: str = "DEBUG", log_format: str = LOG_FORMAT):
|
|
264
|
-
"""Configure logging based on config."""
|
|
265
|
-
logging.basicConfig(format=log_format, level=logging.getLevelName(level), datefmt=LOG_DATE_FORMAT)
|
|
11
|
+
GLOBAL_CONFIG = NeatConfig()
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
3
|
import sys
|
|
4
|
+
import warnings
|
|
4
5
|
from abc import ABC, abstractmethod
|
|
5
6
|
from collections.abc import Callable, Iterable, Sequence, Set
|
|
6
7
|
from datetime import datetime, timezone
|
|
@@ -9,13 +10,16 @@ from typing import Any, Generic, TypeVar
|
|
|
9
10
|
|
|
10
11
|
from cognite.client import CogniteClient
|
|
11
12
|
from cognite.client.data_classes._base import WriteableCogniteResource
|
|
13
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
12
14
|
from pydantic import AnyHttpUrl, ValidationError
|
|
13
15
|
from rdflib import RDF, XSD, Literal, Namespace, URIRef
|
|
14
16
|
|
|
15
17
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
16
18
|
from cognite.neat._graph.extractors._base import BaseExtractor
|
|
19
|
+
from cognite.neat._issues.warnings import CDFAuthWarning
|
|
17
20
|
from cognite.neat._shared import Triple
|
|
18
21
|
from cognite.neat._utils.auxiliary import string_to_ideal_type
|
|
22
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
19
23
|
|
|
20
24
|
T_CogniteResource = TypeVar("T_CogniteResource", bound=WriteableCogniteResource)
|
|
21
25
|
|
|
@@ -98,17 +102,11 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
98
102
|
|
|
99
103
|
def extract(self) -> Iterable[Triple]:
|
|
100
104
|
"""Extracts an asset with the given asset_id."""
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
else:
|
|
107
|
-
to_iterate = track(
|
|
108
|
-
self.items,
|
|
109
|
-
total=self.limit or self.total,
|
|
110
|
-
description=f"Extracting {type(self).__name__.removesuffix('Extractor')}",
|
|
111
|
-
)
|
|
105
|
+
|
|
106
|
+
if self.total is not None and self.total > 0:
|
|
107
|
+
to_iterate = iterate_progress_bar_if_above_config_threshold(
|
|
108
|
+
self.items, self.total, f"Extracting {type(self).__name__.removesuffix('Extractor')}"
|
|
109
|
+
)
|
|
112
110
|
else:
|
|
113
111
|
to_iterate = self.items
|
|
114
112
|
for no, asset in enumerate(to_iterate):
|
|
@@ -221,7 +219,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
221
219
|
camel_case: bool = True,
|
|
222
220
|
as_write: bool = False,
|
|
223
221
|
):
|
|
224
|
-
total, items = cls._from_dataset(client, data_set_external_id)
|
|
222
|
+
total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
|
|
225
223
|
return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
|
|
226
224
|
|
|
227
225
|
@classmethod
|
|
@@ -244,7 +242,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
244
242
|
camel_case: bool = True,
|
|
245
243
|
as_write: bool = False,
|
|
246
244
|
):
|
|
247
|
-
total, items = cls._from_hierarchy(client, root_asset_external_id)
|
|
245
|
+
total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
|
|
248
246
|
return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
|
|
249
247
|
|
|
250
248
|
@classmethod
|
|
@@ -273,3 +271,18 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
273
271
|
@abstractmethod
|
|
274
272
|
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
275
273
|
raise NotImplementedError
|
|
274
|
+
|
|
275
|
+
@classmethod
|
|
276
|
+
def _handle_no_access(
|
|
277
|
+
cls, action: Callable[[], tuple[int | None, Iterable[T_CogniteResource]]]
|
|
278
|
+
) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
279
|
+
try:
|
|
280
|
+
return action()
|
|
281
|
+
except CogniteAPIError as e:
|
|
282
|
+
if e.code == 403:
|
|
283
|
+
warnings.warn(
|
|
284
|
+
CDFAuthWarning(f"extract {cls.__name__.removesuffix('Extractor').casefold()}", str(e)), stacklevel=2
|
|
285
|
+
)
|
|
286
|
+
return 0, []
|
|
287
|
+
else:
|
|
288
|
+
raise e
|
|
@@ -226,4 +226,7 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
226
226
|
@staticmethod
|
|
227
227
|
def _chunk(items: Sequence, description: str) -> Iterable:
|
|
228
228
|
to_iterate: Iterable = chunker(items, chunk_size=1000)
|
|
229
|
-
|
|
229
|
+
if items:
|
|
230
|
+
return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
|
|
231
|
+
else:
|
|
232
|
+
return to_iterate
|
|
@@ -73,7 +73,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
73
73
|
as_write: bool = False,
|
|
74
74
|
unpack_columns: bool = False,
|
|
75
75
|
):
|
|
76
|
-
total, items = cls._from_dataset(client, data_set_external_id)
|
|
76
|
+
total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
|
|
77
77
|
return cls(
|
|
78
78
|
items,
|
|
79
79
|
namespace,
|
|
@@ -101,7 +101,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
101
101
|
as_write: bool = False,
|
|
102
102
|
unpack_columns: bool = False,
|
|
103
103
|
):
|
|
104
|
-
total, items = cls._from_hierarchy(client, root_asset_external_id)
|
|
104
|
+
total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
|
|
105
105
|
return cls(
|
|
106
106
|
items,
|
|
107
107
|
namespace,
|
|
@@ -37,6 +37,7 @@ from cognite.neat._rules.models.entities._single_value import ViewEntity
|
|
|
37
37
|
from cognite.neat._shared import InstanceType
|
|
38
38
|
from cognite.neat._store import NeatGraphStore
|
|
39
39
|
from cognite.neat._utils.auxiliary import create_sha256_hash
|
|
40
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
40
41
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri
|
|
41
42
|
from cognite.neat._utils.upload import UploadResult
|
|
42
43
|
|
|
@@ -157,7 +158,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
157
158
|
view_ids.append(f"{view_id!r} (self)")
|
|
158
159
|
|
|
159
160
|
tracker = self._tracker(type(self).__name__, view_ids, "views")
|
|
160
|
-
for view_id, (view,
|
|
161
|
+
for view_id, (view, instance_count) in view_and_count_by_id.items():
|
|
161
162
|
pydantic_cls, edge_by_type, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
162
163
|
yield from issues
|
|
163
164
|
tracker.issue(issues)
|
|
@@ -194,7 +195,11 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
194
195
|
# this assumes no changes in the suffix of view and class
|
|
195
196
|
reader = self.graph_store.read(view.external_id)
|
|
196
197
|
|
|
197
|
-
|
|
198
|
+
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
199
|
+
reader, instance_count, f"Loading {track_id}"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
for identifier, properties in instance_iterable:
|
|
198
203
|
if skip_properties:
|
|
199
204
|
properties = {k: v for k, v in properties.items() if k not in skip_properties}
|
|
200
205
|
try:
|
|
@@ -8,7 +8,7 @@ from rdflib.query import ResultRow
|
|
|
8
8
|
|
|
9
9
|
from cognite.neat._issues.warnings import NeatValueWarning
|
|
10
10
|
from cognite.neat._shared import Triple
|
|
11
|
-
from cognite.neat._utils.collection_ import
|
|
11
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
12
12
|
from cognite.neat._utils.graph_transformations_report import GraphTransformationResult
|
|
13
13
|
|
|
14
14
|
To_Add_Triples: TypeAlias = list[Triple]
|
|
@@ -42,7 +42,6 @@ class BaseTransformerStandardised(ABC):
|
|
|
42
42
|
description: str
|
|
43
43
|
_use_only_once: bool = False
|
|
44
44
|
_need_changes: ClassVar[frozenset[str]] = frozenset()
|
|
45
|
-
_use_iterate_bar_threshold: int = 500
|
|
46
45
|
|
|
47
46
|
@abstractmethod
|
|
48
47
|
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
@@ -99,12 +98,9 @@ class BaseTransformerStandardised(ABC):
|
|
|
99
98
|
return outcome
|
|
100
99
|
|
|
101
100
|
result_iterable = graph.query(self._iterate_query())
|
|
102
|
-
|
|
103
|
-
result_iterable
|
|
104
|
-
|
|
105
|
-
total=iteration_count,
|
|
106
|
-
description=self.description,
|
|
107
|
-
)
|
|
101
|
+
result_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
102
|
+
result_iterable, iteration_count, self.description
|
|
103
|
+
)
|
|
108
104
|
|
|
109
105
|
for row in result_iterable:
|
|
110
106
|
row = cast(ResultRow, row)
|
|
@@ -230,7 +230,7 @@ class AssetEventConnector(BaseAssetConnector):
|
|
|
230
230
|
|
|
231
231
|
|
|
232
232
|
# TODO: standardise
|
|
233
|
-
class AssetRelationshipConnector(
|
|
233
|
+
class AssetRelationshipConnector(BaseTransformerStandardised):
|
|
234
234
|
description: str = "Connects assets via relationships"
|
|
235
235
|
_use_only_once: bool = True
|
|
236
236
|
_need_changes = frozenset(
|
|
@@ -248,6 +248,44 @@ class AssetRelationshipConnector(BaseTransformer):
|
|
|
248
248
|
?target <{asset_xid_property}> ?target_xid .
|
|
249
249
|
?target a <{asset_type}> .}}"""
|
|
250
250
|
|
|
251
|
+
def _count_query(self) -> str:
|
|
252
|
+
query = """SELECT (COUNT(?target) as ?count) WHERE {{
|
|
253
|
+
?relationship a <{relationship_type}> .
|
|
254
|
+
?relationship <{relationship_source_xid_prop}> ?source_xid .
|
|
255
|
+
?source <{asset_xid_property}> ?source_xid .
|
|
256
|
+
?source a <{asset_type}> .
|
|
257
|
+
|
|
258
|
+
?relationship <{relationship_target_xid_prop}> ?target_xid .
|
|
259
|
+
?target <{asset_xid_property}> ?target_xid .
|
|
260
|
+
?target a <{asset_type}> .}}"""
|
|
261
|
+
|
|
262
|
+
return query.format(
|
|
263
|
+
relationship_type=self.relationship_type,
|
|
264
|
+
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
265
|
+
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
266
|
+
asset_xid_property=self.asset_xid_property,
|
|
267
|
+
asset_type=self.asset_type,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
def _iterate_query(self) -> str:
|
|
271
|
+
query = """SELECT ?source ?relationship ?target WHERE {{
|
|
272
|
+
?relationship a <{relationship_type}> .
|
|
273
|
+
?relationship <{relationship_source_xid_prop}> ?source_xid .
|
|
274
|
+
?source <{asset_xid_property}> ?source_xid .
|
|
275
|
+
?source a <{asset_type}> .
|
|
276
|
+
|
|
277
|
+
?relationship <{relationship_target_xid_prop}> ?target_xid .
|
|
278
|
+
?target <{asset_xid_property}> ?target_xid .
|
|
279
|
+
?target a <{asset_type}> .}}"""
|
|
280
|
+
|
|
281
|
+
return query.format(
|
|
282
|
+
relationship_type=self.relationship_type,
|
|
283
|
+
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
284
|
+
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
285
|
+
asset_xid_property=self.asset_xid_property,
|
|
286
|
+
asset_type=self.asset_type,
|
|
287
|
+
)
|
|
288
|
+
|
|
251
289
|
def __init__(
|
|
252
290
|
self,
|
|
253
291
|
asset_type: URIRef | None = None,
|
|
@@ -262,48 +300,20 @@ class AssetRelationshipConnector(BaseTransformer):
|
|
|
262
300
|
self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.targetExternalId
|
|
263
301
|
self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.externalId
|
|
264
302
|
|
|
265
|
-
def
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
):
|
|
269
|
-
relationship_id: URIRef = cast(tuple, relationship_id_result)[0]
|
|
270
|
-
|
|
271
|
-
if assets_id_res := list(
|
|
272
|
-
graph.query(
|
|
273
|
-
self._asset_template.format(
|
|
274
|
-
relationship_id=relationship_id,
|
|
275
|
-
asset_xid_property=self.asset_xid_property,
|
|
276
|
-
relationship_source_xid_prop=self.relationship_source_xid_prop,
|
|
277
|
-
relationship_target_xid_prop=self.relationship_target_xid_prop,
|
|
278
|
-
asset_type=self.asset_type,
|
|
279
|
-
)
|
|
280
|
-
)
|
|
281
|
-
):
|
|
282
|
-
# files can be connected to multiple assets in the graph
|
|
283
|
-
for source_asset_id, target_asset_id in cast(list[tuple], assets_id_res):
|
|
284
|
-
# create a relationship between the two assets
|
|
285
|
-
graph.add(
|
|
286
|
-
(
|
|
287
|
-
source_asset_id,
|
|
288
|
-
DEFAULT_NAMESPACE.relationship,
|
|
289
|
-
relationship_id,
|
|
290
|
-
)
|
|
291
|
-
)
|
|
292
|
-
graph.add(
|
|
293
|
-
(
|
|
294
|
-
target_asset_id,
|
|
295
|
-
DEFAULT_NAMESPACE.relationship,
|
|
296
|
-
relationship_id,
|
|
297
|
-
)
|
|
298
|
-
)
|
|
303
|
+
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
304
|
+
row_output = RowTransformationOutput()
|
|
305
|
+
source, relationship, target = query_result_row
|
|
299
306
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
307
|
+
row_output.add_triples.append(cast(Triple, (source, DEFAULT_NAMESPACE.relationship, target)))
|
|
308
|
+
row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.source, source)))
|
|
309
|
+
row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.target, target)))
|
|
303
310
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
311
|
+
row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_source_xid_prop, None)))
|
|
312
|
+
row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_target_xid_prop, None)))
|
|
313
|
+
|
|
314
|
+
row_output.instances_modified_count += 2
|
|
315
|
+
|
|
316
|
+
return row_output
|
|
307
317
|
|
|
308
318
|
|
|
309
319
|
# TODO: standardise
|
|
@@ -59,7 +59,7 @@ class AddSelfReferenceProperty(BaseTransformer):
|
|
|
59
59
|
|
|
60
60
|
class MakeConnectionOnExactMatch(BaseTransformerStandardised):
|
|
61
61
|
description: str = "Adds property that contains id of reference to all references of given class in Rules"
|
|
62
|
-
_use_only_once: bool =
|
|
62
|
+
_use_only_once: bool = False
|
|
63
63
|
_need_changes = frozenset({})
|
|
64
64
|
|
|
65
65
|
def __init__(
|
|
@@ -17,6 +17,7 @@ from cognite.neat._rules.models.information import (
|
|
|
17
17
|
)
|
|
18
18
|
from cognite.neat._store import NeatGraphStore
|
|
19
19
|
from cognite.neat._store._provenance import INSTANCES_ENTITY
|
|
20
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar
|
|
20
21
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri, uri_to_short_form
|
|
21
22
|
|
|
22
23
|
from ._base import DEFAULT_NON_EXISTING_NODE_TYPE, BaseRDFImporter
|
|
@@ -27,7 +28,6 @@ ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
|
|
|
27
28
|
WHERE { ?s a ?class . }
|
|
28
29
|
group by ?class order by DESC(?instances)"""
|
|
29
30
|
|
|
30
|
-
|
|
31
31
|
INSTANCES_OF_CLASS_QUERY = """SELECT ?s ?propertyCount WHERE { ?s a <class> . BIND ('Unknown' as ?propertyCount) }"""
|
|
32
32
|
|
|
33
33
|
|
|
@@ -171,8 +171,10 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
171
171
|
INSTANCES_OF_CLASS_QUERY if self.max_number_of_instance == -1 else INSTANCES_OF_CLASS_RICHNESS_ORDERED_QUERY
|
|
172
172
|
)
|
|
173
173
|
|
|
174
|
+
classes_iterable = iterate_progress_bar(classes.items(), len(classes), "Inferring classes")
|
|
175
|
+
|
|
174
176
|
# Infers all the properties of the class
|
|
175
|
-
for class_id, class_definition in
|
|
177
|
+
for class_id, class_definition in classes_iterable:
|
|
176
178
|
for ( # type: ignore[misc]
|
|
177
179
|
instance,
|
|
178
180
|
_,
|