cognite-neat 0.105.1__py3-none-any.whl → 0.106.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognite/neat/_config.py +6 -260
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +26 -13
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +4 -1
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +2 -2
- cognite/neat/_graph/loaders/_rdf2dms.py +7 -2
- cognite/neat/_graph/transformers/_base.py +4 -8
- cognite/neat/_graph/transformers/_classic_cdf.py +164 -80
- cognite/neat/_graph/transformers/_rdfpath.py +1 -1
- cognite/neat/_issues/warnings/_external.py +1 -1
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +4 -2
- cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
- cognite/neat/_rules/transformers/_mapping.py +3 -2
- cognite/neat/_session/_base.py +6 -7
- cognite/neat/_session/_inspect.py +6 -2
- cognite/neat/_session/_mapping.py +6 -8
- cognite/neat/_session/_prepare.py +9 -10
- cognite/neat/_session/_read.py +35 -26
- cognite/neat/_session/_set.py +9 -0
- cognite/neat/_session/_state.py +3 -1
- cognite/neat/_session/_to.py +11 -13
- cognite/neat/_store/_graph_store.py +33 -28
- cognite/neat/_utils/auth.py +35 -15
- cognite/neat/_utils/collection_.py +32 -11
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/METADATA +1 -7
- {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/RECORD +29 -29
- {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.105.1.dist-info → cognite_neat-0.106.0.dist-info}/entry_points.txt +0 -0
cognite/neat/_config.py
CHANGED
|
@@ -1,265 +1,11 @@
|
|
|
1
|
-
import
|
|
2
|
-
import logging
|
|
3
|
-
import os
|
|
4
|
-
import shutil
|
|
5
|
-
import sys
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Any, Literal, cast
|
|
1
|
+
from typing import Literal
|
|
8
2
|
|
|
9
|
-
import
|
|
10
|
-
from pydantic import BaseModel, Field, model_validator
|
|
11
|
-
from yaml import safe_load
|
|
3
|
+
from pydantic import BaseModel
|
|
12
4
|
|
|
13
|
-
from cognite.neat._constants import EXAMPLE_GRAPHS, EXAMPLE_RULES, EXAMPLE_WORKFLOWS
|
|
14
|
-
from cognite.neat._utils.auth import EnvironmentVariables
|
|
15
5
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
else:
|
|
20
|
-
from backports.strenum import StrEnum
|
|
21
|
-
from typing_extensions import Self
|
|
6
|
+
class NeatConfig(BaseModel, validate_assignment=True):
|
|
7
|
+
progress_bar: Literal["tqdm", "rich", "tqdm-notebook", "infer"] | None = "infer"
|
|
8
|
+
use_iterate_bar_threshold: int | None = 500
|
|
22
9
|
|
|
23
|
-
LOG_FORMAT = "%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s"
|
|
24
|
-
LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
|
|
25
10
|
|
|
26
|
-
|
|
27
|
-
class RulesStoreType(StrEnum):
|
|
28
|
-
"""Rules Store type"""
|
|
29
|
-
|
|
30
|
-
CDF = "cdf"
|
|
31
|
-
FILE = "file"
|
|
32
|
-
URL = "url"
|
|
33
|
-
GOOGLE_SHEET = "google_sheet"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class WorkflowsStoreType(StrEnum):
|
|
37
|
-
"""Workflows Store type"""
|
|
38
|
-
|
|
39
|
-
CDF = "cdf"
|
|
40
|
-
FILE = "file"
|
|
41
|
-
URL = "url"
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class Config(BaseModel, arbitrary_types_allowed=True):
|
|
45
|
-
workflows_store_type: WorkflowsStoreType = WorkflowsStoreType.FILE
|
|
46
|
-
data_store_path: Path = Field(default_factory=lambda: Path.cwd() / "data")
|
|
47
|
-
|
|
48
|
-
workflow_downloader_filter: list[str] | None = Field(
|
|
49
|
-
description="List of workflow names+tags to filter on when downloading workflows from CDF. "
|
|
50
|
-
"Example name:workflow_name=version,tag:tag_name",
|
|
51
|
-
default=None,
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
cdf_auth_config: EnvironmentVariables = Field(default_factory=EnvironmentVariables.default)
|
|
55
|
-
cdf_default_dataset_id: int = 0
|
|
56
|
-
load_examples: bool = True
|
|
57
|
-
|
|
58
|
-
log_level: Literal["ERROR", "WARNING", "INFO", "DEBUG"] = "INFO"
|
|
59
|
-
log_format: str = LOG_FORMAT
|
|
60
|
-
download_workflows_from_cdf: bool = Field(
|
|
61
|
-
default=False,
|
|
62
|
-
description="Downloads all workflows from CDF automatically and stores them locally",
|
|
63
|
-
)
|
|
64
|
-
stop_on_error: bool = False
|
|
65
|
-
|
|
66
|
-
@model_validator(mode="before")
|
|
67
|
-
def backwards_compatible(cls, data: Any):
|
|
68
|
-
if not isinstance(data, dict):
|
|
69
|
-
return data
|
|
70
|
-
if "cdf_client" in data:
|
|
71
|
-
cdf_client = data["cdf_client"]
|
|
72
|
-
if isinstance(cdf_client, dict):
|
|
73
|
-
if "base_url" in cdf_client:
|
|
74
|
-
base_url = cdf_client["base_url"]
|
|
75
|
-
cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
|
|
76
|
-
else:
|
|
77
|
-
base_url, cluster = "Missing", "Missing"
|
|
78
|
-
if "scopes" in cdf_client:
|
|
79
|
-
scopes = cdf_client["scopes"]
|
|
80
|
-
if isinstance(scopes, list):
|
|
81
|
-
scopes = ",".join(scopes)
|
|
82
|
-
else:
|
|
83
|
-
scopes = "Missing"
|
|
84
|
-
data["cdf_auth_config"] = EnvironmentVariables(
|
|
85
|
-
CDF_PROJECT=cdf_client.get("project", "Missing"),
|
|
86
|
-
CDF_CLUSTER=cluster,
|
|
87
|
-
CDF_URL=base_url,
|
|
88
|
-
IDP_CLIENT_ID=cdf_client.get("client_id", "Missing"),
|
|
89
|
-
IDP_CLIENT_SECRET=cdf_client.get("client_secret", "Missing"),
|
|
90
|
-
IDP_TOKEN_URL=cdf_client.get("token_url", "Missing"),
|
|
91
|
-
IDP_SCOPES=scopes,
|
|
92
|
-
CDF_TIMEOUT=int(cdf_client.get("timeout", 60)),
|
|
93
|
-
CDF_MAX_WORKERS=int(cdf_client.get("max_workers", 3)),
|
|
94
|
-
)
|
|
95
|
-
return data
|
|
96
|
-
|
|
97
|
-
def as_legacy_config(
|
|
98
|
-
self,
|
|
99
|
-
) -> dict[str, Any]:
|
|
100
|
-
config: dict[str, Any] = {}
|
|
101
|
-
|
|
102
|
-
config["workflows_store_type"] = self.workflows_store_type
|
|
103
|
-
config["data_store_path"] = str(self.data_store_path)
|
|
104
|
-
config["workflows_downloader_filter"] = self.workflow_downloader_filter
|
|
105
|
-
|
|
106
|
-
config["cdf_client"] = {}
|
|
107
|
-
if self.cdf_auth_config.CDF_PROJECT not in {"Missing", "NOT SET"}:
|
|
108
|
-
config["cdf_client"]["project"] = self.cdf_auth_config.CDF_PROJECT
|
|
109
|
-
if self.cdf_auth_config.CDF_CLUSTER not in {"Missing", "NOT SET"}:
|
|
110
|
-
config["cdf_client"]["cluster"] = self.cdf_auth_config.CDF_CLUSTER
|
|
111
|
-
if self.cdf_auth_config.CDF_URL:
|
|
112
|
-
config["cdf_client"]["base_url"] = self.cdf_auth_config.CDF_URL
|
|
113
|
-
if self.cdf_auth_config.IDP_CLIENT_ID:
|
|
114
|
-
config["cdf_client"]["client_id"] = self.cdf_auth_config.IDP_CLIENT_ID
|
|
115
|
-
if self.cdf_auth_config.IDP_CLIENT_SECRET:
|
|
116
|
-
config["cdf_client"]["client_secret"] = self.cdf_auth_config.IDP_CLIENT_SECRET
|
|
117
|
-
if self.cdf_auth_config.IDP_TOKEN_URL:
|
|
118
|
-
config["cdf_client"]["token_url"] = self.cdf_auth_config.IDP_TOKEN_URL
|
|
119
|
-
if self.cdf_auth_config.IDP_SCOPES:
|
|
120
|
-
config["cdf_client"]["scopes"] = self.cdf_auth_config.idp_scopes
|
|
121
|
-
if self.cdf_auth_config.CDF_TIMEOUT:
|
|
122
|
-
config["cdf_client"]["timeout"] = self.cdf_auth_config.CDF_TIMEOUT
|
|
123
|
-
if self.cdf_auth_config.CDF_MAX_WORKERS:
|
|
124
|
-
config["cdf_client"]["max_workers"] = self.cdf_auth_config.CDF_MAX_WORKERS
|
|
125
|
-
|
|
126
|
-
config["cdf_default_dataset_id"] = self.cdf_default_dataset_id
|
|
127
|
-
config["load_examples"] = self.load_examples
|
|
128
|
-
config["log_level"] = self.log_level
|
|
129
|
-
config["log_format"] = self.log_format
|
|
130
|
-
config["download_workflows_from_cdf"] = self.download_workflows_from_cdf
|
|
131
|
-
config["stop_on_error"] = self.stop_on_error
|
|
132
|
-
|
|
133
|
-
return config
|
|
134
|
-
|
|
135
|
-
@property
|
|
136
|
-
def _dir_suffix(self) -> str:
|
|
137
|
-
is_test_running = "pytest" in sys.modules
|
|
138
|
-
if is_test_running:
|
|
139
|
-
# Todo change the below to f"-{os.getpid()}" when all tests supports parallel execution.
|
|
140
|
-
return ""
|
|
141
|
-
return ""
|
|
142
|
-
|
|
143
|
-
@property
|
|
144
|
-
def rules_store_path(self) -> Path:
|
|
145
|
-
return self.data_store_path / f"rules{self._dir_suffix}"
|
|
146
|
-
|
|
147
|
-
@property
|
|
148
|
-
def workflows_store_path(self) -> Path:
|
|
149
|
-
return self.data_store_path / f"workflows{self._dir_suffix}"
|
|
150
|
-
|
|
151
|
-
@property
|
|
152
|
-
def source_graph_path(self) -> Path:
|
|
153
|
-
return self.data_store_path / f"source-graphs{self._dir_suffix}"
|
|
154
|
-
|
|
155
|
-
@property
|
|
156
|
-
def staging_path(self) -> Path:
|
|
157
|
-
return self.data_store_path / f"staging{self._dir_suffix}"
|
|
158
|
-
|
|
159
|
-
@classmethod
|
|
160
|
-
def from_yaml(cls, filepath: Path) -> Self:
|
|
161
|
-
return cls(**safe_load(filepath.read_text()))
|
|
162
|
-
|
|
163
|
-
def to_yaml(self, filepath: Path):
|
|
164
|
-
# Parse as json to avoid Path and Enum objects
|
|
165
|
-
dump = json.loads(self.model_dump_json())
|
|
166
|
-
|
|
167
|
-
with filepath.open("w") as f:
|
|
168
|
-
yaml.safe_dump(dump, f)
|
|
169
|
-
|
|
170
|
-
@classmethod
|
|
171
|
-
def from_env(cls) -> Self:
|
|
172
|
-
missing = "Missing"
|
|
173
|
-
# This is to be backwards compatible with the old config
|
|
174
|
-
|
|
175
|
-
base_url: str | None = None
|
|
176
|
-
if "NEAT_CDF_BASE_URL" in os.environ:
|
|
177
|
-
base_url = os.environ["NEAT_CDF_BASE_URL"]
|
|
178
|
-
if isinstance(base_url, str):
|
|
179
|
-
cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
|
|
180
|
-
else:
|
|
181
|
-
cluster = missing
|
|
182
|
-
variables = EnvironmentVariables(
|
|
183
|
-
CDF_PROJECT=os.environ.get("NEAT_CDF_PROJECT", missing),
|
|
184
|
-
CDF_CLUSTER=cluster,
|
|
185
|
-
CDF_URL=base_url,
|
|
186
|
-
IDP_CLIENT_ID=os.environ.get("NEAT_CDF_CLIENT_ID"),
|
|
187
|
-
IDP_CLIENT_SECRET=os.environ.get("NEAT_CDF_CLIENT_SECRET"),
|
|
188
|
-
IDP_TOKEN_URL=os.environ.get("NEAT_CDF_TOKEN_URL"),
|
|
189
|
-
IDP_SCOPES=os.environ.get("NEAT_CDF_SCOPES"),
|
|
190
|
-
CDF_TIMEOUT=int(os.environ["NEAT_CDF_CLIENT_TIMEOUT"] if "NEAT_CDF_CLIENT_TIMEOUT" in os.environ else 60),
|
|
191
|
-
CDF_MAX_WORKERS=int(
|
|
192
|
-
os.environ["NEAT_CDF_CLIENT_MAX_WORKERS"] if "NEAT_CDF_CLIENT_MAX_WORKERS" in os.environ else 3
|
|
193
|
-
),
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
if workflow_downloader_filter_value := os.environ.get("NEAT_WORKFLOW_DOWNLOADER_FILTER", None):
|
|
197
|
-
workflow_downloader_filter = workflow_downloader_filter_value.split(",")
|
|
198
|
-
else:
|
|
199
|
-
workflow_downloader_filter = None
|
|
200
|
-
|
|
201
|
-
return cls(
|
|
202
|
-
cdf_auth_config=variables,
|
|
203
|
-
workflows_store_type=os.environ.get( # type: ignore[arg-type]
|
|
204
|
-
"NEAT_WORKFLOWS_STORE_TYPE", WorkflowsStoreType.FILE
|
|
205
|
-
),
|
|
206
|
-
data_store_path=Path(os.environ.get("NEAT_DATA_PATH", "_app/data")),
|
|
207
|
-
cdf_default_dataset_id=int(os.environ.get("NEAT_CDF_DEFAULT_DATASET_ID", 6476640149881990)),
|
|
208
|
-
log_level=cast(
|
|
209
|
-
Literal["ERROR", "WARNING", "INFO", "DEBUG"],
|
|
210
|
-
os.environ.get("NEAT_LOG_LEVEL", "INFO"),
|
|
211
|
-
),
|
|
212
|
-
workflow_downloader_filter=workflow_downloader_filter,
|
|
213
|
-
load_examples=bool(os.environ.get("NEAT_LOAD_EXAMPLES", True) in ["True", "true", "1"]),
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def copy_examples_to_directory(config: Config):
|
|
218
|
-
"""
|
|
219
|
-
Copier over all the examples to the target_data_directory,
|
|
220
|
-
without overwriting
|
|
221
|
-
|
|
222
|
-
Args:
|
|
223
|
-
target_data_dir : The target directory
|
|
224
|
-
suffix : The suffix to add to the directory names
|
|
225
|
-
|
|
226
|
-
"""
|
|
227
|
-
|
|
228
|
-
print(f"Copying examples into {config.data_store_path}")
|
|
229
|
-
_copy_examples(EXAMPLE_RULES, config.rules_store_path)
|
|
230
|
-
_copy_examples(EXAMPLE_GRAPHS, config.source_graph_path)
|
|
231
|
-
_copy_examples(EXAMPLE_WORKFLOWS, config.workflows_store_path)
|
|
232
|
-
config.staging_path.mkdir(exist_ok=True, parents=True)
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
def create_data_dir_structure(config: Config) -> None:
|
|
236
|
-
"""
|
|
237
|
-
Create the data directory structure in empty directory
|
|
238
|
-
|
|
239
|
-
Args:
|
|
240
|
-
target_data_dir : The target directory
|
|
241
|
-
suffix : The suffix to add to the directory names
|
|
242
|
-
|
|
243
|
-
"""
|
|
244
|
-
for path in (
|
|
245
|
-
config.rules_store_path,
|
|
246
|
-
config.source_graph_path,
|
|
247
|
-
config.staging_path,
|
|
248
|
-
config.workflows_store_path,
|
|
249
|
-
):
|
|
250
|
-
path.mkdir(exist_ok=True, parents=True)
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
def _copy_examples(source_dir: Path, target_dir: Path):
|
|
254
|
-
for current in source_dir.rglob("*"):
|
|
255
|
-
if current.is_dir():
|
|
256
|
-
continue
|
|
257
|
-
relative = current.relative_to(source_dir)
|
|
258
|
-
if not (target := target_dir / relative).exists():
|
|
259
|
-
target.parent.mkdir(exist_ok=True, parents=True)
|
|
260
|
-
shutil.copy2(current, target)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
def configure_logging(level: str = "DEBUG", log_format: str = LOG_FORMAT):
|
|
264
|
-
"""Configure logging based on config."""
|
|
265
|
-
logging.basicConfig(format=log_format, level=logging.getLevelName(level), datefmt=LOG_DATE_FORMAT)
|
|
11
|
+
GLOBAL_CONFIG = NeatConfig()
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
3
|
import sys
|
|
4
|
+
import warnings
|
|
4
5
|
from abc import ABC, abstractmethod
|
|
5
6
|
from collections.abc import Callable, Iterable, Sequence, Set
|
|
6
7
|
from datetime import datetime, timezone
|
|
@@ -9,13 +10,16 @@ from typing import Any, Generic, TypeVar
|
|
|
9
10
|
|
|
10
11
|
from cognite.client import CogniteClient
|
|
11
12
|
from cognite.client.data_classes._base import WriteableCogniteResource
|
|
13
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
12
14
|
from pydantic import AnyHttpUrl, ValidationError
|
|
13
15
|
from rdflib import RDF, XSD, Literal, Namespace, URIRef
|
|
14
16
|
|
|
15
17
|
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
16
18
|
from cognite.neat._graph.extractors._base import BaseExtractor
|
|
19
|
+
from cognite.neat._issues.warnings import CDFAuthWarning
|
|
17
20
|
from cognite.neat._shared import Triple
|
|
18
21
|
from cognite.neat._utils.auxiliary import string_to_ideal_type
|
|
22
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
19
23
|
|
|
20
24
|
T_CogniteResource = TypeVar("T_CogniteResource", bound=WriteableCogniteResource)
|
|
21
25
|
|
|
@@ -98,17 +102,11 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
98
102
|
|
|
99
103
|
def extract(self) -> Iterable[Triple]:
|
|
100
104
|
"""Extracts an asset with the given asset_id."""
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
else:
|
|
107
|
-
to_iterate = track(
|
|
108
|
-
self.items,
|
|
109
|
-
total=self.limit or self.total,
|
|
110
|
-
description=f"Extracting {type(self).__name__.removesuffix('Extractor')}",
|
|
111
|
-
)
|
|
105
|
+
|
|
106
|
+
if self.total is not None and self.total > 0:
|
|
107
|
+
to_iterate = iterate_progress_bar_if_above_config_threshold(
|
|
108
|
+
self.items, self.total, f"Extracting {type(self).__name__.removesuffix('Extractor')}"
|
|
109
|
+
)
|
|
112
110
|
else:
|
|
113
111
|
to_iterate = self.items
|
|
114
112
|
for no, asset in enumerate(to_iterate):
|
|
@@ -221,7 +219,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
221
219
|
camel_case: bool = True,
|
|
222
220
|
as_write: bool = False,
|
|
223
221
|
):
|
|
224
|
-
total, items = cls._from_dataset(client, data_set_external_id)
|
|
222
|
+
total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
|
|
225
223
|
return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
|
|
226
224
|
|
|
227
225
|
@classmethod
|
|
@@ -244,7 +242,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
244
242
|
camel_case: bool = True,
|
|
245
243
|
as_write: bool = False,
|
|
246
244
|
):
|
|
247
|
-
total, items = cls._from_hierarchy(client, root_asset_external_id)
|
|
245
|
+
total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
|
|
248
246
|
return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
|
|
249
247
|
|
|
250
248
|
@classmethod
|
|
@@ -273,3 +271,18 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
273
271
|
@abstractmethod
|
|
274
272
|
def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
275
273
|
raise NotImplementedError
|
|
274
|
+
|
|
275
|
+
@classmethod
|
|
276
|
+
def _handle_no_access(
|
|
277
|
+
cls, action: Callable[[], tuple[int | None, Iterable[T_CogniteResource]]]
|
|
278
|
+
) -> tuple[int | None, Iterable[T_CogniteResource]]:
|
|
279
|
+
try:
|
|
280
|
+
return action()
|
|
281
|
+
except CogniteAPIError as e:
|
|
282
|
+
if e.code == 403:
|
|
283
|
+
warnings.warn(
|
|
284
|
+
CDFAuthWarning(f"extract {cls.__name__.removesuffix('Extractor').casefold()}", str(e)), stacklevel=2
|
|
285
|
+
)
|
|
286
|
+
return 0, []
|
|
287
|
+
else:
|
|
288
|
+
raise e
|
|
@@ -226,4 +226,7 @@ class ClassicGraphExtractor(BaseExtractor):
|
|
|
226
226
|
@staticmethod
|
|
227
227
|
def _chunk(items: Sequence, description: str) -> Iterable:
|
|
228
228
|
to_iterate: Iterable = chunker(items, chunk_size=1000)
|
|
229
|
-
|
|
229
|
+
if items:
|
|
230
|
+
return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
|
|
231
|
+
else:
|
|
232
|
+
return to_iterate
|
|
@@ -73,7 +73,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
73
73
|
as_write: bool = False,
|
|
74
74
|
unpack_columns: bool = False,
|
|
75
75
|
):
|
|
76
|
-
total, items = cls._from_dataset(client, data_set_external_id)
|
|
76
|
+
total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
|
|
77
77
|
return cls(
|
|
78
78
|
items,
|
|
79
79
|
namespace,
|
|
@@ -101,7 +101,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
101
101
|
as_write: bool = False,
|
|
102
102
|
unpack_columns: bool = False,
|
|
103
103
|
):
|
|
104
|
-
total, items = cls._from_hierarchy(client, root_asset_external_id)
|
|
104
|
+
total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
|
|
105
105
|
return cls(
|
|
106
106
|
items,
|
|
107
107
|
namespace,
|
|
@@ -37,6 +37,7 @@ from cognite.neat._rules.models.entities._single_value import ViewEntity
|
|
|
37
37
|
from cognite.neat._shared import InstanceType
|
|
38
38
|
from cognite.neat._store import NeatGraphStore
|
|
39
39
|
from cognite.neat._utils.auxiliary import create_sha256_hash
|
|
40
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
40
41
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri
|
|
41
42
|
from cognite.neat._utils.upload import UploadResult
|
|
42
43
|
|
|
@@ -157,7 +158,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
157
158
|
view_ids.append(f"{view_id!r} (self)")
|
|
158
159
|
|
|
159
160
|
tracker = self._tracker(type(self).__name__, view_ids, "views")
|
|
160
|
-
for view_id, (view,
|
|
161
|
+
for view_id, (view, instance_count) in view_and_count_by_id.items():
|
|
161
162
|
pydantic_cls, edge_by_type, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
162
163
|
yield from issues
|
|
163
164
|
tracker.issue(issues)
|
|
@@ -194,7 +195,11 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
194
195
|
# this assumes no changes in the suffix of view and class
|
|
195
196
|
reader = self.graph_store.read(view.external_id)
|
|
196
197
|
|
|
197
|
-
|
|
198
|
+
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
199
|
+
reader, instance_count, f"Loading {track_id}"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
for identifier, properties in instance_iterable:
|
|
198
203
|
if skip_properties:
|
|
199
204
|
properties = {k: v for k, v in properties.items() if k not in skip_properties}
|
|
200
205
|
try:
|
|
@@ -8,7 +8,7 @@ from rdflib.query import ResultRow
|
|
|
8
8
|
|
|
9
9
|
from cognite.neat._issues.warnings import NeatValueWarning
|
|
10
10
|
from cognite.neat._shared import Triple
|
|
11
|
-
from cognite.neat._utils.collection_ import
|
|
11
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
12
12
|
from cognite.neat._utils.graph_transformations_report import GraphTransformationResult
|
|
13
13
|
|
|
14
14
|
To_Add_Triples: TypeAlias = list[Triple]
|
|
@@ -42,7 +42,6 @@ class BaseTransformerStandardised(ABC):
|
|
|
42
42
|
description: str
|
|
43
43
|
_use_only_once: bool = False
|
|
44
44
|
_need_changes: ClassVar[frozenset[str]] = frozenset()
|
|
45
|
-
_use_iterate_bar_threshold: int = 500
|
|
46
45
|
|
|
47
46
|
@abstractmethod
|
|
48
47
|
def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
|
|
@@ -99,12 +98,9 @@ class BaseTransformerStandardised(ABC):
|
|
|
99
98
|
return outcome
|
|
100
99
|
|
|
101
100
|
result_iterable = graph.query(self._iterate_query())
|
|
102
|
-
|
|
103
|
-
result_iterable
|
|
104
|
-
|
|
105
|
-
total=iteration_count,
|
|
106
|
-
description=self.description,
|
|
107
|
-
)
|
|
101
|
+
result_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
102
|
+
result_iterable, iteration_count, self.description
|
|
103
|
+
)
|
|
108
104
|
|
|
109
105
|
for row in result_iterable:
|
|
110
106
|
row = cast(ResultRow, row)
|