cognite-neat 0.105.2__py3-none-any.whl → 0.106.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. cognite/neat/_config.py +6 -260
  2. cognite/neat/_graph/extractors/_classic_cdf/_base.py +26 -13
  3. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +4 -1
  4. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +2 -2
  5. cognite/neat/_graph/loaders/_rdf2dms.py +7 -2
  6. cognite/neat/_graph/transformers/_base.py +4 -8
  7. cognite/neat/_graph/transformers/_classic_cdf.py +51 -41
  8. cognite/neat/_graph/transformers/_rdfpath.py +1 -1
  9. cognite/neat/_issues/warnings/_external.py +1 -1
  10. cognite/neat/_rules/importers/_rdf/_inference2rules.py +4 -2
  11. cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
  12. cognite/neat/_rules/transformers/_mapping.py +3 -2
  13. cognite/neat/_session/_base.py +6 -7
  14. cognite/neat/_session/_inspect.py +6 -2
  15. cognite/neat/_session/_mapping.py +6 -8
  16. cognite/neat/_session/_prepare.py +9 -10
  17. cognite/neat/_session/_read.py +35 -26
  18. cognite/neat/_session/_set.py +9 -0
  19. cognite/neat/_session/_state.py +3 -1
  20. cognite/neat/_session/_to.py +11 -13
  21. cognite/neat/_store/_graph_store.py +33 -28
  22. cognite/neat/_utils/collection_.py +32 -11
  23. cognite/neat/_version.py +1 -1
  24. {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/METADATA +1 -7
  25. {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/RECORD +28 -28
  26. {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/LICENSE +0 -0
  27. {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/WHEEL +0 -0
  28. {cognite_neat-0.105.2.dist-info → cognite_neat-0.106.0.dist-info}/entry_points.txt +0 -0
cognite/neat/_config.py CHANGED
@@ -1,265 +1,11 @@
1
- import json
2
- import logging
3
- import os
4
- import shutil
5
- import sys
6
- from pathlib import Path
7
- from typing import Any, Literal, cast
1
+ from typing import Literal
8
2
 
9
- import yaml
10
- from pydantic import BaseModel, Field, model_validator
11
- from yaml import safe_load
3
+ from pydantic import BaseModel
12
4
 
13
- from cognite.neat._constants import EXAMPLE_GRAPHS, EXAMPLE_RULES, EXAMPLE_WORKFLOWS
14
- from cognite.neat._utils.auth import EnvironmentVariables
15
5
 
16
- if sys.version_info >= (3, 11):
17
- from enum import StrEnum
18
- from typing import Self
19
- else:
20
- from backports.strenum import StrEnum
21
- from typing_extensions import Self
6
+ class NeatConfig(BaseModel, validate_assignment=True):
7
+ progress_bar: Literal["tqdm", "rich", "tqdm-notebook", "infer"] | None = "infer"
8
+ use_iterate_bar_threshold: int | None = 500
22
9
 
23
- LOG_FORMAT = "%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s"
24
- LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
25
10
 
26
-
27
- class RulesStoreType(StrEnum):
28
- """Rules Store type"""
29
-
30
- CDF = "cdf"
31
- FILE = "file"
32
- URL = "url"
33
- GOOGLE_SHEET = "google_sheet"
34
-
35
-
36
- class WorkflowsStoreType(StrEnum):
37
- """Workflows Store type"""
38
-
39
- CDF = "cdf"
40
- FILE = "file"
41
- URL = "url"
42
-
43
-
44
- class Config(BaseModel, arbitrary_types_allowed=True):
45
- workflows_store_type: WorkflowsStoreType = WorkflowsStoreType.FILE
46
- data_store_path: Path = Field(default_factory=lambda: Path.cwd() / "data")
47
-
48
- workflow_downloader_filter: list[str] | None = Field(
49
- description="List of workflow names+tags to filter on when downloading workflows from CDF. "
50
- "Example name:workflow_name=version,tag:tag_name",
51
- default=None,
52
- )
53
-
54
- cdf_auth_config: EnvironmentVariables = Field(default_factory=EnvironmentVariables.default)
55
- cdf_default_dataset_id: int = 0
56
- load_examples: bool = True
57
-
58
- log_level: Literal["ERROR", "WARNING", "INFO", "DEBUG"] = "INFO"
59
- log_format: str = LOG_FORMAT
60
- download_workflows_from_cdf: bool = Field(
61
- default=False,
62
- description="Downloads all workflows from CDF automatically and stores them locally",
63
- )
64
- stop_on_error: bool = False
65
-
66
- @model_validator(mode="before")
67
- def backwards_compatible(cls, data: Any):
68
- if not isinstance(data, dict):
69
- return data
70
- if "cdf_client" in data:
71
- cdf_client = data["cdf_client"]
72
- if isinstance(cdf_client, dict):
73
- if "base_url" in cdf_client:
74
- base_url = cdf_client["base_url"]
75
- cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
76
- else:
77
- base_url, cluster = "Missing", "Missing"
78
- if "scopes" in cdf_client:
79
- scopes = cdf_client["scopes"]
80
- if isinstance(scopes, list):
81
- scopes = ",".join(scopes)
82
- else:
83
- scopes = "Missing"
84
- data["cdf_auth_config"] = EnvironmentVariables(
85
- CDF_PROJECT=cdf_client.get("project", "Missing"),
86
- CDF_CLUSTER=cluster,
87
- CDF_URL=base_url,
88
- IDP_CLIENT_ID=cdf_client.get("client_id", "Missing"),
89
- IDP_CLIENT_SECRET=cdf_client.get("client_secret", "Missing"),
90
- IDP_TOKEN_URL=cdf_client.get("token_url", "Missing"),
91
- IDP_SCOPES=scopes,
92
- CDF_TIMEOUT=int(cdf_client.get("timeout", 60)),
93
- CDF_MAX_WORKERS=int(cdf_client.get("max_workers", 3)),
94
- )
95
- return data
96
-
97
- def as_legacy_config(
98
- self,
99
- ) -> dict[str, Any]:
100
- config: dict[str, Any] = {}
101
-
102
- config["workflows_store_type"] = self.workflows_store_type
103
- config["data_store_path"] = str(self.data_store_path)
104
- config["workflows_downloader_filter"] = self.workflow_downloader_filter
105
-
106
- config["cdf_client"] = {}
107
- if self.cdf_auth_config.CDF_PROJECT not in {"Missing", "NOT SET"}:
108
- config["cdf_client"]["project"] = self.cdf_auth_config.CDF_PROJECT
109
- if self.cdf_auth_config.CDF_CLUSTER not in {"Missing", "NOT SET"}:
110
- config["cdf_client"]["cluster"] = self.cdf_auth_config.CDF_CLUSTER
111
- if self.cdf_auth_config.CDF_URL:
112
- config["cdf_client"]["base_url"] = self.cdf_auth_config.CDF_URL
113
- if self.cdf_auth_config.IDP_CLIENT_ID:
114
- config["cdf_client"]["client_id"] = self.cdf_auth_config.IDP_CLIENT_ID
115
- if self.cdf_auth_config.IDP_CLIENT_SECRET:
116
- config["cdf_client"]["client_secret"] = self.cdf_auth_config.IDP_CLIENT_SECRET
117
- if self.cdf_auth_config.IDP_TOKEN_URL:
118
- config["cdf_client"]["token_url"] = self.cdf_auth_config.IDP_TOKEN_URL
119
- if self.cdf_auth_config.IDP_SCOPES:
120
- config["cdf_client"]["scopes"] = self.cdf_auth_config.idp_scopes
121
- if self.cdf_auth_config.CDF_TIMEOUT:
122
- config["cdf_client"]["timeout"] = self.cdf_auth_config.CDF_TIMEOUT
123
- if self.cdf_auth_config.CDF_MAX_WORKERS:
124
- config["cdf_client"]["max_workers"] = self.cdf_auth_config.CDF_MAX_WORKERS
125
-
126
- config["cdf_default_dataset_id"] = self.cdf_default_dataset_id
127
- config["load_examples"] = self.load_examples
128
- config["log_level"] = self.log_level
129
- config["log_format"] = self.log_format
130
- config["download_workflows_from_cdf"] = self.download_workflows_from_cdf
131
- config["stop_on_error"] = self.stop_on_error
132
-
133
- return config
134
-
135
- @property
136
- def _dir_suffix(self) -> str:
137
- is_test_running = "pytest" in sys.modules
138
- if is_test_running:
139
- # Todo change the below to f"-{os.getpid()}" when all tests supports parallel execution.
140
- return ""
141
- return ""
142
-
143
- @property
144
- def rules_store_path(self) -> Path:
145
- return self.data_store_path / f"rules{self._dir_suffix}"
146
-
147
- @property
148
- def workflows_store_path(self) -> Path:
149
- return self.data_store_path / f"workflows{self._dir_suffix}"
150
-
151
- @property
152
- def source_graph_path(self) -> Path:
153
- return self.data_store_path / f"source-graphs{self._dir_suffix}"
154
-
155
- @property
156
- def staging_path(self) -> Path:
157
- return self.data_store_path / f"staging{self._dir_suffix}"
158
-
159
- @classmethod
160
- def from_yaml(cls, filepath: Path) -> Self:
161
- return cls(**safe_load(filepath.read_text()))
162
-
163
- def to_yaml(self, filepath: Path):
164
- # Parse as json to avoid Path and Enum objects
165
- dump = json.loads(self.model_dump_json())
166
-
167
- with filepath.open("w") as f:
168
- yaml.safe_dump(dump, f)
169
-
170
- @classmethod
171
- def from_env(cls) -> Self:
172
- missing = "Missing"
173
- # This is to be backwards compatible with the old config
174
-
175
- base_url: str | None = None
176
- if "NEAT_CDF_BASE_URL" in os.environ:
177
- base_url = os.environ["NEAT_CDF_BASE_URL"]
178
- if isinstance(base_url, str):
179
- cluster = base_url.removeprefix("https://").removesuffix(".cognitedata.com")
180
- else:
181
- cluster = missing
182
- variables = EnvironmentVariables(
183
- CDF_PROJECT=os.environ.get("NEAT_CDF_PROJECT", missing),
184
- CDF_CLUSTER=cluster,
185
- CDF_URL=base_url,
186
- IDP_CLIENT_ID=os.environ.get("NEAT_CDF_CLIENT_ID"),
187
- IDP_CLIENT_SECRET=os.environ.get("NEAT_CDF_CLIENT_SECRET"),
188
- IDP_TOKEN_URL=os.environ.get("NEAT_CDF_TOKEN_URL"),
189
- IDP_SCOPES=os.environ.get("NEAT_CDF_SCOPES"),
190
- CDF_TIMEOUT=int(os.environ["NEAT_CDF_CLIENT_TIMEOUT"] if "NEAT_CDF_CLIENT_TIMEOUT" in os.environ else 60),
191
- CDF_MAX_WORKERS=int(
192
- os.environ["NEAT_CDF_CLIENT_MAX_WORKERS"] if "NEAT_CDF_CLIENT_MAX_WORKERS" in os.environ else 3
193
- ),
194
- )
195
-
196
- if workflow_downloader_filter_value := os.environ.get("NEAT_WORKFLOW_DOWNLOADER_FILTER", None):
197
- workflow_downloader_filter = workflow_downloader_filter_value.split(",")
198
- else:
199
- workflow_downloader_filter = None
200
-
201
- return cls(
202
- cdf_auth_config=variables,
203
- workflows_store_type=os.environ.get( # type: ignore[arg-type]
204
- "NEAT_WORKFLOWS_STORE_TYPE", WorkflowsStoreType.FILE
205
- ),
206
- data_store_path=Path(os.environ.get("NEAT_DATA_PATH", "_app/data")),
207
- cdf_default_dataset_id=int(os.environ.get("NEAT_CDF_DEFAULT_DATASET_ID", 6476640149881990)),
208
- log_level=cast(
209
- Literal["ERROR", "WARNING", "INFO", "DEBUG"],
210
- os.environ.get("NEAT_LOG_LEVEL", "INFO"),
211
- ),
212
- workflow_downloader_filter=workflow_downloader_filter,
213
- load_examples=bool(os.environ.get("NEAT_LOAD_EXAMPLES", True) in ["True", "true", "1"]),
214
- )
215
-
216
-
217
- def copy_examples_to_directory(config: Config):
218
- """
219
- Copier over all the examples to the target_data_directory,
220
- without overwriting
221
-
222
- Args:
223
- target_data_dir : The target directory
224
- suffix : The suffix to add to the directory names
225
-
226
- """
227
-
228
- print(f"Copying examples into {config.data_store_path}")
229
- _copy_examples(EXAMPLE_RULES, config.rules_store_path)
230
- _copy_examples(EXAMPLE_GRAPHS, config.source_graph_path)
231
- _copy_examples(EXAMPLE_WORKFLOWS, config.workflows_store_path)
232
- config.staging_path.mkdir(exist_ok=True, parents=True)
233
-
234
-
235
- def create_data_dir_structure(config: Config) -> None:
236
- """
237
- Create the data directory structure in empty directory
238
-
239
- Args:
240
- target_data_dir : The target directory
241
- suffix : The suffix to add to the directory names
242
-
243
- """
244
- for path in (
245
- config.rules_store_path,
246
- config.source_graph_path,
247
- config.staging_path,
248
- config.workflows_store_path,
249
- ):
250
- path.mkdir(exist_ok=True, parents=True)
251
-
252
-
253
- def _copy_examples(source_dir: Path, target_dir: Path):
254
- for current in source_dir.rglob("*"):
255
- if current.is_dir():
256
- continue
257
- relative = current.relative_to(source_dir)
258
- if not (target := target_dir / relative).exists():
259
- target.parent.mkdir(exist_ok=True, parents=True)
260
- shutil.copy2(current, target)
261
-
262
-
263
- def configure_logging(level: str = "DEBUG", log_format: str = LOG_FORMAT):
264
- """Configure logging based on config."""
265
- logging.basicConfig(format=log_format, level=logging.getLevelName(level), datefmt=LOG_DATE_FORMAT)
11
+ GLOBAL_CONFIG = NeatConfig()
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import re
3
3
  import sys
4
+ import warnings
4
5
  from abc import ABC, abstractmethod
5
6
  from collections.abc import Callable, Iterable, Sequence, Set
6
7
  from datetime import datetime, timezone
@@ -9,13 +10,16 @@ from typing import Any, Generic, TypeVar
9
10
 
10
11
  from cognite.client import CogniteClient
11
12
  from cognite.client.data_classes._base import WriteableCogniteResource
13
+ from cognite.client.exceptions import CogniteAPIError
12
14
  from pydantic import AnyHttpUrl, ValidationError
13
15
  from rdflib import RDF, XSD, Literal, Namespace, URIRef
14
16
 
15
17
  from cognite.neat._constants import DEFAULT_NAMESPACE
16
18
  from cognite.neat._graph.extractors._base import BaseExtractor
19
+ from cognite.neat._issues.warnings import CDFAuthWarning
17
20
  from cognite.neat._shared import Triple
18
21
  from cognite.neat._utils.auxiliary import string_to_ideal_type
22
+ from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
19
23
 
20
24
  T_CogniteResource = TypeVar("T_CogniteResource", bound=WriteableCogniteResource)
21
25
 
@@ -98,17 +102,11 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
98
102
 
99
103
  def extract(self) -> Iterable[Triple]:
100
104
  """Extracts an asset with the given asset_id."""
101
- if self.total:
102
- try:
103
- from rich.progress import track
104
- except ModuleNotFoundError:
105
- to_iterate = self.items
106
- else:
107
- to_iterate = track(
108
- self.items,
109
- total=self.limit or self.total,
110
- description=f"Extracting {type(self).__name__.removesuffix('Extractor')}",
111
- )
105
+
106
+ if self.total is not None and self.total > 0:
107
+ to_iterate = iterate_progress_bar_if_above_config_threshold(
108
+ self.items, self.total, f"Extracting {type(self).__name__.removesuffix('Extractor')}"
109
+ )
112
110
  else:
113
111
  to_iterate = self.items
114
112
  for no, asset in enumerate(to_iterate):
@@ -221,7 +219,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
221
219
  camel_case: bool = True,
222
220
  as_write: bool = False,
223
221
  ):
224
- total, items = cls._from_dataset(client, data_set_external_id)
222
+ total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
225
223
  return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
226
224
 
227
225
  @classmethod
@@ -244,7 +242,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
244
242
  camel_case: bool = True,
245
243
  as_write: bool = False,
246
244
  ):
247
- total, items = cls._from_hierarchy(client, root_asset_external_id)
245
+ total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
248
246
  return cls(items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write)
249
247
 
250
248
  @classmethod
@@ -273,3 +271,18 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
273
271
  @abstractmethod
274
272
  def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[T_CogniteResource]]:
275
273
  raise NotImplementedError
274
+
275
+ @classmethod
276
+ def _handle_no_access(
277
+ cls, action: Callable[[], tuple[int | None, Iterable[T_CogniteResource]]]
278
+ ) -> tuple[int | None, Iterable[T_CogniteResource]]:
279
+ try:
280
+ return action()
281
+ except CogniteAPIError as e:
282
+ if e.code == 403:
283
+ warnings.warn(
284
+ CDFAuthWarning(f"extract {cls.__name__.removesuffix('Extractor').casefold()}", str(e)), stacklevel=2
285
+ )
286
+ return 0, []
287
+ else:
288
+ raise e
@@ -226,4 +226,7 @@ class ClassicGraphExtractor(BaseExtractor):
226
226
  @staticmethod
227
227
  def _chunk(items: Sequence, description: str) -> Iterable:
228
228
  to_iterate: Iterable = chunker(items, chunk_size=1000)
229
- return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
229
+ if items:
230
+ return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
231
+ else:
232
+ return to_iterate
@@ -73,7 +73,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
73
73
  as_write: bool = False,
74
74
  unpack_columns: bool = False,
75
75
  ):
76
- total, items = cls._from_dataset(client, data_set_external_id)
76
+ total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
77
77
  return cls(
78
78
  items,
79
79
  namespace,
@@ -101,7 +101,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
101
101
  as_write: bool = False,
102
102
  unpack_columns: bool = False,
103
103
  ):
104
- total, items = cls._from_hierarchy(client, root_asset_external_id)
104
+ total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
105
105
  return cls(
106
106
  items,
107
107
  namespace,
@@ -37,6 +37,7 @@ from cognite.neat._rules.models.entities._single_value import ViewEntity
37
37
  from cognite.neat._shared import InstanceType
38
38
  from cognite.neat._store import NeatGraphStore
39
39
  from cognite.neat._utils.auxiliary import create_sha256_hash
40
+ from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
40
41
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri
41
42
  from cognite.neat._utils.upload import UploadResult
42
43
 
@@ -157,7 +158,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
157
158
  view_ids.append(f"{view_id!r} (self)")
158
159
 
159
160
  tracker = self._tracker(type(self).__name__, view_ids, "views")
160
- for view_id, (view, _) in view_and_count_by_id.items():
161
+ for view_id, (view, instance_count) in view_and_count_by_id.items():
161
162
  pydantic_cls, edge_by_type, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
162
163
  yield from issues
163
164
  tracker.issue(issues)
@@ -194,7 +195,11 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
194
195
  # this assumes no changes in the suffix of view and class
195
196
  reader = self.graph_store.read(view.external_id)
196
197
 
197
- for identifier, properties in reader:
198
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
199
+ reader, instance_count, f"Loading {track_id}"
200
+ )
201
+
202
+ for identifier, properties in instance_iterable:
198
203
  if skip_properties:
199
204
  properties = {k: v for k, v in properties.items() if k not in skip_properties}
200
205
  try:
@@ -8,7 +8,7 @@ from rdflib.query import ResultRow
8
8
 
9
9
  from cognite.neat._issues.warnings import NeatValueWarning
10
10
  from cognite.neat._shared import Triple
11
- from cognite.neat._utils.collection_ import iterate_progress_bar
11
+ from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
12
12
  from cognite.neat._utils.graph_transformations_report import GraphTransformationResult
13
13
 
14
14
  To_Add_Triples: TypeAlias = list[Triple]
@@ -42,7 +42,6 @@ class BaseTransformerStandardised(ABC):
42
42
  description: str
43
43
  _use_only_once: bool = False
44
44
  _need_changes: ClassVar[frozenset[str]] = frozenset()
45
- _use_iterate_bar_threshold: int = 500
46
45
 
47
46
  @abstractmethod
48
47
  def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
@@ -99,12 +98,9 @@ class BaseTransformerStandardised(ABC):
99
98
  return outcome
100
99
 
101
100
  result_iterable = graph.query(self._iterate_query())
102
- if iteration_count > self._use_iterate_bar_threshold:
103
- result_iterable = iterate_progress_bar( # type: ignore[misc, assignment]
104
- result_iterable,
105
- total=iteration_count,
106
- description=self.description,
107
- )
101
+ result_iterable = iterate_progress_bar_if_above_config_threshold(
102
+ result_iterable, iteration_count, self.description
103
+ )
108
104
 
109
105
  for row in result_iterable:
110
106
  row = cast(ResultRow, row)
@@ -230,7 +230,7 @@ class AssetEventConnector(BaseAssetConnector):
230
230
 
231
231
 
232
232
  # TODO: standardise
233
- class AssetRelationshipConnector(BaseTransformer):
233
+ class AssetRelationshipConnector(BaseTransformerStandardised):
234
234
  description: str = "Connects assets via relationships"
235
235
  _use_only_once: bool = True
236
236
  _need_changes = frozenset(
@@ -248,6 +248,44 @@ class AssetRelationshipConnector(BaseTransformer):
248
248
  ?target <{asset_xid_property}> ?target_xid .
249
249
  ?target a <{asset_type}> .}}"""
250
250
 
251
+ def _count_query(self) -> str:
252
+ query = """SELECT (COUNT(?target) as ?count) WHERE {{
253
+ ?relationship a <{relationship_type}> .
254
+ ?relationship <{relationship_source_xid_prop}> ?source_xid .
255
+ ?source <{asset_xid_property}> ?source_xid .
256
+ ?source a <{asset_type}> .
257
+
258
+ ?relationship <{relationship_target_xid_prop}> ?target_xid .
259
+ ?target <{asset_xid_property}> ?target_xid .
260
+ ?target a <{asset_type}> .}}"""
261
+
262
+ return query.format(
263
+ relationship_type=self.relationship_type,
264
+ relationship_source_xid_prop=self.relationship_source_xid_prop,
265
+ relationship_target_xid_prop=self.relationship_target_xid_prop,
266
+ asset_xid_property=self.asset_xid_property,
267
+ asset_type=self.asset_type,
268
+ )
269
+
270
+ def _iterate_query(self) -> str:
271
+ query = """SELECT ?source ?relationship ?target WHERE {{
272
+ ?relationship a <{relationship_type}> .
273
+ ?relationship <{relationship_source_xid_prop}> ?source_xid .
274
+ ?source <{asset_xid_property}> ?source_xid .
275
+ ?source a <{asset_type}> .
276
+
277
+ ?relationship <{relationship_target_xid_prop}> ?target_xid .
278
+ ?target <{asset_xid_property}> ?target_xid .
279
+ ?target a <{asset_type}> .}}"""
280
+
281
+ return query.format(
282
+ relationship_type=self.relationship_type,
283
+ relationship_source_xid_prop=self.relationship_source_xid_prop,
284
+ relationship_target_xid_prop=self.relationship_target_xid_prop,
285
+ asset_xid_property=self.asset_xid_property,
286
+ asset_type=self.asset_type,
287
+ )
288
+
251
289
  def __init__(
252
290
  self,
253
291
  asset_type: URIRef | None = None,
@@ -262,48 +300,20 @@ class AssetRelationshipConnector(BaseTransformer):
262
300
  self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.targetExternalId
263
301
  self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.externalId
264
302
 
265
- def transform(self, graph: Graph) -> None:
266
- for relationship_id_result in graph.query(
267
- f"SELECT DISTINCT ?relationship_id WHERE {{?relationship_id a <{self.relationship_type}>}}"
268
- ):
269
- relationship_id: URIRef = cast(tuple, relationship_id_result)[0]
270
-
271
- if assets_id_res := list(
272
- graph.query(
273
- self._asset_template.format(
274
- relationship_id=relationship_id,
275
- asset_xid_property=self.asset_xid_property,
276
- relationship_source_xid_prop=self.relationship_source_xid_prop,
277
- relationship_target_xid_prop=self.relationship_target_xid_prop,
278
- asset_type=self.asset_type,
279
- )
280
- )
281
- ):
282
- # files can be connected to multiple assets in the graph
283
- for source_asset_id, target_asset_id in cast(list[tuple], assets_id_res):
284
- # create a relationship between the two assets
285
- graph.add(
286
- (
287
- source_asset_id,
288
- DEFAULT_NAMESPACE.relationship,
289
- relationship_id,
290
- )
291
- )
292
- graph.add(
293
- (
294
- target_asset_id,
295
- DEFAULT_NAMESPACE.relationship,
296
- relationship_id,
297
- )
298
- )
303
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
304
+ row_output = RowTransformationOutput()
305
+ source, relationship, target = query_result_row
299
306
 
300
- # add source and target to the relationship
301
- graph.add((relationship_id, DEFAULT_NAMESPACE.source, source_asset_id))
302
- graph.add((relationship_id, DEFAULT_NAMESPACE.target, target_asset_id))
307
+ row_output.add_triples.append(cast(Triple, (source, DEFAULT_NAMESPACE.relationship, target)))
308
+ row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.source, source)))
309
+ row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.target, target)))
303
310
 
304
- # remove properties that are not needed, specifically the external ids
305
- graph.remove((relationship_id, self.relationship_source_xid_prop, None))
306
- graph.remove((relationship_id, self.relationship_target_xid_prop, None))
311
+ row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_source_xid_prop, None)))
312
+ row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_target_xid_prop, None)))
313
+
314
+ row_output.instances_modified_count += 2
315
+
316
+ return row_output
307
317
 
308
318
 
309
319
  # TODO: standardise
@@ -59,7 +59,7 @@ class AddSelfReferenceProperty(BaseTransformer):
59
59
 
60
60
  class MakeConnectionOnExactMatch(BaseTransformerStandardised):
61
61
  description: str = "Adds property that contains id of reference to all references of given class in Rules"
62
- _use_only_once: bool = True
62
+ _use_only_once: bool = False
63
63
  _need_changes = frozenset({})
64
64
 
65
65
  def __init__(
@@ -42,7 +42,7 @@ class FileItemNotSupportedWarning(NeatWarning):
42
42
 
43
43
  @dataclass(unsafe_hash=True)
44
44
  class CDFAuthWarning(NeatWarning):
45
- """Failed to {action} due to {reason}"""
45
+ """Failed to {action}: {reason}"""
46
46
 
47
47
  action: str
48
48
  reason: str
@@ -17,6 +17,7 @@ from cognite.neat._rules.models.information import (
17
17
  )
18
18
  from cognite.neat._store import NeatGraphStore
19
19
  from cognite.neat._store._provenance import INSTANCES_ENTITY
20
+ from cognite.neat._utils.collection_ import iterate_progress_bar
20
21
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri, uri_to_short_form
21
22
 
22
23
  from ._base import DEFAULT_NON_EXISTING_NODE_TYPE, BaseRDFImporter
@@ -27,7 +28,6 @@ ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
27
28
  WHERE { ?s a ?class . }
28
29
  group by ?class order by DESC(?instances)"""
29
30
 
30
-
31
31
  INSTANCES_OF_CLASS_QUERY = """SELECT ?s ?propertyCount WHERE { ?s a <class> . BIND ('Unknown' as ?propertyCount) }"""
32
32
 
33
33
 
@@ -171,8 +171,10 @@ class InferenceImporter(BaseRDFImporter):
171
171
  INSTANCES_OF_CLASS_QUERY if self.max_number_of_instance == -1 else INSTANCES_OF_CLASS_RICHNESS_ORDERED_QUERY
172
172
  )
173
173
 
174
+ classes_iterable = iterate_progress_bar(classes.items(), len(classes), "Inferring classes")
175
+
174
176
  # Infers all the properties of the class
175
- for class_id, class_definition in classes.items():
177
+ for class_id, class_definition in classes_iterable:
176
178
  for ( # type: ignore[misc]
177
179
  instance,
178
180
  _,