cognite-neat 0.106.0__py3-none-any.whl → 0.108.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (67) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/__init__.py +5 -1
  4. cognite/neat/_graph/extractors/_base.py +32 -0
  5. cognite/neat/_graph/extractors/_classic_cdf/_base.py +128 -14
  6. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +156 -12
  7. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +50 -12
  8. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +26 -1
  9. cognite/neat/_graph/extractors/_dms.py +196 -47
  10. cognite/neat/_graph/extractors/_dms_graph.py +199 -0
  11. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  12. cognite/neat/_graph/extractors/_rdf_file.py +33 -5
  13. cognite/neat/_graph/loaders/__init__.py +1 -3
  14. cognite/neat/_graph/loaders/_rdf2dms.py +123 -19
  15. cognite/neat/_graph/queries/_base.py +140 -84
  16. cognite/neat/_graph/queries/_construct.py +2 -2
  17. cognite/neat/_graph/transformers/__init__.py +8 -1
  18. cognite/neat/_graph/transformers/_base.py +9 -1
  19. cognite/neat/_graph/transformers/_classic_cdf.py +90 -3
  20. cognite/neat/_graph/transformers/_rdfpath.py +3 -3
  21. cognite/neat/_graph/transformers/_value_type.py +106 -45
  22. cognite/neat/_issues/errors/_resources.py +1 -1
  23. cognite/neat/_issues/warnings/__init__.py +0 -2
  24. cognite/neat/_issues/warnings/_models.py +1 -1
  25. cognite/neat/_issues/warnings/_properties.py +0 -8
  26. cognite/neat/_rules/analysis/_base.py +1 -1
  27. cognite/neat/_rules/analysis/_information.py +14 -13
  28. cognite/neat/_rules/catalog/__init__.py +1 -0
  29. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  30. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  31. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  32. cognite/neat/_rules/importers/__init__.py +3 -1
  33. cognite/neat/_rules/importers/_dms2rules.py +7 -5
  34. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  35. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  36. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  37. cognite/neat/_rules/importers/_rdf/_inference2rules.py +242 -19
  38. cognite/neat/_rules/models/_base_rules.py +13 -15
  39. cognite/neat/_rules/models/_types.py +5 -0
  40. cognite/neat/_rules/models/dms/_rules.py +51 -10
  41. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  42. cognite/neat/_rules/models/information/_rules.py +48 -5
  43. cognite/neat/_rules/models/information/_rules_input.py +6 -1
  44. cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
  45. cognite/neat/_rules/transformers/__init__.py +10 -0
  46. cognite/neat/_rules/transformers/_converters.py +300 -62
  47. cognite/neat/_session/_base.py +57 -10
  48. cognite/neat/_session/_drop.py +5 -1
  49. cognite/neat/_session/_inspect.py +3 -2
  50. cognite/neat/_session/_mapping.py +17 -6
  51. cognite/neat/_session/_prepare.py +0 -47
  52. cognite/neat/_session/_read.py +115 -10
  53. cognite/neat/_session/_set.py +27 -0
  54. cognite/neat/_session/_show.py +4 -4
  55. cognite/neat/_session/_state.py +12 -1
  56. cognite/neat/_session/_to.py +43 -2
  57. cognite/neat/_session/_wizard.py +1 -1
  58. cognite/neat/_session/exceptions.py +8 -3
  59. cognite/neat/_store/_graph_store.py +331 -136
  60. cognite/neat/_store/_rules_store.py +130 -1
  61. cognite/neat/_utils/auth.py +3 -1
  62. cognite/neat/_version.py +1 -1
  63. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +2 -2
  64. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +67 -65
  65. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +1 -1
  66. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
  67. {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
@@ -1,11 +1,16 @@
1
+ import typing
2
+ import warnings
1
3
  from collections import defaultdict
2
4
  from collections.abc import Callable, Iterable, Set
3
5
  from pathlib import Path
6
+ from typing import Any
4
7
 
5
8
  from cognite.client import CogniteClient
6
9
  from cognite.client.data_classes import Relationship, RelationshipList
7
- from rdflib import Namespace
10
+ from rdflib import Namespace, URIRef
8
11
 
12
+ from cognite.neat._issues.warnings import NeatValueWarning
13
+ from cognite.neat._shared import Triple
9
14
  from cognite.neat._utils.auxiliary import create_sha256_hash
10
15
 
11
16
  from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix, T_CogniteResource
@@ -28,9 +33,18 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
28
33
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
29
34
  camel_case: bool = True,
30
35
  as_write: bool = False,
36
+ prefix: str | None = None,
37
+ identifier: typing.Literal["id", "externalId"] = "id",
31
38
  ):
39
+ # This is used by the ClassicExtractor to log the target nodes, such
40
+ # that it can extract them.
41
+ # It is private to avoid exposing it to the user.
42
+ self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
43
+ self._log_target_nodes = False
44
+ # Ensure that this becomes an iterator, even if it is a list.
45
+ to_iterate = (self._log_target_nodes_if_set(item) for item in items)
32
46
  super().__init__(
33
- items,
47
+ to_iterate,
34
48
  namespace=namespace,
35
49
  to_type=to_type,
36
50
  total=total,
@@ -39,12 +53,40 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
39
53
  skip_metadata_values=skip_metadata_values,
40
54
  camel_case=camel_case,
41
55
  as_write=as_write,
56
+ prefix=prefix,
57
+ identifier=identifier,
42
58
  )
43
- # This is used by the ClassicExtractor to log the target nodes, such
44
- # that it can extract them.
45
- # It is private to avoid exposing it to the user.
46
- self._log_target_nodes = False
47
- self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
59
+ self._uri_by_external_id_by_by_type: dict[InstanceIdPrefix, dict[str, URIRef]] = defaultdict(dict)
60
+ self._target_triples: list[tuple[URIRef, URIRef, str, str]] = []
61
+
62
+ def _log_target_nodes_if_set(self, item: Relationship) -> Relationship:
63
+ if not self._log_target_nodes:
64
+ return item
65
+ if item.target_type and item.target_external_id:
66
+ self._target_external_ids_by_type[InstanceIdPrefix.from_str(item.target_type)].add(item.target_external_id)
67
+ return item
68
+
69
+ def _item2triples_special_cases(self, id_: URIRef, dumped: dict[str, Any]) -> list[Triple]:
70
+ if self.identifier == "externalId":
71
+ return []
72
+ triples: list[Triple] = []
73
+ if (source_external_id := dumped.pop("sourceExternalId")) and "sourceType" in dumped:
74
+ source_type = dumped["sourceType"]
75
+ try:
76
+ source_uri = self._uri_by_external_id_by_by_type[InstanceIdPrefix.from_str(source_type)][
77
+ source_external_id
78
+ ]
79
+ except KeyError:
80
+ warnings.warn(
81
+ NeatValueWarning(f"Missing externalId {source_external_id} for {source_type}"), stacklevel=2
82
+ )
83
+ else:
84
+ triples.append((id_, self.namespace["sourceExternalId"], source_uri))
85
+ if (target_external_id := dumped.pop("targetExternalId")) and "targetType" in dumped:
86
+ target_type = dumped["targetType"]
87
+ # We do not yet have the target nodes, so we log them for later extraction.
88
+ self._target_triples.append((id_, self.namespace["targetExternalId"], target_type, target_external_id))
89
+ return triples
48
90
 
49
91
  @classmethod
50
92
  def _from_dataset(
@@ -67,10 +109,6 @@ class RelationshipsExtractor(ClassicCDFBaseExtractor[Relationship]):
67
109
  return len(relationships), relationships
68
110
 
69
111
  def _fallback_id(self, item: Relationship) -> str | None:
70
- if item.external_id and item.source_external_id and item.target_external_id:
71
- if self._log_target_nodes and item.target_type and item.target_external_id:
72
- self._target_external_ids_by_type[InstanceIdPrefix.from_str(item.target_type)].add(
73
- item.target_external_id
74
- )
112
+ if item.external_id:
75
113
  return create_sha256_hash(item.external_id)
76
114
  return None
@@ -1,5 +1,6 @@
1
1
  import itertools
2
2
  import json
3
+ import typing
3
4
  from collections.abc import Callable, Iterable, Set
4
5
  from pathlib import Path
5
6
  from typing import Any
@@ -52,10 +53,22 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
52
53
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
53
54
  camel_case: bool = True,
54
55
  as_write: bool = False,
56
+ prefix: str | None = None,
57
+ identifier: typing.Literal["id", "externalId"] = "id",
55
58
  unpack_columns: bool = False,
56
59
  ):
57
60
  super().__init__(
58
- items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write
61
+ items,
62
+ namespace,
63
+ to_type,
64
+ total,
65
+ limit,
66
+ unpack_metadata,
67
+ skip_metadata_values,
68
+ camel_case,
69
+ as_write,
70
+ prefix,
71
+ identifier,
59
72
  )
60
73
  self.unpack_columns = unpack_columns
61
74
 
@@ -71,6 +84,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
71
84
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
72
85
  camel_case: bool = True,
73
86
  as_write: bool = False,
87
+ prefix: str | None = None,
88
+ identifier: typing.Literal["id", "externalId"] = "id",
74
89
  unpack_columns: bool = False,
75
90
  ):
76
91
  total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
@@ -84,6 +99,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
84
99
  skip_metadata_values,
85
100
  camel_case,
86
101
  as_write,
102
+ prefix,
103
+ identifier,
87
104
  unpack_columns,
88
105
  )
89
106
 
@@ -99,6 +116,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
99
116
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
100
117
  camel_case: bool = True,
101
118
  as_write: bool = False,
119
+ prefix: str | None = None,
120
+ identifier: typing.Literal["id", "externalId"] = "id",
102
121
  unpack_columns: bool = False,
103
122
  ):
104
123
  total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
@@ -112,6 +131,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
112
131
  skip_metadata_values,
113
132
  camel_case,
114
133
  as_write,
134
+ prefix,
135
+ identifier,
115
136
  unpack_columns,
116
137
  )
117
138
 
@@ -126,6 +147,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
126
147
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
127
148
  camel_case: bool = True,
128
149
  as_write: bool = False,
150
+ prefix: str | None = None,
151
+ identifier: typing.Literal["id", "externalId"] = "id",
129
152
  unpack_columns: bool = False,
130
153
  ):
131
154
  total, items = cls._from_file(file_path)
@@ -139,6 +162,8 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
139
162
  skip_metadata_values,
140
163
  camel_case,
141
164
  as_write,
165
+ prefix,
166
+ identifier,
142
167
  unpack_columns,
143
168
  )
144
169
 
@@ -1,44 +1,68 @@
1
- from collections.abc import Iterable, Iterator
1
+ import urllib.parse
2
+ from collections.abc import Iterable, Iterator, Set
3
+ from functools import cached_property
2
4
  from typing import cast
3
5
 
4
6
  from cognite.client import CogniteClient
5
7
  from cognite.client import data_modeling as dm
6
8
  from cognite.client.data_classes.data_modeling import DataModelIdentifier
7
9
  from cognite.client.data_classes.data_modeling.instances import Instance, PropertyValue
10
+ from cognite.client.utils.useful_types import SequenceNotStr
8
11
  from rdflib import RDF, XSD, Literal, Namespace, URIRef
9
12
 
10
- from cognite.neat._constants import DEFAULT_SPACE_URI
13
+ from cognite.neat._config import GLOBAL_CONFIG
14
+ from cognite.neat._constants import DEFAULT_SPACE_URI, is_readonly_property
11
15
  from cognite.neat._issues.errors import ResourceRetrievalError
12
16
  from cognite.neat._shared import Triple
17
+ from cognite.neat._utils.auxiliary import string_to_ideal_type
18
+ from cognite.neat._utils.collection_ import iterate_progress_bar
13
19
 
14
20
  from ._base import BaseExtractor
15
21
 
22
+ DEFAULT_EMPTY_VALUES = frozenset({"nan", "null", "none", "", " ", "nil", "n/a", "na", "unknown", "undefined"})
23
+
16
24
 
17
25
  class DMSExtractor(BaseExtractor):
18
26
  """Extract data from Cognite Data Fusion DMS instances into Neat.
19
27
 
20
28
  Args:
21
- items: The items to extract.
22
- total: The total number of items to extract. If provided, this will be used to estimate the progress.
29
+ total_instances_pair_by_view: A dictionary where the key is the view id and the value is a tuple with the total
30
+ number of instances and an iterable of instances.
23
31
  limit: The maximum number of items to extract.
24
32
  overwrite_namespace: If provided, this will overwrite the space of the extracted items.
33
+ unpack_json: If True, JSON objects will be unpacked into RDF literals.
34
+ empty_values: If unpack_json is True, when unpacking JSON objects, if a key has a value in this set, it will be
35
+ considered as an empty value and skipped.
36
+ str_to_ideal_type: If unpack_json is True, when unpacking JSON objects, if the value is a string, the extractor
37
+ will try to convert it to the ideal type.
25
38
  """
26
39
 
27
40
  def __init__(
28
41
  self,
29
- items: Iterable[Instance],
30
- total: int | None = None,
42
+ total_instances_pair_by_view: dict[dm.ViewId, tuple[int | None, Iterable[Instance]]],
31
43
  limit: int | None = None,
32
44
  overwrite_namespace: Namespace | None = None,
45
+ unpack_json: bool = False,
46
+ empty_values: Set[str] = DEFAULT_EMPTY_VALUES,
47
+ str_to_ideal_type: bool = False,
33
48
  ) -> None:
34
- self.items = items
35
- self.total = total
49
+ self.total_instances_pair_by_view = total_instances_pair_by_view
36
50
  self.limit = limit
37
51
  self.overwrite_namespace = overwrite_namespace
52
+ self.unpack_json = unpack_json
53
+ self.empty_values = empty_values
54
+ self.str_to_ideal_type = str_to_ideal_type
38
55
 
39
56
  @classmethod
40
57
  def from_data_model(
41
- cls, client: CogniteClient, data_model: DataModelIdentifier, limit: int | None = None
58
+ cls,
59
+ client: CogniteClient,
60
+ data_model: DataModelIdentifier,
61
+ limit: int | None = None,
62
+ overwrite_namespace: Namespace | None = None,
63
+ instance_space: str | SequenceNotStr[str] | None = None,
64
+ unpack_json: bool = False,
65
+ str_to_ideal_type: bool = False,
42
66
  ) -> "DMSExtractor":
43
67
  """Create an extractor from a data model.
44
68
 
@@ -46,28 +70,79 @@ class DMSExtractor(BaseExtractor):
46
70
  client: The Cognite client to use.
47
71
  data_model: The data model to extract.
48
72
  limit: The maximum number of instances to extract.
73
+ overwrite_namespace: If provided, this will overwrite the space of the extracted items.
74
+ instance_space: The space to extract instances from.
75
+ unpack_json: If True, JSON objects will be unpacked into RDF literals.
49
76
  """
50
77
  retrieved = client.data_modeling.data_models.retrieve(data_model, inline_views=True)
51
78
  if not retrieved:
52
79
  raise ResourceRetrievalError(dm.DataModelId.load(data_model), "data model", "Data Model is missing in CDF")
53
- return cls.from_views(client, retrieved.latest_version().views, limit)
80
+ return cls.from_views(
81
+ client,
82
+ retrieved.latest_version().views,
83
+ limit,
84
+ overwrite_namespace,
85
+ instance_space,
86
+ unpack_json,
87
+ str_to_ideal_type,
88
+ )
54
89
 
55
90
  @classmethod
56
- def from_views(cls, client: CogniteClient, views: Iterable[dm.View], limit: int | None = None) -> "DMSExtractor":
91
+ def from_views(
92
+ cls,
93
+ client: CogniteClient,
94
+ views: Iterable[dm.View],
95
+ limit: int | None = None,
96
+ overwrite_namespace: Namespace | None = None,
97
+ instance_space: str | SequenceNotStr[str] | None = None,
98
+ unpack_json: bool = False,
99
+ str_to_ideal_type: bool = False,
100
+ ) -> "DMSExtractor":
57
101
  """Create an extractor from a set of views.
58
102
 
59
103
  Args:
60
104
  client: The Cognite client to use.
61
105
  views: The views to extract.
62
106
  limit: The maximum number of instances to extract.
107
+ overwrite_namespace: If provided, this will overwrite the space of the extracted items.
108
+ instance_space: The space to extract instances from.
109
+ unpack_json: If True, JSON objects will be unpacked into RDF literals.
110
+ str_to_ideal_type: If True, when unpacking JSON objects, if the value is a string, the extractor will try to
111
+ convert it to the ideal type.
63
112
  """
64
- return cls(_InstanceIterator(client, views), total=None, limit=limit)
113
+ total_instances_pair_by_view: dict[dm.ViewId, tuple[int | None, Iterable[Instance]]] = {}
114
+ for view in views:
115
+ instance_iterator = _ViewInstanceIterator(client, view, instance_space)
116
+ total_instances_pair_by_view[view.as_id()] = (instance_iterator.count, instance_iterator)
117
+
118
+ return cls(
119
+ total_instances_pair_by_view=total_instances_pair_by_view,
120
+ limit=limit,
121
+ overwrite_namespace=overwrite_namespace,
122
+ unpack_json=unpack_json,
123
+ str_to_ideal_type=str_to_ideal_type,
124
+ )
65
125
 
66
126
  def extract(self) -> Iterable[Triple]:
67
- for count, item in enumerate(self.items, 1):
68
- if self.limit and count > self.limit:
69
- break
70
- yield from self._extract_instance(item)
127
+ total_instances = sum(total for total, _ in self.total_instances_pair_by_view.values() if total is not None)
128
+ use_progress_bar = (
129
+ GLOBAL_CONFIG.use_iterate_bar_threshold and total_instances > GLOBAL_CONFIG.use_iterate_bar_threshold
130
+ )
131
+
132
+ for view_id, (total, instances) in self.total_instances_pair_by_view.items():
133
+ if total == 0:
134
+ continue
135
+ if use_progress_bar and total is not None:
136
+ instances = iterate_progress_bar(
137
+ instances,
138
+ total,
139
+ f"Extracting instances from {view_id.space}:{view_id.external_id}(version={view_id.version})",
140
+ )
141
+
142
+ for count, item in enumerate(instances, 1):
143
+ if self.limit and count > self.limit:
144
+ break
145
+ yield from self._extract_instance(item)
71
146
 
72
147
  def _extract_instance(self, instance: Instance) -> Iterable[Triple]:
73
148
  if isinstance(instance, dm.Edge):
@@ -82,7 +157,6 @@ class DMSExtractor(BaseExtractor):
82
157
  # If the edge has properties, we create a node for the edge and connect it to the start and end nodes.
83
158
  id_ = self._as_uri_ref(instance)
84
159
  yield id_, RDF.type, self._as_uri_ref(instance.type)
85
- yield id_, RDF.type, self._get_namespace(instance.space).Edge
86
160
  yield (
87
161
  id_,
88
162
  self._as_uri_ref(dm.DirectRelationReference(instance.space, "startNode")),
@@ -98,6 +172,9 @@ class DMSExtractor(BaseExtractor):
98
172
  id_ = self._as_uri_ref(instance)
99
173
  if instance.type:
100
174
  type_ = self._as_uri_ref(cast(dm.DirectRelationReference, instance.type))
175
+ elif len(instance.properties) == 1:
176
+ view_id = next(iter(instance.properties.keys()))
177
+ type_ = self._get_namespace(view_id.space)[urllib.parse.quote(view_id.external_id)]
101
178
  else:
102
179
  type_ = self._get_namespace(instance.space).Node
103
180
 
@@ -105,54 +182,126 @@ class DMSExtractor(BaseExtractor):
105
182
  else:
106
183
  raise NotImplementedError(f"Unknown instance type {type(instance)}")
107
184
 
185
+ if self.overwrite_namespace:
186
+ # If the namespace is overwritten, keep the original space as a property to avoid losing information.
187
+ yield id_, self._get_namespace(instance.space)["space"], Literal(instance.space)
188
+
108
189
  for view_id, properties in instance.properties.items():
109
190
  namespace = self._get_namespace(view_id.space)
110
191
  for key, value in properties.items():
111
- for object_ in self._get_objects(value):
112
- yield id_, namespace[key], object_
192
+ for predicate_str, object_ in self._get_predicate_objects_pair(key, value):
193
+ yield id_, namespace[urllib.parse.quote(predicate_str)], object_
113
194
 
114
- def _get_objects(self, value: PropertyValue) -> Iterable[Literal | URIRef]:
195
+ def _get_predicate_objects_pair(self, key: str, value: PropertyValue) -> Iterable[tuple[str, Literal | URIRef]]:
115
196
  if isinstance(value, str | float | bool | int):
116
- yield Literal(value)
197
+ yield key, Literal(value)
117
198
  elif isinstance(value, dict) and "space" in value and "externalId" in value:
118
- yield self._as_uri_ref(dm.DirectRelationReference.load(value))
199
+ yield key, self._as_uri_ref(dm.DirectRelationReference.load(value))
200
+ elif isinstance(value, dict) and self.unpack_json:
201
+ for sub_key, sub_value in value.items():
202
+ if isinstance(sub_value, str):
203
+ if sub_value.casefold() in self.empty_values:
204
+ continue
205
+ if self.str_to_ideal_type:
206
+ yield sub_key, Literal(string_to_ideal_type(sub_value))
207
+ else:
208
+ yield sub_key, Literal(sub_value)
209
+ elif isinstance(sub_value, int | float | bool):
210
+ yield sub_key, Literal(sub_value)
211
+ elif isinstance(sub_value, dict):
212
+ yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", sub_value)
213
+ elif isinstance(sub_value, list):
214
+ for item in sub_value:
215
+ yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", item)
216
+ else:
217
+ yield sub_key, Literal(str(sub_value))
119
218
  elif isinstance(value, dict):
120
219
  # This object is a json object.
121
- yield Literal(str(value), datatype=XSD._NS["json"])
220
+ yield key, Literal(str(value), datatype=XSD._NS["json"])
122
221
  elif isinstance(value, list):
123
222
  for item in value:
124
- yield from self._get_objects(item)
223
+ yield from self._get_predicate_objects_pair(key, item)
125
224
 
126
225
  def _as_uri_ref(self, instance: Instance | dm.DirectRelationReference) -> URIRef:
127
- return self._get_namespace(instance.space)[instance.external_id]
226
+ return self._get_namespace(instance.space)[urllib.parse.quote(instance.external_id)]
128
227
 
129
228
  def _get_namespace(self, space: str) -> Namespace:
130
229
  if self.overwrite_namespace:
131
230
  return self.overwrite_namespace
132
- return Namespace(DEFAULT_SPACE_URI.format(space=space))
231
+ return Namespace(DEFAULT_SPACE_URI.format(space=urllib.parse.quote(space)))
133
232
 
134
233
 
135
- class _InstanceIterator(Iterator[Instance]):
136
- def __init__(self, client: CogniteClient, views: Iterable[dm.View]):
234
+ class _ViewInstanceIterator(Iterable[Instance]):
235
+ def __init__(self, client: CogniteClient, view: dm.View, instance_space: str | SequenceNotStr[str] | None = None):
137
236
  self.client = client
138
- self.views = views
237
+ self.view = view
238
+ self.instance_space = instance_space
239
+
240
+ @cached_property
241
+ def count(self) -> int:
242
+ node_count = edge_count = 0
243
+ if self.view.used_for in ("node", "all"):
244
+ node_count = int(
245
+ self.client.data_modeling.instances.aggregate(
246
+ view=self.view.as_id(),
247
+ aggregates=dm.aggregations.Count("externalId"),
248
+ instance_type="node",
249
+ space=self.instance_space,
250
+ ).value
251
+ )
252
+ if self.view.used_for in ("edge", "all"):
253
+ edge_count = int(
254
+ self.client.data_modeling.instances.aggregate(
255
+ view=self.view.as_id(),
256
+ aggregates=dm.aggregations.Count("externalId"),
257
+ instance_type="edge",
258
+ space=self.instance_space,
259
+ ).value
260
+ )
261
+ return node_count + edge_count
139
262
 
140
263
  def __iter__(self) -> Iterator[Instance]:
141
- return self
142
-
143
- def __next__(self) -> Instance: # type: ignore[misc]
144
- for view in self.views:
145
- # All nodes and edges with properties
146
- yield from self.client.data_modeling.instances(chunk_size=None, instance_type="node", sources=[view])
147
- yield from self.client.data_modeling.instances(chunk_size=None, instance_type="edge", sources=[view])
148
-
149
- for prop in view.properties.values():
150
- if isinstance(prop, dm.EdgeConnection):
151
- # Get all edges with properties
152
- yield from self.client.data_modeling.instances(
153
- chunk_size=None,
154
- instance_type="edge",
155
- filter=dm.filters.Equals(
156
- ["edge", "type"], {"space": prop.type.space, "externalId": prop.type.external_id}
157
- ),
158
- )
264
+ view_id = self.view.as_id()
265
+ read_only_properties = {
266
+ prop_id
267
+ for prop_id, prop in self.view.properties.items()
268
+ if isinstance(prop, dm.MappedProperty)
269
+ and is_readonly_property(prop.container, prop.container_property_identifier)
270
+ }
271
+ # All nodes and edges with properties
272
+ if self.view.used_for in ("node", "all"):
273
+ node_iterable: Iterable[Instance] = self.client.data_modeling.instances(
274
+ chunk_size=None, instance_type="node", sources=[view_id], space=self.instance_space
275
+ )
276
+ if read_only_properties:
277
+ node_iterable = self._remove_read_only_properties(node_iterable, read_only_properties, view_id)
278
+ yield from node_iterable
279
+
280
+ if self.view.used_for in ("edge", "all"):
281
+ yield from self.client.data_modeling.instances(
282
+ chunk_size=None, instance_type="edge", sources=[view_id], space=self.instance_space
283
+ )
284
+
285
+ for prop in self.view.properties.values():
286
+ if isinstance(prop, dm.EdgeConnection):
287
+ if prop.edge_source:
288
+ # All edges with properties are extracted from the edge source
289
+ continue
290
+ yield from self.client.data_modeling.instances(
291
+ chunk_size=None,
292
+ instance_type="edge",
293
+ filter=dm.filters.Equals(
294
+ ["edge", "type"], {"space": prop.type.space, "externalId": prop.type.external_id}
295
+ ),
296
+ space=self.instance_space,
297
+ )
298
+
299
+ @staticmethod
300
+ def _remove_read_only_properties(
301
+ nodes: Iterable[Instance], read_only_properties: Set[str], view_id: dm.ViewId
302
+ ) -> Iterable[Instance]:
303
+ for node in nodes:
304
+ if properties := node.properties.get(view_id):
305
+ for read_only in read_only_properties:
306
+ properties.pop(read_only, None)
307
+ yield node