cognite-neat 0.100.0__py3-none-any.whl → 0.101.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

@@ -3,7 +3,7 @@ from abc import ABC, abstractmethod
3
3
  from collections.abc import Callable, Collection, Iterable, Sequence
4
4
  from dataclasses import dataclass, field
5
5
  from graphlib import TopologicalSorter
6
- from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar, cast
6
+ from typing import TYPE_CHECKING, Any, ClassVar, Generic, Literal, TypeVar, cast
7
7
 
8
8
  from cognite.client.data_classes import filters
9
9
  from cognite.client.data_classes._base import (
@@ -363,9 +363,16 @@ class SpaceLoader(DataModelingLoader[str, SpaceApply, Space, SpaceApplyList, Spa
363
363
  return SpaceApplyList(schema.spaces.values())
364
364
 
365
365
  def has_data(self, item_id: str) -> bool:
366
- return bool(self._client.data_modeling.instances.list("node", limit=1, space=item_id)) or bool(
367
- self._client.data_modeling.instances.list("edge", limit=1, space=item_id)
368
- )
366
+ if self._client.data_modeling.instances.list("node", limit=1, space=item_id):
367
+ return True
368
+ if self._client.data_modeling.instances.list("edge", limit=1, space=item_id):
369
+ return True
370
+ # Need to check if there are any containers with data in the space. Typically,
371
+ # a schema space will not contain data, while it will have containers that have data in an instance space.
372
+ for container in self._client.data_modeling.containers(space=item_id, include_global=False):
373
+ if self._client.loaders.containers.has_data(container.as_id()):
374
+ return True
375
+ return False
369
376
 
370
377
 
371
378
  class ContainerLoader(DataModelingLoader[ContainerId, ContainerApply, Container, ContainerApplyList, ContainerList]):
@@ -490,10 +497,23 @@ class ContainerLoader(DataModelingLoader[ContainerId, ContainerApply, Container,
490
497
  return ContainerApplyList(schema.containers.values())
491
498
 
492
499
  def has_data(self, item_id: ContainerId) -> bool:
493
- has_data = filters.HasData(containers=[item_id])
494
- return bool(self._client.data_modeling.instances.list("node", limit=1, filter=has_data)) or bool(
495
- self._client.data_modeling.instances.list("edge", limit=1, filter=has_data)
496
- )
500
+ has_data_filter = filters.HasData(containers=[item_id])
501
+ has_data = False
502
+ instance_type: Literal["node", "edge"]
503
+ # Mypy does not understand that the instance type is Literal["node", "edge"]
504
+ for instance_type in ["node", "edge"]: # type: ignore[assignment]
505
+ try:
506
+ has_data = bool(
507
+ self._client.data_modeling.instances.list(instance_type, limit=1, filter=has_data_filter)
508
+ )
509
+ except CogniteAPIError as e:
510
+ if e.code != 400:
511
+ # If the container is used for nodes and we ask for edges, we get a 400 error. This
512
+ # means there is no edge data for this container.
513
+ raise
514
+ if has_data:
515
+ return True
516
+ return has_data
497
517
 
498
518
 
499
519
  class ViewLoader(DataModelingLoader[ViewId, ViewApply, View, ViewApplyList, ViewList]):
@@ -156,7 +156,6 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
156
156
 
157
157
  for identifier, properties in reader:
158
158
  try:
159
- print(view_id)
160
159
  yield self._create_node(identifier, properties, pydantic_cls, view_id)
161
160
  except ValueError as e:
162
161
  error_node = ResourceCreationError(identifier, "node", error=str(e))
@@ -7,6 +7,7 @@ from ._classic_cdf import (
7
7
  AssetTimeSeriesConnector,
8
8
  RelationshipAsEdgeTransformer,
9
9
  )
10
+ from ._prune_graph import AttachPropertyFromTargetToSource, PruneDanglingNodes
10
11
  from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
11
12
  from ._value_type import SplitMultiValueProperty
12
13
 
@@ -21,6 +22,8 @@ __all__ = [
21
22
  "SplitMultiValueProperty",
22
23
  "RelationshipAsEdgeTransformer",
23
24
  "MakeConnectionOnExactMatch",
25
+ "AttachPropertyFromTargetToSource",
26
+ "PruneDanglingNodes",
24
27
  ]
25
28
 
26
29
  Transformers = (
@@ -34,4 +37,6 @@ Transformers = (
34
37
  | SplitMultiValueProperty
35
38
  | RelationshipAsEdgeTransformer
36
39
  | MakeConnectionOnExactMatch
40
+ | AttachPropertyFromTargetToSource
41
+ | PruneDanglingNodes
37
42
  )
@@ -2,12 +2,12 @@ from rdflib import Namespace
2
2
 
3
3
  from cognite.neat._graph.extractors import IODDExtractor
4
4
 
5
- from ._prune_graph import PruneDanglingNodes, TwoHopFlattener
5
+ from ._prune_graph import AttachPropertyFromTargetToSource, PruneDanglingNodes
6
6
 
7
7
  IODD = Namespace("http://www.io-link.com/IODD/2010/10/")
8
8
 
9
9
 
10
- class IODDTwoHopFlattener(TwoHopFlattener):
10
+ class IODDAttachPropertyFromTargetToSource(AttachPropertyFromTargetToSource):
11
11
  _need_changes = frozenset(
12
12
  {
13
13
  str(IODDExtractor.__name__),
@@ -15,11 +15,16 @@ class IODDTwoHopFlattener(TwoHopFlattener):
15
15
  )
16
16
 
17
17
  def __init__(self):
18
- super().__init__(destination_node_type=IODD.TextObject, property_predicate=IODD.value, property_name="value")
18
+ super().__init__(
19
+ target_node_type=IODD.TextObject,
20
+ namespace=IODD,
21
+ target_property="value",
22
+ delete_target_node=True,
23
+ )
19
24
 
20
25
 
21
26
  class IODDPruneDanglingNodes(PruneDanglingNodes):
22
- _need_changes = frozenset({str(IODDExtractor.__name__), str(IODDTwoHopFlattener.__name__)})
27
+ _need_changes = frozenset({str(IODDExtractor.__name__), str(IODDAttachPropertyFromTargetToSource.__name__)})
23
28
 
24
29
  def __init__(self):
25
30
  super().__init__(node_prune_types=[IODD.TextObject])
@@ -1,84 +1,141 @@
1
1
  from rdflib import Graph, Namespace, URIRef
2
2
  from rdflib.query import ResultRow
3
- from rdflib.term import Identifier
3
+
4
+ from cognite.neat._utils.rdf_ import as_neat_compliant_uri
5
+ from cognite.neat._utils.text import sentence_or_string_to_camel
4
6
 
5
7
  from ._base import BaseTransformer
6
8
 
7
9
 
8
- # TODO: Handle the cse when value is None, which will not make the TextObject resolve
9
- class TwoHopFlattener(BaseTransformer):
10
+ class AttachPropertyFromTargetToSource(BaseTransformer):
10
11
  """
11
- Transformer that will flatten the distance between a source node, an intermediate connecting node, and a
12
- target property that is connected to the intermediate node.
13
- The transformation result is that the target property is attached directly to the source node, instead of having
14
- to go via the intermediate node.
15
- The user can also provide a flag to decide if the intermediate node should be removed from the graph or not
16
- after connecting the target property to the source node.
12
+ Transformer that considers a TargetNode and SourceNode relationship, to extract a property that is attached to
13
+ the TargetNode, and attaches it to the SourceNode instead, while also deleting the edge between
14
+ the SourceNode and TargetNode.
15
+ This means that you no longer have to go via the SourceNode to TargetNode to extract
16
+ the desired property from TargetNode, you can get it directly from the SourceNode instead.
17
+ Further, there are two ways of defining the predicate for the new property to attach to
18
+ the SourceNode. The predicate that is used will either be the old predicate between the SourceNode and TargetNode,
19
+ or, the TargetNode may hold a property with a value for the new predicate to use.
20
+ In this case, the user must specify the name of this predicate property connected to the TargetNode.
21
+ Consider the following example for illustration:
22
+
23
+ Ex. AttachPropertyFromTargetToSource
24
+ Graph before transformation:
25
+
26
+ :SourceNode a :SourceType .
27
+ :SourceNode :sourceProperty :TargetNode .
28
+
29
+ :TargetNode a :TargetType .
30
+ :TargetNode :propertyWhichValueWeWant 'Target Value' .
31
+ :TargetNode :propertyWhichValueWeMightWantAsNameForNewProperty 'PropertyName'
32
+
33
+ Use case A after transformation - attach new property to SourceNode using old predicate:
34
+
35
+ :SourceNode a :SourceType .
36
+ :SourceNode :sourceProperty 'Target Value' .
37
+
38
+ Use case B after transformation - extract new predicate from one of the properties of the TargetNode:
39
+
40
+ :SourceNode a :SourceType .
41
+ :SourceNode :PropertyName 'Target Value' .
17
42
 
18
- Ex. TwoHopFlattener:
19
43
 
20
- Graph before flattening (with deletion of intermediate node):
21
- node(A, rdf:type(Pump)) -(predicate("vendor"))>
22
- node(B, rdf:type(TextObject)) -(predicate("value"))> Literal("CompanyX")
44
+ The user can provide a flag to decide if the intermediate target node should be removed from the graph or not
45
+ after connecting the target property to the source node. The example illustrates this.
46
+ The default however is False.
23
47
 
24
- Graph after flattening nodes with destination_node_type = rdf:type(TextObject), property_predicate = :value,
25
- and property_name = "value":
48
+ If delete_target_node is not set, the expected number of triples after this transformation should be the same as
49
+ before the transformation.
26
50
 
27
- node(A, rdf:type(Pump)) -(predicate("vendor"))> Literal("CompanyX")
51
+ If delete_target_node is set, the expected number of triples should be:
52
+ #triples_before - #target_nodes * #target_nodes_properties
53
+
54
+ Number of triples after operation from above example: 5 - 1*3 = 2
28
55
 
29
56
  Args:
30
- destination_node_type: RDF.type of edge Node
31
- property_predicate: Predicate to use when resolving the value from the edge node
32
- property_name: name of the property that the intermediate node is pointing to
33
- delete_connecting_node: bool if the intermediate Node and Edge between source Node
34
- and target property should be deleted. Defaults to True.
57
+ target_node_type: RDF.type of edge Node
58
+ namespace: RDF Namespace to use when querying the graph
59
+ target_property: str with name of the property that holds the value attached to the intermediate node
60
+ target_property_holding_new_property_name: Optional str of the property name that holds
61
+ the new predicate to use when attaching the new property to the source node.
62
+ delete_target_node: bool if the intermediate Node and Edge between source Node
63
+ and target property should be deleted. Defaults to False.
35
64
  """
36
65
 
37
- description: str = "Prunes the graph of specified node types that do not have connections to other nodes."
38
- _query_template: str = """SELECT ?sourceNode ?property ?destinationNode ?value WHERE {{
39
- ?sourceNode ?property ?destinationNode .
40
- ?destinationNode a <{destination_node_type}> .
41
- ?destinationNode <{property_predicate}> ?{property_name} . }}"""
66
+ description: str = "Attaches a target property from a target node that is connected to a source node."
67
+
68
+ _query_template_use_case_a: str = """
69
+ SELECT ?sourceNode ?sourceProperty ?targetNode ?newSourceProperty ?newSourcePropertyValue WHERE {{
70
+ ?sourceNode ?sourceProperty ?targetNode .
71
+ BIND( <{target_property}> as ?newSourceProperty ) .
72
+ ?targetNode a <{target_node_type}> .
73
+ ?targetNode <{target_property}> ?newSourcePropertyValue . }}"""
74
+
75
+ _query_template_use_case_b: str = """
76
+ SELECT ?sourceNode ?sourceProperty ?targetNode ?newSourceProperty ?newSourcePropertyValue WHERE {{
77
+ ?sourceNode ?sourceProperty ?targetNode .
78
+ ?targetNode a <{target_node_type}> .
79
+ ?targetNode <{target_property_holding_new_property_name}> ?newSourceProperty .
80
+ ?targetNode <{target_property}> ?newSourcePropertyValue . }}"""
42
81
 
43
82
  def __init__(
44
83
  self,
45
- destination_node_type: URIRef,
46
- property_predicate: Namespace,
47
- property_name: str,
48
- delete_connecting_node: bool = True,
84
+ target_node_type: URIRef,
85
+ namespace: Namespace,
86
+ target_property: str,
87
+ target_property_holding_new_property_name: str | None = None,
88
+ delete_target_node: bool = False,
49
89
  ):
50
- self.destination_node_type = destination_node_type
51
- self.property_predicate = property_predicate
52
- self.property_name = property_name
53
- self.delete_connecting_node = delete_connecting_node
54
-
55
- def transform(self, graph: Graph) -> None:
56
- nodes_to_delete: list[Identifier] = []
57
-
58
- graph_traversals = list(
59
- graph.query(
60
- self._query_template.format(
61
- destination_node_type=self.destination_node_type,
62
- property_predicate=self.property_predicate,
63
- property_name=self.property_name,
64
- )
90
+ self.target_node_type = target_node_type
91
+ self.namespace = namespace
92
+ self.target_property = self.namespace[target_property]
93
+ self.delete_target_node = delete_target_node
94
+ self.target_property_holding_new_property_name = target_property_holding_new_property_name
95
+
96
+ def transform(self, graph) -> None:
97
+ nodes_to_delete: list[tuple] = []
98
+
99
+ if self.target_property_holding_new_property_name is not None:
100
+ target_property_holding_new_property_name = self.namespace[self.target_property_holding_new_property_name]
101
+ query = self._query_template_use_case_b.format(
102
+ target_node_type=self.target_node_type,
103
+ target_property_holding_new_property_name=target_property_holding_new_property_name,
104
+ target_property=self.target_property,
105
+ )
106
+ else:
107
+ query = self._query_template_use_case_a.format(
108
+ target_node_type=self.target_node_type,
109
+ target_property=self.target_property,
65
110
  )
66
- )
67
-
68
- for path in graph_traversals:
69
- if isinstance(path, ResultRow):
70
- source_node, predicate, destination_node, property_value = path.asdict().values()
71
-
72
- # Create new connection from source node to value
73
- graph.add((source_node, predicate, property_value))
74
- nodes_to_delete.append(destination_node)
75
-
76
- if self.delete_connecting_node:
77
- for node in nodes_to_delete:
78
- # Remove edge triples to node
79
- graph.remove((None, None, node))
80
- # Remove node triple
81
- graph.remove((node, None, None))
111
+
112
+ for (
113
+ source_node,
114
+ old_predicate,
115
+ target_node,
116
+ new_predicate_value,
117
+ new_property_value,
118
+ ) in graph.query(query):
119
+ if self.target_property_holding_new_property_name is not None:
120
+ # Ensure new predicate is URI compliant as we are creating a new predicate
121
+ new_predicate_value_string = sentence_or_string_to_camel(str(new_predicate_value))
122
+ predicate = as_neat_compliant_uri(self.namespace[new_predicate_value_string])
123
+ else:
124
+ predicate = old_predicate
125
+
126
+ # Create new connection from source node to value
127
+ graph.add((source_node, predicate, new_property_value))
128
+ # Remove old relationship between source node and destination node
129
+ graph.remove((source_node, old_predicate, target_node))
130
+
131
+ nodes_to_delete.append(target_node)
132
+
133
+ if self.delete_target_node:
134
+ for target_node in nodes_to_delete:
135
+ # Remove triples with edges to target_node
136
+ graph.remove((None, None, target_node))
137
+ # Remove target node triple and its properties
138
+ graph.remove((target_node, None, None))
82
139
 
83
140
 
84
141
  class PruneDanglingNodes(BaseTransformer):
@@ -12,6 +12,7 @@ from cognite.client.data_classes._base import (
12
12
  from cognite.client.data_classes.data_modeling import (
13
13
  DataModelApplyList,
14
14
  DataModelId,
15
+ SpaceApply,
15
16
  ViewApplyList,
16
17
  )
17
18
  from cognite.client.exceptions import CogniteAPIError
@@ -200,6 +201,32 @@ class DMSExporter(CDFExporter[DMSRules, DMSSchema]):
200
201
  loader.resource_name for loader, categorized in categorized_items_by_loader.items() if categorized.to_update
201
202
  )
202
203
 
204
+ deleted_by_name: dict[str, UploadResult] = {}
205
+ if not is_failing:
206
+ # Deletion is done in reverse order to take care of dependencies
207
+ for loader, items in reversed(categorized_items_by_loader.items()):
208
+ issue_list = IssueList()
209
+
210
+ if items.resource_name == client.loaders.data_models.resource_name:
211
+ warning_list = self._validate(list(items.item_ids()), client)
212
+ issue_list.extend(warning_list)
213
+
214
+ results = UploadResult(loader.resource_name, issues=issue_list) # type: ignore[var-annotated]
215
+ if dry_run:
216
+ results.deleted.update(items.to_delete_ids)
217
+ else:
218
+ if items.to_delete_ids:
219
+ try:
220
+ deleted = loader.delete(items.to_delete_ids)
221
+ except MultiCogniteAPIError as e:
222
+ results.deleted.update([loader.get_id(item) for item in e.success])
223
+ results.failed_deleted.update([loader.get_id(item) for item in e.failed])
224
+ for error in e.errors:
225
+ results.error_messages.append(f"Failed to delete {loader.resource_name}: {error!s}")
226
+ else:
227
+ results.deleted.update(deleted)
228
+ deleted_by_name[loader.resource_name] = results
229
+
203
230
  for loader, items in categorized_items_by_loader.items():
204
231
  issue_list = IssueList()
205
232
 
@@ -221,28 +248,21 @@ class DMSExporter(CDFExporter[DMSRules, DMSSchema]):
221
248
 
222
249
  results.unchanged.update(items.unchanged_ids)
223
250
  results.skipped.update(items.to_skip_ids)
251
+ if delete_results := deleted_by_name.get(loader.resource_name):
252
+ results.deleted.update(delete_results.deleted)
253
+ results.failed_deleted.update(delete_results.failed_deleted)
254
+ results.error_messages.extend(delete_results.error_messages)
255
+
224
256
  if dry_run:
225
257
  if self.existing in ["update", "force"]:
226
258
  # Assume all changed are successful
227
259
  results.changed.update(items.to_update_ids)
228
260
  elif self.existing == "skip":
229
261
  results.skipped.update(items.to_update_ids)
230
- results.deleted.update(items.to_delete_ids)
231
262
  results.created.update(items.to_create_ids)
232
263
  yield results
233
264
  continue
234
265
 
235
- if items.to_delete_ids:
236
- try:
237
- deleted = loader.delete(items.to_delete_ids)
238
- except MultiCogniteAPIError as e:
239
- results.deleted.update([loader.get_id(item) for item in e.success])
240
- results.failed_deleted.update([loader.get_id(item) for item in e.failed])
241
- for error in e.errors:
242
- results.error_messages.append(f"Failed to delete {loader.resource_name}: {error!s}")
243
- else:
244
- results.deleted.update(deleted)
245
-
246
266
  if items.to_create:
247
267
  try:
248
268
  created = loader.create(items.to_create)
@@ -304,12 +324,14 @@ class DMSExporter(CDFExporter[DMSRules, DMSSchema]):
304
324
  and not loader.in_space(item, self.include_space)
305
325
  ):
306
326
  continue
307
-
308
- cdf_item = cdf_item_by_id.get(loader.get_id(item))
327
+ item_id = loader.get_id(item)
328
+ cdf_item = cdf_item_by_id.get(item_id)
309
329
  if cdf_item is None:
310
330
  categorized.to_create.append(item)
311
- elif is_redeploying or self.existing == "recreate":
312
- if loader.has_data(cdf_item) and not self.drop_data:
331
+ elif (is_redeploying or self.existing == "recreate") and not isinstance(item, SpaceApply):
332
+ # Spaces are not deleted, instead they are updated. Deleting a space is an expensive operation
333
+ # and are seldom needed. If you need to delete the space, it should be done in a different operation.
334
+ if not self.drop_data and loader.has_data(item_id):
313
335
  categorized.to_skip.append(cdf_item)
314
336
  else:
315
337
  categorized.to_delete.append(cdf_item.as_write())
@@ -147,11 +147,13 @@ class BaseMetadata(SchemaModel):
147
147
  Metadata model for data model
148
148
  """
149
149
 
150
- role: ClassVar[RoleTypes]
151
- aspect: ClassVar[DataModelAspect]
152
- space: SpaceType = Field(alias="prefix")
153
- external_id: DataModelExternalIdType = Field(alias="externalId")
154
- version: VersionType
150
+ role: ClassVar[RoleTypes] = Field(description="Role of the person creating the data model")
151
+ aspect: ClassVar[DataModelAspect] = Field(description="Aspect of the data model")
152
+ space: SpaceType = Field(alias="prefix", description="The space where the data model is defined")
153
+ external_id: DataModelExternalIdType = Field(
154
+ alias="externalId", description="External identifier for the data model"
155
+ )
156
+ version: VersionType = Field(description="Version of the data model")
155
157
 
156
158
  name: str | None = Field(
157
159
  None,
@@ -160,21 +162,23 @@ class BaseMetadata(SchemaModel):
160
162
  max_length=255,
161
163
  )
162
164
 
163
- description: str | None = Field(None, min_length=1, max_length=1024)
165
+ description: str | None = Field(
166
+ None, min_length=1, max_length=1024, description="Short description of the data model"
167
+ )
164
168
 
165
169
  creator: StrListType = Field(
166
170
  description=(
167
- "List of contributors to the data model creation, "
171
+ "List of contributors (comma seperated) to the data model creation, "
168
172
  "typically information architects are considered as contributors."
169
173
  ),
170
174
  )
171
175
 
172
176
  created: datetime = Field(
173
- description=("Date of the data model creation"),
177
+ description="Date of the data model creation",
174
178
  )
175
179
 
176
180
  updated: datetime = Field(
177
- description=("Date of the data model update"),
181
+ description="Date of the data model update",
178
182
  )
179
183
 
180
184
  @field_validator("*", mode="before")