cognite-neat 0.99.1__py3-none-any.whl → 0.100.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (47) hide show
  1. cognite/neat/_client/_api/data_modeling_loaders.py +403 -182
  2. cognite/neat/_client/data_classes/data_modeling.py +4 -0
  3. cognite/neat/_graph/extractors/_base.py +7 -0
  4. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +23 -13
  5. cognite/neat/_graph/loaders/_rdf2dms.py +50 -11
  6. cognite/neat/_graph/transformers/__init__.py +3 -3
  7. cognite/neat/_graph/transformers/_classic_cdf.py +120 -52
  8. cognite/neat/_issues/warnings/__init__.py +2 -0
  9. cognite/neat/_issues/warnings/_resources.py +15 -0
  10. cognite/neat/_rules/analysis/_base.py +15 -5
  11. cognite/neat/_rules/analysis/_dms.py +20 -0
  12. cognite/neat/_rules/analysis/_information.py +22 -0
  13. cognite/neat/_rules/exporters/_base.py +3 -5
  14. cognite/neat/_rules/exporters/_rules2dms.py +192 -200
  15. cognite/neat/_rules/importers/_rdf/_inference2rules.py +22 -5
  16. cognite/neat/_rules/models/_base_rules.py +19 -0
  17. cognite/neat/_rules/models/_types.py +5 -0
  18. cognite/neat/_rules/models/dms/_exporter.py +215 -93
  19. cognite/neat/_rules/models/dms/_rules.py +4 -4
  20. cognite/neat/_rules/models/dms/_rules_input.py +8 -3
  21. cognite/neat/_rules/models/dms/_validation.py +42 -11
  22. cognite/neat/_rules/models/entities/_multi_value.py +3 -0
  23. cognite/neat/_rules/models/information/_rules.py +17 -2
  24. cognite/neat/_rules/models/information/_rules_input.py +11 -2
  25. cognite/neat/_rules/models/information/_validation.py +99 -3
  26. cognite/neat/_rules/models/mapping/_classic2core.yaml +1 -1
  27. cognite/neat/_rules/transformers/__init__.py +2 -1
  28. cognite/neat/_rules/transformers/_converters.py +163 -61
  29. cognite/neat/_rules/transformers/_mapping.py +132 -2
  30. cognite/neat/_session/_base.py +42 -31
  31. cognite/neat/_session/_mapping.py +105 -5
  32. cognite/neat/_session/_prepare.py +43 -9
  33. cognite/neat/_session/_read.py +50 -4
  34. cognite/neat/_session/_set.py +1 -0
  35. cognite/neat/_session/_to.py +36 -13
  36. cognite/neat/_session/_wizard.py +5 -0
  37. cognite/neat/_session/engine/_interface.py +3 -2
  38. cognite/neat/_store/_base.py +79 -19
  39. cognite/neat/_utils/collection_.py +22 -0
  40. cognite/neat/_utils/rdf_.py +24 -0
  41. cognite/neat/_version.py +2 -2
  42. cognite/neat/_workflows/steps/lib/current/rules_exporter.py +3 -3
  43. {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/METADATA +1 -1
  44. {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/RECORD +47 -47
  45. {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/LICENSE +0 -0
  46. {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/WHEEL +0 -0
  47. {cognite_neat-0.99.1.dist-info → cognite_neat-0.100.1.dist-info}/entry_points.txt +0 -0
@@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
2
2
  from collections.abc import Hashable, ItemsView, Iterable, Iterator, KeysView, Mapping, MutableMapping, ValuesView
3
3
  from typing import (
4
4
  Any,
5
+ Literal,
6
+ TypeAlias,
5
7
  TypeVar,
6
8
  cast,
7
9
  final,
@@ -24,6 +26,8 @@ from cognite.client.data_classes.data_modeling import (
24
26
  from cognite.client.utils._auxiliary import load_yaml_or_json
25
27
  from cognite.client.utils._pandas_helpers import convert_nullable_int_cols
26
28
 
29
+ Component: TypeAlias = Literal["spaces", "data_models", "views", "containers", "node_types"]
30
+
27
31
  T_ID = TypeVar("T_ID", bound=Hashable)
28
32
 
29
33
 
@@ -10,6 +10,13 @@ class BaseExtractor:
10
10
  extractors must implement.
11
11
  """
12
12
 
13
+ def _get_activity_names(self) -> list[str]:
14
+ """Returns the name of the activities that the extractor performs,
15
+ i.e., the actions that it performs when you call extract().."""
16
+ # This method can be overridden by subclasses that runs multiple extractors
17
+ # for example the ClassicGraphExtractor
18
+ return [type(self).__name__]
19
+
13
20
  @abstractmethod
14
21
  def extract(self) -> Iterable[Triple]:
15
22
  raise NotImplementedError()
@@ -11,7 +11,7 @@ from cognite.neat._constants import CLASSIC_CDF_NAMESPACE
11
11
  from cognite.neat._graph.extractors._base import BaseExtractor
12
12
  from cognite.neat._issues.warnings import CDFAuthWarning
13
13
  from cognite.neat._shared import Triple
14
- from cognite.neat._utils.collection_ import chunker
14
+ from cognite.neat._utils.collection_ import chunker, iterate_progress_bar
15
15
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri
16
16
 
17
17
  from ._assets import AssetsExtractor
@@ -92,6 +92,7 @@ class ClassicGraphExtractor(BaseExtractor):
92
92
  data_set_external_id: str | None = None,
93
93
  root_asset_external_id: str | None = None,
94
94
  namespace: Namespace | None = None,
95
+ limit_per_type: int | None = None,
95
96
  ):
96
97
  self._client = client
97
98
  if sum([bool(data_set_external_id), bool(root_asset_external_id)]) != 1:
@@ -99,12 +100,27 @@ class ClassicGraphExtractor(BaseExtractor):
99
100
  self._root_asset_external_id = root_asset_external_id
100
101
  self._data_set_external_id = data_set_external_id
101
102
  self._namespace = namespace or CLASSIC_CDF_NAMESPACE
102
- self._extractor_args = dict(namespace=self._namespace, unpack_metadata=False, as_write=True, camel_case=True)
103
+ self._extractor_args = dict(
104
+ namespace=self._namespace, unpack_metadata=False, as_write=True, camel_case=True, limit=limit_per_type
105
+ )
106
+ self._limit_per_type = limit_per_type
103
107
 
104
108
  self._source_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
105
109
  self._target_external_ids_by_type: dict[InstanceIdPrefix, set[str]] = defaultdict(set)
106
110
  self._labels: set[str] = set()
107
111
  self._data_set_ids: set[int] = set()
112
+ self._extracted_labels = False
113
+ self._extracted_data_sets = False
114
+
115
+ def _get_activity_names(self) -> list[str]:
116
+ activities = [data_access_object.extractor_cls.__name__ for data_access_object in self._classic_node_types] + [
117
+ RelationshipsExtractor.__name__,
118
+ ]
119
+ if self._extracted_labels:
120
+ activities.append(LabelsExtractor.__name__)
121
+ if self._extracted_data_sets:
122
+ activities.append(DataSetExtractor.__name__)
123
+ return activities
108
124
 
109
125
  def extract(self) -> Iterable[Triple]:
110
126
  """Extracts all classic CDF Resources."""
@@ -118,11 +134,15 @@ class ClassicGraphExtractor(BaseExtractor):
118
134
  yield from self._extract_labels()
119
135
  except CogniteAPIError as e:
120
136
  warnings.warn(CDFAuthWarning("extract labels", str(e)), stacklevel=2)
137
+ else:
138
+ self._extracted_labels = True
121
139
 
122
140
  try:
123
141
  yield from self._extract_data_sets()
124
142
  except CogniteAPIError as e:
125
143
  warnings.warn(CDFAuthWarning("extract data sets", str(e)), stacklevel=2)
144
+ else:
145
+ self._extracted_data_sets = True
126
146
 
127
147
  def _extract_core_start_nodes(self):
128
148
  for core_node in self._classic_node_types:
@@ -206,14 +226,4 @@ class ClassicGraphExtractor(BaseExtractor):
206
226
  @staticmethod
207
227
  def _chunk(items: Sequence, description: str) -> Iterable:
208
228
  to_iterate: Iterable = chunker(items, chunk_size=1000)
209
- try:
210
- from rich.progress import track
211
- except ModuleNotFoundError:
212
- ...
213
- else:
214
- to_iterate = track(
215
- to_iterate,
216
- total=(len(items) // 1_000) + 1,
217
- description=description,
218
- )
219
- return to_iterate
229
+ return iterate_progress_bar(to_iterate, (len(items) // 1_000) + 1, description)
@@ -14,7 +14,7 @@ from cognite.client.data_classes.data_modeling.ids import InstanceId
14
14
  from cognite.client.data_classes.data_modeling.views import SingleEdgeConnection
15
15
  from cognite.client.exceptions import CogniteAPIError
16
16
  from pydantic import BaseModel, ValidationInfo, create_model, field_validator
17
- from rdflib import RDF
17
+ from rdflib import RDF, URIRef
18
18
 
19
19
  from cognite.neat._graph._tracking import LogTracker, Tracker
20
20
  from cognite.neat._issues import IssueList, NeatIssue, NeatIssueList
@@ -25,8 +25,10 @@ from cognite.neat._issues.errors import (
25
25
  ResourceRetrievalError,
26
26
  )
27
27
  from cognite.neat._issues.warnings import PropertyTypeNotSupportedWarning
28
+ from cognite.neat._rules.analysis._dms import DMSAnalysis
28
29
  from cognite.neat._rules.models import DMSRules
29
30
  from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
31
+ from cognite.neat._rules.models.entities._single_value import ViewEntity
30
32
  from cognite.neat._shared import InstanceType
31
33
  from cognite.neat._store import NeatGraphStore
32
34
  from cognite.neat._utils.auxiliary import create_sha256_hash
@@ -52,16 +54,18 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
52
54
  graph_store: NeatGraphStore,
53
55
  data_model: dm.DataModel[dm.View] | None,
54
56
  instance_space: str,
55
- class_by_view_id: dict[ViewId, str] | None = None,
57
+ class_neat_id_by_view_id: dict[ViewId, URIRef] | None = None,
56
58
  create_issues: Sequence[NeatIssue] | None = None,
57
59
  tracker: type[Tracker] | None = None,
60
+ rules: DMSRules | None = None,
58
61
  ):
59
62
  super().__init__(graph_store)
60
63
  self.data_model = data_model
61
64
  self.instance_space = instance_space
62
- self.class_by_view_id = class_by_view_id or {}
65
+ self.class_neat_id_by_view_id = class_neat_id_by_view_id or {}
63
66
  self._issues = IssueList(create_issues or [])
64
67
  self._tracker: type[Tracker] = tracker or LogTracker
68
+ self.rules = rules
65
69
 
66
70
  @classmethod
67
71
  def from_data_model_id(
@@ -95,7 +99,17 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
95
99
  reason=str(e),
96
100
  )
97
101
  )
98
- return cls(graph_store, data_model, instance_space, {}, issues)
102
+
103
+ class_neat_id_by_view_id = {view.view.as_id(): view.logical for view in rules.views if view.logical}
104
+
105
+ return cls(
106
+ graph_store,
107
+ data_model,
108
+ instance_space,
109
+ class_neat_id_by_view_id,
110
+ issues,
111
+ rules=rules,
112
+ )
99
113
 
100
114
  def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue]:
101
115
  if self._issues.has_errors and stop_on_exception:
@@ -106,6 +120,13 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
106
120
  if not self.data_model:
107
121
  # There should already be an error in this case.
108
122
  return
123
+
124
+ views_with_linked_properties = (
125
+ DMSAnalysis(self.rules).views_with_properties_linked_to_classes(consider_inheritance=True)
126
+ if self.rules and self.rules.metadata.logical
127
+ else None
128
+ )
129
+
109
130
  view_ids = [repr(v.as_id()) for v in self.data_model.views]
110
131
  tracker = self._tracker(type(self).__name__, view_ids, "views")
111
132
  for view in self.data_model.views:
@@ -114,17 +135,35 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
114
135
  pydantic_cls, edge_by_type, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
115
136
  yield from issues
116
137
  tracker.issue(issues)
117
- class_name = self.class_by_view_id.get(view.as_id(), view.external_id)
118
138
 
119
- for identifier, properties in self.graph_store.read(class_name):
139
+ # this assumes no changes in the suffix of view and class
140
+
141
+ if views_with_linked_properties:
142
+ # we need graceful exit if the view is not in the view_property_pairs
143
+ property_link_pairs = views_with_linked_properties.get(ViewEntity.from_id(view_id))
144
+
145
+ if class_neat_id := self.class_neat_id_by_view_id.get(view_id):
146
+ reader = self.graph_store._read_via_rules_linkage(class_neat_id, property_link_pairs)
147
+ else:
148
+ error_view = ResourceRetrievalError(view_id, "view", "View not linked to class")
149
+ tracker.issue(error_view)
150
+ if stop_on_exception:
151
+ raise error_view
152
+ yield error_view
153
+
154
+ else:
155
+ reader = self.graph_store.read(view.external_id)
156
+
157
+ for identifier, properties in reader:
120
158
  try:
159
+ print(view_id)
121
160
  yield self._create_node(identifier, properties, pydantic_cls, view_id)
122
161
  except ValueError as e:
123
- error = ResourceCreationError(identifier, "node", error=str(e))
124
- tracker.issue(error)
162
+ error_node = ResourceCreationError(identifier, "node", error=str(e))
163
+ tracker.issue(error_node)
125
164
  if stop_on_exception:
126
- raise error from e
127
- yield error
165
+ raise error_node from e
166
+ yield error_node
128
167
  yield from self._create_edges(identifier, properties, edge_by_type, tracker)
129
168
  tracker.finish(repr(view_id))
130
169
 
@@ -244,7 +283,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
244
283
  return dm.NodeApply(
245
284
  space=self.instance_space,
246
285
  external_id=identifier,
247
- type=dm.DirectRelationReference(view_id.space, type_) if type_ is not None else None,
286
+ type=(dm.DirectRelationReference(view_id.space, view_id.external_id) if type_ is not None else None),
248
287
  sources=[dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump().items()))],
249
288
  )
250
289
 
@@ -5,7 +5,7 @@ from ._classic_cdf import (
5
5
  AssetRelationshipConnector,
6
6
  AssetSequenceConnector,
7
7
  AssetTimeSeriesConnector,
8
- RelationshipToSchemaTransformer,
8
+ RelationshipAsEdgeTransformer,
9
9
  )
10
10
  from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
11
11
  from ._value_type import SplitMultiValueProperty
@@ -19,7 +19,7 @@ __all__ = [
19
19
  "AssetRelationshipConnector",
20
20
  "AddSelfReferenceProperty",
21
21
  "SplitMultiValueProperty",
22
- "RelationshipToSchemaTransformer",
22
+ "RelationshipAsEdgeTransformer",
23
23
  "MakeConnectionOnExactMatch",
24
24
  ]
25
25
 
@@ -32,6 +32,6 @@ Transformers = (
32
32
  | AssetRelationshipConnector
33
33
  | AddSelfReferenceProperty
34
34
  | SplitMultiValueProperty
35
- | RelationshipToSchemaTransformer
35
+ | RelationshipAsEdgeTransformer
36
36
  | MakeConnectionOnExactMatch
37
37
  )
@@ -1,15 +1,22 @@
1
1
  import textwrap
2
2
  import warnings
3
3
  from abc import ABC
4
+ from collections.abc import Callable, Iterable
5
+ from functools import lru_cache
4
6
  from typing import cast
5
7
 
6
8
  from rdflib import RDF, Graph, Literal, Namespace, URIRef
7
- from rdflib.query import ResultRow
8
9
 
9
10
  from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE
10
11
  from cognite.neat._graph import extractors
11
12
  from cognite.neat._issues.warnings import ResourceNotFoundWarning
12
- from cognite.neat._utils.rdf_ import Triple, add_triples_in_batch, remove_namespace_from_uri
13
+ from cognite.neat._utils.collection_ import iterate_progress_bar
14
+ from cognite.neat._utils.rdf_ import (
15
+ Triple,
16
+ add_triples_in_batch,
17
+ remove_instance_ids_in_batch,
18
+ remove_namespace_from_uri,
19
+ )
13
20
 
14
21
  from ._base import BaseTransformer
15
22
 
@@ -235,31 +242,40 @@ class AssetRelationshipConnector(BaseTransformer):
235
242
  graph.remove((relationship_id, self.relationship_target_xid_prop, None))
236
243
 
237
244
 
238
- class RelationshipToSchemaTransformer(BaseTransformer):
239
- """Replaces relationships with a schema.
245
+ class RelationshipAsEdgeTransformer(BaseTransformer):
246
+ """Converts relationships into edges in the graph.
240
247
 
241
- This transformer analyzes the relationships in the graph and modifies them to be part of the schema
242
- for Assets, Events, Files, Sequences, and TimeSeries. Relationships without any properties
243
- are replaced by a simple relationship between the source and target nodes. Relationships with
244
- properties are replaced by a schema that contains the properties as attributes.
248
+ This transformer converts relationships into edges in the graph. This is useful as the
249
+ edges will be picked up as part of the schema connected to Assets, Events, Files, Sequenses,
250
+ and TimeSeries in the InferenceImporter.
245
251
 
246
252
  Args:
247
- limit: The minimum number of relationships that need to be present for it
248
- to be converted into a schema. Default is 1.
253
+ min_relationship_types: The minimum number of relationship types that must exists to convert those
254
+ relationships to edges. For example, if there is only 5 relationships between Assets and TimeSeries,
255
+ and limit is 10, those relationships will not be converted to edges.
256
+ limit_per_type: The number of conversions to perform per relationship type. For example, if there are 10
257
+ relationships between Assets and TimeSeries, and limit_per_type is 1, only 1 of those relationships
258
+ will be converted to an edge. If None, all relationships will be converted.
249
259
 
250
260
  """
251
261
 
252
- def __init__(self, limit: int = 1, namespace: Namespace = CLASSIC_CDF_NAMESPACE) -> None:
253
- self._limit = limit
262
+ def __init__(
263
+ self,
264
+ min_relationship_types: int = 1,
265
+ limit_per_type: int | None = None,
266
+ namespace: Namespace = CLASSIC_CDF_NAMESPACE,
267
+ ) -> None:
268
+ self._min_relationship_types = min_relationship_types
269
+ self._limit_per_type = limit_per_type
254
270
  self._namespace = namespace
255
271
 
256
272
  _NOT_PROPERTIES: frozenset[str] = frozenset(
257
273
  {"sourceExternalId", "targetExternalId", "externalId", "sourceType", "targetType"}
258
274
  )
259
275
  _RELATIONSHIP_NODE_TYPES: tuple[str, ...] = tuple(["Asset", "Event", "File", "Sequence", "TimeSeries"])
260
- description = "Replaces relationships with a schema"
276
+ description = "Converts relationships to edge"
261
277
  _use_only_once: bool = True
262
- _need_changes = frozenset({str(extractors.RelationshipsExtractor.__name__)})
278
+ _need_changes = frozenset({extractors.RelationshipsExtractor.__name__})
263
279
 
264
280
  _count_by_source_target = """PREFIX classic: <{namespace}>
265
281
 
@@ -286,47 +302,99 @@ WHERE {{
286
302
  ?entity classic:externalId "{external_id}" .
287
303
  }}"""
288
304
 
305
+ @staticmethod
306
+ def create_lookup_entity_with_external_id(graph: Graph, namespace: Namespace) -> Callable[[str, str], URIRef]:
307
+ @lru_cache(maxsize=10_000)
308
+ def lookup_entity_with_external_id(entity_type: str, external_id: str) -> URIRef:
309
+ query = RelationshipAsEdgeTransformer._lookup_entity_query.format(
310
+ namespace=namespace, entity_type=entity_type, external_id=external_id
311
+ )
312
+ result = list(graph.query(query))
313
+ if len(result) == 1:
314
+ return cast(URIRef, result[0][0]) # type: ignore[index]
315
+ raise ValueError(f"Could not find entity with external_id {external_id} and type {entity_type}")
316
+
317
+ return lookup_entity_with_external_id
318
+
289
319
  def transform(self, graph: Graph) -> None:
320
+ lookup_entity_with_external_id = self.create_lookup_entity_with_external_id(graph, self._namespace)
290
321
  for source_type in self._RELATIONSHIP_NODE_TYPES:
291
322
  for target_type in self._RELATIONSHIP_NODE_TYPES:
292
323
  query = self._count_by_source_target.format(
293
324
  namespace=self._namespace, source_type=source_type, target_type=target_type
294
325
  )
295
- for instance_count in graph.query(query):
296
- if int(instance_count[0]) < self._limit: # type: ignore[index, arg-type]
326
+ for instance_count_res in graph.query(query):
327
+ instance_count = int(instance_count_res[0]) # type: ignore[index, arg-type]
328
+ if instance_count < self._min_relationship_types:
297
329
  continue
298
- query = self._instances.format(
299
- namespace=self._namespace, source_type=source_type, target_type=target_type
330
+ edge_triples = self._edge_triples(
331
+ graph, source_type, target_type, instance_count, lookup_entity_with_external_id
300
332
  )
301
- for result in graph.query(query):
302
- instance_id = cast(URIRef, result[0]) # type: ignore[index, misc]
303
- self._convert_relationship_to_schema(graph, instance_id, source_type, target_type)
333
+ add_triples_in_batch(graph, edge_triples)
304
334
 
305
- def _convert_relationship_to_schema(
306
- self, graph: Graph, instance_id: URIRef, source_type: str, target_type: str
307
- ) -> None:
308
- result = cast(list[ResultRow], list(graph.query(f"DESCRIBE <{instance_id}>")))
335
+ def _edge_triples(
336
+ self,
337
+ graph: Graph,
338
+ source_type: str,
339
+ target_type: str,
340
+ instance_count: int,
341
+ lookup_entity_with_external_id: Callable[[str, str], URIRef],
342
+ ) -> Iterable[Triple]:
343
+ query = self._instances.format(namespace=self._namespace, source_type=source_type, target_type=target_type)
344
+ total_instance_count = instance_count if self._limit_per_type is None else self._limit_per_type
345
+
346
+ converted_relationships: list[URIRef] = []
347
+ for no, result in enumerate(
348
+ iterate_progress_bar(graph.query(query), total=total_instance_count, description="Relationships to edges")
349
+ ):
350
+ if self._limit_per_type is not None and no >= self._limit_per_type:
351
+ break
352
+ relationship_id = cast(URIRef, result[0]) # type: ignore[index, misc]
353
+ yield from self._relationship_as_edge(
354
+ graph, relationship_id, source_type, target_type, lookup_entity_with_external_id
355
+ )
356
+ converted_relationships.append(relationship_id)
357
+
358
+ if len(converted_relationships) >= 1_000:
359
+ remove_instance_ids_in_batch(graph, converted_relationships)
360
+ converted_relationships = []
361
+
362
+ remove_instance_ids_in_batch(graph, converted_relationships)
363
+
364
+ def _relationship_as_edge(
365
+ self,
366
+ graph: Graph,
367
+ relationship_id: URIRef,
368
+ source_type: str,
369
+ target_type: str,
370
+ lookup_entity_with_external_id: Callable[[str, str], URIRef],
371
+ ) -> list[Triple]:
372
+ relationship_triples = cast(list[Triple], list(graph.query(f"DESCRIBE <{relationship_id}>")))
309
373
  object_by_predicates = cast(
310
- dict[str, URIRef | Literal], {remove_namespace_from_uri(row[1]): row[2] for row in result}
374
+ dict[str, URIRef | Literal], {remove_namespace_from_uri(row[1]): row[2] for row in relationship_triples}
311
375
  )
312
376
  source_external_id = cast(URIRef, object_by_predicates["sourceExternalId"])
313
377
  target_source_id = cast(URIRef, object_by_predicates["targetExternalId"])
314
378
  try:
315
- source_id = self._lookup_entity(graph, source_type, source_external_id)
379
+ source_id = lookup_entity_with_external_id(source_type, source_external_id)
316
380
  except ValueError:
317
- warnings.warn(ResourceNotFoundWarning(source_external_id, "class", str(instance_id), "class"), stacklevel=2)
318
- return None
381
+ warnings.warn(
382
+ ResourceNotFoundWarning(source_external_id, "class", str(relationship_id), "class"), stacklevel=2
383
+ )
384
+ return []
319
385
  try:
320
- target_id = self._lookup_entity(graph, target_type, target_source_id)
386
+ target_id = lookup_entity_with_external_id(target_type, target_source_id)
321
387
  except ValueError:
322
- warnings.warn(ResourceNotFoundWarning(target_source_id, "class", str(instance_id), "class"), stacklevel=2)
323
- return None
324
- external_id = str(object_by_predicates["externalId"])
388
+ warnings.warn(
389
+ ResourceNotFoundWarning(target_source_id, "class", str(relationship_id), "class"), stacklevel=2
390
+ )
391
+ return []
392
+ edge_id = str(object_by_predicates["externalId"])
325
393
  # If there is properties on the relationship, we create a new intermediate node
326
- self._create_node(graph, object_by_predicates, external_id, source_id, target_id, self._predicate(target_type))
327
-
328
- for triple in result:
329
- graph.remove(triple) # type: ignore[arg-type]
394
+ edge_type = self._namespace[f"{source_type}To{target_type}Edge"]
395
+ return self._create_edge(
396
+ object_by_predicates, edge_id, source_id, target_id, self._predicate(target_type), edge_type
397
+ )
330
398
 
331
399
  def _lookup_entity(self, graph: Graph, entity_type: str, external_id: str) -> URIRef:
332
400
  query = self._lookup_entity_query.format(
@@ -337,38 +405,38 @@ WHERE {{
337
405
  return cast(URIRef, result[0][0]) # type: ignore[index]
338
406
  raise ValueError(f"Could not find entity with external_id {external_id} and type {entity_type}")
339
407
 
340
- def _create_node(
408
+ def _create_edge(
341
409
  self,
342
- graph: Graph,
343
410
  objects_by_predicates: dict[str, URIRef | Literal],
344
411
  external_id: str,
345
412
  source_id: URIRef,
346
413
  target_id: URIRef,
347
414
  predicate: URIRef,
348
- ) -> None:
415
+ edge_type: URIRef,
416
+ ) -> list[Triple]:
349
417
  """Creates a new intermediate node for the relationship with properties."""
350
418
  # Create the entity with the properties
351
- instance_id = self._namespace[external_id]
352
- graph.add((instance_id, RDF.type, self._namespace["Edge"]))
419
+ edge_triples: list[Triple] = []
420
+ edge_id = self._namespace[external_id]
421
+
422
+ edge_triples.append((edge_id, RDF.type, edge_type))
353
423
  for prop_name, object_ in objects_by_predicates.items():
354
424
  if prop_name in self._NOT_PROPERTIES:
355
425
  continue
356
- graph.add((instance_id, self._namespace[prop_name], object_))
426
+ edge_triples.append((edge_id, self._namespace[prop_name], object_))
357
427
 
358
428
  # Target and Source IDs will always be a combination of Asset, Sequence, Event, TimeSeries, and File.
359
429
  # If we assume source ID is an asset and target ID is a time series, then
360
430
  # before we had relationship pointing to both: timeseries <- relationship -> asset
361
- # After, we want asset -> timeseries, and asset.edgeSource -> Edge
431
+ # After, we want asset <-> Edge -> TimeSeries
362
432
  # and the new edge will point to the asset and the timeseries through startNode and endNode
363
433
 
364
- # Link the two entities directly,
365
- graph.add((source_id, predicate, target_id))
366
- # Create the new edge
367
- graph.add((instance_id, self._namespace["startNode"], source_id))
368
- graph.add((instance_id, self._namespace["endNode"], target_id))
369
-
370
- # Link the source to the edge properties
371
- graph.add((source_id, self._namespace["edgeSource"], instance_id))
434
+ # Link the source to the new edge
435
+ edge_triples.append((source_id, predicate, edge_id))
436
+ # Link the edge to the source and target
437
+ edge_triples.append((edge_id, self._namespace["startNode"], source_id))
438
+ edge_triples.append((edge_id, self._namespace["endNode"], target_id))
439
+ return edge_triples
372
440
 
373
441
  def _predicate(self, target_type: str) -> URIRef:
374
442
  return self._namespace[f"relationship{target_type.capitalize()}"]
@@ -36,6 +36,7 @@ from ._properties import (
36
36
  from ._resources import (
37
37
  ResourceNeatWarning,
38
38
  ResourceNotFoundWarning,
39
+ ResourceRegexViolationWarning,
39
40
  ResourceRetrievalWarning,
40
41
  ResourcesDuplicatedWarning,
41
42
  ResourceTypeNotSupportedWarning,
@@ -64,6 +65,7 @@ __all__ = [
64
65
  "ResourceNotFoundWarning",
65
66
  "ResourceTypeNotSupportedWarning",
66
67
  "ResourceRetrievalWarning",
68
+ "ResourceRegexViolationWarning",
67
69
  "PrincipleOneModelOneSpaceWarning",
68
70
  "PrincipleMatchingSpaceAndVersionWarning",
69
71
  "PrincipleSolutionBuildsOnEnterpriseWarning",
@@ -13,6 +13,21 @@ class ResourceNeatWarning(NeatWarning, Generic[T_Identifier]):
13
13
  resource_type: ResourceType
14
14
 
15
15
 
16
+ @dataclass(unsafe_hash=True)
17
+ class ResourceRegexViolationWarning(ResourceNeatWarning):
18
+ """The {resource_type} with identifier {identifier} in the {location} is violating
19
+ the CDF regex {regex}. This will lead to errors when converting to DMS data model.
20
+ """
21
+
22
+ fix = (
23
+ "Either export the data model and make the necessary changes manually"
24
+ " or run prepare.cdf_compliant_external_ids."
25
+ )
26
+
27
+ location: str
28
+ regex: str
29
+
30
+
16
31
  @dataclass(unsafe_hash=True)
17
32
  class ResourceNotFoundWarning(ResourceNeatWarning, Generic[T_Identifier, T_ReferenceIdentifier]):
18
33
  """The {resource_type} with identifier {identifier} referred by {referred_type} {referred_by} does not exist.
@@ -4,23 +4,25 @@ from abc import ABC, abstractmethod
4
4
  from collections import defaultdict
5
5
  from collections.abc import Set
6
6
  from dataclasses import dataclass
7
- from typing import Generic, TypeVar
7
+ from typing import Generic, TypeVar, cast
8
8
 
9
9
  import pandas as pd
10
- from pydantic import BaseModel
10
+ from rdflib import URIRef
11
11
 
12
12
  from cognite.neat._rules.models._base_rules import BaseRules
13
13
  from cognite.neat._rules.models._rdfpath import RDFPath
14
+ from cognite.neat._rules.models.dms._rules import DMSProperty, DMSView
14
15
  from cognite.neat._rules.models.entities import (
15
16
  ClassEntity,
16
17
  Entity,
17
18
  )
18
19
  from cognite.neat._rules.models.information import InformationProperty
20
+ from cognite.neat._rules.models.information._rules import InformationClass
19
21
  from cognite.neat._utils.rdf_ import get_inheritance_path
20
22
 
21
23
  T_Rules = TypeVar("T_Rules", bound=BaseRules)
22
- T_Property = TypeVar("T_Property", bound=BaseModel)
23
- T_Class = TypeVar("T_Class", bound=BaseModel)
24
+ T_Property = TypeVar("T_Property", bound=InformationProperty | DMSProperty)
25
+ T_Class = TypeVar("T_Class", bound=InformationClass | DMSView)
24
26
  T_ClassEntity = TypeVar("T_ClassEntity", bound=Entity)
25
27
  T_PropertyEntity = TypeVar("T_PropertyEntity", bound=Entity | str)
26
28
 
@@ -108,6 +110,14 @@ class BaseAnalysis(ABC, Generic[T_Rules, T_Class, T_Property, T_ClassEntity, T_P
108
110
  def inherited_referred_classes(self) -> set[ClassEntity]:
109
111
  raise NotImplementedError
110
112
 
113
+ @property
114
+ def properties_by_neat_id(self) -> dict[URIRef, T_Property]:
115
+ return {cast(URIRef, prop.neatId): prop for prop in self._get_properties()}
116
+
117
+ @property
118
+ def classes_by_neat_id(self) -> dict[URIRef, T_Class]:
119
+ return {cast(URIRef, class_.neatId): class_ for class_ in self._get_classes()}
120
+
111
121
  # Todo Lru cache this method.
112
122
  def class_parent_pairs(self, allow_different_space: bool = False) -> dict[T_ClassEntity, list[T_ClassEntity]]:
113
123
  """This only returns class - parent pairs only if parent is in the same data model"""
@@ -176,7 +186,7 @@ class BaseAnalysis(ABC, Generic[T_Rules, T_Class, T_Property, T_ClassEntity, T_P
176
186
  # ParentClassEntity -> ClassEntity to match the type of class_property_pairs
177
187
  if parent in class_property_pairs:
178
188
  for property_ in class_property_pairs[parent]:
179
- property_ = property_.model_copy()
189
+ property_ = property_.model_copy() # type: ignore
180
190
 
181
191
  # This corresponds to importing properties from parent class
182
192
  # making sure that the property is attached to desired child class
@@ -1,3 +1,7 @@
1
+ from collections import defaultdict
2
+
3
+ from rdflib import URIRef
4
+
1
5
  from cognite.neat._constants import DMS_LISTABLE_PROPERTY_LIMIT
2
6
  from cognite.neat._rules.models.dms import DMSProperty, DMSRules, DMSView
3
7
  from cognite.neat._rules.models.entities import ViewEntity
@@ -35,3 +39,19 @@ class DMSAnalysis(BaseAnalysis[DMSRules, DMSView, DMSProperty, ViewEntity, str])
35
39
 
36
40
  def _get_prop_entity(self, property_: DMSProperty) -> str:
37
41
  return property_.view_property
42
+
43
+ def views_with_properties_linked_to_classes(
44
+ self,
45
+ consider_inheritance: bool = False,
46
+ allow_different_namespace: bool = False,
47
+ ) -> dict[ViewEntity, dict[str, URIRef]]:
48
+ view_property_pairs = self.classes_with_properties(consider_inheritance, allow_different_namespace)
49
+
50
+ view_and_properties_with_links: dict[ViewEntity, dict[str, URIRef]] = defaultdict(dict)
51
+
52
+ for view, properties in view_property_pairs.items():
53
+ view_and_properties_with_links[view] = {
54
+ prop.view_property: prop.logical for prop in properties if prop.logical
55
+ }
56
+
57
+ return view_and_properties_with_links