cognite-neat 0.105.2__py3-none-any.whl → 0.107.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (54) hide show
  1. cognite/neat/_config.py +6 -260
  2. cognite/neat/_graph/extractors/__init__.py +5 -1
  3. cognite/neat/_graph/extractors/_base.py +32 -0
  4. cognite/neat/_graph/extractors/_classic_cdf/_base.py +42 -16
  5. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +78 -8
  6. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +2 -0
  7. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +10 -3
  8. cognite/neat/_graph/extractors/_dms.py +48 -14
  9. cognite/neat/_graph/extractors/_dms_graph.py +149 -0
  10. cognite/neat/_graph/extractors/_rdf_file.py +32 -5
  11. cognite/neat/_graph/loaders/_rdf2dms.py +119 -20
  12. cognite/neat/_graph/queries/_construct.py +1 -1
  13. cognite/neat/_graph/transformers/__init__.py +5 -0
  14. cognite/neat/_graph/transformers/_base.py +13 -9
  15. cognite/neat/_graph/transformers/_classic_cdf.py +141 -44
  16. cognite/neat/_graph/transformers/_rdfpath.py +4 -4
  17. cognite/neat/_graph/transformers/_value_type.py +54 -44
  18. cognite/neat/_issues/warnings/_external.py +1 -1
  19. cognite/neat/_rules/analysis/_base.py +1 -1
  20. cognite/neat/_rules/analysis/_information.py +14 -13
  21. cognite/neat/_rules/catalog/__init__.py +1 -0
  22. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  23. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  24. cognite/neat/_rules/importers/_dms2rules.py +7 -5
  25. cognite/neat/_rules/importers/_rdf/_inference2rules.py +5 -3
  26. cognite/neat/_rules/models/_base_rules.py +0 -12
  27. cognite/neat/_rules/models/_types.py +5 -0
  28. cognite/neat/_rules/models/dms/_rules.py +50 -2
  29. cognite/neat/_rules/models/information/_rules.py +48 -5
  30. cognite/neat/_rules/models/information/_rules_input.py +1 -1
  31. cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
  32. cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
  33. cognite/neat/_rules/transformers/__init__.py +4 -0
  34. cognite/neat/_rules/transformers/_converters.py +209 -62
  35. cognite/neat/_rules/transformers/_mapping.py +3 -2
  36. cognite/neat/_session/_base.py +8 -13
  37. cognite/neat/_session/_inspect.py +6 -2
  38. cognite/neat/_session/_mapping.py +22 -13
  39. cognite/neat/_session/_prepare.py +9 -57
  40. cognite/neat/_session/_read.py +96 -29
  41. cognite/neat/_session/_set.py +9 -0
  42. cognite/neat/_session/_state.py +10 -1
  43. cognite/neat/_session/_to.py +51 -15
  44. cognite/neat/_session/exceptions.py +7 -3
  45. cognite/neat/_store/_graph_store.py +85 -39
  46. cognite/neat/_store/_rules_store.py +22 -0
  47. cognite/neat/_utils/auth.py +2 -0
  48. cognite/neat/_utils/collection_.py +32 -11
  49. cognite/neat/_version.py +1 -1
  50. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/METADATA +2 -8
  51. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/RECORD +54 -52
  52. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/WHEEL +1 -1
  53. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/LICENSE +0 -0
  54. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/entry_points.txt +0 -0
@@ -1,3 +1,4 @@
1
+ from ._base import BaseTransformerStandardised
1
2
  from ._classic_cdf import (
2
3
  AddAssetDepth,
3
4
  AssetEventConnector,
@@ -5,6 +6,7 @@ from ._classic_cdf import (
5
6
  AssetRelationshipConnector,
6
7
  AssetSequenceConnector,
7
8
  AssetTimeSeriesConnector,
9
+ LookupRelationshipSourceTarget,
8
10
  RelationshipAsEdgeTransformer,
9
11
  )
10
12
  from ._prune_graph import (
@@ -29,6 +31,7 @@ __all__ = [
29
31
  "ConnectionToLiteral",
30
32
  "ConvertLiteral",
31
33
  "LiteralToEntity",
34
+ "LookupRelationshipSourceTarget",
32
35
  "MakeConnectionOnExactMatch",
33
36
  "PruneDanglingNodes",
34
37
  "PruneDeadEndEdges",
@@ -57,4 +60,6 @@ Transformers = (
57
60
  | ConvertLiteral
58
61
  | LiteralToEntity
59
62
  | ConnectionToLiteral
63
+ | BaseTransformerStandardised
64
+ | LookupRelationshipSourceTarget
60
65
  )
@@ -1,6 +1,7 @@
1
1
  import dataclasses
2
2
  import warnings
3
3
  from abc import ABC, abstractmethod
4
+ from collections.abc import Iterator
4
5
  from typing import ClassVar, TypeAlias, cast
5
6
 
6
7
  from rdflib import Graph
@@ -8,7 +9,7 @@ from rdflib.query import ResultRow
8
9
 
9
10
  from cognite.neat._issues.warnings import NeatValueWarning
10
11
  from cognite.neat._shared import Triple
11
- from cognite.neat._utils.collection_ import iterate_progress_bar
12
+ from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
12
13
  from cognite.neat._utils.graph_transformations_report import GraphTransformationResult
13
14
 
14
15
  To_Add_Triples: TypeAlias = list[Triple]
@@ -42,7 +43,6 @@ class BaseTransformerStandardised(ABC):
42
43
  description: str
43
44
  _use_only_once: bool = False
44
45
  _need_changes: ClassVar[frozenset[str]] = frozenset()
45
- _use_iterate_bar_threshold: int = 500
46
46
 
47
47
  @abstractmethod
48
48
  def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
@@ -66,9 +66,16 @@ class BaseTransformerStandardised(ABC):
66
66
  The query to use for extracting target triples from the graph and performing the transformation.
67
67
  Returns:
68
68
  A query string.
69
+
70
+ !!! note "Complex Queries"
71
+ In majority of cases the query should be a simple SELECT query. However, in case
72
+ when there is a need to have one or more sub iterators, one can overwrite the ._iterator() method
69
73
  """
70
74
  raise NotImplementedError()
71
75
 
76
+ def _iterator(self, graph: Graph) -> Iterator:
77
+ yield from graph.query(self._iterate_query())
78
+
72
79
  def _skip_count_query(self) -> str:
73
80
  """
74
81
  The query to use for extracting target triples from the graph and performing the transformation.
@@ -98,13 +105,10 @@ class BaseTransformerStandardised(ABC):
98
105
  if iteration_count == 0:
99
106
  return outcome
100
107
 
101
- result_iterable = graph.query(self._iterate_query())
102
- if iteration_count > self._use_iterate_bar_threshold:
103
- result_iterable = iterate_progress_bar( # type: ignore[misc, assignment]
104
- result_iterable,
105
- total=iteration_count,
106
- description=self.description,
107
- )
108
+ result_iterable = self._iterator(graph)
109
+ result_iterable = iterate_progress_bar_if_above_config_threshold(
110
+ result_iterable, iteration_count, self.description
111
+ )
108
112
 
109
113
  for row in result_iterable:
110
114
  row = cast(ResultRow, row)
@@ -1,6 +1,7 @@
1
+ import urllib.parse
1
2
  import warnings
2
3
  from abc import ABC
3
- from collections.abc import Callable, Iterable
4
+ from collections.abc import Callable, Iterable, Iterator
4
5
  from functools import lru_cache
5
6
  from typing import cast
6
7
 
@@ -9,6 +10,7 @@ from rdflib.query import ResultRow
9
10
 
10
11
  from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE
11
12
  from cognite.neat._graph import extractors
13
+ from cognite.neat._issues.errors import NeatValueError
12
14
  from cognite.neat._issues.warnings import ResourceNotFoundWarning
13
15
  from cognite.neat._utils.collection_ import iterate_progress_bar
14
16
  from cognite.neat._utils.rdf_ import (
@@ -229,8 +231,7 @@ class AssetEventConnector(BaseAssetConnector):
229
231
  )
230
232
 
231
233
 
232
- # TODO: standardise
233
- class AssetRelationshipConnector(BaseTransformer):
234
+ class AssetRelationshipConnector(BaseTransformerStandardised):
234
235
  description: str = "Connects assets via relationships"
235
236
  _use_only_once: bool = True
236
237
  _need_changes = frozenset(
@@ -248,6 +249,44 @@ class AssetRelationshipConnector(BaseTransformer):
248
249
  ?target <{asset_xid_property}> ?target_xid .
249
250
  ?target a <{asset_type}> .}}"""
250
251
 
252
+ def _count_query(self) -> str:
253
+ query = """SELECT (COUNT(?target) as ?count) WHERE {{
254
+ ?relationship a <{relationship_type}> .
255
+ ?relationship <{relationship_source_xid_prop}> ?source_xid .
256
+ ?source <{asset_xid_property}> ?source_xid .
257
+ ?source a <{asset_type}> .
258
+
259
+ ?relationship <{relationship_target_xid_prop}> ?target_xid .
260
+ ?target <{asset_xid_property}> ?target_xid .
261
+ ?target a <{asset_type}> .}}"""
262
+
263
+ return query.format(
264
+ relationship_type=self.relationship_type,
265
+ relationship_source_xid_prop=self.relationship_source_xid_prop,
266
+ relationship_target_xid_prop=self.relationship_target_xid_prop,
267
+ asset_xid_property=self.asset_xid_property,
268
+ asset_type=self.asset_type,
269
+ )
270
+
271
+ def _iterate_query(self) -> str:
272
+ query = """SELECT ?source ?relationship ?target WHERE {{
273
+ ?relationship a <{relationship_type}> .
274
+ ?relationship <{relationship_source_xid_prop}> ?source_xid .
275
+ ?source <{asset_xid_property}> ?source_xid .
276
+ ?source a <{asset_type}> .
277
+
278
+ ?relationship <{relationship_target_xid_prop}> ?target_xid .
279
+ ?target <{asset_xid_property}> ?target_xid .
280
+ ?target a <{asset_type}> .}}"""
281
+
282
+ return query.format(
283
+ relationship_type=self.relationship_type,
284
+ relationship_source_xid_prop=self.relationship_source_xid_prop,
285
+ relationship_target_xid_prop=self.relationship_target_xid_prop,
286
+ asset_xid_property=self.asset_xid_property,
287
+ asset_type=self.asset_type,
288
+ )
289
+
251
290
  def __init__(
252
291
  self,
253
292
  asset_type: URIRef | None = None,
@@ -262,48 +301,20 @@ class AssetRelationshipConnector(BaseTransformer):
262
301
  self.relationship_target_xid_prop = relationship_target_xid_prop or DEFAULT_NAMESPACE.targetExternalId
263
302
  self.asset_xid_property = asset_xid_property or DEFAULT_NAMESPACE.externalId
264
303
 
265
- def transform(self, graph: Graph) -> None:
266
- for relationship_id_result in graph.query(
267
- f"SELECT DISTINCT ?relationship_id WHERE {{?relationship_id a <{self.relationship_type}>}}"
268
- ):
269
- relationship_id: URIRef = cast(tuple, relationship_id_result)[0]
270
-
271
- if assets_id_res := list(
272
- graph.query(
273
- self._asset_template.format(
274
- relationship_id=relationship_id,
275
- asset_xid_property=self.asset_xid_property,
276
- relationship_source_xid_prop=self.relationship_source_xid_prop,
277
- relationship_target_xid_prop=self.relationship_target_xid_prop,
278
- asset_type=self.asset_type,
279
- )
280
- )
281
- ):
282
- # files can be connected to multiple assets in the graph
283
- for source_asset_id, target_asset_id in cast(list[tuple], assets_id_res):
284
- # create a relationship between the two assets
285
- graph.add(
286
- (
287
- source_asset_id,
288
- DEFAULT_NAMESPACE.relationship,
289
- relationship_id,
290
- )
291
- )
292
- graph.add(
293
- (
294
- target_asset_id,
295
- DEFAULT_NAMESPACE.relationship,
296
- relationship_id,
297
- )
298
- )
304
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
305
+ row_output = RowTransformationOutput()
306
+ source, relationship, target = query_result_row
299
307
 
300
- # add source and target to the relationship
301
- graph.add((relationship_id, DEFAULT_NAMESPACE.source, source_asset_id))
302
- graph.add((relationship_id, DEFAULT_NAMESPACE.target, target_asset_id))
308
+ row_output.add_triples.append(cast(Triple, (source, DEFAULT_NAMESPACE.relationship, target)))
309
+ row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.source, source)))
310
+ row_output.add_triples.append(cast(Triple, (relationship, DEFAULT_NAMESPACE.target, target)))
303
311
 
304
- # remove properties that are not needed, specifically the external ids
305
- graph.remove((relationship_id, self.relationship_source_xid_prop, None))
306
- graph.remove((relationship_id, self.relationship_target_xid_prop, None))
312
+ row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_source_xid_prop, None)))
313
+ row_output.remove_triples.append(cast(Triple, (relationship, self.relationship_target_xid_prop, None)))
314
+
315
+ row_output.instances_modified_count += 2
316
+
317
+ return row_output
307
318
 
308
319
 
309
320
  # TODO: standardise
@@ -455,7 +466,7 @@ WHERE {{
455
466
  ResourceNotFoundWarning(target_source_id, "class", str(relationship_id), "class"), stacklevel=2
456
467
  )
457
468
  return []
458
- edge_id = str(object_by_predicates["externalId"])
469
+ edge_id = urllib.parse.quote(str(object_by_predicates["externalId"]))
459
470
  # If there is properties on the relationship, we create a new intermediate node
460
471
  edge_type = self._namespace[f"{source_type}To{target_type}Edge"]
461
472
  return self._create_edge(
@@ -506,3 +517,89 @@ WHERE {{
506
517
 
507
518
  def _predicate(self, target_type: str) -> URIRef:
508
519
  return self._namespace[f"relationship{target_type.capitalize()}"]
520
+
521
+
522
+ class LookupRelationshipSourceTarget(BaseTransformerStandardised):
523
+ """When relationships are extracted, the source and target are extracted as literals. This transformers
524
+ lookup the externalID of the source and target and replaces the literals with the URIRef of the entity.
525
+ """
526
+
527
+ description = "Lookup relationships source and target externalId"
528
+ _use_only_once: bool = True
529
+ _need_changes = frozenset({extractors.RelationshipsExtractor.__name__})
530
+
531
+ _lookup_entity_query = """SELECT ?entity
532
+ WHERE {{
533
+ ?entity a <{entity_type}> .
534
+ ?entity <{namespace}externalId> "{external_id}" .
535
+ }}"""
536
+
537
+ def __init__(self, namespace: Namespace = CLASSIC_CDF_NAMESPACE, type_prefix: str | None = None) -> None:
538
+ self._namespace = namespace
539
+ self._type_prefix = type_prefix
540
+ self._lookup_entity: Callable[[URIRef, str], URIRef] | None = None
541
+
542
+ def _count_query(self) -> str:
543
+ return f"""SELECT (COUNT(?instance) AS ?instanceCount)
544
+ WHERE {{
545
+ ?instance a <{self._namespace}ClassicRelationship> .
546
+ }}"""
547
+
548
+ def _iterate_query(self) -> str:
549
+ return f"""SELECT ?instance ?source ?sourceType ?target ?targetType
550
+ WHERE {{
551
+ ?instance a <{self._namespace}ClassicRelationship> .
552
+ ?instance <{self._namespace}sourceExternalId> ?source .
553
+ ?instance <{self._namespace}targetExternalId> ?target .
554
+ ?instance <{self._namespace}sourceType> ?sourceType .
555
+ ?instance <{self._namespace}targetType> ?targetType
556
+ }}"""
557
+
558
+ def _iterator(self, graph: Graph) -> Iterator:
559
+ self._lookup_entity = self.create_lookup_entity_with_external_id(graph, self._namespace, self._type_prefix)
560
+ yield from graph.query(self._iterate_query())
561
+
562
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
563
+ output = RowTransformationOutput()
564
+ instance, source, source_type, target, target_type = cast(
565
+ tuple[URIRef, Literal, URIRef, Literal, URIRef], query_result_row
566
+ )
567
+ if self._lookup_entity is None:
568
+ raise NeatValueError(f"{type(self)}: .operation() called before .transform()")
569
+ try:
570
+ source_id = self._lookup_entity(source_type, source.toPython())
571
+ except ValueError:
572
+ warnings.warn(ResourceNotFoundWarning(source, "class", str(instance), "class"), stacklevel=2)
573
+ return output
574
+
575
+ try:
576
+ target_id = self._lookup_entity(target_type, target.toPython())
577
+ except ValueError:
578
+ warnings.warn(ResourceNotFoundWarning(target, "class", str(instance), "class"), stacklevel=2)
579
+ return output
580
+
581
+ output.remove_triples.append((instance, self._namespace.sourceExternalId, source))
582
+ output.remove_triples.append((instance, self._namespace.targetExternalId, target))
583
+ output.add_triples.append((instance, self._namespace.sourceExternalId, source_id))
584
+ output.add_triples.append((instance, self._namespace.targetExternalId, target_id))
585
+ output.instances_modified_count += 1
586
+ return output
587
+
588
+ @staticmethod
589
+ def create_lookup_entity_with_external_id(
590
+ graph: Graph, namespace: Namespace, type_prefix: str | None
591
+ ) -> Callable[[URIRef, str], URIRef]:
592
+ @lru_cache(maxsize=10_000)
593
+ def lookup_entity_with_external_id(entity_type: URIRef, external_id: str) -> URIRef:
594
+ if type_prefix:
595
+ entity_type = namespace[type_prefix + remove_namespace_from_uri(entity_type)]
596
+
597
+ query = LookupRelationshipSourceTarget._lookup_entity_query.format(
598
+ namespace=namespace, entity_type=entity_type, external_id=external_id
599
+ )
600
+ result = list(graph.query(query))
601
+ if len(result) == 1:
602
+ return cast(URIRef, result[0][0]) # type: ignore[index]
603
+ raise ValueError(f"Could not find entity with external_id {external_id} and type {entity_type}")
604
+
605
+ return lookup_entity_with_external_id
@@ -35,8 +35,8 @@ class AddSelfReferenceProperty(BaseTransformer):
35
35
 
36
36
  def transform(self, graph: Graph) -> None:
37
37
  for property_ in self.properties:
38
- prefix = property_.transformation.traversal.class_.prefix
39
- suffix = property_.transformation.traversal.class_.suffix
38
+ prefix = property_.instance_source.traversal.class_.prefix
39
+ suffix = property_.instance_source.traversal.class_.suffix
40
40
 
41
41
  namespace = self.rules.prefixes[prefix] if prefix in self.rules.prefixes else self.rules.metadata.namespace
42
42
 
@@ -54,12 +54,12 @@ class AddSelfReferenceProperty(BaseTransformer):
54
54
  property_=f"{self.rules.metadata.prefix}:{property_.property_}",
55
55
  )
56
56
 
57
- property_.transformation = RDFPath(traversal=traversal)
57
+ property_.instance_source = RDFPath(traversal=traversal)
58
58
 
59
59
 
60
60
  class MakeConnectionOnExactMatch(BaseTransformerStandardised):
61
61
  description: str = "Adds property that contains id of reference to all references of given class in Rules"
62
- _use_only_once: bool = True
62
+ _use_only_once: bool = False
63
63
  _need_changes = frozenset({})
64
64
 
65
65
  def __init__(
@@ -1,23 +1,21 @@
1
1
  import warnings
2
- from collections.abc import Callable
2
+ from collections.abc import Callable, Iterator
3
3
  from typing import Any, cast
4
4
  from urllib.parse import quote
5
5
 
6
6
  import rdflib
7
- from rdflib import RDF, XSD, Graph, Namespace, URIRef
7
+ from rdflib import RDF, Namespace, URIRef
8
8
  from rdflib.query import ResultRow
9
9
 
10
10
  from cognite.neat._constants import UNKNOWN_TYPE
11
- from cognite.neat._graph.queries import Queries
12
11
  from cognite.neat._issues.warnings import PropertyDataTypeConversionWarning
13
12
  from cognite.neat._utils.auxiliary import string_to_ideal_type
14
- from cognite.neat._utils.rdf_ import get_namespace, remove_namespace_from_uri
13
+ from cognite.neat._utils.rdf_ import Triple, get_namespace, remove_namespace_from_uri
15
14
 
16
- from ._base import BaseTransformer, BaseTransformerStandardised, RowTransformationOutput
15
+ from ._base import BaseTransformerStandardised, RowTransformationOutput
17
16
 
18
17
 
19
- # TODO: Standardise
20
- class SplitMultiValueProperty(BaseTransformer):
18
+ class SplitMultiValueProperty(BaseTransformerStandardised):
21
19
  description: str = (
22
20
  "SplitMultiValueProperty is a transformer that splits a "
23
21
  "multi-value property into multiple single-value properties."
@@ -25,55 +23,67 @@ class SplitMultiValueProperty(BaseTransformer):
25
23
  _use_only_once: bool = True
26
24
  _need_changes = frozenset({})
27
25
 
28
- _object_property_template: str = """SELECT ?s ?o WHERE{{
26
+ def __init__(self, unknown_type: URIRef | None = None) -> None:
27
+ self.unknown_type = unknown_type or UNKNOWN_TYPE
29
28
 
30
- ?s a <{subject_uri}> .
31
- ?s <{property_uri}> ?o .
32
- ?o a <{object_uri}> .
29
+ def _iterate_query(self) -> str:
30
+ query = """SELECT ?subjectType ?property
31
+ (GROUP_CONCAT(DISTINCT STR(?valueType); SEPARATOR=",") AS ?valueTypes)
32
+
33
+ WHERE {{
34
+ ?s ?property ?o .
35
+ ?s a ?subjectType .
36
+ OPTIONAL {{ ?o a ?type }}
37
+
38
+ # Key part to determine value type: either object, data or unknown
39
+ BIND( IF(isLiteral(?o),DATATYPE(?o),
40
+ IF(BOUND(?type), ?type,
41
+ <{unknownType}>)) AS ?valueType)
42
+ }}
43
+
44
+ GROUP BY ?subjectType ?property
45
+ HAVING (COUNT(DISTINCT ?valueType) > 1)"""
46
+
47
+ return query.format(unknownType=self.unknown_type)
48
+
49
+ def _count_query(self) -> str:
50
+ query = """SELECT (COUNT(*) AS ?tripleCount)
51
+ WHERE {?s ?p ?o .}"""
52
+ return query
53
+
54
+ def _sub_iterate_query(self, type_: URIRef, property_: URIRef) -> str:
55
+ query = """ SELECT ?s ?p ?o ?valueType WHERE {{
56
+ ?s a <{subject_uri}> .
57
+ ?s <{property_uri}> ?o .
33
58
 
34
- }}"""
59
+ OPTIONAL {{ ?o a ?type }}
35
60
 
36
- _datatype_property_template: str = """SELECT ?s ?o WHERE {{
61
+ BIND(<{property_uri}> AS ?p)
37
62
 
38
- ?s a <{subject_uri}> .
39
- ?s <{property_uri}> ?o .
40
- FILTER (datatype(?o) = <{object_uri}>)
63
+ BIND(IF(isLiteral(?o), DATATYPE(?o),
64
+ IF(BOUND(?type),?type,
65
+ <{unknownType}>)) AS ?valueType)
41
66
 
42
- }}"""
67
+ }} """
43
68
 
44
- _unknown_property_template: str = """SELECT ?s ?o WHERE {{
69
+ return query.format(unknownType=self.unknown_type, subject_uri=type_, property_uri=property_)
45
70
 
46
- ?s a <{subject_uri}> .
47
- ?s <{property_uri}> ?o .
48
- FILTER NOT EXISTS {{ ?o a ?objectType }}
49
- }}"""
71
+ def _iterator(self, graph) -> Iterator:
72
+ for type_, property_, _ in graph.query(self._iterate_query()):
73
+ yield from graph.query(self._sub_iterate_query(type_, property_))
50
74
 
51
- def transform(self, graph: Graph) -> None:
52
- # handle multi value type object properties
53
- for subject_uri, property_uri, value_types in Queries(graph).multi_value_type_property():
54
- for value_type_uri in value_types:
55
- _args = {
56
- "subject_uri": subject_uri,
57
- "property_uri": property_uri,
58
- "object_uri": value_type_uri,
59
- }
75
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
76
+ row_output = RowTransformationOutput()
77
+ subject, old_property, object, value_type = query_result_row
60
78
 
61
- # Case 1: Unknown value type
62
- if value_type_uri == UNKNOWN_TYPE:
63
- iterator = graph.query(self._unknown_property_template.format(**_args))
79
+ new_property = URIRef(f"{old_property}_{remove_namespace_from_uri(value_type)}")
64
80
 
65
- # Case 2: Datatype value type
66
- elif value_type_uri.startswith(str(XSD)):
67
- iterator = graph.query(self._datatype_property_template.format(**_args))
81
+ row_output.add_triples.append(cast(Triple, (subject, new_property, object)))
82
+ row_output.remove_triples.append(cast(Triple, (subject, old_property, object)))
68
83
 
69
- # Case 3: Object value type
70
- else:
71
- iterator = graph.query(self._object_property_template.format(**_args))
84
+ row_output.instances_modified_count += 1
72
85
 
73
- for s, o in iterator: # type: ignore [misc]
74
- graph.remove((s, property_uri, o))
75
- new_property = URIRef(f"{property_uri}_{remove_namespace_from_uri(value_type_uri)}")
76
- graph.add((s, new_property, o))
86
+ return row_output
77
87
 
78
88
 
79
89
  class ConvertLiteral(BaseTransformerStandardised):
@@ -42,7 +42,7 @@ class FileItemNotSupportedWarning(NeatWarning):
42
42
 
43
43
  @dataclass(unsafe_hash=True)
44
44
  class CDFAuthWarning(NeatWarning):
45
- """Failed to {action} due to {reason}"""
45
+ """Failed to {action}: {reason}"""
46
46
 
47
47
  action: str
48
48
  reason: str
@@ -251,7 +251,7 @@ class BaseAnalysis(ABC, Generic[T_Rules, T_Class, T_Property, T_ClassEntity, T_P
251
251
  if (
252
252
  only_rdfpath
253
253
  and isinstance(property_, InformationProperty)
254
- and isinstance(property_.transformation, RDFPath)
254
+ and isinstance(property_.instance_source, RDFPath)
255
255
  ) or not only_rdfpath:
256
256
  processed_properties[prop_entity] = property_
257
257
  class_property_pairs[class_] = processed_properties
@@ -55,12 +55,13 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
55
55
 
56
56
  def has_hop_transformations(self):
57
57
  return any(
58
- prop_.transformation and isinstance(prop_.transformation.traversal, Hop) for prop_ in self.rules.properties
58
+ prop_.instance_source and isinstance(prop_.instance_source.traversal, Hop)
59
+ for prop_ in self.rules.properties
59
60
  )
60
61
 
61
62
  def has_self_reference_property_transformations(self):
62
63
  return any(
63
- prop_.transformation and isinstance(prop_.transformation.traversal, SelfReferenceProperty)
64
+ prop_.instance_source and isinstance(prop_.instance_source.traversal, SelfReferenceProperty)
64
65
  for prop_ in self.rules.properties
65
66
  )
66
67
 
@@ -68,7 +69,7 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
68
69
  return [
69
70
  prop_
70
71
  for prop_ in self.rules.properties
71
- if prop_.transformation and isinstance(prop_.transformation.traversal, SelfReferenceProperty)
72
+ if prop_.instance_source and isinstance(prop_.instance_source.traversal, SelfReferenceProperty)
72
73
  ]
73
74
 
74
75
  def define_property_renaming_config(self, class_: ClassEntity) -> dict[str | URIRef, str]:
@@ -76,7 +77,7 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
76
77
 
77
78
  if definitions := self.class_property_pairs(only_rdfpath=True, consider_inheritance=True).get(class_, None):
78
79
  for property_id, definition in definitions.items():
79
- transformation = cast(RDFPath, definition.transformation)
80
+ transformation = cast(RDFPath, definition.instance_source)
80
81
 
81
82
  # use case we have a single property rdf path, and defined prefix
82
83
  # in either metadata or prefixes of rules
@@ -101,26 +102,26 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
101
102
 
102
103
  return property_renaming_configuration
103
104
 
104
- def neat_id_to_transformation_property_uri(self, property_neat_id: URIRef) -> URIRef | None:
105
+ def neat_id_to_instance_source_property_uri(self, property_neat_id: URIRef) -> URIRef | None:
105
106
  if (
106
107
  (property_ := self.properties_by_neat_id.get(property_neat_id))
107
- and property_.transformation
108
+ and property_.instance_source
108
109
  and isinstance(
109
- property_.transformation.traversal,
110
+ property_.instance_source.traversal,
110
111
  SingleProperty,
111
112
  )
112
113
  and (
113
- property_.transformation.traversal.property.prefix in self.rules.prefixes
114
- or property_.transformation.traversal.property.prefix == self.rules.metadata.prefix
114
+ property_.instance_source.traversal.property.prefix in self.rules.prefixes
115
+ or property_.instance_source.traversal.property.prefix == self.rules.metadata.prefix
115
116
  )
116
117
  ):
117
118
  namespace = (
118
119
  self.rules.metadata.namespace
119
- if property_.transformation.traversal.property.prefix == self.rules.metadata.prefix
120
- else self.rules.prefixes[property_.transformation.traversal.property.prefix]
120
+ if property_.instance_source.traversal.property.prefix == self.rules.metadata.prefix
121
+ else self.rules.prefixes[property_.instance_source.traversal.property.prefix]
121
122
  )
122
123
 
123
- return namespace[property_.transformation.traversal.property.suffix]
124
+ return namespace[property_.instance_source.traversal.property.suffix]
124
125
  return None
125
126
 
126
127
  def property_types(self, class_: ClassEntity) -> dict[str, EntityTypes]:
@@ -137,7 +138,7 @@ class InformationAnalysis(BaseAnalysis[InformationRules, InformationClass, Infor
137
138
  class_, None
138
139
  ):
139
140
  for property_ in class_property_pairs.values():
140
- classes.append(cast(RDFPath, property_.transformation).traversal.class_)
141
+ classes.append(cast(RDFPath, property_.instance_source).traversal.class_)
141
142
 
142
143
  return cast(ClassEntity, most_occurring_element(classes))
143
144
  else:
@@ -5,3 +5,4 @@ from pathlib import Path
5
5
  _CATALOG = Path(__file__).parent
6
6
  imf_attributes = _CATALOG / "info-rules-imf.xlsx"
7
7
  hello_world_pump = _CATALOG / "hello_world_pump.xlsx"
8
+ classic_model = _CATALOG / "classic_model.xlsx"
@@ -115,8 +115,8 @@ class DMSImporter(BaseImporter[DMSInputRules]):
115
115
  data_model_ids = [data_model_id]
116
116
  data_models = client.data_modeling.data_models.retrieve(data_model_ids, inline_views=True)
117
117
 
118
- user_models = cls._find_model_in_list(data_models, data_model_id)
119
- if len(user_models) == 0:
118
+ retrieved_models = cls._find_model_in_list(data_models, data_model_id)
119
+ if len(retrieved_models) == 0:
120
120
  return cls(
121
121
  DMSSchema(),
122
122
  [
@@ -127,16 +127,18 @@ class DMSImporter(BaseImporter[DMSInputRules]):
127
127
  )
128
128
  ],
129
129
  )
130
- user_model = user_models.latest_version()
130
+ return cls.from_data_model(client, retrieved_models.latest_version())
131
131
 
132
+ @classmethod
133
+ def from_data_model(cls, client: NeatClient, model: dm.DataModel[dm.View]) -> "DMSImporter":
132
134
  issue_list = IssueList()
133
135
  with _handle_issues(issue_list) as result:
134
- schema = NeatClient(client).schema.retrieve_data_model(user_model)
136
+ schema = client.schema.retrieve_data_model(model)
135
137
 
136
138
  if result.result == "failure" or issue_list.has_errors:
137
139
  return cls(DMSSchema(), issue_list)
138
140
 
139
- metadata = cls._create_metadata_from_model(user_model)
141
+ metadata = cls._create_metadata_from_model(model)
140
142
 
141
143
  return cls(
142
144
  schema,
@@ -17,6 +17,7 @@ from cognite.neat._rules.models.information import (
17
17
  )
18
18
  from cognite.neat._store import NeatGraphStore
19
19
  from cognite.neat._store._provenance import INSTANCES_ENTITY
20
+ from cognite.neat._utils.collection_ import iterate_progress_bar
20
21
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri, uri_to_short_form
21
22
 
22
23
  from ._base import DEFAULT_NON_EXISTING_NODE_TYPE, BaseRDFImporter
@@ -27,7 +28,6 @@ ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
27
28
  WHERE { ?s a ?class . }
28
29
  group by ?class order by DESC(?instances)"""
29
30
 
30
-
31
31
  INSTANCES_OF_CLASS_QUERY = """SELECT ?s ?propertyCount WHERE { ?s a <class> . BIND ('Unknown' as ?propertyCount) }"""
32
32
 
33
33
 
@@ -171,8 +171,10 @@ class InferenceImporter(BaseRDFImporter):
171
171
  INSTANCES_OF_CLASS_QUERY if self.max_number_of_instance == -1 else INSTANCES_OF_CLASS_RICHNESS_ORDERED_QUERY
172
172
  )
173
173
 
174
+ classes_iterable = iterate_progress_bar(classes.items(), len(classes), "Inferring classes")
175
+
174
176
  # Infers all the properties of the class
175
- for class_id, class_definition in classes.items():
177
+ for class_id, class_definition in classes_iterable:
176
178
  for ( # type: ignore[misc]
177
179
  instance,
178
180
  _,
@@ -231,7 +233,7 @@ class InferenceImporter(BaseRDFImporter):
231
233
  "property_": property_id,
232
234
  "max_count": cast(RdfLiteral, occurrence).value,
233
235
  "value_type": value_type_id,
234
- "transformation": (
236
+ "instance_source": (
235
237
  f"{uri_to_short_form(class_definition['uri'], prefixes)}"
236
238
  f"({uri_to_short_form(cast(URIRef, property_uri), prefixes)})"
237
239
  ),