cognite-neat 0.105.2__py3-none-any.whl → 0.107.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (54) hide show
  1. cognite/neat/_config.py +6 -260
  2. cognite/neat/_graph/extractors/__init__.py +5 -1
  3. cognite/neat/_graph/extractors/_base.py +32 -0
  4. cognite/neat/_graph/extractors/_classic_cdf/_base.py +42 -16
  5. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +78 -8
  6. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +2 -0
  7. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +10 -3
  8. cognite/neat/_graph/extractors/_dms.py +48 -14
  9. cognite/neat/_graph/extractors/_dms_graph.py +149 -0
  10. cognite/neat/_graph/extractors/_rdf_file.py +32 -5
  11. cognite/neat/_graph/loaders/_rdf2dms.py +119 -20
  12. cognite/neat/_graph/queries/_construct.py +1 -1
  13. cognite/neat/_graph/transformers/__init__.py +5 -0
  14. cognite/neat/_graph/transformers/_base.py +13 -9
  15. cognite/neat/_graph/transformers/_classic_cdf.py +141 -44
  16. cognite/neat/_graph/transformers/_rdfpath.py +4 -4
  17. cognite/neat/_graph/transformers/_value_type.py +54 -44
  18. cognite/neat/_issues/warnings/_external.py +1 -1
  19. cognite/neat/_rules/analysis/_base.py +1 -1
  20. cognite/neat/_rules/analysis/_information.py +14 -13
  21. cognite/neat/_rules/catalog/__init__.py +1 -0
  22. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  23. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  24. cognite/neat/_rules/importers/_dms2rules.py +7 -5
  25. cognite/neat/_rules/importers/_rdf/_inference2rules.py +5 -3
  26. cognite/neat/_rules/models/_base_rules.py +0 -12
  27. cognite/neat/_rules/models/_types.py +5 -0
  28. cognite/neat/_rules/models/dms/_rules.py +50 -2
  29. cognite/neat/_rules/models/information/_rules.py +48 -5
  30. cognite/neat/_rules/models/information/_rules_input.py +1 -1
  31. cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
  32. cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
  33. cognite/neat/_rules/transformers/__init__.py +4 -0
  34. cognite/neat/_rules/transformers/_converters.py +209 -62
  35. cognite/neat/_rules/transformers/_mapping.py +3 -2
  36. cognite/neat/_session/_base.py +8 -13
  37. cognite/neat/_session/_inspect.py +6 -2
  38. cognite/neat/_session/_mapping.py +22 -13
  39. cognite/neat/_session/_prepare.py +9 -57
  40. cognite/neat/_session/_read.py +96 -29
  41. cognite/neat/_session/_set.py +9 -0
  42. cognite/neat/_session/_state.py +10 -1
  43. cognite/neat/_session/_to.py +51 -15
  44. cognite/neat/_session/exceptions.py +7 -3
  45. cognite/neat/_store/_graph_store.py +85 -39
  46. cognite/neat/_store/_rules_store.py +22 -0
  47. cognite/neat/_utils/auth.py +2 -0
  48. cognite/neat/_utils/collection_.py +32 -11
  49. cognite/neat/_version.py +1 -1
  50. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/METADATA +2 -8
  51. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/RECORD +54 -52
  52. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/WHEEL +1 -1
  53. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/LICENSE +0 -0
  54. {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/entry_points.txt +0 -0
@@ -52,10 +52,11 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
52
52
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
53
53
  camel_case: bool = True,
54
54
  as_write: bool = False,
55
+ prefix: str | None = None,
55
56
  unpack_columns: bool = False,
56
57
  ):
57
58
  super().__init__(
58
- items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write
59
+ items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write, prefix
59
60
  )
60
61
  self.unpack_columns = unpack_columns
61
62
 
@@ -71,9 +72,10 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
71
72
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
72
73
  camel_case: bool = True,
73
74
  as_write: bool = False,
75
+ prefix: str | None = None,
74
76
  unpack_columns: bool = False,
75
77
  ):
76
- total, items = cls._from_dataset(client, data_set_external_id)
78
+ total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
77
79
  return cls(
78
80
  items,
79
81
  namespace,
@@ -84,6 +86,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
84
86
  skip_metadata_values,
85
87
  camel_case,
86
88
  as_write,
89
+ prefix,
87
90
  unpack_columns,
88
91
  )
89
92
 
@@ -99,9 +102,10 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
99
102
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
100
103
  camel_case: bool = True,
101
104
  as_write: bool = False,
105
+ prefix: str | None = None,
102
106
  unpack_columns: bool = False,
103
107
  ):
104
- total, items = cls._from_hierarchy(client, root_asset_external_id)
108
+ total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
105
109
  return cls(
106
110
  items,
107
111
  namespace,
@@ -112,6 +116,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
112
116
  skip_metadata_values,
113
117
  camel_case,
114
118
  as_write,
119
+ prefix,
115
120
  unpack_columns,
116
121
  )
117
122
 
@@ -126,6 +131,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
126
131
  skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
127
132
  camel_case: bool = True,
128
133
  as_write: bool = False,
134
+ prefix: str | None = None,
129
135
  unpack_columns: bool = False,
130
136
  ):
131
137
  total, items = cls._from_file(file_path)
@@ -139,6 +145,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
139
145
  skip_metadata_values,
140
146
  camel_case,
141
147
  as_write,
148
+ prefix,
142
149
  unpack_columns,
143
150
  )
144
151
 
@@ -1,3 +1,4 @@
1
+ import urllib.parse
1
2
  from collections.abc import Iterable, Iterator
2
3
  from typing import cast
3
4
 
@@ -5,6 +6,7 @@ from cognite.client import CogniteClient
5
6
  from cognite.client import data_modeling as dm
6
7
  from cognite.client.data_classes.data_modeling import DataModelIdentifier
7
8
  from cognite.client.data_classes.data_modeling.instances import Instance, PropertyValue
9
+ from cognite.client.utils.useful_types import SequenceNotStr
8
10
  from rdflib import RDF, XSD, Literal, Namespace, URIRef
9
11
 
10
12
  from cognite.neat._constants import DEFAULT_SPACE_URI
@@ -38,7 +40,12 @@ class DMSExtractor(BaseExtractor):
38
40
 
39
41
  @classmethod
40
42
  def from_data_model(
41
- cls, client: CogniteClient, data_model: DataModelIdentifier, limit: int | None = None
43
+ cls,
44
+ client: CogniteClient,
45
+ data_model: DataModelIdentifier,
46
+ limit: int | None = None,
47
+ overwrite_namespace: Namespace | None = None,
48
+ instance_space: str | SequenceNotStr[str] | None = None,
42
49
  ) -> "DMSExtractor":
43
50
  """Create an extractor from a data model.
44
51
 
@@ -46,22 +53,38 @@ class DMSExtractor(BaseExtractor):
46
53
  client: The Cognite client to use.
47
54
  data_model: The data model to extract.
48
55
  limit: The maximum number of instances to extract.
56
+ overwrite_namespace: If provided, this will overwrite the space of the extracted items.
57
+ instance_space: The space to extract instances from.
49
58
  """
50
59
  retrieved = client.data_modeling.data_models.retrieve(data_model, inline_views=True)
51
60
  if not retrieved:
52
61
  raise ResourceRetrievalError(dm.DataModelId.load(data_model), "data model", "Data Model is missing in CDF")
53
- return cls.from_views(client, retrieved.latest_version().views, limit)
62
+ return cls.from_views(client, retrieved.latest_version().views, limit, overwrite_namespace, instance_space)
54
63
 
55
64
  @classmethod
56
- def from_views(cls, client: CogniteClient, views: Iterable[dm.View], limit: int | None = None) -> "DMSExtractor":
65
+ def from_views(
66
+ cls,
67
+ client: CogniteClient,
68
+ views: Iterable[dm.View],
69
+ limit: int | None = None,
70
+ overwrite_namespace: Namespace | None = None,
71
+ instance_space: str | SequenceNotStr[str] | None = None,
72
+ ) -> "DMSExtractor":
57
73
  """Create an extractor from a set of views.
58
74
 
59
75
  Args:
60
76
  client: The Cognite client to use.
61
77
  views: The views to extract.
62
78
  limit: The maximum number of instances to extract.
79
+ overwrite_namespace: If provided, this will overwrite the space of the extracted items.
80
+ instance_space: The space to extract instances from.
63
81
  """
64
- return cls(_InstanceIterator(client, views), total=None, limit=limit)
82
+ return cls(
83
+ _InstanceIterator(client, views, instance_space),
84
+ total=None,
85
+ limit=limit,
86
+ overwrite_namespace=overwrite_namespace,
87
+ )
65
88
 
66
89
  def extract(self) -> Iterable[Triple]:
67
90
  for count, item in enumerate(self.items, 1):
@@ -105,6 +128,10 @@ class DMSExtractor(BaseExtractor):
105
128
  else:
106
129
  raise NotImplementedError(f"Unknown instance type {type(instance)}")
107
130
 
131
+ if self.overwrite_namespace:
132
+ # If the namespace is overwritten, keep the original space as a property to avoid losing information.
133
+ yield id_, self._get_namespace(instance.space)["space"], Literal(instance.space)
134
+
108
135
  for view_id, properties in instance.properties.items():
109
136
  namespace = self._get_namespace(view_id.space)
110
137
  for key, value in properties.items():
@@ -124,35 +151,42 @@ class DMSExtractor(BaseExtractor):
124
151
  yield from self._get_objects(item)
125
152
 
126
153
  def _as_uri_ref(self, instance: Instance | dm.DirectRelationReference) -> URIRef:
127
- return self._get_namespace(instance.space)[instance.external_id]
154
+ return self._get_namespace(instance.space)[urllib.parse.quote(instance.external_id)]
128
155
 
129
156
  def _get_namespace(self, space: str) -> Namespace:
130
157
  if self.overwrite_namespace:
131
158
  return self.overwrite_namespace
132
- return Namespace(DEFAULT_SPACE_URI.format(space=space))
159
+ return Namespace(DEFAULT_SPACE_URI.format(space=urllib.parse.quote(space)))
133
160
 
134
161
 
135
- class _InstanceIterator(Iterator[Instance]):
136
- def __init__(self, client: CogniteClient, views: Iterable[dm.View]):
162
+ class _InstanceIterator(Iterable[Instance]):
163
+ def __init__(
164
+ self, client: CogniteClient, views: Iterable[dm.View], instance_space: str | SequenceNotStr[str] | None = None
165
+ ):
137
166
  self.client = client
138
167
  self.views = views
168
+ self.instance_space = instance_space
139
169
 
140
170
  def __iter__(self) -> Iterator[Instance]:
141
- return self
142
-
143
- def __next__(self) -> Instance: # type: ignore[misc]
144
171
  for view in self.views:
172
+ view_id = view.as_id()
145
173
  # All nodes and edges with properties
146
- yield from self.client.data_modeling.instances(chunk_size=None, instance_type="node", sources=[view])
147
- yield from self.client.data_modeling.instances(chunk_size=None, instance_type="edge", sources=[view])
174
+ if view.used_for in ("node", "all"):
175
+ yield from self.client.data_modeling.instances(
176
+ chunk_size=None, instance_type="node", sources=[view_id], space=self.instance_space
177
+ )
178
+ if view.used_for in ("edge", "all"):
179
+ yield from self.client.data_modeling.instances(
180
+ chunk_size=None, instance_type="edge", sources=[view_id], space=self.instance_space
181
+ )
148
182
 
149
183
  for prop in view.properties.values():
150
184
  if isinstance(prop, dm.EdgeConnection):
151
- # Get all edges with properties
152
185
  yield from self.client.data_modeling.instances(
153
186
  chunk_size=None,
154
187
  instance_type="edge",
155
188
  filter=dm.filters.Equals(
156
189
  ["edge", "type"], {"space": prop.type.space, "externalId": prop.type.external_id}
157
190
  ),
191
+ space=self.instance_space,
158
192
  )
@@ -0,0 +1,149 @@
1
+ from collections.abc import Iterable, Sequence
2
+
3
+ from cognite.client import data_modeling as dm
4
+ from cognite.client.exceptions import CogniteAPIError
5
+ from cognite.client.utils.useful_types import SequenceNotStr
6
+ from rdflib import Namespace, URIRef
7
+
8
+ from cognite.neat._client import NeatClient
9
+ from cognite.neat._constants import DEFAULT_NAMESPACE
10
+ from cognite.neat._issues import IssueList, NeatIssue, catch_warnings
11
+ from cognite.neat._issues.warnings import CDFAuthWarning, ResourceNotFoundWarning, ResourceRetrievalWarning
12
+ from cognite.neat._rules.importers import DMSImporter
13
+ from cognite.neat._rules.models import DMSRules, InformationRules
14
+ from cognite.neat._rules.transformers import DMSToInformation, VerifyDMSRules
15
+ from cognite.neat._shared import Triple
16
+
17
+ from ._base import KnowledgeGraphExtractor
18
+ from ._dms import DMSExtractor
19
+
20
+
21
+ class DMSGraphExtractor(KnowledgeGraphExtractor):
22
+ def __init__(
23
+ self,
24
+ data_model: dm.DataModel[dm.View],
25
+ client: NeatClient,
26
+ namespace: Namespace = DEFAULT_NAMESPACE,
27
+ issues: Sequence[NeatIssue] | None = None,
28
+ instance_space: str | SequenceNotStr[str] | None = None,
29
+ ) -> None:
30
+ self._client = client
31
+ self._data_model = data_model
32
+ self._namespace = namespace or DEFAULT_NAMESPACE
33
+ self._issues = IssueList(issues)
34
+ self._instance_space = instance_space
35
+
36
+ self._views: list[dm.View] | None = None
37
+ self._information_rules: InformationRules | None = None
38
+ self._dms_rules: DMSRules | None = None
39
+
40
+ @classmethod
41
+ def from_data_model_id(
42
+ cls,
43
+ data_model_id: dm.DataModelIdentifier,
44
+ client: NeatClient,
45
+ namespace: Namespace = DEFAULT_NAMESPACE,
46
+ instance_space: str | SequenceNotStr[str] | None = None,
47
+ ) -> "DMSGraphExtractor":
48
+ issues: list[NeatIssue] = []
49
+ try:
50
+ data_model = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True)
51
+ except CogniteAPIError as e:
52
+ issues.append(CDFAuthWarning("retrieving data model", str(e)))
53
+ return cls(
54
+ cls._create_empty_model(dm.DataModelId.load(data_model_id)), client, namespace, issues, instance_space
55
+ )
56
+ if not data_model:
57
+ issues.append(ResourceRetrievalWarning(frozenset({data_model_id}), "data model"))
58
+ return cls(
59
+ cls._create_empty_model(dm.DataModelId.load(data_model_id)), client, namespace, issues, instance_space
60
+ )
61
+ return cls(data_model.latest_version(), client, namespace, issues, instance_space)
62
+
63
+ @classmethod
64
+ def _create_empty_model(cls, data_model_id: dm.DataModelId) -> dm.DataModel:
65
+ return dm.DataModel(
66
+ data_model_id.space,
67
+ data_model_id.external_id,
68
+ data_model_id.version or "MISSING",
69
+ is_global=False,
70
+ last_updated_time=0,
71
+ created_time=0,
72
+ description=None,
73
+ name=None,
74
+ views=[],
75
+ )
76
+
77
+ @property
78
+ def _model_views(self) -> list[dm.View]:
79
+ if self._views is None:
80
+ self._views = self._get_views()
81
+ return self._views
82
+
83
+ @property
84
+ def description(self) -> str:
85
+ return "Extracts a data model with nodes and edges."
86
+
87
+ @property
88
+ def source_uri(self) -> URIRef:
89
+ space, external_id, version = self._data_model.as_id().as_tuple()
90
+ return DEFAULT_NAMESPACE[f"{self._client.config.project}/{space}/{external_id}/{version}"]
91
+
92
+ def extract(self) -> Iterable[Triple]:
93
+ """Extracts the knowledge graph from the data model."""
94
+ views = self._model_views
95
+ yield from DMSExtractor.from_views(
96
+ self._client,
97
+ views,
98
+ overwrite_namespace=self._namespace,
99
+ instance_space=self._instance_space,
100
+ ).extract()
101
+
102
+ def _get_views(self) -> list[dm.View]:
103
+ view_by_id: dict[dm.ViewId, dm.View] = {}
104
+ if view_ids := [view_id for view_id in self._data_model.views if isinstance(view_id, dm.ViewId)]:
105
+ try:
106
+ # MyPy does not understand the isinstance check above.
107
+ retrieved = self._client.data_modeling.views.retrieve(ids=view_ids) # type: ignore[arg-type]
108
+ except CogniteAPIError as e:
109
+ self._issues.append(CDFAuthWarning("retrieving views", str(e)))
110
+ else:
111
+ view_by_id.update({view.as_id(): view for view in retrieved})
112
+ views: list[dm.View] = []
113
+ data_model_id = self._data_model.as_id()
114
+ for dm_view in self._data_model.views:
115
+ if isinstance(dm_view, dm.View):
116
+ views.append(dm_view)
117
+ elif isinstance(dm_view, dm.ViewId):
118
+ if view := view_by_id.get(dm_view):
119
+ views.append(view)
120
+ else:
121
+ self._issues.append(ResourceNotFoundWarning(dm_view, "view", data_model_id, "data model"))
122
+ return views
123
+
124
+ def get_information_rules(self) -> InformationRules:
125
+ """Returns the information rules that the extractor uses."""
126
+ if self._information_rules is None:
127
+ self._information_rules, self._dms_rules = self._create_rules()
128
+ return self._information_rules
129
+
130
+ def get_dms_rules(self) -> DMSRules:
131
+ """Returns the DMS rules that the extractor uses."""
132
+ if self._dms_rules is None:
133
+ self._information_rules, self._dms_rules = self._create_rules()
134
+ return self._dms_rules
135
+
136
+ def get_issues(self) -> IssueList:
137
+ """Returns the issues that occurred during the extraction."""
138
+ return self._issues
139
+
140
+ def _create_rules(self) -> tuple[InformationRules, DMSRules]:
141
+ # The DMS and Information rules must be created together to link them property.
142
+ importer = DMSImporter.from_data_model(self._client, self._data_model)
143
+ unverified_dms = importer.to_rules()
144
+ with catch_warnings() as issues:
145
+ # Any errors occur will be raised and caught outside the extractor.
146
+ verified_dms = VerifyDMSRules(client=self._client).transform(unverified_dms)
147
+ information_rules = DMSToInformation(self._namespace).transform(verified_dms)
148
+ self._issues.extend(issues)
149
+ return information_rules, verified_dms
@@ -1,6 +1,7 @@
1
+ import zipfile
1
2
  from collections.abc import Iterable
2
3
  from pathlib import Path
3
- from typing import get_args
4
+ from typing import cast, get_args
4
5
 
5
6
  from rdflib import URIRef
6
7
  from rdflib.util import guess_format
@@ -10,6 +11,7 @@ from cognite.neat._graph._shared import RDFTypes
10
11
  from cognite.neat._graph.extractors._base import BaseExtractor
11
12
  from cognite.neat._issues._base import IssueList
12
13
  from cognite.neat._issues.errors import FileNotFoundNeatError, FileTypeUnexpectedError
14
+ from cognite.neat._issues.errors._general import NeatValueError
13
15
  from cognite.neat._shared import Triple
14
16
 
15
17
 
@@ -24,25 +26,50 @@ class RdfFileExtractor(BaseExtractor):
24
26
 
25
27
  def __init__(
26
28
  self,
27
- filepath: Path,
29
+ filepath: Path | zipfile.ZipExtFile,
28
30
  base_uri: URIRef = DEFAULT_BASE_URI,
29
31
  issue_list: IssueList | None = None,
30
32
  ):
31
33
  self.issue_list = issue_list or IssueList(title=f"{filepath.name}")
32
34
  self.base_uri = base_uri
33
35
  self.filepath = filepath
34
- self.format = guess_format(str(self.filepath))
35
36
 
36
- if not self.filepath.exists():
37
+ self.format = guess_format(str(self.filepath) if isinstance(self.filepath, Path) else self.filepath.name)
38
+
39
+ print(self.format)
40
+ if isinstance(self.filepath, Path) and not self.filepath.exists():
37
41
  self.issue_list.append(FileNotFoundNeatError(self.filepath))
38
42
 
39
43
  if not self.format:
40
44
  self.issue_list.append(
41
45
  FileTypeUnexpectedError(
42
- self.filepath,
46
+ (self.filepath if isinstance(self.filepath, Path) else Path(self.filepath.name)),
43
47
  frozenset(get_args(RDFTypes)),
44
48
  )
45
49
  )
46
50
 
47
51
  def extract(self) -> Iterable[Triple]:
48
52
  raise NotImplementedError()
53
+
54
+ @classmethod
55
+ def from_zip(
56
+ cls,
57
+ filepath: Path,
58
+ filename: str = "neat-session/instances/instances.ttl",
59
+ base_uri: URIRef = DEFAULT_BASE_URI,
60
+ issue_list: IssueList | None = None,
61
+ ):
62
+ if not filepath.exists():
63
+ raise FileNotFoundNeatError(filepath)
64
+ if filepath.suffix not in {".zip"}:
65
+ raise NeatValueError("Expected a zip file, got {filepath.suffix}")
66
+
67
+ with zipfile.ZipFile(filepath, "r") as zip_ref:
68
+ for file_info in zip_ref.infolist():
69
+ if file_info.filename == filename:
70
+ # We need to open the file in the zip file, and close it upon
71
+ # triple extraction ...
72
+ file = zip_ref.open(file_info)
73
+ return cls(cast(zipfile.ZipExtFile, file), base_uri, issue_list)
74
+
75
+ raise NeatValueError(f"Cannot extract {filename} from zip file {filepath}")
@@ -32,11 +32,12 @@ from cognite.neat._issues.errors import (
32
32
  from cognite.neat._issues.warnings import PropertyDirectRelationLimitWarning, PropertyTypeNotSupportedWarning
33
33
  from cognite.neat._rules.analysis._dms import DMSAnalysis
34
34
  from cognite.neat._rules.models import DMSRules
35
- from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
35
+ from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json, String
36
36
  from cognite.neat._rules.models.entities._single_value import ViewEntity
37
37
  from cognite.neat._shared import InstanceType
38
38
  from cognite.neat._store import NeatGraphStore
39
39
  from cognite.neat._utils.auxiliary import create_sha256_hash
40
+ from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
40
41
  from cognite.neat._utils.rdf_ import remove_namespace_from_uri
41
42
  from cognite.neat._utils.upload import UploadResult
42
43
 
@@ -157,8 +158,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
157
158
  view_ids.append(f"{view_id!r} (self)")
158
159
 
159
160
  tracker = self._tracker(type(self).__name__, view_ids, "views")
160
- for view_id, (view, _) in view_and_count_by_id.items():
161
- pydantic_cls, edge_by_type, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
161
+ for view_id, (view, instance_count) in view_and_count_by_id.items():
162
+ pydantic_cls, edge_by_type, edge_by_prop_id, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
162
163
  yield from issues
163
164
  tracker.issue(issues)
164
165
 
@@ -194,21 +195,66 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
194
195
  # this assumes no changes in the suffix of view and class
195
196
  reader = self.graph_store.read(view.external_id)
196
197
 
197
- for identifier, properties in reader:
198
+ instance_iterable = iterate_progress_bar_if_above_config_threshold(
199
+ reader, instance_count, f"Loading {track_id}"
200
+ )
201
+
202
+ for identifier, properties in instance_iterable:
203
+ start_node, end_node = self._pop_start_end_node(properties)
204
+ is_edge = start_node and end_node
205
+ if (is_edge and view.used_for == "node") or (not is_edge and view.used_for == "edge"):
206
+ instance_type = "edge" if is_edge else "node"
207
+ creation_error = ResourceCreationError(
208
+ identifier,
209
+ instance_type,
210
+ error=f"{instance_type.capitalize()} found in {view.used_for} view",
211
+ )
212
+ tracker.issue(creation_error)
213
+ if stop_on_exception:
214
+ raise creation_error
215
+ yield creation_error
216
+ continue
217
+
198
218
  if skip_properties:
199
219
  properties = {k: v for k, v in properties.items() if k not in skip_properties}
200
- try:
201
- yield self._create_node(identifier, properties, pydantic_cls, view_id)
202
- except ValueError as e:
203
- error_node = ResourceCreationError(identifier, "node", error=str(e))
204
- tracker.issue(error_node)
205
- if stop_on_exception:
206
- raise error_node from e
207
- yield error_node
208
- yield from self._create_edges(identifier, properties, edge_by_type, tracker)
220
+
221
+ if start_node and end_node:
222
+ # Is an edge
223
+ try:
224
+ yield self._create_edge_with_properties(
225
+ identifier, properties, start_node, end_node, pydantic_cls, view_id
226
+ )
227
+ except ValueError as e:
228
+ error_edge = ResourceCreationError(identifier, "edge", error=str(e))
229
+ tracker.issue(error_edge)
230
+ if stop_on_exception:
231
+ raise error_edge from e
232
+ yield error_edge
233
+ else:
234
+ try:
235
+ yield self._create_node(identifier, properties, pydantic_cls, view_id)
236
+ except ValueError as e:
237
+ error_node = ResourceCreationError(identifier, "node", error=str(e))
238
+ tracker.issue(error_node)
239
+ if stop_on_exception:
240
+ raise error_node from e
241
+ yield error_node
242
+ yield from self._create_edges_without_properties(
243
+ identifier, properties, edge_by_type, edge_by_prop_id, tracker
244
+ )
209
245
  tracker.finish(track_id)
210
246
  yield _END_OF_CLASS
211
247
 
248
+ @staticmethod
249
+ def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str | None, str | None]:
250
+ start_node = properties.pop("startNode", [None])[0]
251
+ if not start_node:
252
+ start_node = properties.pop("start_node", [None])[0]
253
+ end_node = properties.pop("endNode", [None])[0]
254
+ if not end_node:
255
+ end_node = properties.pop("end_node", [None])[0]
256
+ return start_node, end_node
257
+
212
258
  def write_to_file(self, filepath: Path) -> None:
213
259
  if filepath.suffix not in [".json", ".yaml", ".yml"]:
214
260
  raise ValueError(f"File format {filepath.suffix} is not supported")
@@ -293,17 +339,30 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
293
339
 
294
340
  def _create_validation_classes(
295
341
  self, view: dm.View
296
- ) -> tuple[type[BaseModel], dict[str, tuple[str, dm.EdgeConnection]], NeatIssueList]:
342
+ ) -> tuple[
343
+ type[BaseModel],
344
+ dict[str, tuple[str, dm.EdgeConnection]],
345
+ dict[str, tuple[str, dm.EdgeConnection]],
346
+ NeatIssueList,
347
+ ]:
297
348
  issues = IssueList()
298
349
  field_definitions: dict[str, tuple[type, Any]] = {}
299
- edge_by_property: dict[str, tuple[str, dm.EdgeConnection]] = {}
350
+ edge_by_type: dict[str, tuple[str, dm.EdgeConnection]] = {}
351
+ edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]] = {}
300
352
  validators: dict[str, classmethod] = {}
301
353
  direct_relation_by_property: dict[str, dm.DirectRelation] = {}
302
354
  unit_properties: list[str] = []
303
355
  json_fields: list[str] = []
356
+ text_fields: list[str] = []
304
357
  for prop_id, prop in view.properties.items():
305
358
  if isinstance(prop, dm.EdgeConnection):
306
- edge_by_property[prop_id] = prop_id, prop
359
+ if prop.edge_source:
360
+ # Edges with properties are created separately
361
+ continue
362
+
363
+ edge_by_type[prop.type.external_id] = prop_id, prop
364
+ edge_by_prop_id[prop_id] = prop_id, prop
365
+
307
366
  if isinstance(prop, dm.MappedProperty):
308
367
  if is_readonly_property(prop.container, prop.container_property_identifier):
309
368
  continue
@@ -329,6 +388,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
329
388
 
330
389
  if data_type == Json:
331
390
  json_fields.append(prop_id)
391
+ elif data_type == String:
392
+ text_fields.append(prop_id)
332
393
  python_type = data_type.python
333
394
  if isinstance(prop.type, ListablePropertyType) and prop.type.is_list:
334
395
  python_type = list[python_type]
@@ -364,11 +425,20 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
364
425
  else:
365
426
  raise ValueError(f"Expect valid JSON string or dict for {info.field_name}: {value}")
366
427
 
428
+ def parse_text(cls, value: Any, info: ValidationInfo) -> Any:
429
+ if isinstance(value, list):
430
+ return [remove_namespace_from_uri(v) for v in value]
431
+ else:
432
+ return remove_namespace_from_uri(value)
433
+
367
434
  if json_fields:
368
435
  validators["parse_json_string"] = field_validator(*json_fields, mode="before")(parse_json_string) # type: ignore[assignment, arg-type]
369
436
 
370
437
  validators["parse_list"] = field_validator("*", mode="before")(parse_list) # type: ignore[assignment, arg-type]
371
438
 
439
+ if text_fields:
440
+ validators["parse_text"] = field_validator(*text_fields, mode="before")(parse_text) # type: ignore[assignment, arg-type]
441
+
372
442
  if direct_relation_by_property:
373
443
 
374
444
  def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
@@ -409,7 +479,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
409
479
  )
410
480
 
411
481
  pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
412
- return pydantic_cls, edge_by_property, issues
482
+ return pydantic_cls, edge_by_type, edge_by_prop_id, issues
413
483
 
414
484
  def _create_node(
415
485
  self,
@@ -430,17 +500,46 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
430
500
  ],
431
501
  )
432
502
 
433
- def _create_edges(
503
+ def _create_edge_with_properties(
504
+ self,
505
+ identifier: str,
506
+ properties: dict[str | InstanceType, list[str]],
507
+ start_node: str,
508
+ end_node: str,
509
+ pydantic_cls: type[BaseModel],
510
+ view_id: dm.ViewId,
511
+ ) -> dm.EdgeApply:
512
+ type_ = properties.pop(RDF.type, [None])[0]
513
+ created = pydantic_cls.model_validate(properties)
514
+ if type_ is None:
515
+ raise ValueError(f"Missing type for edge {identifier}")
516
+
517
+ return dm.EdgeApply(
518
+ space=self.instance_space,
519
+ external_id=identifier,
520
+ type=dm.DirectRelationReference(view_id.space, view_id.external_id),
521
+ start_node=dm.DirectRelationReference(self.instance_space, start_node),
522
+ end_node=dm.DirectRelationReference(self.instance_space, end_node),
523
+ sources=[
524
+ dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
525
+ ],
526
+ )
527
+
528
+ def _create_edges_without_properties(
434
529
  self,
435
530
  identifier: str,
436
531
  properties: dict[str, list[str]],
437
532
  edge_by_type: dict[str, tuple[str, dm.EdgeConnection]],
533
+ edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]],
438
534
  tracker: Tracker,
439
535
  ) -> Iterable[dm.EdgeApply | NeatIssue]:
440
536
  for predicate, values in properties.items():
441
- if predicate not in edge_by_type:
537
+ if predicate in edge_by_type:
538
+ prop_id, edge = edge_by_type[predicate]
539
+ elif predicate in edge_by_prop_id:
540
+ prop_id, edge = edge_by_prop_id[predicate]
541
+ else:
442
542
  continue
443
- prop_id, edge = edge_by_type[predicate]
444
543
  if isinstance(edge, SingleEdgeConnection) and len(values) > 1:
445
544
  error = ResourceDuplicatedError(
446
545
  resource_type="edge",
@@ -106,7 +106,7 @@ def to_construct_triples(
106
106
  non_inherited_starting_rdf_types = []
107
107
 
108
108
  for transformation in transformations:
109
- traversal = cast(RDFPath, transformation.transformation).traversal
109
+ traversal = cast(RDFPath, transformation.instance_source).traversal
110
110
 
111
111
  # keeping track of starting rdf types of non-inherited transformations/properties
112
112
  if isinstance(traversal, Traversal) and not transformation.inherited: