cognite-neat 0.104.0__py3-none-any.whl → 0.105.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (143) hide show
  1. cognite/neat/_client/_api/data_modeling_loaders.py +83 -23
  2. cognite/neat/_client/_api/schema.py +2 -1
  3. cognite/neat/_client/data_classes/neat_sequence.py +261 -0
  4. cognite/neat/_client/data_classes/schema.py +5 -1
  5. cognite/neat/_client/testing.py +33 -0
  6. cognite/neat/_constants.py +57 -0
  7. cognite/neat/_graph/extractors/_classic_cdf/_base.py +6 -5
  8. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +225 -11
  9. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  10. cognite/neat/_graph/loaders/_rdf2dms.py +31 -5
  11. cognite/neat/_graph/transformers/__init__.py +3 -1
  12. cognite/neat/_graph/transformers/_classic_cdf.py +39 -51
  13. cognite/neat/_graph/transformers/_rdfpath.py +14 -15
  14. cognite/neat/_graph/transformers/_value_type.py +72 -0
  15. cognite/neat/_issues/__init__.py +0 -2
  16. cognite/neat/_issues/_base.py +19 -35
  17. cognite/neat/_issues/warnings/__init__.py +6 -1
  18. cognite/neat/_issues/warnings/_general.py +7 -0
  19. cognite/neat/_issues/warnings/_properties.py +11 -0
  20. cognite/neat/_issues/warnings/_resources.py +11 -0
  21. cognite/neat/_rules/exporters/_rules2dms.py +35 -1
  22. cognite/neat/_rules/exporters/_rules2excel.py +2 -2
  23. cognite/neat/_rules/importers/_dms2rules.py +66 -55
  24. cognite/neat/_rules/models/_base_rules.py +4 -1
  25. cognite/neat/_rules/models/entities/_wrapped.py +10 -5
  26. cognite/neat/_rules/models/mapping/_classic2core.yaml +239 -38
  27. cognite/neat/_rules/transformers/__init__.py +8 -2
  28. cognite/neat/_rules/transformers/_converters.py +271 -188
  29. cognite/neat/_rules/transformers/_mapping.py +75 -59
  30. cognite/neat/_rules/transformers/_verification.py +2 -3
  31. cognite/neat/_session/_inspect.py +3 -1
  32. cognite/neat/_session/_prepare.py +112 -24
  33. cognite/neat/_session/_read.py +33 -70
  34. cognite/neat/_session/_state.py +2 -2
  35. cognite/neat/_session/_to.py +2 -2
  36. cognite/neat/_store/_rules_store.py +4 -8
  37. cognite/neat/_utils/reader/_base.py +27 -0
  38. cognite/neat/_version.py +1 -1
  39. {cognite_neat-0.104.0.dist-info → cognite_neat-0.105.1.dist-info}/METADATA +4 -3
  40. cognite_neat-0.105.1.dist-info/RECORD +179 -0
  41. {cognite_neat-0.104.0.dist-info → cognite_neat-0.105.1.dist-info}/WHEEL +1 -1
  42. cognite/neat/_app/api/__init__.py +0 -0
  43. cognite/neat/_app/api/asgi/metrics.py +0 -4
  44. cognite/neat/_app/api/configuration.py +0 -98
  45. cognite/neat/_app/api/context_manager/__init__.py +0 -3
  46. cognite/neat/_app/api/context_manager/manager.py +0 -16
  47. cognite/neat/_app/api/data_classes/__init__.py +0 -0
  48. cognite/neat/_app/api/data_classes/rest.py +0 -59
  49. cognite/neat/_app/api/explorer.py +0 -66
  50. cognite/neat/_app/api/routers/configuration.py +0 -25
  51. cognite/neat/_app/api/routers/crud.py +0 -102
  52. cognite/neat/_app/api/routers/metrics.py +0 -10
  53. cognite/neat/_app/api/routers/workflows.py +0 -224
  54. cognite/neat/_app/api/utils/__init__.py +0 -0
  55. cognite/neat/_app/api/utils/data_mapping.py +0 -17
  56. cognite/neat/_app/api/utils/logging.py +0 -26
  57. cognite/neat/_app/api/utils/query_templates.py +0 -92
  58. cognite/neat/_app/main.py +0 -17
  59. cognite/neat/_app/monitoring/__init__.py +0 -0
  60. cognite/neat/_app/monitoring/metrics.py +0 -69
  61. cognite/neat/_app/ui/index.html +0 -1
  62. cognite/neat/_app/ui/neat-app/.gitignore +0 -23
  63. cognite/neat/_app/ui/neat-app/README.md +0 -70
  64. cognite/neat/_app/ui/neat-app/build/asset-manifest.json +0 -14
  65. cognite/neat/_app/ui/neat-app/build/favicon.ico +0 -0
  66. cognite/neat/_app/ui/neat-app/build/img/architect-icon.svg +0 -116
  67. cognite/neat/_app/ui/neat-app/build/img/developer-icon.svg +0 -112
  68. cognite/neat/_app/ui/neat-app/build/img/sme-icon.svg +0 -34
  69. cognite/neat/_app/ui/neat-app/build/index.html +0 -1
  70. cognite/neat/_app/ui/neat-app/build/logo192.png +0 -0
  71. cognite/neat/_app/ui/neat-app/build/manifest.json +0 -25
  72. cognite/neat/_app/ui/neat-app/build/robots.txt +0 -3
  73. cognite/neat/_app/ui/neat-app/build/static/css/main.72e3d92e.css +0 -2
  74. cognite/neat/_app/ui/neat-app/build/static/css/main.72e3d92e.css.map +0 -1
  75. cognite/neat/_app/ui/neat-app/build/static/js/main.5a52cf09.js +0 -3
  76. cognite/neat/_app/ui/neat-app/build/static/js/main.5a52cf09.js.LICENSE.txt +0 -88
  77. cognite/neat/_app/ui/neat-app/build/static/js/main.5a52cf09.js.map +0 -1
  78. cognite/neat/_app/ui/neat-app/build/static/media/logo.8093b84df9ed36a174c629d6fe0b730d.svg +0 -1
  79. cognite/neat/_app/ui/neat-app/package-lock.json +0 -18306
  80. cognite/neat/_app/ui/neat-app/package.json +0 -62
  81. cognite/neat/_app/ui/neat-app/public/favicon.ico +0 -0
  82. cognite/neat/_app/ui/neat-app/public/img/architect-icon.svg +0 -116
  83. cognite/neat/_app/ui/neat-app/public/img/developer-icon.svg +0 -112
  84. cognite/neat/_app/ui/neat-app/public/img/sme-icon.svg +0 -34
  85. cognite/neat/_app/ui/neat-app/public/index.html +0 -43
  86. cognite/neat/_app/ui/neat-app/public/logo192.png +0 -0
  87. cognite/neat/_app/ui/neat-app/public/manifest.json +0 -25
  88. cognite/neat/_app/ui/neat-app/public/robots.txt +0 -3
  89. cognite/neat/_app/ui/neat-app/src/App.css +0 -38
  90. cognite/neat/_app/ui/neat-app/src/App.js +0 -17
  91. cognite/neat/_app/ui/neat-app/src/App.test.js +0 -8
  92. cognite/neat/_app/ui/neat-app/src/MainContainer.tsx +0 -70
  93. cognite/neat/_app/ui/neat-app/src/components/JsonViewer.tsx +0 -43
  94. cognite/neat/_app/ui/neat-app/src/components/LocalUploader.tsx +0 -124
  95. cognite/neat/_app/ui/neat-app/src/components/OverviewComponentEditorDialog.tsx +0 -63
  96. cognite/neat/_app/ui/neat-app/src/components/StepEditorDialog.tsx +0 -511
  97. cognite/neat/_app/ui/neat-app/src/components/TabPanel.tsx +0 -36
  98. cognite/neat/_app/ui/neat-app/src/components/Utils.tsx +0 -56
  99. cognite/neat/_app/ui/neat-app/src/components/WorkflowDeleteDialog.tsx +0 -60
  100. cognite/neat/_app/ui/neat-app/src/components/WorkflowExecutionReport.tsx +0 -112
  101. cognite/neat/_app/ui/neat-app/src/components/WorkflowImportExportDialog.tsx +0 -67
  102. cognite/neat/_app/ui/neat-app/src/components/WorkflowMetadataDialog.tsx +0 -79
  103. cognite/neat/_app/ui/neat-app/src/index.css +0 -13
  104. cognite/neat/_app/ui/neat-app/src/index.js +0 -13
  105. cognite/neat/_app/ui/neat-app/src/logo.svg +0 -1
  106. cognite/neat/_app/ui/neat-app/src/reportWebVitals.js +0 -13
  107. cognite/neat/_app/ui/neat-app/src/setupTests.js +0 -5
  108. cognite/neat/_app/ui/neat-app/src/types/WorkflowTypes.ts +0 -388
  109. cognite/neat/_app/ui/neat-app/src/views/AboutView.tsx +0 -61
  110. cognite/neat/_app/ui/neat-app/src/views/ConfigView.tsx +0 -184
  111. cognite/neat/_app/ui/neat-app/src/views/GlobalConfigView.tsx +0 -180
  112. cognite/neat/_app/ui/neat-app/src/views/WorkflowView.tsx +0 -570
  113. cognite/neat/_app/ui/neat-app/tsconfig.json +0 -27
  114. cognite/neat/_workflows/__init__.py +0 -17
  115. cognite/neat/_workflows/base.py +0 -590
  116. cognite/neat/_workflows/cdf_store.py +0 -393
  117. cognite/neat/_workflows/examples/Export_DMS/workflow.yaml +0 -89
  118. cognite/neat/_workflows/examples/Export_Semantic_Data_Model/workflow.yaml +0 -66
  119. cognite/neat/_workflows/examples/Import_DMS/workflow.yaml +0 -65
  120. cognite/neat/_workflows/examples/Validate_Rules/workflow.yaml +0 -67
  121. cognite/neat/_workflows/examples/Validate_Solution_Model/workflow.yaml +0 -64
  122. cognite/neat/_workflows/manager.py +0 -292
  123. cognite/neat/_workflows/model.py +0 -203
  124. cognite/neat/_workflows/steps/__init__.py +0 -0
  125. cognite/neat/_workflows/steps/data_contracts.py +0 -109
  126. cognite/neat/_workflows/steps/lib/__init__.py +0 -0
  127. cognite/neat/_workflows/steps/lib/current/__init__.py +0 -6
  128. cognite/neat/_workflows/steps/lib/current/graph_extractor.py +0 -100
  129. cognite/neat/_workflows/steps/lib/current/graph_loader.py +0 -51
  130. cognite/neat/_workflows/steps/lib/current/graph_store.py +0 -48
  131. cognite/neat/_workflows/steps/lib/current/rules_exporter.py +0 -537
  132. cognite/neat/_workflows/steps/lib/current/rules_importer.py +0 -323
  133. cognite/neat/_workflows/steps/lib/current/rules_validator.py +0 -106
  134. cognite/neat/_workflows/steps/lib/io/__init__.py +0 -1
  135. cognite/neat/_workflows/steps/lib/io/io_steps.py +0 -393
  136. cognite/neat/_workflows/steps/step_model.py +0 -79
  137. cognite/neat/_workflows/steps_registry.py +0 -218
  138. cognite/neat/_workflows/tasks.py +0 -18
  139. cognite/neat/_workflows/triggers.py +0 -169
  140. cognite/neat/_workflows/utils.py +0 -19
  141. cognite_neat-0.104.0.dist-info/RECORD +0 -276
  142. {cognite_neat-0.104.0.dist-info → cognite_neat-0.105.1.dist-info}/LICENSE +0 -0
  143. {cognite_neat-0.104.0.dist-info → cognite_neat-0.105.1.dist-info}/entry_points.txt +0 -0
@@ -1,37 +1,251 @@
1
- from collections.abc import Iterable
1
+ import itertools
2
+ import json
3
+ from collections.abc import Callable, Iterable, Set
2
4
  from pathlib import Path
5
+ from typing import Any
3
6
 
4
7
  from cognite.client import CogniteClient
5
- from cognite.client.data_classes import Sequence, SequenceFilter, SequenceList
8
+ from cognite.client.data_classes import Sequence, SequenceFilter
9
+ from rdflib import RDF, XSD, Literal, Namespace, URIRef
6
10
 
7
- from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
11
+ from cognite.neat._client.data_classes.neat_sequence import NeatSequence, NeatSequenceList
12
+ from cognite.neat._shared import Triple
8
13
 
14
+ from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
9
15
 
10
- class SequencesExtractor(ClassicCDFBaseExtractor[Sequence]):
11
- """Extract data from Cognite Data Fusions Sequences into Neat."""
16
+
17
+ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
18
+ """Extract data from Cognite Data Fusions Sequences into Neat.
19
+
20
+ Args:
21
+ items (Iterable[T_CogniteResource]): An iterable of classic resource.
22
+ namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
23
+ to_type (Callable[[T_CogniteResource], str | None], optional): A function to convert an item to a type.
24
+ Defaults to None. If None or if the function returns None, the asset will be set to the default type.
25
+ total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
26
+ is installed. Defaults to None.
27
+ limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
28
+ testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
29
+ limit the extraction to 1000 assets to test the setup.
30
+ unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
31
+ a JSON string.
32
+ skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
33
+ values in this set will be skipped.
34
+ camel_case (bool, optional): Whether to use camelCase instead of snake_case for property names.
35
+ Defaults to True.
36
+ as_write (bool, optional): Whether to use the write/request format of the items. Defaults to False.
37
+ unpack_columns (bool, optional): Whether to unpack columns. Defaults to False.
38
+ """
12
39
 
13
40
  _default_rdf_type = "Sequence"
41
+ _column_rdf_type = "ColumnClass"
14
42
  _instance_id_prefix = InstanceIdPrefix.sequence
15
43
 
44
+ def __init__(
45
+ self,
46
+ items: Iterable[NeatSequence],
47
+ namespace: Namespace | None = None,
48
+ to_type: Callable[[NeatSequence], str | None] | None = None,
49
+ total: int | None = None,
50
+ limit: int | None = None,
51
+ unpack_metadata: bool = True,
52
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
53
+ camel_case: bool = True,
54
+ as_write: bool = False,
55
+ unpack_columns: bool = False,
56
+ ):
57
+ super().__init__(
58
+ items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write
59
+ )
60
+ self.unpack_columns = unpack_columns
61
+
16
62
  @classmethod
17
- def _from_dataset(cls, client: CogniteClient, data_set_external_id: str) -> tuple[int | None, Iterable[Sequence]]:
63
+ def from_dataset(
64
+ cls,
65
+ client: CogniteClient,
66
+ data_set_external_id: str,
67
+ namespace: Namespace | None = None,
68
+ to_type: Callable[[NeatSequence], str | None] | None = None,
69
+ limit: int | None = None,
70
+ unpack_metadata: bool = True,
71
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
72
+ camel_case: bool = True,
73
+ as_write: bool = False,
74
+ unpack_columns: bool = False,
75
+ ):
76
+ total, items = cls._from_dataset(client, data_set_external_id)
77
+ return cls(
78
+ items,
79
+ namespace,
80
+ to_type,
81
+ total,
82
+ limit,
83
+ unpack_metadata,
84
+ skip_metadata_values,
85
+ camel_case,
86
+ as_write,
87
+ unpack_columns,
88
+ )
89
+
90
+ @classmethod
91
+ def from_hierarchy(
92
+ cls,
93
+ client: CogniteClient,
94
+ root_asset_external_id: str,
95
+ namespace: Namespace | None = None,
96
+ to_type: Callable[[NeatSequence], str | None] | None = None,
97
+ limit: int | None = None,
98
+ unpack_metadata: bool = True,
99
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
100
+ camel_case: bool = True,
101
+ as_write: bool = False,
102
+ unpack_columns: bool = False,
103
+ ):
104
+ total, items = cls._from_hierarchy(client, root_asset_external_id)
105
+ return cls(
106
+ items,
107
+ namespace,
108
+ to_type,
109
+ total,
110
+ limit,
111
+ unpack_metadata,
112
+ skip_metadata_values,
113
+ camel_case,
114
+ as_write,
115
+ unpack_columns,
116
+ )
117
+
118
+ @classmethod
119
+ def from_file(
120
+ cls,
121
+ file_path: str | Path,
122
+ namespace: Namespace | None = None,
123
+ to_type: Callable[[NeatSequence], str | None] | None = None,
124
+ limit: int | None = None,
125
+ unpack_metadata: bool = True,
126
+ skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
127
+ camel_case: bool = True,
128
+ as_write: bool = False,
129
+ unpack_columns: bool = False,
130
+ ):
131
+ total, items = cls._from_file(file_path)
132
+ return cls(
133
+ items,
134
+ namespace,
135
+ to_type,
136
+ total,
137
+ limit,
138
+ unpack_metadata,
139
+ skip_metadata_values,
140
+ camel_case,
141
+ as_write,
142
+ unpack_columns,
143
+ )
144
+
145
+ @classmethod
146
+ def _from_dataset(
147
+ cls, client: CogniteClient, data_set_external_id: str
148
+ ) -> tuple[int | None, Iterable[NeatSequence]]:
18
149
  total = client.sequences.aggregate_count(
19
150
  filter=SequenceFilter(data_set_ids=[{"externalId": data_set_external_id}])
20
151
  )
21
152
  items = client.sequences(data_set_external_ids=data_set_external_id)
22
- return total, items
153
+ return total, cls._lookup_rows(items, client)
23
154
 
24
155
  @classmethod
25
156
  def _from_hierarchy(
26
157
  cls, client: CogniteClient, root_asset_external_id: str
27
- ) -> tuple[int | None, Iterable[Sequence]]:
158
+ ) -> tuple[int | None, Iterable[NeatSequence]]:
28
159
  total = client.sequences.aggregate_count(
29
160
  filter=SequenceFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
30
161
  )
31
162
  items = client.sequences(asset_subtree_external_ids=[root_asset_external_id])
32
- return total, items
163
+ return total, cls._lookup_rows(items, client)
33
164
 
34
165
  @classmethod
35
- def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Sequence]]:
36
- sequences = SequenceList.load(Path(file_path).read_text())
166
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[NeatSequence]]:
167
+ sequences = NeatSequenceList.load(Path(file_path).read_text())
37
168
  return len(sequences), sequences
169
+
170
+ @classmethod
171
+ def _lookup_rows(cls, sequence_iterable: Iterable[Sequence], client: CogniteClient) -> Iterable[NeatSequence]:
172
+ iterator = iter(sequence_iterable)
173
+ for sequences in iter(lambda: list(itertools.islice(iterator, client.config.max_workers)), []):
174
+ # The PySDK uses max_workers to limit the number of requests made in parallel.
175
+ # We can only get one set of sequence rows per request, so we chunk the sequences up into groups of
176
+ # max_workers and then make a request to get all the rows for those sequences in one go.
177
+ sequence_list = list(sequences)
178
+ row_list = client.sequences.rows.retrieve(id=[seq.id for seq in sequence_list])
179
+ rows_by_sequence_id = {row.id: row.rows for row in row_list}
180
+ for seq in sequence_list:
181
+ yield NeatSequence.from_cognite_sequence(seq, rows_by_sequence_id.get(seq.id))
182
+
183
+ def _item2triples_special_cases(self, id_: URIRef, dumped: dict[str, Any]) -> list[Triple]:
184
+ """For sequences, columns and rows are special cases.'"""
185
+ if self.unpack_columns:
186
+ return self._unpack_columns(id_, dumped)
187
+ else:
188
+ return self._default_columns_and_rows(id_, dumped)
189
+
190
+ def _default_columns_and_rows(self, id_: URIRef, dumped: dict[str, Any]) -> list[Triple]:
191
+ triples: list[Triple] = []
192
+ if "columns" in dumped:
193
+ columns = dumped.pop("columns")
194
+ triples.extend(
195
+ [
196
+ (
197
+ id_,
198
+ self.namespace.columns,
199
+ # Rows have a rowNumber, so we introduce colNumber here to be consistent.
200
+ Literal(json.dumps({"colNumber": no, **col}), datatype=XSD._NS["json"]),
201
+ )
202
+ for no, col in enumerate(columns, 1)
203
+ ]
204
+ )
205
+ if "rows" in dumped:
206
+ rows = dumped.pop("rows")
207
+ triples.extend(
208
+ [(id_, self.namespace.rows, Literal(json.dumps(row), datatype=XSD._NS["json"])) for row in rows]
209
+ )
210
+ return triples
211
+
212
+ def _unpack_columns(self, id_: URIRef, dumped: dict[str, Any]) -> list[Triple]:
213
+ triples: list[Triple] = []
214
+ columnValueTypes: list[str] = []
215
+ column_order: list[str] = []
216
+ if columns := dumped.pop("columns", None):
217
+ for col in columns:
218
+ external_id = col.pop("externalId")
219
+ column_order.append(external_id)
220
+ value_type = col.pop("valueType")
221
+ columnValueTypes.append(value_type)
222
+
223
+ col_id = self.namespace[f"Column_{external_id}"]
224
+ triples.append((id_, self.namespace[external_id], col_id))
225
+ type_ = self.namespace[self._column_rdf_type]
226
+ triples.append((col_id, RDF.type, type_))
227
+ if metadata := col.pop("metadata", None):
228
+ triples.extend(self._metadata_to_triples(col_id, metadata))
229
+ # Should only be name and description left in col
230
+ for key, value in col.items():
231
+ if value is None:
232
+ continue
233
+ triples.append((col_id, self.namespace[key], Literal(value, datatype=XSD.string)))
234
+
235
+ triples.append(
236
+ (id_, self.namespace.columnOrder, Literal(json.dumps(column_order), datatype=XSD._NS["json"]))
237
+ )
238
+ triples.append(
239
+ (id_, self.namespace.columnValueTypes, Literal(json.dumps(columnValueTypes), datatype=XSD._NS["json"]))
240
+ )
241
+ if rows := dumped.pop("rows", None):
242
+ values_by_column: list[list[Any]] = [[] for _ in column_order]
243
+ for row in rows:
244
+ for i, value in enumerate(row["values"]):
245
+ values_by_column[i].append(value)
246
+ for col_name, values in zip(column_order, values_by_column, strict=False):
247
+ triples.append(
248
+ (id_, self.namespace[f"{col_name}Values"], Literal(json.dumps(values), datatype=XSD._NS["json"]))
249
+ )
250
+
251
+ return triples
@@ -183,7 +183,7 @@ def _get_generation_order(
183
183
  parent_col: str = "source_class",
184
184
  child_col: str = "target_class",
185
185
  ) -> dict:
186
- parent_child_list: list[list[str]] = class_linkage[[parent_col, child_col]].values.tolist()
186
+ parent_child_list: list[list[str]] = class_linkage[[parent_col, child_col]].values.tolist() # type: ignore[assignment]
187
187
  # Build a directed graph and a list of all names that have no parent
188
188
  graph: dict[str, set] = {name: set() for tup in parent_child_list for name in tup}
189
189
  has_parent: dict[str, bool] = {name: False for tup in parent_child_list for name in tup}
@@ -1,10 +1,11 @@
1
1
  import itertools
2
2
  import json
3
+ import warnings
3
4
  from collections import defaultdict
4
5
  from collections.abc import Iterable, Sequence
5
6
  from graphlib import TopologicalSorter
6
7
  from pathlib import Path
7
- from typing import Any, get_args
8
+ from typing import Any, cast, get_args
8
9
 
9
10
  import yaml
10
11
  from cognite.client import CogniteClient
@@ -19,6 +20,7 @@ from pydantic import BaseModel, ValidationInfo, create_model, field_validator
19
20
  from rdflib import RDF, URIRef
20
21
 
21
22
  from cognite.neat._client import NeatClient
23
+ from cognite.neat._constants import DMS_DIRECT_RELATION_LIST_LIMIT, is_readonly_property
22
24
  from cognite.neat._graph._tracking import LogTracker, Tracker
23
25
  from cognite.neat._issues import IssueList, NeatIssue, NeatIssueList
24
26
  from cognite.neat._issues.errors import (
@@ -27,7 +29,7 @@ from cognite.neat._issues.errors import (
27
29
  ResourceDuplicatedError,
28
30
  ResourceRetrievalError,
29
31
  )
30
- from cognite.neat._issues.warnings import PropertyTypeNotSupportedWarning
32
+ from cognite.neat._issues.warnings import PropertyDirectRelationLimitWarning, PropertyTypeNotSupportedWarning
31
33
  from cognite.neat._rules.analysis._dms import DMSAnalysis
32
34
  from cognite.neat._rules.models import DMSRules
33
35
  from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
@@ -303,6 +305,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
303
305
  if isinstance(prop, dm.EdgeConnection):
304
306
  edge_by_property[prop_id] = prop_id, prop
305
307
  if isinstance(prop, dm.MappedProperty):
308
+ if is_readonly_property(prop.container, prop.container_property_identifier):
309
+ continue
310
+
306
311
  if isinstance(prop.type, dm.DirectRelation):
307
312
  if prop.container == dm.ContainerId("cdf_cdm", "CogniteTimeSeries") and prop_id == "unit":
308
313
  unit_properties.append(prop_id)
@@ -343,9 +348,14 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
343
348
 
344
349
  return value
345
350
 
346
- def parse_json_string(cls, value: Any, info: ValidationInfo) -> dict:
351
+ def parse_json_string(cls, value: Any, info: ValidationInfo) -> dict | list:
347
352
  if isinstance(value, dict):
348
353
  return value
354
+ elif isinstance(value, list):
355
+ try:
356
+ return [json.loads(v) if isinstance(v, str) else v for v in value]
357
+ except json.JSONDecodeError as error:
358
+ raise ValueError(f"Not valid JSON string for {info.field_name}: {value}, error {error}") from error
349
359
  elif isinstance(value, str):
350
360
  try:
351
361
  return json.loads(value)
@@ -364,7 +374,21 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
364
374
  def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
365
375
  # We validate above that we only get one value for single direct relations.
366
376
  if list.__name__ in _get_field_value_types(cls, info):
367
- return [{"space": self.instance_space, "externalId": remove_namespace_from_uri(v)} for v in value]
377
+ result = [{"space": self.instance_space, "externalId": remove_namespace_from_uri(v)} for v in value]
378
+ if len(result) <= DMS_DIRECT_RELATION_LIST_LIMIT:
379
+ return result
380
+ warnings.warn(
381
+ PropertyDirectRelationLimitWarning(
382
+ identifier="unknown",
383
+ resource_type="view property",
384
+ property_name=cast(str, cls.model_fields[info.field_name].alias or info.field_name),
385
+ limit=DMS_DIRECT_RELATION_LIST_LIMIT,
386
+ ),
387
+ stacklevel=2,
388
+ )
389
+ # To get deterministic results, we sort by space and externalId
390
+ result.sort(key=lambda x: (x["space"], x["externalId"]))
391
+ return result[:DMS_DIRECT_RELATION_LIST_LIMIT]
368
392
  elif value:
369
393
  return {"space": self.instance_space, "externalId": remove_namespace_from_uri(value[0])}
370
394
  return {}
@@ -401,7 +425,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
401
425
  space=self.instance_space,
402
426
  external_id=identifier,
403
427
  type=(dm.DirectRelationReference(view_id.space, view_id.external_id) if type_ is not None else None),
404
- sources=[dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump().items()))],
428
+ sources=[
429
+ dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
430
+ ],
405
431
  )
406
432
 
407
433
  def _create_edges(
@@ -15,7 +15,7 @@ from ._prune_graph import (
15
15
  PruneTypes,
16
16
  )
17
17
  from ._rdfpath import AddSelfReferenceProperty, MakeConnectionOnExactMatch
18
- from ._value_type import ConvertLiteral, LiteralToEntity, SplitMultiValueProperty
18
+ from ._value_type import ConnectionToLiteral, ConvertLiteral, LiteralToEntity, SplitMultiValueProperty
19
19
 
20
20
  __all__ = [
21
21
  "AddAssetDepth",
@@ -26,6 +26,7 @@ __all__ = [
26
26
  "AssetSequenceConnector",
27
27
  "AssetTimeSeriesConnector",
28
28
  "AttachPropertyFromTargetToSource",
29
+ "ConnectionToLiteral",
29
30
  "ConvertLiteral",
30
31
  "LiteralToEntity",
31
32
  "MakeConnectionOnExactMatch",
@@ -55,4 +56,5 @@ Transformers = (
55
56
  | PruneInstancesOfUnknownType
56
57
  | ConvertLiteral
57
58
  | LiteralToEntity
59
+ | ConnectionToLiteral
58
60
  )
@@ -6,6 +6,7 @@ from functools import lru_cache
6
6
  from typing import cast
7
7
 
8
8
  from rdflib import RDF, Graph, Literal, Namespace, URIRef
9
+ from rdflib.query import ResultRow
9
10
 
10
11
  from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE
11
12
  from cognite.neat._graph import extractors
@@ -18,71 +19,57 @@ from cognite.neat._utils.rdf_ import (
18
19
  remove_namespace_from_uri,
19
20
  )
20
21
 
21
- from ._base import BaseTransformer
22
+ from ._base import BaseTransformer, BaseTransformerStandardised, RowTransformationOutput
22
23
 
23
24
 
24
- # TODO: standardise
25
- class AddAssetDepth(BaseTransformer):
26
- description: str = "Adds depth of asset in the asset hierarchy to the graph"
25
+ class AddAssetDepth(BaseTransformerStandardised):
26
+ description: str = "Adds depth of asset in the asset hierarchy and optionally types asset based on depth"
27
27
  _use_only_once: bool = True
28
28
  _need_changes = frozenset({str(extractors.AssetsExtractor.__name__)})
29
29
 
30
- _parent_template: str = """SELECT ?child ?parent WHERE {{
31
- <{asset_id}> <{parent_prop}> ?child .
32
- OPTIONAL{{?child <{parent_prop}>+ ?parent .}}}}"""
33
-
34
- _root_template: str = """SELECT ?root WHERE {{
35
- <{asset_id}> <{root_prop}> ?root .}}"""
36
-
37
30
  def __init__(
38
31
  self,
39
32
  asset_type: URIRef | None = None,
40
- root_prop: URIRef | None = None,
41
33
  parent_prop: URIRef | None = None,
42
34
  depth_typing: dict[int, str] | None = None,
43
35
  ):
44
36
  self.asset_type = asset_type or DEFAULT_NAMESPACE.Asset
45
- self.root_prop = root_prop or DEFAULT_NAMESPACE.rootId
46
37
  self.parent_prop = parent_prop or DEFAULT_NAMESPACE.parentId
47
38
  self.depth_typing = depth_typing
48
39
 
49
- def transform(self, graph: Graph) -> None:
50
- """Adds depth of asset in the asset hierarchy to the graph."""
51
- for result in graph.query(f"SELECT DISTINCT ?asset_id WHERE {{?asset_id a <{self.asset_type}>}}"):
52
- asset_id = cast(tuple, result)[0]
53
- if depth := self.get_depth(graph, asset_id, self.root_prop, self.parent_prop):
54
- graph.add((asset_id, DEFAULT_NAMESPACE.depth, Literal(depth)))
55
-
56
- if self.depth_typing and (type_ := self.depth_typing.get(depth, None)):
57
- # remove existing type
58
- graph.remove((asset_id, RDF.type, None))
59
-
60
- # add new type
61
- graph.add((asset_id, RDF.type, DEFAULT_NAMESPACE[type_]))
62
-
63
- @classmethod
64
- def get_depth(
65
- cls,
66
- graph: Graph,
67
- asset_id: URIRef,
68
- root_prop: URIRef,
69
- parent_prop: URIRef,
70
- ) -> int | None:
71
- """Get asset depth in the asset hierarchy."""
72
-
73
- # Handles non-root assets
74
- if result := list(graph.query(cls._parent_template.format(asset_id=asset_id, parent_prop=parent_prop))):
75
- return len(cast(list[tuple], result)) + 2 if cast(list[tuple], result)[0][1] else 2
76
-
77
- # Handles root assets
78
- elif (
79
- (result := list(graph.query(cls._root_template.format(asset_id=asset_id, root_prop=root_prop))))
80
- and len(cast(list[tuple], result)) == 1
81
- and cast(list[tuple], result)[0][0] == asset_id
82
- ):
83
- return 1
84
- else:
85
- return None
40
+ def _iterate_query(self) -> str:
41
+ query = """SELECT ?asset (IF(?isRoot, 0, COUNT(?parent)) AS ?parentCount)
42
+ WHERE {{
43
+ ?asset a <{asset_type}> .
44
+ OPTIONAL {{ ?asset <{parent_prop}>+ ?parent . }}
45
+ BIND(IF(BOUND(?parent), false, true) AS ?isRoot)}}
46
+ GROUP BY ?asset ?isRoot
47
+ ORDER BY DESC(?parentCount)"""
48
+
49
+ return query.format(
50
+ asset_type=self.asset_type,
51
+ parent_prop=self.parent_prop,
52
+ )
53
+
54
+ def _count_query(self) -> str:
55
+ query = """SELECT (COUNT(?asset) as ?count)
56
+ WHERE {{ ?asset a <{asset_type}> . }}"""
57
+
58
+ return query.format(asset_type=self.asset_type)
59
+
60
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
61
+ row_output = RowTransformationOutput()
62
+ subject, object = query_result_row
63
+
64
+ row_output.add_triples.append(cast(Triple, (subject, DEFAULT_NAMESPACE.depth, object)))
65
+
66
+ if self.depth_typing and (type_ := self.depth_typing.get(int(object), None)):
67
+ row_output.remove_triples.append(cast(Triple, (subject, RDF.type, self.asset_type)))
68
+ row_output.add_triples.append(cast(Triple, (subject, RDF.type, DEFAULT_NAMESPACE[type_])))
69
+
70
+ row_output.instances_modified_count += 1
71
+
72
+ return row_output
86
73
 
87
74
 
88
75
  # TODO: standardise
@@ -375,7 +362,8 @@ WHERE {{
375
362
  ) -> list[Triple]:
376
363
  relationship_triples = cast(list[Triple], list(graph.query(f"DESCRIBE <{relationship_id}>")))
377
364
  object_by_predicates = cast(
378
- dict[str, URIRef | Literal], {remove_namespace_from_uri(row[1]): row[2] for row in relationship_triples}
365
+ dict[str, URIRef | Literal],
366
+ {remove_namespace_from_uri(row[1]): row[2] for row in relationship_triples if row[1] != RDF.type},
379
367
  )
380
368
  source_external_id = cast(URIRef, object_by_predicates["sourceExternalId"])
381
369
  target_source_id = cast(URIRef, object_by_predicates["targetExternalId"])
@@ -1,15 +1,14 @@
1
1
  from typing import cast
2
2
  from urllib.parse import quote
3
3
 
4
- from rdflib import Graph, URIRef
4
+ from rdflib import Graph, Namespace, URIRef
5
5
  from rdflib.query import ResultRow
6
6
 
7
- from cognite.neat._constants import DEFAULT_NAMESPACE
8
7
  from cognite.neat._rules.analysis import InformationAnalysis
9
8
  from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
10
9
  from cognite.neat._rules.models.information import InformationRules
11
10
  from cognite.neat._shared import Triple
12
- from cognite.neat._utils.rdf_ import remove_namespace_from_uri
11
+ from cognite.neat._utils.rdf_ import get_namespace, remove_namespace_from_uri
13
12
 
14
13
  from ._base import BaseTransformer, BaseTransformerStandardised, RowTransformationOutput
15
14
 
@@ -76,11 +75,11 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
76
75
  self.subject_predicate = subject_predicate
77
76
  self.object_type = object_type
78
77
  self.object_predicate = object_predicate
79
-
78
+ subject_namespace = Namespace(get_namespace(subject_type))
80
79
  self.connection = (
81
- DEFAULT_NAMESPACE[quote(connection.strip())]
80
+ subject_namespace[quote(connection.strip())]
82
81
  if isinstance(connection, str)
83
- else connection or DEFAULT_NAMESPACE[remove_namespace_from_uri(self.object_type).lower()]
82
+ else connection or subject_namespace[remove_namespace_from_uri(self.object_type).lower()]
84
83
  )
85
84
 
86
85
  self.limit = limit
@@ -88,10 +87,10 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
88
87
  def _iterate_query(self) -> str:
89
88
  query = """SELECT DISTINCT ?subject ?object
90
89
  WHERE {{
91
- ?subject a <{subject_type}> .
92
- ?subject <{subject_predicate}> ?value .
93
- ?object <{object_predicate}> ?value .
94
- ?object a <{object_type}> .
90
+ ?subject a <{subject_type}> ;
91
+ <{subject_predicate}> ?value .
92
+ ?object a <{object_type}> ;
93
+ <{object_predicate}> ?value .
95
94
  }}"""
96
95
 
97
96
  if self.limit and isinstance(self.limit, int) and self.limit > 0:
@@ -105,12 +104,12 @@ class MakeConnectionOnExactMatch(BaseTransformerStandardised):
105
104
  )
106
105
 
107
106
  def _count_query(self) -> str:
108
- query = """SELECT (COUNT(DISTINCT (?subject ?object)) as ?count)
107
+ query = """SELECT (COUNT(DISTINCT ?subject) as ?count)
109
108
  WHERE {{
110
- ?subject a <{subject_type}> .
111
- ?subject <{subject_predicate}> ?value .
112
- ?object <{object_predicate}> ?value .
113
- ?object a <{object_type}> .
109
+ ?subject a <{subject_type}> ;
110
+ <{subject_predicate}> ?value .
111
+ ?object a <{object_type}> ;
112
+ <{object_predicate}> ?value .
114
113
  }}"""
115
114
 
116
115
  if self.limit and isinstance(self.limit, int) and self.limit > 0:
@@ -223,3 +223,75 @@ class LiteralToEntity(BaseTransformerStandardised):
223
223
  row_output.instances_modified_count += 1 # we modify the old entity
224
224
 
225
225
  return row_output
226
+
227
+
228
+ class ConnectionToLiteral(BaseTransformerStandardised):
229
+ description = "Converts an entity connection to a literal value"
230
+
231
+ def __init__(self, subject_type: URIRef | None, subject_predicate: URIRef) -> None:
232
+ self.subject_type = subject_type
233
+ self.subject_predicate = subject_predicate
234
+
235
+ def _iterate_query(self) -> str:
236
+ if self.subject_type is None:
237
+ query = """SELECT ?instance ?object
238
+ WHERE {{
239
+ ?instance <{subject_predicate}> ?object
240
+ FILTER(isIRI(?object))
241
+ }}"""
242
+ return query.format(subject_predicate=self.subject_predicate)
243
+ else:
244
+ query = """SELECT ?instance ?object
245
+ WHERE {{
246
+ ?instance a <{subject_type}> .
247
+ ?instance <{subject_predicate}> ?object
248
+ FILTER(isIRI(?object))
249
+ }}"""
250
+ return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
251
+
252
+ def _skip_count_query(self) -> str:
253
+ if self.subject_type is None:
254
+ query = """SELECT (COUNT(?object) AS ?objectCount)
255
+ WHERE {{
256
+ ?instance <{subject_predicate}> ?object
257
+ FILTER(isLiteral(?object))
258
+ }}"""
259
+ return query.format(subject_predicate=self.subject_predicate)
260
+ else:
261
+ query = """SELECT (COUNT(?object) AS ?objectCount)
262
+ WHERE {{
263
+ ?instance a <{subject_type}> .
264
+ ?instance <{subject_predicate}> ?object
265
+ FILTER(isLiteral(?object))
266
+ }}"""
267
+ return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
268
+
269
+ def _count_query(self) -> str:
270
+ if self.subject_type is None:
271
+ query = """SELECT (COUNT(?object) AS ?objectCount)
272
+ WHERE {{
273
+ ?instance <{subject_predicate}> ?object
274
+ FILTER(isIRI(?object))
275
+ }}"""
276
+ return query.format(subject_predicate=self.subject_predicate)
277
+ else:
278
+ query = """SELECT (COUNT(?object) AS ?objectCount)
279
+ WHERE {{
280
+ ?instance a <{subject_type}> .
281
+ ?instance <{subject_predicate}> ?object
282
+ FILTER(isIRI(?object))
283
+ }}"""
284
+
285
+ return query.format(subject_type=self.subject_type, subject_predicate=self.subject_predicate)
286
+
287
+ def operation(self, query_result_row: ResultRow) -> RowTransformationOutput:
288
+ row_output = RowTransformationOutput()
289
+
290
+ instance, object_entity = cast(tuple[URIRef, URIRef], query_result_row)
291
+ value = remove_namespace_from_uri(object_entity)
292
+
293
+ row_output.add_triples.append((instance, self.subject_predicate, rdflib.Literal(value)))
294
+ row_output.remove_triples.append((instance, self.subject_predicate, object_entity))
295
+ row_output.instances_modified_count += 1
296
+
297
+ return row_output
@@ -3,7 +3,6 @@ as some helper classes to handle them like NeatIssueList"""
3
3
 
4
4
  from ._base import (
5
5
  DefaultWarning,
6
- FutureResult,
7
6
  IssueList,
8
7
  MultiValueError,
9
8
  NeatError,
@@ -16,7 +15,6 @@ from ._base import (
16
15
 
17
16
  __all__ = [
18
17
  "DefaultWarning",
19
- "FutureResult",
20
18
  "IssueList",
21
19
  "MultiValueError",
22
20
  "NeatError",