cognite-neat 0.76.2__py3-none-any.whl → 0.76.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

cognite/neat/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.76.2"
1
+ __version__ = "0.76.3"
@@ -13,8 +13,10 @@ from openpyxl.worksheet.worksheet import Worksheet
13
13
 
14
14
  from cognite.neat.rules._shared import Rules
15
15
  from cognite.neat.rules.models import (
16
+ DataModelType,
16
17
  DMSRules,
17
18
  DomainRules,
19
+ ExtensionCategory,
18
20
  InformationRules,
19
21
  RoleTypes,
20
22
  SchemaCompleteness,
@@ -35,11 +37,18 @@ class ExcelExporter(BaseExporter[Workbook]):
35
37
  new_model_id: The new model ID to use for the exported spreadsheet. This is only applicable if the input
36
38
  rules have 'is_reference' set. If provided, the model ID will be used to automatically create the
37
39
  new metadata sheet in the Excel file.
38
- is_reference: If True, the rules are considered to be a reference model. The exported Excel file will
39
- then contain empty sheets for the main rules and this data model will be dumped to the reference sheets.
40
- This is useful when you are building a solution model based on an Enterprise model, then the
41
- Enterprise model will serve as the reference model. It is also useful when you are extending an existing
42
- model, then the existing model will serve as the reference model. Defaults to False.
40
+ dump_as: This determines how the rules are written to the Excel file. An Excel file has up to three sets of
41
+ sheets: user, last, and reference. The user sheets are used for inputting rules from a user. The last sheets
42
+ are used for the last version of the same model as the user, while the reference sheets are used for
43
+ the model the user is building on. The options are:
44
+ * "user": The rules are written to the user sheets. This is used when you want to modify the rules
45
+ directly and potentially change the model. This is useful when you have imported the data model
46
+ from outside CDF and you want to modify it before you write it to CDF.
47
+ * "last": The rules are written to the last sheets. This is used when you want to extend the rules,
48
+ but have validation that you are not breaking the existing model. This is used when you want to
49
+ change a model that has already been published to CDF and that model is in production.
50
+ * "reference": The rules are written to the reference sheets. This is typically used when you want to build
51
+ a new solution on top of an enterprise model.
43
52
 
44
53
  The following styles are available:
45
54
 
@@ -51,7 +60,7 @@ class ExcelExporter(BaseExporter[Workbook]):
51
60
  """
52
61
 
53
62
  Style = Literal["none", "minimal", "default", "maximal"]
54
-
63
+ DumpOptions = Literal["user", "last", "reference"]
55
64
  _main_header_by_sheet_name: ClassVar[dict[str, str]] = {
56
65
  "Properties": "Definition of Properties per Class",
57
66
  "Classes": "Definition of Classes",
@@ -59,21 +68,24 @@ class ExcelExporter(BaseExporter[Workbook]):
59
68
  "Containers": "Definition of Containers",
60
69
  }
61
70
  style_options = get_args(Style)
71
+ dump_options = get_args(DumpOptions)
62
72
 
63
73
  def __init__(
64
74
  self,
65
75
  styling: Style = "default",
66
76
  output_role: RoleTypes | None = None,
67
77
  new_model_id: tuple[str, str, str] | None = None,
68
- is_reference: bool = False,
78
+ dump_as: DumpOptions = "user",
69
79
  ):
70
80
  if styling not in self.style_options:
71
81
  raise ValueError(f"Invalid styling: {styling}. Valid options are {self.style_options}")
82
+ if dump_as not in self.dump_options:
83
+ raise ValueError(f"Invalid dump_as: {dump_as}. Valid options are {self.dump_options}")
72
84
  self.styling = styling
73
85
  self._styling_level = self.style_options.index(styling)
74
86
  self.output_role = output_role
75
87
  self.new_model_id = new_model_id
76
- self.is_reference = is_reference
88
+ self.dump_as = dump_as
77
89
 
78
90
  def export_to_file(self, rules: Rules, filepath: Path) -> None:
79
91
  """Exports transformation rules to excel file."""
@@ -90,41 +102,48 @@ class ExcelExporter(BaseExporter[Workbook]):
90
102
  # Remove default sheet named "Sheet"
91
103
  workbook.remove(workbook["Sheet"])
92
104
 
93
- dumped_rules: dict[str, Any]
105
+ dumped_user_rules: dict[str, Any]
106
+ dumped_last_rules: dict[str, Any] | None = None
94
107
  dumped_reference_rules: dict[str, Any] | None = None
95
- if self.is_reference:
108
+ if self.dump_as != "user":
96
109
  # Writes empty reference sheets
97
- dumped_rules = {
110
+ dumped_user_rules = {
98
111
  "Metadata": self._create_metadata_sheet_user_rules(rules),
99
112
  }
100
- dumped_rules["Metadata"]["role"] = (
101
- self.output_role and self.output_role.value
102
- ) or rules.metadata.role.value
103
- dumped_reference_rules = rules.reference_self().model_dump(by_alias=True)
113
+
114
+ if self.dump_as == "last":
115
+ dumped_last_rules = rules.model_dump(by_alias=True)
116
+ if rules.reference:
117
+ dumped_reference_rules = rules.reference.model_dump(by_alias=True)
118
+ elif self.dump_as == "reference":
119
+ dumped_reference_rules = rules.reference_self().model_dump(by_alias=True)
104
120
  else:
105
- dumped_rules = rules.model_dump(by_alias=True)
121
+ dumped_user_rules = rules.model_dump(by_alias=True)
122
+ if rules.last:
123
+ dumped_last_rules = rules.last.model_dump(by_alias=True)
106
124
  if rules.reference:
107
125
  dumped_reference_rules = rules.reference.model_dump(by_alias=True)
108
126
 
109
- self._write_metadata_sheet(workbook, dumped_rules["Metadata"])
110
- self._write_sheets(workbook, dumped_rules, rules)
127
+ self._write_metadata_sheet(workbook, dumped_user_rules["Metadata"])
128
+ self._write_sheets(workbook, dumped_user_rules, rules)
129
+ if dumped_last_rules:
130
+ self._write_sheets(workbook, dumped_last_rules, rules, sheet_prefix="Last")
131
+
111
132
  if dumped_reference_rules:
112
- self._write_sheets(workbook, dumped_reference_rules, rules, is_reference=True)
113
- self._write_metadata_sheet(workbook, dumped_reference_rules["Metadata"], is_reference=True)
133
+ prefix = "Ref"
134
+ self._write_sheets(workbook, dumped_reference_rules, rules, sheet_prefix=prefix)
135
+ self._write_metadata_sheet(workbook, dumped_reference_rules["Metadata"], sheet_prefix=prefix)
114
136
 
115
137
  if self._styling_level > 0:
116
138
  self._adjust_column_widths(workbook)
117
139
 
118
140
  return workbook
119
141
 
120
- def _write_sheets(self, workbook: Workbook, dumped_rules: dict[str, Any], rules: Rules, is_reference: bool = False):
142
+ def _write_sheets(self, workbook: Workbook, dumped_rules: dict[str, Any], rules: Rules, sheet_prefix: str = ""):
121
143
  for sheet_name, headers in rules.headers_by_sheet(by_alias=True).items():
122
144
  if sheet_name in ("Metadata", "prefixes", "Reference", "Last"):
123
145
  continue
124
- if is_reference:
125
- sheet = workbook.create_sheet(f"Ref{sheet_name}")
126
- else:
127
- sheet = workbook.create_sheet(sheet_name)
146
+ sheet = workbook.create_sheet(f"{sheet_prefix}{sheet_name}")
128
147
 
129
148
  main_header = self._main_header_by_sheet_name[sheet_name]
130
149
  sheet.append([main_header] + [""] * (len(headers) - 1))
@@ -170,17 +189,14 @@ class ExcelExporter(BaseExporter[Workbook]):
170
189
  for cell in sheet["2"]:
171
190
  cell.font = Font(bold=True, size=14)
172
191
 
173
- def _write_metadata_sheet(self, workbook: Workbook, metadata: dict[str, Any], is_reference: bool = False) -> None:
192
+ def _write_metadata_sheet(self, workbook: Workbook, metadata: dict[str, Any], sheet_prefix: str = "") -> None:
174
193
  # Excel does not support timezone in datetime strings
175
194
  if isinstance(metadata.get("created"), datetime):
176
195
  metadata["created"] = metadata["created"].replace(tzinfo=None)
177
196
  if isinstance(metadata.get("updated"), datetime):
178
197
  metadata["updated"] = metadata["updated"].replace(tzinfo=None)
179
198
 
180
- if is_reference:
181
- metadata_sheet = workbook.create_sheet("RefMetadata")
182
- else:
183
- metadata_sheet = workbook.create_sheet("Metadata")
199
+ metadata_sheet = workbook.create_sheet(f"{sheet_prefix}Metadata")
184
200
  for key, value in metadata.items():
185
201
  metadata_sheet.append([key, value])
186
202
 
@@ -241,25 +257,29 @@ class ExcelExporter(BaseExporter[Workbook]):
241
257
  # Excel does not support timezone in datetime strings
242
258
  now_iso = datetime.now().replace(tzinfo=None).isoformat()
243
259
  is_info = isinstance(rules, InformationRules)
244
- is_dms = not is_info
245
- is_extension = self.new_model_id is not None
246
- is_solution = is_extension and self.new_model_id != existing_model_id
260
+ is_dms = isinstance(rules, DMSRules)
261
+ is_extension = self.new_model_id is not None or rules.reference is not None
262
+ is_solution = rules.metadata.data_model_type == DataModelType.solution
247
263
 
248
- if is_solution:
264
+ if is_solution and self.new_model_id:
249
265
  metadata["prefix" if is_info else "space"] = self.new_model_id[0] # type: ignore[index]
250
266
  metadata["title" if is_info else "externalId"] = self.new_model_id[1] # type: ignore[index]
251
267
  metadata["version"] = self.new_model_id[2] # type: ignore[index]
268
+ elif is_solution and self.dump_as == "reference" and rules.reference:
269
+ metadata["prefix" if is_info else "space"] = "YOUR_PREFIX"
270
+ metadata["title" if is_info else "externalId"] = "YOUR_TITLE"
271
+ metadata["version"] = "1"
252
272
  else:
253
273
  metadata["prefix" if is_info else "space"] = existing_model_id[0]
254
274
  metadata["title" if is_info else "externalId"] = existing_model_id[1]
255
275
  metadata["version"] = existing_model_id[2]
256
276
 
257
- if is_solution and is_info:
277
+ if is_solution and is_info and self.new_model_id:
258
278
  metadata["namespace"] = f"http://purl.org/{self.new_model_id[0]}/" # type: ignore[index]
259
279
  elif is_info:
260
280
  metadata["namespace"] = existing_metadata["namespace"]
261
281
 
262
- if is_solution and is_dms:
282
+ if is_solution and is_dms and self.new_model_id:
263
283
  metadata["name"] = self.new_model_id[1] # type: ignore[index]
264
284
 
265
285
  if is_solution:
@@ -285,6 +305,11 @@ class ExcelExporter(BaseExporter[Workbook]):
285
305
  else:
286
306
  metadata["schema"] = SchemaCompleteness.complete.value
287
307
 
288
- metadata["extension"] = "addition"
308
+ if is_solution:
309
+ metadata["dataModelType"] = DataModelType.solution.value
310
+ else:
311
+ metadata["dataModelType"] = DataModelType.enterprise.value
289
312
 
313
+ metadata["extension"] = ExtensionCategory.addition.value
314
+ metadata["role"] = (self.output_role and self.output_role.value) or rules.metadata.role.value
290
315
  return metadata
@@ -87,71 +87,95 @@ class MetadataRaw(UserDict):
87
87
  class ReadResult:
88
88
  sheets: dict[str, dict | list]
89
89
  read_info_by_sheet: dict[str, SpreadsheetRead]
90
- role: RoleTypes
91
- schema: SchemaCompleteness | None
90
+ metadata: MetadataRaw
91
+
92
+ @property
93
+ def role(self) -> RoleTypes:
94
+ return self.metadata.role
95
+
96
+ @property
97
+ def schema(self) -> SchemaCompleteness | None:
98
+ return self.metadata.schema
92
99
 
93
100
 
94
101
  class SpreadsheetReader:
95
- def __init__(self, issue_list: IssueList, is_reference: bool = False):
102
+ def __init__(
103
+ self,
104
+ issue_list: IssueList,
105
+ required: bool = True,
106
+ metadata: MetadataRaw | None = None,
107
+ sheet_prefix: Literal["", "Last", "Ref"] = "",
108
+ ):
96
109
  self.issue_list = issue_list
97
- self._is_reference = is_reference
110
+ self.required = required
111
+ self.metadata = metadata
112
+ self._sheet_prefix = sheet_prefix
98
113
 
99
114
  @property
100
115
  def metadata_sheet_name(self) -> str:
101
- metadata_name = "Metadata"
102
- return self.to_reference_sheet(metadata_name) if self._is_reference else metadata_name
116
+ return f"{self._sheet_prefix}Metadata"
103
117
 
104
118
  def sheet_names(self, role: RoleTypes) -> set[str]:
105
119
  names = MANDATORY_SHEETS_BY_ROLE[role]
106
- return {self.to_reference_sheet(sheet_name) for sheet_name in names} if self._is_reference else names
107
-
108
- @classmethod
109
- def to_reference_sheet(cls, sheet_name: str) -> str:
110
- return f"Ref{sheet_name}"
120
+ return {f"{self._sheet_prefix}{sheet_name}" for sheet_name in names if sheet_name != "Metadata"}
111
121
 
112
122
  def read(self, filepath: Path) -> None | ReadResult:
113
123
  with pd.ExcelFile(filepath) as excel_file:
114
- if self.metadata_sheet_name not in excel_file.sheet_names:
124
+ metadata: MetadataRaw | None
125
+ if self.metadata is not None:
126
+ metadata = self.metadata
127
+ else:
128
+ metadata = self._read_metadata(excel_file, filepath)
129
+ if metadata is None:
130
+ # The reading of metadata failed, so we can't continue
131
+ return None
132
+
133
+ sheets, read_info_by_sheet = self._read_sheets(excel_file, metadata.role)
134
+ if sheets is None or self.issue_list.has_errors:
135
+ return None
136
+ sheets["Metadata"] = dict(metadata)
137
+
138
+ return ReadResult(sheets, read_info_by_sheet, metadata)
139
+
140
+ def _read_metadata(self, excel_file: ExcelFile, filepath: Path) -> MetadataRaw | None:
141
+ if self.metadata_sheet_name not in excel_file.sheet_names:
142
+ if self.required:
115
143
  self.issue_list.append(
116
144
  issues.spreadsheet_file.MetadataSheetMissingOrFailedError(
117
145
  filepath, sheet_name=self.metadata_sheet_name
118
146
  )
119
147
  )
120
- return None
121
-
122
- metadata = MetadataRaw.from_excel(excel_file, self.metadata_sheet_name)
148
+ return None
123
149
 
124
- if not metadata.is_valid(self.issue_list, filepath):
125
- return None
150
+ metadata = MetadataRaw.from_excel(excel_file, self.metadata_sheet_name)
126
151
 
127
- sheets, read_info_by_sheet = self._read_sheets(metadata, excel_file)
128
- if sheets is None or self.issue_list.has_errors:
129
- return None
130
-
131
- return ReadResult(sheets, read_info_by_sheet, metadata.role, metadata.schema)
152
+ if not metadata.is_valid(self.issue_list, filepath):
153
+ return None
154
+ return metadata
132
155
 
133
156
  def _read_sheets(
134
- self, metadata: MetadataRaw, excel_file: ExcelFile
157
+ self, excel_file: ExcelFile, read_role: RoleTypes
135
158
  ) -> tuple[dict[str, dict | list] | None, dict[str, SpreadsheetRead]]:
136
159
  read_info_by_sheet: dict[str, SpreadsheetRead] = defaultdict(SpreadsheetRead)
137
160
 
138
- sheets: dict[str, dict | list] = {"Metadata": dict(metadata)}
161
+ sheets: dict[str, dict | list] = {}
139
162
 
140
- expected_sheet_names = self.sheet_names(metadata.role)
163
+ expected_sheet_names = self.sheet_names(read_role)
141
164
 
142
165
  if missing_sheets := expected_sheet_names.difference(set(excel_file.sheet_names)):
143
- self.issue_list.append(
144
- issues.spreadsheet_file.SheetMissingError(cast(Path, excel_file.io), list(missing_sheets))
145
- )
166
+ if self.required:
167
+ self.issue_list.append(
168
+ issues.spreadsheet_file.SheetMissingError(cast(Path, excel_file.io), list(missing_sheets))
169
+ )
146
170
  return None, read_info_by_sheet
147
171
 
148
172
  for source_sheet_name, target_sheet_name, headers_input in SOURCE_SHEET__TARGET_FIELD__HEADERS:
149
- source_sheet_name = self.to_reference_sheet(source_sheet_name) if self._is_reference else source_sheet_name
173
+ source_sheet_name = f"{self._sheet_prefix}{source_sheet_name}"
150
174
 
151
175
  if source_sheet_name not in excel_file.sheet_names:
152
176
  continue
153
177
  if isinstance(headers_input, dict):
154
- headers = headers_input[metadata.role]
178
+ headers = headers_input[read_role]
155
179
  else:
156
180
  headers = headers_input
157
181
 
@@ -188,42 +212,37 @@ class ExcelImporter(BaseImporter):
188
212
  issue_list.append(issues.spreadsheet_file.SpreadsheetNotFoundError(self.filepath))
189
213
  return self._return_or_raise(issue_list, errors)
190
214
 
191
- user_result = SpreadsheetReader(issue_list, is_reference=False).read(self.filepath)
192
- if user_result is None or issue_list.has_errors:
215
+ user_read = SpreadsheetReader(issue_list).read(self.filepath)
216
+ if user_read is None or issue_list.has_errors:
193
217
  return self._return_or_raise(issue_list, errors)
194
218
 
195
- reference_result: ReadResult | None = None
196
- if (
197
- user_result
198
- and user_result.role != RoleTypes.domain_expert
199
- and user_result.schema == SchemaCompleteness.extended
200
- ):
201
- reference_result = SpreadsheetReader(issue_list, is_reference=True).read(self.filepath)
219
+ last_read: ReadResult | None = None
220
+ reference_read: ReadResult | None = None
221
+ if user_read.schema == SchemaCompleteness.extended:
222
+ # Last does not have its own metadata sheet. It is the same as the user's metadata sheet.
223
+ last_read = SpreadsheetReader(
224
+ issue_list, required=False, metadata=user_read.metadata, sheet_prefix="Last"
225
+ ).read(self.filepath)
226
+ reference_read = SpreadsheetReader(issue_list, sheet_prefix="Ref").read(self.filepath)
202
227
  if issue_list.has_errors:
203
228
  return self._return_or_raise(issue_list, errors)
204
229
 
205
- if user_result and reference_result and user_result.role != reference_result.role:
230
+ if reference_read and user_read.role != reference_read.role:
206
231
  issue_list.append(issues.spreadsheet_file.RoleMismatchError(self.filepath))
207
232
  return self._return_or_raise(issue_list, errors)
208
233
 
209
- if user_result and reference_result:
210
- user_result.sheets["reference"] = reference_result.sheets
211
- sheets = user_result.sheets
212
- original_role = user_result.role
213
- read_info_by_sheet = user_result.read_info_by_sheet
214
- read_info_by_sheet.update(reference_result.read_info_by_sheet)
215
- elif user_result:
216
- sheets = user_result.sheets
217
- original_role = user_result.role
218
- read_info_by_sheet = user_result.read_info_by_sheet
219
- elif reference_result:
220
- sheets = reference_result.sheets
221
- original_role = reference_result.role
222
- read_info_by_sheet = reference_result.read_info_by_sheet
223
- else:
224
- raise ValueError(
225
- "No rules were generated. This should have been caught earlier. " f"Bug in {type(self).__name__}."
226
- )
234
+ sheets = user_read.sheets
235
+ original_role = user_read.role
236
+ read_info_by_sheet = user_read.read_info_by_sheet
237
+ if last_read:
238
+ sheets["last"] = last_read.sheets
239
+ read_info_by_sheet.update(last_read.read_info_by_sheet)
240
+ if reference_read:
241
+ # The last rules will also be validated against the reference rules
242
+ sheets["last"]["reference"] = reference_read.sheets # type: ignore[call-overload]
243
+ if reference_read:
244
+ sheets["reference"] = reference_read.sheets
245
+ read_info_by_sheet.update(reference_read.read_info_by_sheet)
227
246
 
228
247
  rules_cls = RULES_PER_ROLE[original_role]
229
248
  with _handle_issues(
@@ -9,6 +9,7 @@ from .base import NeatValidationError, ValidationWarning
9
9
  __all__ = [
10
10
  "DMSSchemaError",
11
11
  "DMSSchemaWarning",
12
+ "IncompleteSchemaError",
12
13
  "MissingSpaceError",
13
14
  "MissingContainerError",
14
15
  "MissingContainerPropertyError",
@@ -19,12 +20,14 @@ __all__ = [
19
20
  "DirectRelationMissingSourceWarning",
20
21
  "ViewModelVersionNotMatchingWarning",
21
22
  "ViewModelSpaceNotMatchingWarning",
23
+ "ViewMapsToTooManyContainersWarning",
22
24
  "DuplicatedViewInDataModelError",
23
25
  "ContainerPropertyUsedMultipleTimesError",
24
26
  "EmptyContainerWarning",
25
27
  "UnsupportedConnectionWarning",
26
28
  "MultipleReferenceWarning",
27
29
  "HasDataFilterOnNoPropertiesViewWarning",
30
+ "HasDataFilterAppliedToTooManyContainersWarning",
28
31
  "ReverseRelationMissingOtherSideWarning",
29
32
  "NodeTypeFilterOnParentViewWarning",
30
33
  "ChangingContainerError",
@@ -40,6 +43,24 @@ class DMSSchemaError(NeatValidationError, ABC): ...
40
43
  class DMSSchemaWarning(ValidationWarning, ABC): ...
41
44
 
42
45
 
46
+ @dataclass(frozen=True)
47
+ class IncompleteSchemaError(DMSSchemaError):
48
+ description = "This error is raised when the schema is claimed to be complete but missing some components"
49
+ fix = "Either provide the missing components or change the schema to partial"
50
+ missing_component: dm.ContainerId | dm.ViewId
51
+
52
+ def message(self) -> str:
53
+ return (
54
+ "The data model schema is set to be complete, however, "
55
+ f"the referred component {self.missing_component} is not preset."
56
+ )
57
+
58
+ def dump(self) -> dict[str, Any]:
59
+ output = super().dump()
60
+ output["missing_component"] = self.missing_component
61
+ return output
62
+
63
+
43
64
  @dataclass(frozen=True)
44
65
  class MissingSpaceError(DMSSchemaError):
45
66
  description = "The spaced referred to by the Container/View/Node/Edge/DataModel does not exist"
@@ -250,6 +271,28 @@ class ViewModelSpaceNotMatchingWarning(DMSSchemaWarning):
250
271
  return output
251
272
 
252
273
 
274
+ @dataclass(frozen=True)
275
+ class ViewMapsToTooManyContainersWarning(DMSSchemaWarning):
276
+ description = "The view maps to more than 10 containers which impacts read/write performance of data model"
277
+ fix = "Try to have as few containers as possible to which the view maps to"
278
+ error_name: ClassVar[str] = "ViewMapsToTooManyContainers"
279
+ view_id: dm.ViewId
280
+ container_ids: set[dm.ContainerId]
281
+
282
+ def message(self) -> str:
283
+ return (
284
+ f"The view {self.view_id} maps to total of {len(self.container_ids)},."
285
+ "Mapping to more than 10 containers is not recommended and can lead to poor performances."
286
+ "Re-iterate the data model design to reduce the number of containers to which the view maps to."
287
+ )
288
+
289
+ def dump(self) -> dict[str, Any]:
290
+ output = super().dump()
291
+ output["view_id"] = self.view_id.dump()
292
+ output["container_ids"] = [container_id.dump() for container_id in self.container_ids]
293
+ return output
294
+
295
+
253
296
  @dataclass(frozen=True)
254
297
  class ContainerPropertyUsedMultipleTimesError(DMSSchemaError):
255
298
  description = "The container property is used multiple times by the same view property"
@@ -442,6 +485,28 @@ class HasDataFilterOnNoPropertiesViewWarning(DMSSchemaWarning):
442
485
  return output
443
486
 
444
487
 
488
+ @dataclass(frozen=True)
489
+ class HasDataFilterAppliedToTooManyContainersWarning(DMSSchemaWarning):
490
+ description = "The view filter hasData applied to more than 10 containers this will cause DMS API Error"
491
+ fix = "Do not map to more than 10 containers, alternatively override the filter by using rawFilter"
492
+ error_name: ClassVar[str] = "HasDataFilterAppliedToTooManyContainers"
493
+ view_id: dm.ViewId
494
+ container_ids: set[dm.ContainerId]
495
+
496
+ def message(self) -> str:
497
+ return (
498
+ f"The view {self.view_id} HasData filter applied to total of {len(self.container_ids)},."
499
+ "Applying HasData filter to more than 10 containers is not recommended and can lead to DMS API error."
500
+ "Re-iterate the data model design to reduce the number of containers to which the view maps to."
501
+ )
502
+
503
+ def dump(self) -> dict[str, Any]:
504
+ output = super().dump()
505
+ output["view_id"] = self.view_id.dump()
506
+ output["container_ids"] = [container_id.dump() for container_id in self.container_ids]
507
+ return output
508
+
509
+
445
510
  @dataclass(frozen=True)
446
511
  class NodeTypeFilterOnParentViewWarning(DMSSchemaWarning):
447
512
  description = (
@@ -303,6 +303,7 @@ class DMSRulesInput:
303
303
  properties: Sequence[DMSPropertyInput]
304
304
  views: Sequence[DMSViewInput]
305
305
  containers: Sequence[DMSContainerInput] | None = None
306
+ last: "DMSRulesInput | DMSRules | None" = None
306
307
  reference: "DMSRulesInput | DMSRules | None" = None
307
308
 
308
309
  @classmethod
@@ -323,6 +324,7 @@ class DMSRulesInput:
323
324
  properties=DMSPropertyInput.load(data.get("properties")), # type: ignore[arg-type]
324
325
  views=DMSViewInput.load(data.get("views")), # type: ignore[arg-type]
325
326
  containers=DMSContainerInput.load(data.get("containers")) or [],
327
+ last=DMSRulesInput.load(data.get("last")),
326
328
  reference=DMSRulesInput.load(data.get("reference")),
327
329
  )
328
330
 
@@ -338,12 +340,19 @@ class DMSRulesInput:
338
340
  elif isinstance(self.reference, DMSRules):
339
341
  # We need to load through the DMSRulesInput to set the correct default space and version
340
342
  reference = DMSRulesInput.load(self.reference.model_dump()).dump()
343
+ last: dict[str, Any] | None = None
344
+ if isinstance(self.last, DMSRulesInput):
345
+ last = self.last.dump()
346
+ elif isinstance(self.last, DMSRules):
347
+ # We need to load through the DMSRulesInput to set the correct default space and version
348
+ last = DMSRulesInput.load(self.last.model_dump()).dump()
341
349
 
342
350
  return dict(
343
351
  Metadata=self.metadata.dump(),
344
352
  Properties=[prop.dump(default_space, default_version) for prop in self.properties],
345
353
  Views=[view.dump(default_space, default_version) for view in self.views],
346
354
  Containers=[container.dump(default_space) for container in self.containers or []] or None,
355
+ Last=last,
347
356
  Reference=reference,
348
357
  )
349
358
 
@@ -20,6 +20,7 @@ from cognite.neat.rules.issues.dms import (
20
20
  DirectRelationMissingSourceWarning,
21
21
  DMSSchemaError,
22
22
  DuplicatedViewInDataModelError,
23
+ IncompleteSchemaError,
23
24
  MissingContainerError,
24
25
  MissingContainerPropertyError,
25
26
  MissingEdgeViewError,
@@ -32,6 +33,7 @@ from cognite.neat.rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE
32
33
  from cognite.neat.utils.cdf_loaders import ViewLoader
33
34
  from cognite.neat.utils.cdf_loaders.data_classes import RawTableWrite, RawTableWriteList
34
35
  from cognite.neat.utils.text import to_camel
36
+ from cognite.neat.utils.utils import get_inheritance_path
35
37
 
36
38
  if sys.version_info >= (3, 11):
37
39
  from typing import Self
@@ -60,6 +62,30 @@ class DMSSchema:
60
62
  "node": "node_types",
61
63
  }
62
64
 
65
+ def _get_mapped_container_from_view(self, view_id: dm.ViewId) -> set[dm.ContainerId]:
66
+ # index all views, including ones from reference
67
+ indexed_views = {
68
+ **{view.as_id(): view for view in self.views},
69
+ **({view.as_id(): view for view in self.reference.views} if self.reference else {}),
70
+ }
71
+
72
+ if view_id not in indexed_views:
73
+ raise ValueError(f"View {view_id} not found")
74
+
75
+ indexed_implemented_views = {id_: view.implements for id_, view in indexed_views.items()}
76
+ view_inheritance = get_inheritance_path(view_id, indexed_implemented_views)
77
+
78
+ directly_referenced_containers = indexed_views[view_id].referenced_containers()
79
+ inherited_referenced_containers = set()
80
+
81
+ for view_id in view_inheritance:
82
+ if implemented_view := indexed_views.get(view_id):
83
+ inherited_referenced_containers |= implemented_view.referenced_containers()
84
+ else:
85
+ raise IncompleteSchemaError(missing_component=view_id).as_exception()
86
+
87
+ return directly_referenced_containers | inherited_referenced_containers
88
+
63
89
  @classmethod
64
90
  def from_model_id(cls, client: CogniteClient, data_model_id: dm.DataModelIdentifier) -> "DMSSchema":
65
91
  data_models = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True)
@@ -77,10 +77,10 @@ class _DMSRulesSerializer:
77
77
  dumped[self.prop_name]["data"], key=lambda p: (p[self.prop_view], p[self.prop_view_property])
78
78
  )
79
79
  dumped[self.view_name] = sorted(dumped[self.view_name]["data"], key=lambda v: v[self.view_view])
80
- if self.container_name in dumped:
81
- dumped[self.container_name] = sorted(
82
- dumped[self.container_name]["data"], key=lambda c: c[self.container_container]
83
- )
80
+ if container_data := dumped.get(self.container_name):
81
+ dumped[self.container_name] = sorted(container_data["data"], key=lambda c: c[self.container_container])
82
+ else:
83
+ dumped.pop(self.container_name, None)
84
84
 
85
85
  for prop in dumped[self.prop_name]:
86
86
  for field_name in self.properties_fields:
@@ -105,7 +105,7 @@ class _DMSRulesSerializer:
105
105
  for field in self.exclude_views:
106
106
  view.pop(field, None)
107
107
 
108
- for container in dumped[self.container_name]:
108
+ for container in dumped.get(self.container_name, []):
109
109
  for field_name in self.containers_fields:
110
110
  if value := container.get(field_name):
111
111
  container[field_name] = value.removeprefix(self.default_space)
@@ -1,6 +1,8 @@
1
1
  from collections import defaultdict
2
2
  from typing import Any
3
3
 
4
+ from cognite.client import data_modeling as dm
5
+
4
6
  from cognite.neat.rules import issues
5
7
  from cognite.neat.rules.issues import IssueList
6
8
  from cognite.neat.rules.models._base import ExtensionCategory, SchemaCompleteness
@@ -27,6 +29,7 @@ class DMSPostValidation:
27
29
  self._referenced_views_and_containers_are_existing()
28
30
  self._validate_extension()
29
31
  self._validate_schema()
32
+ self._validate_performance()
30
33
  return self.issue_list
31
34
 
32
35
  def _consistent_container_properties(self) -> None:
@@ -208,6 +211,36 @@ class DMSPostValidation:
208
211
  )
209
212
  )
210
213
 
214
+ def _validate_performance(self) -> None:
215
+ # we can only validate performance on complete schemas due to the need
216
+ # to access all the container mappings
217
+ if self.metadata.schema_ is not SchemaCompleteness.complete:
218
+ return None
219
+
220
+ dms_schema = self.rules.as_schema()
221
+
222
+ for view in dms_schema.views:
223
+ mapped_containers = dms_schema._get_mapped_container_from_view(view.as_id())
224
+
225
+ if mapped_containers and len(mapped_containers) > 10:
226
+ self.issue_list.append(
227
+ issues.dms.ViewMapsToTooManyContainersWarning(
228
+ view_id=view.as_id(),
229
+ container_ids=mapped_containers,
230
+ )
231
+ )
232
+ if (
233
+ view.filter
234
+ and isinstance(view.filter, dm.filters.HasData)
235
+ and len(view.filter.dump()["hasData"]) > 10
236
+ ):
237
+ self.issue_list.append(
238
+ issues.dms.HasDataFilterAppliedToTooManyContainersWarning(
239
+ view_id=view.as_id(),
240
+ container_ids=mapped_containers,
241
+ )
242
+ )
243
+
211
244
  @staticmethod
212
245
  def _changed_attributes_and_properties(
213
246
  new_dumped: dict[str, Any], existing_dumped: dict[str, Any]
@@ -55,6 +55,7 @@ class DomainRules(RuleModel):
55
55
  metadata: DomainMetadata = Field(alias="Metadata")
56
56
  properties: SheetList[DomainProperty] = Field(alias="Properties")
57
57
  classes: SheetList[DomainClass] | None = Field(None, alias="Classes")
58
+ last: "DomainRules | None" = Field(None, alias="Last")
58
59
  reference: "DomainRules | None" = Field(None, alias="Reference")
59
60
 
60
61
  @model_serializer(mode="plain", when_used="always")
@@ -4,6 +4,7 @@ from datetime import datetime
4
4
  from typing import Literal
5
5
 
6
6
  from cognite.neat.rules.models._base import (
7
+ DataModelType,
7
8
  SheetList,
8
9
  )
9
10
  from cognite.neat.rules.models.data_types import DataType
@@ -45,6 +46,7 @@ class _InformationRulesConverter:
45
46
  metadata = DMSMetadata(
46
47
  schema_=info_metadata.schema_,
47
48
  space=space,
49
+ data_model_type=DataModelType.solution if self.information.reference else DataModelType.enterprise,
48
50
  version=info_metadata.version,
49
51
  external_id=info_metadata.name.replace(" ", "_").lower(),
50
52
  creator=info_metadata.creator,
@@ -292,7 +292,8 @@ class RulesToExcel(Step):
292
292
  if role != "input" and role is not None:
293
293
  output_role = RoleTypes[role]
294
294
 
295
- excel_exporter = exporters.ExcelExporter(styling=styling, output_role=output_role, is_reference=is_reference)
295
+ dump_as = "reference" if is_reference else "user"
296
+ excel_exporter = exporters.ExcelExporter(styling=styling, output_role=output_role, dump_as=dump_as) # type: ignore[arg-type]
296
297
 
297
298
  rule_instance: Rules
298
299
  if rules.domain:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-neat
3
- Version: 0.76.2
3
+ Version: 0.76.3
4
4
  Summary: Knowledge graph transformation
5
5
  Home-page: https://cognite-neat.readthedocs-hosted.com/
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  cognite/neat/__init__.py,sha256=v-rRiDOgZ3sQSMQKq0vgUQZvpeOkoHFXissAx6Ktg84,61
2
- cognite/neat/_version.py,sha256=HtluIH-Z1E-exBrYUiTjSYnxRCglf6JdDjRnCX5A30k,23
2
+ cognite/neat/_version.py,sha256=OtWfvEnAHBJj0F1t4KUE2UmEyH4tIaTqHJzrcWyW6tI,23
3
3
  cognite/neat/app/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  cognite/neat/app/api/asgi/metrics.py,sha256=nxFy7L5cChTI0a-zkCiJ59Aq8yLuIJp5c9Dg0wRXtV0,152
5
5
  cognite/neat/app/api/configuration.py,sha256=2U5M6M252swvQPQyooA1EBzFUZNtcTmuSaywfJDgckM,4232
@@ -166,7 +166,7 @@ cognite/neat/rules/exporters/__init__.py,sha256=Gn3CjkVKHJF9Po1ZPH4wAJ-sRW9up7b2
166
166
  cognite/neat/rules/exporters/_base.py,sha256=m63iw8xjlZbZAxGL8mn7pjGf1pW3rVv8C20_RSiu4t0,1511
167
167
  cognite/neat/rules/exporters/_models.py,sha256=vRd0P_YsrZ1eaAGGHfdTeFunaqHdaa0ZtnWiVZBR1nc,1976
168
168
  cognite/neat/rules/exporters/_rules2dms.py,sha256=BNznUtTdJ__M10I7QQf3_zdIQTET8SGFvHv5a-5louM,13529
169
- cognite/neat/rules/exporters/_rules2excel.py,sha256=zjGmUxQmsnHCEmSM5KvGQCFapdaDCCMJQRaLkVsI8mM,13034
169
+ cognite/neat/rules/exporters/_rules2excel.py,sha256=K3D_AC6UZ-cG9ZFkqFvuDiMTdBC9ZUW9_IkkY9KsYW0,14934
170
170
  cognite/neat/rules/exporters/_rules2ontology.py,sha256=NWS3cn2927LQqW_PdQ-92OLIlmIKGNk7xh5yOMAyj94,20120
171
171
  cognite/neat/rules/exporters/_rules2yaml.py,sha256=sOSdnTJ5mXuyAJECdNnNsX6oLvgETptkpgPUQbK0n2w,3026
172
172
  cognite/neat/rules/exporters/_validation.py,sha256=OlKIyf4nhSDehJwFHDQ8Zdf6HpNfW7dSe2s67eywHu4,4078
@@ -183,11 +183,11 @@ cognite/neat/rules/importers/_owl2rules/_owl2classes.py,sha256=LInFeBq-NbBIuMEAw
183
183
  cognite/neat/rules/importers/_owl2rules/_owl2metadata.py,sha256=NdPN0dBB0NYkAcfC0yrYdIrGfdPbl5gfeGnSV3EtUPM,7786
184
184
  cognite/neat/rules/importers/_owl2rules/_owl2properties.py,sha256=BLptGmH-Aa5gZu0hDIxSZTrn9GmB2FicWgRYoETLSnQ,7437
185
185
  cognite/neat/rules/importers/_owl2rules/_owl2rules.py,sha256=H2Vv56hXGFnq_b0obWGWr5ErDFcoWpT8G2uy89100cU,6925
186
- cognite/neat/rules/importers/_spreadsheet2rules.py,sha256=eCDxFImXn_tQagjET_fxC9wriQmrWqRhghqilPfDXPI,11353
186
+ cognite/neat/rules/importers/_spreadsheet2rules.py,sha256=dKTue97yZlPQagegfdyOWthDQ6X07hfz18KLXZlUPtA,11882
187
187
  cognite/neat/rules/importers/_yaml2rules.py,sha256=F0uksSz1A3po5OlRM2152_w5j8D9oYTLB9NFTkSMlWI,4275
188
188
  cognite/neat/rules/issues/__init__.py,sha256=Ms6jgCxCezc5IgTOwCFtXQPtoVFfOvdcXj84_rs917I,563
189
189
  cognite/neat/rules/issues/base.py,sha256=i2aTC-wq3UVW2bj_7wKeuhYxCpMD06Bd9-m00bWcTBs,6438
190
- cognite/neat/rules/issues/dms.py,sha256=0xBTYa3b3CPFjsdfKv59l3yX8DLTYB1Jo0LWDhfG6oQ,19178
190
+ cognite/neat/rules/issues/dms.py,sha256=7UyVCqLfky6-KR9-pGNDhJhhMpAxuvOXj2FG-OR5kSA,22035
191
191
  cognite/neat/rules/issues/fileread.py,sha256=ao199mtvhPSW0IA8ZQZ0RzuLIIipYtL0jp6fLqxb4_c,5748
192
192
  cognite/neat/rules/issues/formatters.py,sha256=_ag2bJ9hncOj8pAGJvTTEPs9kTtxbD7vkqvS9Zcnizc,3385
193
193
  cognite/neat/rules/issues/importing.py,sha256=p90847g_TbUAXMyxalcEaWXaPygJoSE5E85dQO4syoo,12288
@@ -204,14 +204,14 @@ cognite/neat/rules/models/dms/__init__.py,sha256=Wzyqzz2ZIjpUbDg04CMuuIAw-f2A02D
204
204
  cognite/neat/rules/models/dms/_converter.py,sha256=x3u3jLnkknozoXXoAXXOWFHCsppqUwSvWv9wMOJ2F1Y,5706
205
205
  cognite/neat/rules/models/dms/_exporter.py,sha256=9EVToiib2QSxEXOzP7-xSlitpUlWSZTHar1HLIrtqxI,18908
206
206
  cognite/neat/rules/models/dms/_rules.py,sha256=CesPmLT4rC7rB_Q895nIV34lwfg3RC9NoU1wsz5l-hA,15599
207
- cognite/neat/rules/models/dms/_rules_input.py,sha256=WcdL_ztq1fAd8dv0Ob6X865pMMc_T1TVd4k2-kwlE2c,13147
208
- cognite/neat/rules/models/dms/_schema.py,sha256=-sMP5_2_DjY_QwdNpDWvSErEorTtZqUa7dATz0S9mZs,36236
209
- cognite/neat/rules/models/dms/_serializer.py,sha256=MYPpkbuor75PoY6kIk6O4elFqnKU8_0ON39nMtkG3dU,6619
210
- cognite/neat/rules/models/dms/_validation.py,sha256=k1awA0nMpv3G1dyWQd3q1GX1RBjJgEzN_G-uVlomaUY,12960
211
- cognite/neat/rules/models/domain.py,sha256=2S74P9YPPtb6myx8wg3-el9jrEBMH9AOBg9dAfwzlh4,2934
207
+ cognite/neat/rules/models/dms/_rules_input.py,sha256=qfInh3JYf7XGEghxPOtIj7GY0f5_aVvnYeUBmfGV9mk,13620
208
+ cognite/neat/rules/models/dms/_schema.py,sha256=cUAkIp7TCYVCshLZwQJcJMj_BduGSI3Y4_LwVxHUUUM,37463
209
+ cognite/neat/rules/models/dms/_serializer.py,sha256=Zulj__rnaVNtrbGJPkn4dYMfMXWYyRmtNPR2Yb5zYW0,6668
210
+ cognite/neat/rules/models/dms/_validation.py,sha256=QaNBqKi9YGqkYw_cwO_FcH3vRI2dXAORC6fjqV3D9HM,14290
211
+ cognite/neat/rules/models/domain.py,sha256=13OhG-XavE5ipU2ICaYaUhz60volkuVfbJrsp0PhaUU,2993
212
212
  cognite/neat/rules/models/entities.py,sha256=iBG84Jr1qQ7PvkMJUJzJ1oWApeONb1IACixdJSztUhk,16395
213
213
  cognite/neat/rules/models/information/__init__.py,sha256=KvbYxVk38qReGbGTrU_Y3P3Gz6Bfghk5lHSKs8DlTOI,195
214
- cognite/neat/rules/models/information/_converter.py,sha256=cNWMBTsJ412M9MW2ytcafDKuWYZc_xcjwcsUag0Fs54,7833
214
+ cognite/neat/rules/models/information/_converter.py,sha256=jzaIk7Q2CeU3TIGULEINwUNNyhWu-VdOW646EjH_FrI,7964
215
215
  cognite/neat/rules/models/information/_rules.py,sha256=YE7X8MsPQv-AVtl4vYtQW99moT45sYk2dI2DDS1YRO0,15546
216
216
  cognite/neat/rules/models/wrapped_entities.py,sha256=c5GkzOrYrE6SSRzIS2r8OAjhwxXpOoAO1WGc8kwiPPo,6154
217
217
  cognite/neat/utils/__init__.py,sha256=l5Nyqhqo25bcQXCOb_lk01cr-UXsG8cczz_y_I0u6bg,68
@@ -253,7 +253,7 @@ cognite/neat/workflows/steps/lib/current/__init__.py,sha256=c22IznGdCSNCpXCi_yon
253
253
  cognite/neat/workflows/steps/lib/current/graph_extractor.py,sha256=vW9UpJScx5dFVCSairpOdWRdBdLpkCt2kNh6litbF0o,5161
254
254
  cognite/neat/workflows/steps/lib/current/graph_loader.py,sha256=HfGg1HRZhbV58TFu89FTjKeUxGsbCYLeFJIQFDN_pQM,2341
255
255
  cognite/neat/workflows/steps/lib/current/graph_store.py,sha256=r7VTxdaz8jJQU7FJbnRDMxvEYbSAZFNMABhPyfNwiFk,6295
256
- cognite/neat/workflows/steps/lib/current/rules_exporter.py,sha256=Xe0b-ngLvfkc_uxvoG-BOzIuYdUup-QfSoJR31cIOwE,22824
256
+ cognite/neat/workflows/steps/lib/current/rules_exporter.py,sha256=wUQAZXWBCqWXe0241QSREtnNTii_tSmOkeiSPwNQRjk,22898
257
257
  cognite/neat/workflows/steps/lib/current/rules_importer.py,sha256=yDq06cvxLvEpSnTXTjwhxDie_MzHa3wO1A4cbKnrH6c,10338
258
258
  cognite/neat/workflows/steps/lib/current/rules_validator.py,sha256=fDRQiRHN9Cuph38-WruK0T1UG5H448S_GsbzdOpi0h4,4729
259
259
  cognite/neat/workflows/steps/lib/io/__init__.py,sha256=k7IPbIq3ey19oRc5sA_15F99-O6dxzqbm1LihGRRo5A,32
@@ -271,8 +271,8 @@ cognite/neat/workflows/steps_registry.py,sha256=fkTX14ZA7_gkUYfWIlx7A1XbCidvqR23
271
271
  cognite/neat/workflows/tasks.py,sha256=dqlJwKAb0jlkl7abbY8RRz3m7MT4SK8-7cntMWkOYjw,788
272
272
  cognite/neat/workflows/triggers.py,sha256=_BLNplzoz0iic367u1mhHMHiUrCwP-SLK6_CZzfODX0,7071
273
273
  cognite/neat/workflows/utils.py,sha256=gKdy3RLG7ctRhbCRwaDIWpL9Mi98zm56-d4jfHDqP1E,453
274
- cognite_neat-0.76.2.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
275
- cognite_neat-0.76.2.dist-info/METADATA,sha256=0DLiYko7Fua8EdVbh9D7gvftXhrk9TyltRtIs_HLvkY,9316
276
- cognite_neat-0.76.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
277
- cognite_neat-0.76.2.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
278
- cognite_neat-0.76.2.dist-info/RECORD,,
274
+ cognite_neat-0.76.3.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
275
+ cognite_neat-0.76.3.dist-info/METADATA,sha256=N-fbQdrmuZ820x4fcdayOruzEbkDoCD1TU3scIJhWxo,9316
276
+ cognite_neat-0.76.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
277
+ cognite_neat-0.76.3.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
278
+ cognite_neat-0.76.3.dist-info/RECORD,,