cognite-neat 0.109.4__py3-none-any.whl → 0.111.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (88) hide show
  1. cognite/neat/_alpha.py +8 -0
  2. cognite/neat/_client/_api/schema.py +43 -1
  3. cognite/neat/_client/data_classes/schema.py +4 -4
  4. cognite/neat/_constants.py +15 -1
  5. cognite/neat/_graph/extractors/__init__.py +4 -0
  6. cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
  7. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +48 -19
  8. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
  9. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
  10. cognite/neat/_graph/extractors/_dict.py +102 -0
  11. cognite/neat/_graph/extractors/_dms.py +27 -40
  12. cognite/neat/_graph/extractors/_dms_graph.py +30 -3
  13. cognite/neat/_graph/extractors/_iodd.py +3 -3
  14. cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
  15. cognite/neat/_graph/extractors/_raw.py +67 -0
  16. cognite/neat/_graph/loaders/_base.py +20 -4
  17. cognite/neat/_graph/loaders/_rdf2dms.py +476 -383
  18. cognite/neat/_graph/queries/_base.py +163 -133
  19. cognite/neat/_graph/transformers/__init__.py +1 -3
  20. cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
  21. cognite/neat/_graph/transformers/_rdfpath.py +2 -49
  22. cognite/neat/_issues/__init__.py +1 -6
  23. cognite/neat/_issues/_base.py +21 -252
  24. cognite/neat/_issues/_contextmanagers.py +46 -0
  25. cognite/neat/_issues/_factory.py +69 -0
  26. cognite/neat/_issues/errors/__init__.py +20 -4
  27. cognite/neat/_issues/errors/_external.py +7 -0
  28. cognite/neat/_issues/errors/_wrapper.py +81 -3
  29. cognite/neat/_issues/formatters.py +4 -4
  30. cognite/neat/_issues/warnings/__init__.py +3 -2
  31. cognite/neat/_issues/warnings/_properties.py +8 -0
  32. cognite/neat/_issues/warnings/user_modeling.py +12 -0
  33. cognite/neat/_rules/_constants.py +12 -0
  34. cognite/neat/_rules/_shared.py +3 -2
  35. cognite/neat/_rules/analysis/__init__.py +2 -3
  36. cognite/neat/_rules/analysis/_base.py +430 -259
  37. cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
  38. cognite/neat/_rules/exporters/_rules2excel.py +3 -9
  39. cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
  40. cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
  41. cognite/neat/_rules/importers/_base.py +2 -47
  42. cognite/neat/_rules/importers/_dms2rules.py +7 -10
  43. cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
  44. cognite/neat/_rules/importers/_rdf/_inference2rules.py +66 -26
  45. cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
  46. cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
  47. cognite/neat/_rules/models/_base_rules.py +0 -2
  48. cognite/neat/_rules/models/data_types.py +7 -0
  49. cognite/neat/_rules/models/dms/_exporter.py +9 -8
  50. cognite/neat/_rules/models/dms/_rules.py +29 -2
  51. cognite/neat/_rules/models/dms/_rules_input.py +9 -1
  52. cognite/neat/_rules/models/dms/_validation.py +115 -5
  53. cognite/neat/_rules/models/entities/_loaders.py +1 -1
  54. cognite/neat/_rules/models/entities/_multi_value.py +2 -2
  55. cognite/neat/_rules/models/entities/_single_value.py +8 -3
  56. cognite/neat/_rules/models/entities/_wrapped.py +2 -2
  57. cognite/neat/_rules/models/information/_rules.py +18 -17
  58. cognite/neat/_rules/models/information/_rules_input.py +3 -1
  59. cognite/neat/_rules/models/information/_validation.py +66 -17
  60. cognite/neat/_rules/transformers/__init__.py +8 -2
  61. cognite/neat/_rules/transformers/_converters.py +234 -44
  62. cognite/neat/_rules/transformers/_verification.py +5 -10
  63. cognite/neat/_session/_base.py +6 -4
  64. cognite/neat/_session/_explore.py +39 -0
  65. cognite/neat/_session/_inspect.py +25 -6
  66. cognite/neat/_session/_prepare.py +12 -0
  67. cognite/neat/_session/_read.py +88 -20
  68. cognite/neat/_session/_set.py +7 -1
  69. cognite/neat/_session/_show.py +11 -123
  70. cognite/neat/_session/_state.py +6 -2
  71. cognite/neat/_session/_subset.py +64 -0
  72. cognite/neat/_session/_to.py +177 -19
  73. cognite/neat/_store/_graph_store.py +9 -246
  74. cognite/neat/_utils/rdf_.py +36 -5
  75. cognite/neat/_utils/spreadsheet.py +44 -1
  76. cognite/neat/_utils/text.py +124 -37
  77. cognite/neat/_utils/upload.py +2 -0
  78. cognite/neat/_version.py +2 -2
  79. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/METADATA +1 -1
  80. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/RECORD +83 -82
  81. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/WHEEL +1 -1
  82. cognite/neat/_graph/queries/_construct.py +0 -187
  83. cognite/neat/_graph/queries/_shared.py +0 -173
  84. cognite/neat/_rules/analysis/_dms.py +0 -57
  85. cognite/neat/_rules/analysis/_information.py +0 -249
  86. cognite/neat/_rules/models/_rdfpath.py +0 -372
  87. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/LICENSE +0 -0
  88. {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/entry_points.txt +0 -0
@@ -17,7 +17,7 @@ from cognite.neat._client import NeatClient
17
17
  from cognite.neat._client.data_classes.data_modeling import ViewApplyDict
18
18
  from cognite.neat._client.data_classes.schema import DMSSchema
19
19
  from cognite.neat._constants import COGNITE_MODELS, DMS_CONTAINER_PROPERTY_SIZE_LIMIT, DMS_VIEW_CONTAINER_SIZE_LIMIT
20
- from cognite.neat._issues import IssueList, NeatError, NeatIssueList
20
+ from cognite.neat._issues import IssueList, NeatError
21
21
  from cognite.neat._issues.errors import (
22
22
  CDFMissingClientError,
23
23
  PropertyDefinitionDuplicatedError,
@@ -27,10 +27,12 @@ from cognite.neat._issues.errors import (
27
27
  ResourceNotFoundError,
28
28
  ReversedConnectionNotFeasibleError,
29
29
  )
30
+ from cognite.neat._issues.errors._external import CDFMissingResourcesError
30
31
  from cognite.neat._issues.warnings import (
31
32
  NotSupportedHasDataFilterLimitWarning,
32
33
  NotSupportedViewContainerLimitWarning,
33
34
  UndefinedViewWarning,
35
+ user_modeling,
34
36
  )
35
37
  from cognite.neat._issues.warnings.user_modeling import (
36
38
  ContainerPropertyLimitWarning,
@@ -42,6 +44,8 @@ from cognite.neat._rules.models.entities import ContainerEntity, RawFilter
42
44
  from cognite.neat._rules.models.entities._single_value import (
43
45
  ViewEntity,
44
46
  )
47
+ from cognite.neat._utils.spreadsheet import SpreadsheetRead
48
+ from cognite.neat._utils.text import humanize_collection
45
49
 
46
50
  from ._rules import DMSProperty, DMSRules
47
51
 
@@ -54,13 +58,19 @@ class DMSValidation:
54
58
  # For example, changing the filter is allowed, but changing the properties is not.
55
59
  changeable_view_attributes: ClassVar[set[str]] = {"filter"}
56
60
 
57
- def __init__(self, rules: DMSRules, client: NeatClient | None = None) -> None:
61
+ def __init__(
62
+ self,
63
+ rules: DMSRules,
64
+ client: NeatClient | None = None,
65
+ read_info_by_spreadsheet: dict[str, SpreadsheetRead] | None = None,
66
+ ) -> None:
58
67
  self._rules = rules
59
68
  self._client = client
60
69
  self._metadata = rules.metadata
61
70
  self._properties = rules.properties
62
71
  self._containers = rules.containers
63
72
  self._views = rules.views
73
+ self._read_info_by_spreadsheet = read_info_by_spreadsheet or {}
64
74
 
65
75
  def imported_views_and_containers_ids(
66
76
  self, include_views_with_no_properties: bool = True
@@ -87,7 +97,7 @@ class DMSValidation:
87
97
 
88
98
  return imported_views, imported_containers
89
99
 
90
- def validate(self) -> NeatIssueList:
100
+ def validate(self) -> IssueList:
91
101
  imported_views, imported_containers = self.imported_views_and_containers_ids(
92
102
  include_views_with_no_properties=False
93
103
  )
@@ -106,6 +116,14 @@ class DMSValidation:
106
116
  list(imported_containers), include_connected=True
107
117
  )
108
118
 
119
+ missing_views = {view.as_id() for view in imported_views} - {view.as_id() for view in referenced_views}
120
+ missing_containers = {container.as_id() for container in imported_containers} - {
121
+ container.as_id() for container in referenced_containers
122
+ }
123
+
124
+ if missing_views or missing_containers:
125
+ raise CDFMissingResourcesError(resources=f"{missing_views.union(missing_containers)}")
126
+
109
127
  # Setup data structures for validation
110
128
  dms_schema = self._rules.as_schema()
111
129
  ref_view_by_id = {view.as_id(): view for view in referenced_views}
@@ -123,6 +141,10 @@ class DMSValidation:
123
141
  parents_view_ids_by_child_id = self._parent_view_ids_by_child_id(all_views_by_id)
124
142
 
125
143
  issue_list = IssueList()
144
+
145
+ # Validated for duplicated resource
146
+ issue_list.extend(self._duplicated_resources())
147
+
126
148
  # Neat DMS classes Validation
127
149
  # These are errors that can only happen due to the format of the Neat DMS classes
128
150
  issue_list.extend(self._validate_raw_filter())
@@ -141,6 +163,91 @@ class DMSValidation:
141
163
  )
142
164
  issue_list.extend(self._validate_schema(dms_schema, all_views_by_id, all_containers_by_id))
143
165
  issue_list.extend(self._validate_referenced_container_limits(dms_schema.views, view_properties_by_id))
166
+ issue_list.extend(self._same_space_views_and_data_model())
167
+ return issue_list
168
+
169
+ def _same_space_views_and_data_model(self) -> IssueList:
170
+ issue_list = IssueList()
171
+
172
+ schema = self._rules.as_schema(remove_cdf_spaces=True)
173
+
174
+ if schema.data_model and schema.views:
175
+ data_model_space = schema.data_model.space
176
+ views_spaces = {view.space for view in schema.views.values()}
177
+
178
+ if data_model_space not in views_spaces:
179
+ issue_list.append(
180
+ user_modeling.ViewsAndDataModelNotInSameSpaceWarning(
181
+ data_model_space=data_model_space,
182
+ views_spaces=humanize_collection(views_spaces),
183
+ )
184
+ )
185
+
186
+ return issue_list
187
+
188
+ def _duplicated_resources(self) -> IssueList:
189
+ issue_list = IssueList()
190
+
191
+ properties_sheet = self._read_info_by_spreadsheet.get("Properties")
192
+ views_sheet = self._read_info_by_spreadsheet.get("Views")
193
+ containers_sheet = self._read_info_by_spreadsheet.get("Containers")
194
+
195
+ visited = defaultdict(list)
196
+ for row_no, property_ in enumerate(self._properties):
197
+ visited[property_._identifier()].append(
198
+ properties_sheet.adjusted_row_number(row_no) if properties_sheet else row_no + 1
199
+ )
200
+
201
+ for identifier, rows in visited.items():
202
+ if len(rows) == 1:
203
+ continue
204
+ issue_list.append(
205
+ ResourceDuplicatedError(
206
+ identifier[1],
207
+ "property",
208
+ (
209
+ f"the Properties sheet at row {humanize_collection(rows)} "
210
+ "if data model is read from a spreadsheet."
211
+ ),
212
+ )
213
+ )
214
+
215
+ visited = defaultdict(list)
216
+ for row_no, view in enumerate(self._views):
217
+ visited[view._identifier()].append(views_sheet.adjusted_row_number(row_no) if views_sheet else row_no + 1)
218
+
219
+ for identifier, rows in visited.items():
220
+ if len(rows) == 1:
221
+ continue
222
+ issue_list.append(
223
+ ResourceDuplicatedError(
224
+ identifier[0],
225
+ "view",
226
+ (f"the Views sheet at row {humanize_collection(rows)} if data model is read from a spreadsheet."),
227
+ )
228
+ )
229
+
230
+ if self._containers:
231
+ visited = defaultdict(list)
232
+ for row_no, container in enumerate(self._containers):
233
+ visited[container._identifier()].append(
234
+ containers_sheet.adjusted_row_number(row_no) if containers_sheet else row_no + 1
235
+ )
236
+
237
+ for identifier, rows in visited.items():
238
+ if len(rows) == 1:
239
+ continue
240
+ issue_list.append(
241
+ ResourceDuplicatedError(
242
+ identifier[0],
243
+ "container",
244
+ (
245
+ f"the Containers sheet at row {humanize_collection(rows)} "
246
+ "if data model is read from a spreadsheet."
247
+ ),
248
+ )
249
+ )
250
+
144
251
  return issue_list
145
252
 
146
253
  @staticmethod
@@ -212,13 +319,16 @@ class DMSValidation:
212
319
  for prop_no, prop in enumerate(self._properties):
213
320
  if prop.container and prop.container_property:
214
321
  container_properties_by_id[(prop.container, prop.container_property)].append((prop_no, prop))
215
-
322
+ properties_sheet = self._read_info_by_spreadsheet.get("Properties")
216
323
  errors = IssueList()
217
324
  for (container, prop_name), properties in container_properties_by_id.items():
218
325
  if len(properties) == 1:
219
326
  continue
220
327
  container_id = container.as_id()
328
+
221
329
  row_numbers = {prop_no for prop_no, _ in properties}
330
+ if properties_sheet:
331
+ row_numbers = {properties_sheet.adjusted_row_number(row_no) for row_no in row_numbers}
222
332
  value_types = {prop.value_type for _, prop in properties if prop.value_type}
223
333
  # The container type 'direct' is an exception. On a container the type direct can point to any
224
334
  # node. The value type is typically set on the view.
@@ -585,7 +695,7 @@ class DMSValidation:
585
695
  ResourceDuplicatedError(
586
696
  view_id,
587
697
  "view",
588
- repr(model.as_id()),
698
+ f"DMS {model.as_id()!r}",
589
699
  )
590
700
  )
591
701
 
@@ -23,7 +23,7 @@ def load_value_type(
23
23
  elif isinstance(raw, str):
24
24
  # property holding xsd data type
25
25
  # check if it is multi value type
26
- if "|" in raw:
26
+ if "," in raw:
27
27
  value_type = MultiValueTypeInfo.load(raw)
28
28
  value_type.set_default_prefix(default_prefix)
29
29
  return value_type
@@ -18,7 +18,7 @@ class MultiValueTypeInfo(BaseModel):
18
18
  types: list[DataType | ClassEntity]
19
19
 
20
20
  def __str__(self) -> str:
21
- return " | ".join([str(t) for t in self.types])
21
+ return ", ".join([str(t) for t in self.types])
22
22
 
23
23
  @model_serializer(when_used="unless-none", return_type=str)
24
24
  def as_str(self) -> str:
@@ -52,7 +52,7 @@ class MultiValueTypeInfo(BaseModel):
52
52
 
53
53
  @classmethod
54
54
  def _parse(cls, raw: str) -> dict:
55
- if not (types := [type_.strip() for type_ in raw.split("|")]):
55
+ if not (types := [type_.strip() for type_ in raw.split(",")]):
56
56
  return {"types": [UnknownEntity()]}
57
57
  else:
58
58
  return {
@@ -35,6 +35,7 @@ else:
35
35
  from cognite.neat._rules._constants import (
36
36
  ENTITY_PATTERN,
37
37
  SPLIT_ON_COMMA_PATTERN,
38
+ SPLIT_ON_EDGE_ENTITY_ARGS_PATTERN,
38
39
  SPLIT_ON_EQUAL_PATTERN,
39
40
  EntityTypes,
40
41
  )
@@ -135,9 +136,13 @@ class Entity(BaseModel, extra="ignore"):
135
136
  if content is None:
136
137
  return dict(prefix=prefix, suffix=suffix)
137
138
  try:
138
- extra_args = dict(
139
- SPLIT_ON_EQUAL_PATTERN.split(pair.strip()) for pair in SPLIT_ON_COMMA_PATTERN.split(content)
140
- )
139
+ if cls == EdgeEntity:
140
+ matches = SPLIT_ON_EDGE_ENTITY_ARGS_PATTERN.findall(content)
141
+ extra_args = {key: value for key, value in matches}
142
+ else:
143
+ extra_args = dict(
144
+ SPLIT_ON_EQUAL_PATTERN.split(pair.strip()) for pair in SPLIT_ON_COMMA_PATTERN.split(content)
145
+ )
141
146
  except ValueError:
142
147
  raise NeatValueError(f"Invalid {cls.type_.value} entity: {raw!r}") from None
143
148
  expected_args = {
@@ -127,8 +127,8 @@ class DMSFilter(WrappedEntity):
127
127
  if isinstance(entry, dict) and "space" in entry and "externalId" in entry
128
128
  ]
129
129
  )
130
-
131
- raise ValueError(f"Cannot convert {filter._filter_name} to {cls.__name__}")
130
+ # fall back to raw filter to preserve the information
131
+ return RawFilter(filter=json.dumps(dumped))
132
132
 
133
133
 
134
134
  class NodeTypeFilter(DMSFilter):
@@ -8,7 +8,7 @@ from pydantic_core.core_schema import SerializationInfo
8
8
  from rdflib import Namespace, URIRef
9
9
 
10
10
  from cognite.neat._constants import get_default_prefixes_and_namespaces
11
- from cognite.neat._issues.errors import NeatValueError, PropertyDefinitionError
11
+ from cognite.neat._issues.errors import PropertyDefinitionError
12
12
  from cognite.neat._rules._constants import EntityTypes
13
13
  from cognite.neat._rules.models._base_rules import (
14
14
  BaseMetadata,
@@ -18,11 +18,6 @@ from cognite.neat._rules.models._base_rules import (
18
18
  SheetList,
19
19
  SheetRow,
20
20
  )
21
- from cognite.neat._rules.models._rdfpath import (
22
- RDFPath,
23
- TransformationRuleType,
24
- parse_rule,
25
- )
26
21
  from cognite.neat._rules.models._types import (
27
22
  ClassEntityType,
28
23
  InformationPropertyType,
@@ -78,7 +73,11 @@ class InformationClass(SheetRow):
78
73
  default=None,
79
74
  description="List of classes (comma separated) that the current class implements (parents).",
80
75
  )
81
-
76
+ instance_source: URIRefType | None = Field(
77
+ alias="Instance Source",
78
+ default=None,
79
+ description="The link to to the rdf.type that have the instances for this class.",
80
+ )
82
81
  physical: URIRefType | None = Field(
83
82
  None,
84
83
  description="Link to the class representation in the physical data model aspect",
@@ -153,11 +152,10 @@ class InformationProperty(SheetRow):
153
152
  "which means that the property can hold any number of values (listable).",
154
153
  )
155
154
  default: Any | None = Field(alias="Default", default=None, description="Default value of the property.")
156
- instance_source: RDFPath | None = Field(
155
+ instance_source: list[URIRefType] | None = Field(
157
156
  alias="Instance Source",
158
157
  default=None,
159
- description="The link to to the instance property for the model. "
160
- "The rule is provided in a RDFPath query syntax which is converted to downstream solution query (e.g. SPARQL).",
158
+ description="The URIRef(s) in the graph to get the value of the property.",
161
159
  )
162
160
  inherited: bool = Field(
163
161
  default=False,
@@ -182,13 +180,10 @@ class InformationProperty(SheetRow):
182
180
  return value
183
181
 
184
182
  @field_validator("instance_source", mode="before")
185
- def generate_rdfpath(cls, value: str | RDFPath | None) -> RDFPath | None:
186
- if value is None or isinstance(value, RDFPath):
187
- return value
188
- elif isinstance(value, str):
189
- return parse_rule(value, TransformationRuleType.rdfpath)
190
- else:
191
- raise NeatValueError(f"Invalid RDF Path: {value!s}")
183
+ def split_on_comma(cls, value: Any) -> Any:
184
+ if isinstance(value, str):
185
+ return [v.strip() for v in value.split(",")]
186
+ return value
192
187
 
193
188
  @model_validator(mode="after")
194
189
  def set_type_for_default(self):
@@ -214,6 +209,12 @@ class InformationProperty(SheetRow):
214
209
  ) from None
215
210
  return self
216
211
 
212
+ @field_serializer("instance_source", when_used="unless-none")
213
+ def serialize_instance_source(self, value: list[URIRefType] | None) -> str | None:
214
+ if value is None:
215
+ return None
216
+ return ",".join(str(v) for v in value)
217
+
217
218
  @field_serializer("max_count", when_used="json-unless-none")
218
219
  def serialize_max_count(self, value: int | float | None) -> int | float | None | str:
219
220
  if isinstance(value, float) and math.isinf(value):
@@ -84,7 +84,7 @@ class InformationInputProperty(InputComponent[InformationProperty]):
84
84
  min_count: int | None = None
85
85
  max_count: int | float | None = None
86
86
  default: Any | None = None
87
- instance_source: str | None = None
87
+ instance_source: str | list[str] | None = None
88
88
  # Only used internally
89
89
  inherited: bool = False
90
90
  neatId: str | URIRef | None = None
@@ -110,6 +110,7 @@ class InformationInputClass(InputComponent[InformationClass]):
110
110
  name: str | None = None
111
111
  description: str | None = None
112
112
  implements: str | list[ClassEntity] | None = None
113
+ instance_source: str | None = None
113
114
  neatId: str | URIRef | None = None
114
115
  # linking
115
116
  physical: str | URIRef | None = None
@@ -127,6 +128,7 @@ class InformationInputClass(InputComponent[InformationClass]):
127
128
  output = super().dump()
128
129
  parent: list[ClassEntity] | None = None
129
130
  if isinstance(self.implements, str):
131
+ self.implements = self.implements.strip()
130
132
  parent = [ClassEntity.load(parent, prefix=default_prefix) for parent in self.implements.split(",")]
131
133
  elif isinstance(self.implements, list):
132
134
  parent = [ClassEntity.load(parent_, prefix=default_prefix) for parent_ in self.implements]
@@ -1,9 +1,12 @@
1
1
  import itertools
2
- from collections import Counter
2
+ from collections import Counter, defaultdict
3
3
 
4
4
  from cognite.neat._issues import IssueList
5
5
  from cognite.neat._issues.errors import NeatValueError
6
- from cognite.neat._issues.errors._resources import ResourceNotDefinedError
6
+ from cognite.neat._issues.errors._resources import (
7
+ ResourceDuplicatedError,
8
+ ResourceNotDefinedError,
9
+ )
7
10
  from cognite.neat._issues.warnings._models import UndefinedClassWarning
8
11
  from cognite.neat._issues.warnings._resources import (
9
12
  ResourceNotDefinedWarning,
@@ -12,6 +15,8 @@ from cognite.neat._issues.warnings._resources import (
12
15
  from cognite.neat._rules._constants import PATTERNS, EntityTypes
13
16
  from cognite.neat._rules.models.entities import ClassEntity, UnknownEntity
14
17
  from cognite.neat._rules.models.entities._multi_value import MultiValueTypeInfo
18
+ from cognite.neat._utils.spreadsheet import SpreadsheetRead
19
+ from cognite.neat._utils.text import humanize_collection
15
20
 
16
21
  from ._rules import InformationRules
17
22
 
@@ -20,14 +25,16 @@ class InformationValidation:
20
25
  """This class does all the validation of the Information rules that have dependencies
21
26
  between components."""
22
27
 
23
- def __init__(self, rules: InformationRules):
28
+ def __init__(self, rules: InformationRules, read_info_by_spreadsheet: dict[str, SpreadsheetRead] | None = None):
24
29
  self.rules = rules
25
- self.metadata = rules.metadata
26
- self.properties = rules.properties
27
- self.classes = rules.classes
30
+ self._read_info_by_spreadsheet = read_info_by_spreadsheet or {}
31
+ self._metadata = rules.metadata
32
+ self._properties = rules.properties
33
+ self._classes = rules.classes
28
34
  self.issue_list = IssueList()
29
35
 
30
36
  def validate(self) -> IssueList:
37
+ self._duplicated_resources()
31
38
  self._namespaces_reassigned()
32
39
  self._classes_without_properties()
33
40
  self._undefined_classes()
@@ -38,9 +45,51 @@ class InformationValidation:
38
45
 
39
46
  return self.issue_list
40
47
 
48
+ def _duplicated_resources(self) -> None:
49
+ properties_sheet = self._read_info_by_spreadsheet.get("Properties")
50
+ classes_sheet = self._read_info_by_spreadsheet.get("Classes")
51
+
52
+ visited = defaultdict(list)
53
+ for row_no, property_ in enumerate(self._properties):
54
+ visited[property_._identifier()].append(
55
+ properties_sheet.adjusted_row_number(row_no) if properties_sheet else row_no + 1
56
+ )
57
+
58
+ for identifier, rows in visited.items():
59
+ if len(rows) == 1:
60
+ continue
61
+ self.issue_list.append(
62
+ ResourceDuplicatedError(
63
+ identifier[1],
64
+ "property",
65
+ (
66
+ "the Properties sheet at row "
67
+ f"{humanize_collection(rows)}"
68
+ " if data model is read from a spreadsheet."
69
+ ),
70
+ )
71
+ )
72
+
73
+ visited = defaultdict(list)
74
+ for row_no, class_ in enumerate(self._classes):
75
+ visited[class_._identifier()].append(
76
+ classes_sheet.adjusted_row_number(row_no) if classes_sheet else row_no + 1
77
+ )
78
+
79
+ for identifier, rows in visited.items():
80
+ if len(rows) == 1:
81
+ continue
82
+ self.issue_list.append(
83
+ ResourceDuplicatedError(
84
+ identifier[0],
85
+ "class",
86
+ (f"the Classes sheet at row {humanize_collection(rows)} if data model is read from a spreadsheet."),
87
+ )
88
+ )
89
+
41
90
  def _classes_without_properties(self) -> None:
42
- defined_classes = {class_.class_ for class_ in self.classes}
43
- referred_classes = {property_.class_ for property_ in self.properties}
91
+ defined_classes = {class_.class_ for class_ in self._classes}
92
+ referred_classes = {property_.class_ for property_ in self._properties}
44
93
  class_parent_pairs = self._class_parent_pairs()
45
94
 
46
95
  if classes_without_properties := defined_classes.difference(referred_classes):
@@ -48,7 +97,7 @@ class InformationValidation:
48
97
  # USE CASE: class has no direct properties and no parents with properties
49
98
  # and it is a class in the prefix of data model, as long as it is in the
50
99
  # same prefix, meaning same space
51
- if not class_parent_pairs[class_] and class_.prefix == self.metadata.prefix:
100
+ if not class_parent_pairs[class_] and class_.prefix == self._metadata.prefix:
52
101
  self.issue_list.append(
53
102
  ResourceNotDefinedWarning(
54
103
  resource_type="class",
@@ -58,8 +107,8 @@ class InformationValidation:
58
107
  )
59
108
 
60
109
  def _undefined_classes(self) -> None:
61
- defined_classes = {class_.class_ for class_ in self.classes}
62
- referred_classes = {property_.class_ for property_ in self.properties}
110
+ defined_classes = {class_.class_ for class_ in self._classes}
111
+ referred_classes = {property_.class_ for property_ in self._properties}
63
112
 
64
113
  if undefined_classes := referred_classes.difference(defined_classes):
65
114
  for class_ in undefined_classes:
@@ -79,7 +128,7 @@ class InformationValidation:
79
128
 
80
129
  if undefined_parents := parents.difference(classes):
81
130
  for parent in undefined_parents:
82
- if parent.prefix != self.metadata.prefix:
131
+ if parent.prefix != self._metadata.prefix:
83
132
  self.issue_list.append(UndefinedClassWarning(class_id=str(parent)))
84
133
  else:
85
134
  self.issue_list.append(
@@ -92,8 +141,8 @@ class InformationValidation:
92
141
 
93
142
  def _referenced_classes_exist(self) -> None:
94
143
  # needs to be complete for this validation to pass
95
- defined_classes = {class_.class_ for class_ in self.classes}
96
- classes_with_explicit_properties = {property_.class_ for property_ in self.properties}
144
+ defined_classes = {class_.class_ for class_ in self._classes}
145
+ classes_with_explicit_properties = {property_.class_ for property_ in self._properties}
97
146
 
98
147
  # USE CASE: models are complete
99
148
  if missing_classes := classes_with_explicit_properties.difference(defined_classes):
@@ -108,7 +157,7 @@ class InformationValidation:
108
157
 
109
158
  def _referenced_value_types_exist(self) -> None:
110
159
  # adding UnknownEntity to the set of defined classes to handle the case where a property references an unknown
111
- defined_classes = {class_.class_ for class_ in self.classes} | {UnknownEntity()}
160
+ defined_classes = {class_.class_ for class_ in self._classes} | {UnknownEntity()}
112
161
  referred_object_types = {
113
162
  property_.value_type
114
163
  for property_ in self.rules.properties
@@ -129,7 +178,7 @@ class InformationValidation:
129
178
  def _regex_compliance_with_dms(self) -> None:
130
179
  """Check regex compliance with DMS of properties, classes and value types."""
131
180
 
132
- for prop_ in self.properties:
181
+ for prop_ in self._properties:
133
182
  if not PATTERNS.dms_property_id_compliance.match(prop_.property_):
134
183
  self.issue_list.append(
135
184
  ResourceRegexViolationWarning(
@@ -179,7 +228,7 @@ class InformationValidation:
179
228
  )
180
229
  )
181
230
 
182
- for class_ in self.classes:
231
+ for class_ in self._classes:
183
232
  if not PATTERNS.view_id_compliance.match(class_.class_.suffix):
184
233
  self.issue_list.append(
185
234
  ResourceRegexViolationWarning(
@@ -14,11 +14,14 @@ from ._converters import (
14
14
  PrefixEntities,
15
15
  SetIDDMSModel,
16
16
  StandardizeNaming,
17
+ StandardizeSpaceAndVersion,
18
+ SubsetDMSRules,
19
+ SubsetInformationRules,
17
20
  ToCompliantEntities,
18
21
  ToDataProductModel,
22
+ ToDMSCompliantEntities,
19
23
  ToEnterpriseModel,
20
24
  ToExtensionModel,
21
- ToInformationCompliantEntities,
22
25
  ToSolutionModel,
23
26
  )
24
27
  from ._mapping import AsParentPropertyId, MapOneToOne, RuleMapper
@@ -43,11 +46,14 @@ __all__ = [
43
46
  "RulesTransformer",
44
47
  "SetIDDMSModel",
45
48
  "StandardizeNaming",
49
+ "StandardizeSpaceAndVersion",
50
+ "SubsetDMSRules",
51
+ "SubsetInformationRules",
46
52
  "ToCompliantEntities",
53
+ "ToDMSCompliantEntities",
47
54
  "ToDataProductModel",
48
55
  "ToEnterpriseModel",
49
56
  "ToExtensionModel",
50
- "ToInformationCompliantEntities",
51
57
  "ToSolutionModel",
52
58
  "VerifiedRulesTransformer",
53
59
  "VerifyAnyRules",