cognite-neat 0.90.2__py3-none-any.whl → 0.91.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (30) hide show
  1. cognite/neat/_version.py +1 -1
  2. cognite/neat/graph/extractors/__init__.py +3 -0
  3. cognite/neat/graph/extractors/_dexpi.py +4 -4
  4. cognite/neat/graph/extractors/_iodd.py +160 -0
  5. cognite/neat/issues/_base.py +6 -2
  6. cognite/neat/rules/exporters/_rules2excel.py +3 -3
  7. cognite/neat/rules/exporters/_rules2yaml.py +5 -1
  8. cognite/neat/rules/models/__init__.py +2 -2
  9. cognite/neat/rules/models/_base_input.py +2 -2
  10. cognite/neat/rules/models/_base_rules.py +142 -142
  11. cognite/neat/rules/models/asset/_rules.py +1 -34
  12. cognite/neat/rules/models/dms/_rules.py +127 -46
  13. cognite/neat/rules/models/dms/_validation.py +2 -2
  14. cognite/neat/rules/models/domain.py +16 -19
  15. cognite/neat/rules/models/entities/_single_value.py +25 -11
  16. cognite/neat/rules/models/entities/_types.py +0 -10
  17. cognite/neat/rules/models/information/_rules.py +68 -43
  18. cognite/neat/rules/models/information/_validation.py +5 -5
  19. cognite/neat/rules/transformers/_converters.py +6 -8
  20. cognite/neat/rules/transformers/_pipelines.py +8 -4
  21. cognite/neat/store/_base.py +1 -1
  22. cognite/neat/utils/xml_.py +27 -12
  23. {cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/METADATA +1 -1
  24. {cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/RECORD +27 -29
  25. cognite/neat/rules/models/asset/_serializer.py +0 -73
  26. cognite/neat/rules/models/dms/_serializer.py +0 -157
  27. cognite/neat/rules/models/information/_serializer.py +0 -73
  28. {cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/LICENSE +0 -0
  29. {cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/WHEEL +0 -0
  30. {cognite_neat-0.90.2.dist-info → cognite_neat-0.91.0.dist-info}/entry_points.txt +0 -0
@@ -4,13 +4,11 @@ its sub-models and validators.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
- import math
8
7
  import sys
9
8
  import types
10
9
  from abc import ABC, abstractmethod
11
- from collections.abc import Callable, Iterator
12
- from functools import wraps
13
- from typing import Annotated, Any, ClassVar, Generic, Literal, TypeVar
10
+ from collections.abc import Callable, Hashable, Iterator, MutableSequence, Sequence
11
+ from typing import Annotated, Any, ClassVar, Literal, SupportsIndex, TypeVar, get_args, get_origin, overload
14
12
 
15
13
  import pandas as pd
16
14
  from pydantic import (
@@ -18,79 +16,25 @@ from pydantic import (
18
16
  BeforeValidator,
19
17
  ConfigDict,
20
18
  Field,
19
+ GetCoreSchemaHandler,
21
20
  PlainSerializer,
22
21
  field_validator,
23
22
  model_serializer,
24
- model_validator,
25
23
  )
26
- from pydantic.fields import FieldInfo
27
24
  from pydantic.main import IncEx
25
+ from pydantic_core import core_schema
28
26
 
29
27
  if sys.version_info >= (3, 11):
30
28
  from enum import StrEnum
29
+ from typing import Self
31
30
  else:
32
31
  from backports.strenum import StrEnum
32
+ from typing_extensions import Self
33
33
 
34
34
 
35
35
  METADATA_VALUE_MAX_LENGTH = 5120
36
36
 
37
37
 
38
- def replace_nan_floats_with_default(values: dict, model_fields: dict[str, FieldInfo]) -> dict:
39
- output = {}
40
- for field_name, value in values.items():
41
- is_nan_float = isinstance(value, float) and math.isnan(value)
42
- if not is_nan_float:
43
- output[field_name] = value
44
- continue
45
- if field_name in model_fields:
46
- output[field_name] = model_fields[field_name].default
47
- else:
48
- # field_name may be an alias
49
- source_name = next((name for name, field in model_fields.items() if field.alias == field_name), None)
50
- if source_name:
51
- output[field_name] = model_fields[source_name].default
52
- else:
53
- # Just pass it through if it is not an alias.
54
- output[field_name] = value
55
- return output
56
-
57
-
58
- def skip_field_validator(validators_field):
59
- def decorator(func):
60
- @wraps(func)
61
- def wrapper(cls, value, values):
62
- if isinstance(values, dict):
63
- to_skip = values.get(validators_field, set())
64
- else:
65
- try:
66
- to_skip = values.data.get(validators_field, set())
67
- except Exception:
68
- to_skip = set()
69
-
70
- if "all" in to_skip or func.__name__ in to_skip:
71
- return value
72
- return func(cls, value, values)
73
-
74
- return wrapper
75
-
76
- return decorator
77
-
78
-
79
- def skip_model_validator(validators_field):
80
- def decorator(func):
81
- @wraps(func)
82
- def wrapper(self):
83
- to_skip = getattr(self, validators_field, set())
84
- if "all" in to_skip or func.__name__ in to_skip:
85
- return self
86
-
87
- return func(self)
88
-
89
- return wrapper
90
-
91
- return decorator
92
-
93
-
94
38
  def _get_required_fields(model: type[BaseModel], use_alias: bool = False) -> set[str]:
95
39
  """Get required fields from a pydantic model.
96
40
 
@@ -148,7 +92,7 @@ class MatchType(StrEnum):
148
92
  partial = "partial"
149
93
 
150
94
 
151
- class RuleModel(BaseModel):
95
+ class SchemaModel(BaseModel):
152
96
  model_config: ClassVar[ConfigDict] = ConfigDict(
153
97
  populate_by_name=True,
154
98
  str_strip_whitespace=True,
@@ -164,50 +108,8 @@ class RuleModel(BaseModel):
164
108
  """Returns a set of mandatory fields for the model."""
165
109
  return _get_required_fields(cls, use_alias)
166
110
 
167
- @classmethod
168
- def sheets(cls, by_alias: bool = False) -> list[str]:
169
- """Returns a list of sheet names for the model."""
170
- return [
171
- (field.alias or field_name) if by_alias else field_name
172
- for field_name, field in cls.model_fields.items()
173
- if field_name != "validators_to_skip"
174
- ]
175
111
 
176
- @classmethod
177
- def headers_by_sheet(cls, by_alias: bool = False) -> dict[str, list[str]]:
178
- """Returns a list of headers for the model."""
179
- headers_by_sheet: dict[str, list[str]] = {}
180
- for field_name, field in cls.model_fields.items():
181
- if field_name == "validators_to_skip":
182
- continue
183
- sheet_name = (field.alias or field_name) if by_alias else field_name
184
- annotation = field.annotation
185
-
186
- if isinstance(annotation, types.UnionType):
187
- annotation = annotation.__args__[0]
188
-
189
- try:
190
- if isinstance(annotation, type) and issubclass(annotation, SheetList):
191
- # We know that this is a SheetList, so we can safely access the annotation
192
- # which is the concrete type of the SheetEntity.
193
- model_fields = annotation.model_fields["data"].annotation.__args__[0].model_fields # type: ignore[union-attr]
194
- elif isinstance(annotation, type) and issubclass(annotation, BaseModel):
195
- model_fields = annotation.model_fields
196
- else:
197
- model_fields = {}
198
- except TypeError:
199
- # Python 3.10 raises TypeError: issubclass() arg 1 must be a class
200
- # when calling issubclass(annotation, SheetList) with the dict annotation
201
- model_fields = {}
202
- headers_by_sheet[sheet_name] = [
203
- (field.alias or field_name) if by_alias else field_name
204
- for field_name, field in model_fields.items()
205
- if field_name != "validators_to_skip"
206
- ]
207
- return headers_by_sheet
208
-
209
-
210
- class BaseMetadata(RuleModel):
112
+ class BaseMetadata(SchemaModel):
211
113
  """
212
114
  Metadata model for data model
213
115
  """
@@ -242,13 +144,13 @@ class BaseMetadata(RuleModel):
242
144
  raise NotImplementedError()
243
145
 
244
146
 
245
- class BaseRules(RuleModel, ABC):
147
+ class BaseRules(SchemaModel, ABC):
246
148
  """
247
149
  Rules is a core concept in `neat`. This represents fusion of data model
248
150
  definitions and (optionally) the transformation rules used to transform the data/graph
249
151
  from the source representation to the target representation defined by the data model.
250
- The rules are defined in a Excel sheet and then parsed into a `Rules` object. The
251
- `Rules` object is then used to generate data model and the`RDF` graph made of data
152
+ The rules are defined in an Excel sheet and then parsed into a `Rules` object. The
153
+ `Rules` object is then used to generate data model and the `RDF` graph made of data
252
154
  model instances.
253
155
 
254
156
  Args:
@@ -257,70 +159,158 @@ class BaseRules(RuleModel, ABC):
257
159
  """
258
160
 
259
161
  metadata: BaseMetadata
162
+ reference: Self | None = Field(None, alias="Reference")
163
+
164
+ @classmethod
165
+ def headers_by_sheet(cls, by_alias: bool = False) -> dict[str, list[str]]:
166
+ """Returns a list of headers for the model, typically used by ExcelExporter"""
167
+ headers_by_sheet: dict[str, list[str]] = {}
168
+ for field_name, field in cls.model_fields.items():
169
+ if field_name == "validators_to_skip":
170
+ continue
171
+ sheet_name = (field.alias or field_name) if by_alias else field_name
172
+ annotation = field.annotation
173
+
174
+ if isinstance(annotation, types.UnionType):
175
+ annotation = annotation.__args__[0]
176
+
177
+ try:
178
+ if isinstance(annotation, types.GenericAlias) and get_origin(annotation) is SheetList:
179
+ # We know that this is a SheetList, so we can safely access the annotation
180
+ # which is the concrete type of the SheetEntity.
181
+ model_fields = get_args(annotation)[0].model_fields # type: ignore[union-attr]
182
+ elif isinstance(annotation, type) and issubclass(annotation, BaseModel):
183
+ model_fields = annotation.model_fields
184
+ else:
185
+ model_fields = {}
186
+ except TypeError:
187
+ # Python 3.10 raises TypeError: issubclass() arg 1 must be a class
188
+ # when calling issubclass(annotation, SheetList) with the dict annotation
189
+ model_fields = {}
190
+ headers_by_sheet[sheet_name] = [
191
+ (field.alias or field_name) if by_alias else field_name
192
+ for field_name, field in model_fields.items()
193
+ if field_name != "validators_to_skip" and not field.exclude
194
+ ]
195
+ return headers_by_sheet
260
196
 
261
197
  def dump(
262
198
  self,
199
+ entities_exclude_defaults: bool = True,
200
+ as_reference: bool = False,
263
201
  mode: Literal["python", "json"] = "python",
264
202
  by_alias: bool = False,
265
203
  exclude: IncEx = None,
266
204
  exclude_none: bool = False,
267
205
  exclude_unset: bool = False,
268
206
  exclude_defaults: bool = False,
269
- as_reference: bool = False,
270
207
  ) -> dict[str, Any]:
271
208
  """Dump the model to a dictionary.
272
209
 
273
210
  This is used in the Exporters to dump rules in the required format.
211
+
212
+ Args:
213
+ entities_exclude_defaults: Whether to exclude default prefix (and version) for entities.
214
+ For example, given a class that is dumped as 'my_prefix:MyClass', if the prefix for the rules
215
+ set in metadata.prefix = 'my_prefix', then this class will be dumped as 'MyClass' when this flag is set.
216
+ Defaults to True.
217
+ as_reference (bool, optional): Whether to dump as reference. For Information and DMS rules, this will
218
+ set the reference column/field to the reference of that entity. This is used in the ExcelExporter
219
+ to dump a reference model.
220
+ mode: The mode in which `to_python` should run.
221
+ If mode is 'json', the output will only contain JSON serializable types.
222
+ If mode is 'python', the output may contain non-JSON-serializable Python objects.
223
+ by_alias: Whether to use the field's alias in the dictionary key if defined.
224
+ exclude: A set of fields to exclude from the output.
225
+ exclude_none: Whether to exclude fields that have a value of `None`.
226
+ exclude_unset: Whether to exclude fields that have not been explicitly set.
227
+ exclude_defaults: Whether to exclude fields that are set to their default value.
274
228
  """
275
- return self.model_dump(
229
+ for field_name in self.model_fields.keys():
230
+ value = getattr(self, field_name)
231
+ # Ensure deterministic order of properties, classes, views, and so on
232
+ if isinstance(value, SheetList):
233
+ value.sort(key=lambda x: x._identifier())
234
+
235
+ context: dict[str, Any] = {"as_reference": as_reference}
236
+ if entities_exclude_defaults:
237
+ context["metadata"] = self.metadata
238
+
239
+ exclude_input: IncEx
240
+ if self.reference is None:
241
+ exclude_input = exclude
242
+ else:
243
+ # If the rules has a reference, we dump that separately with the as_reference flag set to True.
244
+ # We don't want to include the reference in the main dump, so we exclude it here.
245
+ # This is to include whatever is in the exclude set from the user.
246
+ if isinstance(exclude, dict):
247
+ exclude_input = exclude.copy()
248
+ exclude_input["reference"] = {"__all__"} # type: ignore[index]
249
+ elif isinstance(exclude, set):
250
+ exclude_input = exclude.copy()
251
+ exclude_input.add("reference") # type: ignore[arg-type]
252
+ else:
253
+ exclude_input = {"reference"}
254
+
255
+ output = self.model_dump(
276
256
  mode=mode,
277
257
  by_alias=by_alias,
278
- exclude=exclude,
258
+ exclude=exclude_input,
279
259
  exclude_none=exclude_none,
280
260
  exclude_unset=exclude_unset,
281
261
  exclude_defaults=exclude_defaults,
262
+ context=context,
282
263
  )
283
-
284
-
285
- # An sheet entity is either a class or a property.
286
- class SheetEntity(RuleModel):
264
+ is_reference_user_excluded = isinstance(exclude, dict | set) and "reference" in exclude
265
+ if self.reference is not None and not is_reference_user_excluded:
266
+ # If the rules has a reference, we dump that separately with the as_reference flag set to True.
267
+ # Unless the user has explicitly excluded the reference.
268
+ output["Reference" if by_alias else "reference"] = self.reference.dump(
269
+ mode=mode,
270
+ by_alias=by_alias,
271
+ exclude=exclude,
272
+ exclude_none=exclude_none,
273
+ exclude_unset=exclude_unset,
274
+ exclude_defaults=exclude_defaults,
275
+ entities_exclude_defaults=entities_exclude_defaults,
276
+ as_reference=True,
277
+ )
278
+ return output
279
+
280
+
281
+ class SheetRow(SchemaModel):
287
282
  @field_validator("*", mode="before")
288
283
  def strip_string(cls, value: Any) -> Any:
289
284
  if isinstance(value, str):
290
285
  return value.strip()
291
286
  return value
292
287
 
288
+ @abstractmethod
289
+ def _identifier(self) -> tuple[Hashable, ...]:
290
+ raise NotImplementedError()
293
291
 
294
- T_Entity = TypeVar("T_Entity", bound=SheetEntity)
295
-
296
-
297
- class SheetList(BaseModel, Generic[T_Entity]):
298
- data: list[T_Entity] = Field(default_factory=list)
299
-
300
- @model_validator(mode="before")
301
- def from_list_format(cls, values: Any) -> Any:
302
- if isinstance(values, list):
303
- return {"data": values}
304
- return values
305
292
 
306
- def __contains__(self, item: str) -> bool:
307
- return item in self.data
293
+ T_SheetRow = TypeVar("T_SheetRow", bound=SheetRow)
308
294
 
309
- def __len__(self) -> int:
310
- return len(self.data)
311
295
 
312
- def __iter__(self) -> Iterator[T_Entity]: # type: ignore[override]
313
- return iter(self.data)
296
+ class SheetList(list, MutableSequence[T_SheetRow]):
297
+ @classmethod
298
+ def __get_pydantic_core_schema__(cls, source: Any, handler: GetCoreSchemaHandler) -> core_schema.CoreSchema:
299
+ if args := get_args(source):
300
+ item_type = args[0]
301
+ else:
302
+ # Someone use SheetList without specifying the type
303
+ raise TypeError("SheetList must be used with a type argument, e.g., SheetList[InformationProperty]")
314
304
 
315
- def append(self, value: T_Entity) -> None:
316
- self.data.append(value)
305
+ instance_schema = core_schema.is_instance_schema(cls)
306
+ sequence_row_schema = handler.generate_schema(Sequence[item_type]) # type: ignore[valid-type]
317
307
 
318
- def extend(self, values: list[T_Entity]) -> None:
319
- self.data.extend(values)
308
+ non_instance_schema = core_schema.no_info_after_validator_function(SheetList, sequence_row_schema)
309
+ return core_schema.union_schema([instance_schema, non_instance_schema])
320
310
 
321
311
  def to_pandas(self, drop_na_columns: bool = True, include: list[str] | None = None) -> pd.DataFrame:
322
312
  """Converts ResourceDict to pandas DataFrame."""
323
- df = pd.DataFrame([entity.model_dump() for entity in self.data])
313
+ df = pd.DataFrame([entity.model_dump() for entity in self])
324
314
  if drop_na_columns:
325
315
  df = df.dropna(axis=1, how="all")
326
316
  if include is not None:
@@ -331,10 +321,20 @@ class SheetList(BaseModel, Generic[T_Entity]):
331
321
  """Returns HTML representation of ResourceDict."""
332
322
  return self.to_pandas(drop_na_columns=True)._repr_html_() # type: ignore[operator]
333
323
 
334
- @classmethod
335
- def mandatory_fields(cls, use_alias=False) -> set[str]:
336
- """Returns a set of mandatory fields for the model."""
337
- return _get_required_fields(cls, use_alias)
324
+ # Implemented to get correct type hints
325
+ def __iter__(self) -> Iterator[T_SheetRow]:
326
+ return super().__iter__()
327
+
328
+ @overload
329
+ def __getitem__(self, index: SupportsIndex) -> T_SheetRow: ...
330
+
331
+ @overload
332
+ def __getitem__(self, index: slice) -> SheetList[T_SheetRow]: ...
333
+
334
+ def __getitem__(self, index: SupportsIndex | slice, /) -> T_SheetRow | SheetList[T_SheetRow]:
335
+ if isinstance(index, slice):
336
+ return SheetList[T_SheetRow](super().__getitem__(index))
337
+ return super().__getitem__(index)
338
338
 
339
339
 
340
340
  ExtensionCategoryType = Annotated[
@@ -1,8 +1,7 @@
1
1
  import sys
2
- from typing import Any, ClassVar, Literal, cast
2
+ from typing import Any, ClassVar, cast
3
3
 
4
4
  from pydantic import Field, field_validator, model_validator
5
- from pydantic.main import IncEx
6
5
  from rdflib import Namespace
7
6
 
8
7
  from cognite.neat.constants import get_default_prefixes
@@ -104,35 +103,3 @@ class AssetRules(BaseRules):
104
103
  if issue_list.has_errors:
105
104
  raise issue_list.as_exception()
106
105
  return self
107
-
108
- def dump(
109
- self,
110
- mode: Literal["python", "json"] = "python",
111
- by_alias: bool = False,
112
- exclude: IncEx = None,
113
- exclude_none: bool = False,
114
- exclude_unset: bool = False,
115
- exclude_defaults: bool = False,
116
- as_reference: bool = False,
117
- ) -> dict[str, Any]:
118
- from ._serializer import _AssetRulesSerializer
119
-
120
- dumped = self.model_dump(
121
- mode=mode,
122
- by_alias=by_alias,
123
- exclude=exclude,
124
- exclude_none=exclude_none,
125
- exclude_unset=exclude_unset,
126
- exclude_defaults=exclude_defaults,
127
- )
128
- prefix = self.metadata.prefix
129
- serializer = _AssetRulesSerializer(by_alias, prefix)
130
- cleaned = serializer.clean(dumped, as_reference)
131
- last = "Last" if by_alias else "last"
132
- if last_dump := cleaned.get(last):
133
- cleaned[last] = serializer.clean(last_dump, False)
134
- reference = "Reference" if by_alias else "reference"
135
- if self.reference and (ref_dump := cleaned.get(reference)):
136
- prefix = self.reference.metadata.prefix
137
- cleaned[reference] = _AssetRulesSerializer(by_alias, prefix).clean(ref_dump, True)
138
- return cleaned