cognite-neat 0.100.1__py3-none-any.whl → 0.102.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (32) hide show
  1. cognite/neat/_constants.py +5 -1
  2. cognite/neat/_graph/loaders/_rdf2dms.py +1 -2
  3. cognite/neat/_graph/queries/_base.py +22 -2
  4. cognite/neat/_graph/queries/_shared.py +4 -4
  5. cognite/neat/_graph/transformers/__init__.py +17 -0
  6. cognite/neat/_graph/transformers/_base.py +1 -1
  7. cognite/neat/_graph/transformers/_iodd.py +9 -4
  8. cognite/neat/_graph/transformers/_prune_graph.py +196 -65
  9. cognite/neat/_rules/exporters/_rules2dms.py +35 -13
  10. cognite/neat/_rules/exporters/_rules2excel.py +7 -2
  11. cognite/neat/_rules/importers/_dms2rules.py +51 -19
  12. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  13. cognite/neat/_rules/models/_base_rules.py +13 -9
  14. cognite/neat/_rules/models/dms/_rules.py +111 -39
  15. cognite/neat/_rules/models/information/_rules.py +52 -19
  16. cognite/neat/_session/_base.py +18 -0
  17. cognite/neat/_session/_prepare.py +85 -2
  18. cognite/neat/_session/_read.py +3 -3
  19. cognite/neat/_session/_to.py +1 -1
  20. cognite/neat/_session/engine/_load.py +3 -1
  21. cognite/neat/_store/_base.py +23 -2
  22. cognite/neat/_utils/auth.py +6 -4
  23. cognite/neat/_utils/reader/__init__.py +2 -2
  24. cognite/neat/_utils/reader/_base.py +40 -35
  25. cognite/neat/_utils/text.py +12 -0
  26. cognite/neat/_version.py +2 -2
  27. cognite_neat-0.102.0.dist-info/METADATA +113 -0
  28. {cognite_neat-0.100.1.dist-info → cognite_neat-0.102.0.dist-info}/RECORD +31 -31
  29. cognite_neat-0.100.1.dist-info/METADATA +0 -215
  30. {cognite_neat-0.100.1.dist-info → cognite_neat-0.102.0.dist-info}/LICENSE +0 -0
  31. {cognite_neat-0.100.1.dist-info → cognite_neat-0.102.0.dist-info}/WHEEL +0 -0
  32. {cognite_neat-0.100.1.dist-info → cognite_neat-0.102.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,4 @@
1
- from collections import Counter
1
+ from collections import Counter, defaultdict
2
2
  from collections.abc import Collection, Iterable, Sequence
3
3
  from datetime import datetime, timezone
4
4
  from pathlib import Path
@@ -222,12 +222,18 @@ class DMSImporter(BaseImporter[DMSInputRules]):
222
222
  schema: DMSSchema,
223
223
  metadata: DMSInputMetadata | None = None,
224
224
  ) -> DMSInputRules:
225
+ enum_by_container_property = self._create_enum_collections(schema.containers.values())
226
+ enum_collection_by_container_property = {
227
+ key: enum_list[0].collection for key, enum_list in enum_by_container_property.items() if enum_list
228
+ }
229
+
225
230
  properties: list[DMSInputProperty] = []
226
231
  for view_id, view in schema.views.items():
227
232
  view_entity = ViewEntity.from_id(view_id)
228
- class_entity = view_entity.as_class()
229
233
  for prop_id, prop in (view.properties or {}).items():
230
- dms_property = self._create_dms_property(prop_id, prop, view_entity, class_entity)
234
+ dms_property = self._create_dms_property(
235
+ prop_id, prop, view_entity, enum_collection_by_container_property
236
+ )
231
237
  if dms_property is not None:
232
238
  properties.append(dms_property)
233
239
 
@@ -237,8 +243,6 @@ class DMSImporter(BaseImporter[DMSInputRules]):
237
243
 
238
244
  metadata = metadata or DMSInputMetadata.from_data_model(data_model)
239
245
 
240
- enum = self._create_enum_collections(schema.containers.values())
241
-
242
246
  return DMSInputRules(
243
247
  metadata=metadata,
244
248
  properties=properties,
@@ -248,7 +252,7 @@ class DMSImporter(BaseImporter[DMSInputRules]):
248
252
  for view_id, view in schema.views.items()
249
253
  ],
250
254
  nodes=[DMSInputNode.from_node_type(node_type) for node_type in schema.node_types.values()],
251
- enum=enum,
255
+ enum=[enum for enum_list in enum_by_container_property.values() for enum in enum_list] or None,
252
256
  )
253
257
 
254
258
  @classmethod
@@ -267,7 +271,11 @@ class DMSImporter(BaseImporter[DMSInputRules]):
267
271
  )
268
272
 
269
273
  def _create_dms_property(
270
- self, prop_id: str, prop: ViewPropertyApply, view_entity: ViewEntity, class_entity: ClassEntity
274
+ self,
275
+ prop_id: str,
276
+ prop: ViewPropertyApply,
277
+ view_entity: ViewEntity,
278
+ enum_collection_by_container_property: dict[tuple[dm.ContainerId, str], str],
271
279
  ) -> DMSInputProperty | None:
272
280
  if isinstance(prop, dm.MappedPropertyApply) and prop.container not in self._all_containers_by_id:
273
281
  self.issue_list.append(
@@ -300,7 +308,7 @@ class DMSImporter(BaseImporter[DMSInputRules]):
300
308
  )
301
309
  return None
302
310
 
303
- value_type = self._get_value_type(prop, view_entity, prop_id)
311
+ value_type = self._get_value_type(prop, view_entity, prop_id, enum_collection_by_container_property)
304
312
  if value_type is None:
305
313
  return None
306
314
 
@@ -347,7 +355,11 @@ class DMSImporter(BaseImporter[DMSInputRules]):
347
355
  return None
348
356
 
349
357
  def _get_value_type(
350
- self, prop: ViewPropertyApply, view_entity: ViewEntity, prop_id
358
+ self,
359
+ prop: ViewPropertyApply,
360
+ view_entity: ViewEntity,
361
+ prop_id: str,
362
+ enum_collection_by_container_property: dict[tuple[dm.ContainerId, str], str],
351
363
  ) -> DataType | ViewEntity | DMSUnknownEntity | None:
352
364
  if isinstance(
353
365
  prop,
@@ -367,7 +379,16 @@ class DMSImporter(BaseImporter[DMSInputRules]):
367
379
  elif isinstance(container_prop.type, PropertyTypeWithUnit) and container_prop.type.unit:
368
380
  return DataType.load(f"{container_prop.type._type}(unit={container_prop.type.unit.external_id})")
369
381
  elif isinstance(container_prop.type, DMSEnum):
370
- return Enum(collection=ClassEntity(suffix=prop_id), unknownValue=container_prop.type.unknown_value)
382
+ collection = enum_collection_by_container_property.get(
383
+ (prop.container, prop.container_property_identifier)
384
+ )
385
+ if collection is None:
386
+ # This should never happen
387
+ raise ValueError(
388
+ f"BUG in Neat: Enum for {prop.container}.{prop.container_property_identifier} not found."
389
+ )
390
+
391
+ return Enum(collection=ClassEntity(suffix=collection), unknownValue=container_prop.type.unknown_value)
371
392
  else:
372
393
  return DataType.load(container_prop.type._type)
373
394
  else:
@@ -477,15 +498,26 @@ class DMSImporter(BaseImporter[DMSInputRules]):
477
498
  return candidates[0]
478
499
 
479
500
  @staticmethod
480
- def _create_enum_collections(containers: Collection[dm.ContainerApply]) -> list[DMSInputEnum] | None:
481
- enum_collections: list[DMSInputEnum] = []
501
+ def _create_enum_collections(
502
+ containers: Collection[dm.ContainerApply],
503
+ ) -> dict[tuple[dm.ContainerId, str], list[DMSInputEnum]]:
504
+ enum_by_container_property: dict[tuple[dm.ContainerId, str], list[DMSInputEnum]] = defaultdict(list)
505
+
506
+ is_external_id_unique = len({container.external_id for container in containers}) == len(containers)
507
+
482
508
  for container in containers:
509
+ container_id = container.as_id()
483
510
  for prop_id, prop in container.properties.items():
484
- if isinstance(prop.type, DMSEnum):
485
- for identifier, value in prop.type.values.items():
486
- enum_collections.append(
487
- DMSInputEnum(
488
- collection=prop_id, value=identifier, name=value.name, description=value.description
489
- )
511
+ if not isinstance(prop.type, DMSEnum):
512
+ continue
513
+ if is_external_id_unique:
514
+ collection = f"{container.external_id}.{prop_id}"
515
+ else:
516
+ collection = f"{container.space}:{container.external_id}.{prop_id}"
517
+ for identifier, value in prop.type.values.items():
518
+ enum_by_container_property[(container_id, prop_id)].append(
519
+ DMSInputEnum(
520
+ collection=collection, value=identifier, name=value.name, description=value.description
490
521
  )
491
- return enum_collections
522
+ )
523
+ return enum_by_container_property
@@ -4,7 +4,7 @@ from pathlib import Path
4
4
  from cognite.client import data_modeling as dm
5
5
  from rdflib import Graph, Namespace, URIRef
6
6
 
7
- from cognite.neat._constants import get_default_prefixes
7
+ from cognite.neat._constants import get_default_prefixes_and_namespaces
8
8
  from cognite.neat._issues import IssueList
9
9
  from cognite.neat._issues.errors import FileReadError
10
10
  from cognite.neat._issues.errors._general import NeatValueError
@@ -97,7 +97,7 @@ class BaseRDFImporter(BaseImporter[InformationInputRules]):
97
97
  issue_list.append(FileReadError(filepath, str(e)))
98
98
 
99
99
  # bind key namespaces
100
- for prefix, namespace in get_default_prefixes().items():
100
+ for prefix, namespace in get_default_prefixes_and_namespaces().items():
101
101
  graph.bind(prefix, namespace)
102
102
 
103
103
  return cls(
@@ -147,11 +147,13 @@ class BaseMetadata(SchemaModel):
147
147
  Metadata model for data model
148
148
  """
149
149
 
150
- role: ClassVar[RoleTypes]
151
- aspect: ClassVar[DataModelAspect]
152
- space: SpaceType = Field(alias="prefix")
153
- external_id: DataModelExternalIdType = Field(alias="externalId")
154
- version: VersionType
150
+ role: ClassVar[RoleTypes] = Field(description="Role of the person creating the data model")
151
+ aspect: ClassVar[DataModelAspect] = Field(description="Aspect of the data model")
152
+ space: SpaceType = Field(alias="prefix", description="The space where the data model is defined")
153
+ external_id: DataModelExternalIdType = Field(
154
+ alias="externalId", description="External identifier for the data model"
155
+ )
156
+ version: VersionType = Field(description="Version of the data model")
155
157
 
156
158
  name: str | None = Field(
157
159
  None,
@@ -160,21 +162,23 @@ class BaseMetadata(SchemaModel):
160
162
  max_length=255,
161
163
  )
162
164
 
163
- description: str | None = Field(None, min_length=1, max_length=1024)
165
+ description: str | None = Field(
166
+ None, min_length=1, max_length=1024, description="Short description of the data model"
167
+ )
164
168
 
165
169
  creator: StrListType = Field(
166
170
  description=(
167
- "List of contributors to the data model creation, "
171
+ "List of contributors (comma seperated) to the data model creation, "
168
172
  "typically information architects are considered as contributors."
169
173
  ),
170
174
  )
171
175
 
172
176
  created: datetime = Field(
173
- description=("Date of the data model creation"),
177
+ description="Date of the data model creation",
174
178
  )
175
179
 
176
180
  updated: datetime = Field(
177
- description=("Date of the data model update"),
181
+ description="Date of the data model update",
178
182
  )
179
183
 
180
184
  @field_validator("*", mode="before")
@@ -94,20 +94,60 @@ def _metadata(context: Any) -> DMSMetadata | None:
94
94
 
95
95
 
96
96
  class DMSProperty(SheetRow):
97
- view: ViewEntityType = Field(alias="View")
98
- view_property: DmsPropertyType = Field(alias="View Property")
99
- name: str | None = Field(alias="Name", default=None)
100
- description: str | None = Field(alias="Description", default=None)
101
- connection: Literal["direct"] | ReverseConnectionEntity | EdgeEntity | None = Field(None, alias="Connection")
102
- value_type: DataType | ViewEntity | DMSUnknownEntity = Field(alias="Value Type")
103
- nullable: bool | None = Field(default=None, alias="Nullable")
104
- immutable: bool | None = Field(default=None, alias="Immutable")
105
- is_list: bool | None = Field(default=None, alias="Is List")
106
- default: str | int | dict | None = Field(None, alias="Default")
107
- container: ContainerEntityType | None = Field(None, alias="Container")
108
- container_property: DmsPropertyType | None = Field(None, alias="Container Property")
109
- index: StrListType | None = Field(None, alias="Index")
110
- constraint: StrListType | None = Field(None, alias="Constraint")
97
+ view: ViewEntityType = Field(alias="View", description="The property identifier.")
98
+ view_property: DmsPropertyType = Field(alias="View Property", description="The ViewId this property belongs to")
99
+ name: str | None = Field(alias="Name", default=None, description="Human readable name of the property")
100
+ description: str | None = Field(alias="Description", default=None, description="Short description of the property")
101
+ connection: Literal["direct"] | ReverseConnectionEntity | EdgeEntity | None = Field(
102
+ None,
103
+ alias="Connection",
104
+ description="nly applies to connection between views. "
105
+ "It specify how the connection should be implemented in CDF.",
106
+ )
107
+ value_type: DataType | ViewEntity | DMSUnknownEntity = Field(
108
+ alias="Value Type",
109
+ description="Value type that the property can hold. "
110
+ "It takes either subset of CDF primitive types or a View id",
111
+ )
112
+ nullable: bool | None = Field(
113
+ default=None,
114
+ alias="Nullable",
115
+ description="Used to indicate whether the property is required or not. Only applies to primitive type.",
116
+ )
117
+ immutable: bool | None = Field(
118
+ default=None,
119
+ alias="Immutable",
120
+ description="sed to indicate whether the property is can only be set once. Only applies to primitive type.",
121
+ )
122
+ is_list: bool | None = Field(
123
+ default=None,
124
+ alias="Is List",
125
+ description="Used to indicate whether the property holds single or multiple values (list). "
126
+ "Only applies to primitive types.",
127
+ )
128
+ default: str | int | dict | None = Field(
129
+ None, alias="Default", description="Specifies default value for the property."
130
+ )
131
+ container: ContainerEntityType | None = Field(
132
+ None,
133
+ alias="Container",
134
+ description="Specifies container where the property is stored. Only applies to primitive type.",
135
+ )
136
+ container_property: DmsPropertyType | None = Field(
137
+ None,
138
+ alias="Container Property",
139
+ description="Specifies property in the container where the property is stored. Only applies to primitive type.",
140
+ )
141
+ index: StrListType | None = Field(
142
+ None,
143
+ alias="Index",
144
+ description="The names of the indexes (comma separated) that should be created for the property.",
145
+ )
146
+ constraint: StrListType | None = Field(
147
+ None,
148
+ alias="Constraint",
149
+ description="The names of the uniquness (comma separated) that should be created for the property.",
150
+ )
111
151
  logical: URIRefType | None = Field(
112
152
  None,
113
153
  alias="Logical",
@@ -192,11 +232,21 @@ class DMSProperty(SheetRow):
192
232
 
193
233
 
194
234
  class DMSContainer(SheetRow):
195
- container: ContainerEntityType = Field(alias="Container")
196
- name: str | None = Field(alias="Name", default=None)
197
- description: str | None = Field(alias="Description", default=None)
198
- constraint: ContainerEntityList | None = Field(None, alias="Constraint")
199
- used_for: Literal["node", "edge", "all"] | None = Field("all", alias="Used For")
235
+ container: ContainerEntityType = Field(
236
+ alias="Container", description="Container id, strongly advised to PascalCase usage."
237
+ )
238
+ name: str | None = Field(
239
+ alias="Name", default=None, description="Human readable name of the container being defined."
240
+ )
241
+ description: str | None = Field(
242
+ alias="Description", default=None, description="Short description of the node being defined."
243
+ )
244
+ constraint: ContainerEntityList | None = Field(
245
+ None, alias="Constraint", description="List of required (comma separated) constraints for the container"
246
+ )
247
+ used_for: Literal["node", "edge", "all"] | None = Field(
248
+ "all", alias="Used For", description=" Whether the container is used for nodes, edges or all."
249
+ )
200
250
 
201
251
  def _identifier(self) -> tuple[Hashable, ...]:
202
252
  return (self.container,)
@@ -240,12 +290,22 @@ class DMSContainer(SheetRow):
240
290
 
241
291
 
242
292
  class DMSView(SheetRow):
243
- view: ViewEntityType = Field(alias="View")
244
- name: str | None = Field(alias="Name", default=None)
245
- description: str | None = Field(alias="Description", default=None)
246
- implements: ViewEntityList | None = Field(None, alias="Implements")
247
- filter_: HasDataFilter | NodeTypeFilter | RawFilter | None = Field(None, alias="Filter")
248
- in_model: bool = Field(True, alias="In Model")
293
+ view: ViewEntityType = Field(alias="View", description="View id, strongly advised to PascalCase usage.")
294
+ name: str | None = Field(alias="Name", default=None, description="Human readable name of the view being defined.")
295
+ description: str | None = Field(
296
+ alias="Description", default=None, description="Short description of the view being defined "
297
+ )
298
+ implements: ViewEntityList | None = Field(
299
+ None,
300
+ alias="Implements",
301
+ description="List of parent view ids (comma separated) which the view being defined implements.",
302
+ )
303
+ filter_: HasDataFilter | NodeTypeFilter | RawFilter | None = Field(
304
+ None, alias="Filter", description="Explicitly define the filter for the view."
305
+ )
306
+ in_model: bool = Field(
307
+ True, alias="In Model", description="Indicates whether the view being defined is a part of the data model."
308
+ )
249
309
  logical: URIRefType | None = Field(
250
310
  None,
251
311
  alias="Logical",
@@ -292,10 +352,14 @@ class DMSView(SheetRow):
292
352
 
293
353
 
294
354
  class DMSNode(SheetRow):
295
- node: DMSNodeEntity = Field(alias="Node")
296
- usage: Literal["type", "collection"] = Field(alias="Usage")
297
- name: str | None = Field(alias="Name", default=None)
298
- description: str | None = Field(alias="Description", default=None)
355
+ node: DMSNodeEntity = Field(alias="Node", description="The type definition of the node.")
356
+ usage: Literal["type", "collection"] = Field(
357
+ alias="Usage", description="What the usage of the node is in the data model."
358
+ )
359
+ name: str | None = Field(alias="Name", default=None, description="Human readable name of the node being defined.")
360
+ description: str | None = Field(
361
+ alias="Description", default=None, description="Short description of the node being defined."
362
+ )
299
363
 
300
364
  def _identifier(self) -> tuple[Hashable, ...]:
301
365
  return (self.node,)
@@ -316,10 +380,10 @@ class DMSNode(SheetRow):
316
380
 
317
381
 
318
382
  class DMSEnum(SheetRow):
319
- collection: ClassEntityType = Field(alias="Collection")
320
- value: str = Field(alias="Value")
321
- name: str | None = Field(alias="Name", default=None)
322
- description: str | None = Field(alias="Description", default=None)
383
+ collection: ClassEntityType = Field(alias="Collection", description="The collection this enum belongs to.")
384
+ value: str = Field(alias="Value", description="The value of the enum.")
385
+ name: str | None = Field(alias="Name", default=None, description="Human readable name of the enum.")
386
+ description: str | None = Field(alias="Description", default=None, description="Short description of the enum.")
323
387
 
324
388
  def _identifier(self) -> tuple[Hashable, ...]:
325
389
  return self.collection, self.value
@@ -332,12 +396,20 @@ class DMSEnum(SheetRow):
332
396
 
333
397
 
334
398
  class DMSRules(BaseRules):
335
- metadata: DMSMetadata = Field(alias="Metadata")
336
- properties: SheetList[DMSProperty] = Field(alias="Properties")
337
- views: SheetList[DMSView] = Field(alias="Views")
338
- containers: SheetList[DMSContainer] | None = Field(None, alias="Containers")
339
- enum: SheetList[DMSEnum] | None = Field(None, alias="Enum")
340
- nodes: SheetList[DMSNode] | None = Field(None, alias="Nodes")
399
+ metadata: DMSMetadata = Field(alias="Metadata", description="Contains information about the data model.")
400
+ properties: SheetList[DMSProperty] = Field(
401
+ alias="Properties", description="Contains the properties of the data model."
402
+ )
403
+ views: SheetList[DMSView] = Field(alias="Views", description="Contains the views of the data model.")
404
+ containers: SheetList[DMSContainer] | None = Field(
405
+ None,
406
+ alias="Containers",
407
+ description="Contains the definition containers that are the physical storage of the data model.",
408
+ )
409
+ enum: SheetList[DMSEnum] | None = Field(None, alias="Enum", description="Contains the definition of enum values.")
410
+ nodes: SheetList[DMSNode] | None = Field(
411
+ None, alias="Nodes", description="Contains the definition of the node types."
412
+ )
341
413
 
342
414
  @field_validator("views")
343
415
  def matching_version_and_space(cls, value: SheetList[DMSView], info: ValidationInfo) -> SheetList[DMSView]:
@@ -7,7 +7,7 @@ from pydantic import Field, field_serializer, field_validator, model_validator
7
7
  from pydantic_core.core_schema import SerializationInfo
8
8
  from rdflib import Namespace, URIRef
9
9
 
10
- from cognite.neat._constants import get_default_prefixes
10
+ from cognite.neat._constants import get_default_prefixes_and_namespaces
11
11
  from cognite.neat._issues.errors import NeatValueError, PropertyDefinitionError
12
12
  from cognite.neat._rules._constants import EntityTypes
13
13
  from cognite.neat._rules.models._base_rules import (
@@ -68,10 +68,16 @@ class InformationClass(SheetRow):
68
68
  implements: Which classes the current class implements.
69
69
  """
70
70
 
71
- class_: ClassEntityType = Field(alias="Class")
72
- name: str | None = Field(alias="Name", default=None)
73
- description: str | None = Field(alias="Description", default=None)
74
- implements: ClassEntityList | None = Field(alias="Implements", default=None)
71
+ class_: ClassEntityType = Field(
72
+ alias="Class", description="Class id being defined, use strongly advise `PascalCase` usage."
73
+ )
74
+ name: str | None = Field(alias="Name", default=None, description="Human readable name of the class.")
75
+ description: str | None = Field(alias="Description", default=None, description="Short description of the class.")
76
+ implements: ClassEntityList | None = Field(
77
+ alias="Implements",
78
+ default=None,
79
+ description="List of classes (comma separated) that the current class implements (parents).",
80
+ )
75
81
 
76
82
  physical: URIRefType | None = Field(
77
83
  None,
@@ -119,17 +125,40 @@ class InformationProperty(SheetRow):
119
125
  knowledge graph. Defaults to None (no transformation)
120
126
  """
121
127
 
122
- class_: ClassEntityType = Field(alias="Class")
123
- property_: InformationPropertyType = Field(alias="Property")
124
- name: str | None = Field(alias="Name", default=None)
125
- description: str | None = Field(alias="Description", default=None)
128
+ class_: ClassEntityType = Field(
129
+ alias="Class", description="Class id that the property is defined for, strongly advise `PascalCase` usage."
130
+ )
131
+ property_: InformationPropertyType = Field(
132
+ alias="Property", description="Property id, strongly advised to `camelCase` usage."
133
+ )
134
+ name: str | None = Field(alias="Name", default=None, description="Human readable name of the property.")
135
+ description: str | None = Field(alias="Description", default=None, description="Short description of the property.")
126
136
  value_type: DataType | ClassEntityType | MultiValueTypeType | UnknownEntity = Field(
127
- alias="Value Type", union_mode="left_to_right"
137
+ alias="Value Type",
138
+ union_mode="left_to_right",
139
+ description="Value type that the property can hold. It takes either subset of XSD type or a class defined.",
140
+ )
141
+ min_count: int | None = Field(
142
+ alias="Min Count",
143
+ default=None,
144
+ description="Minimum number of values that the property can hold. "
145
+ "If no value is provided, the default value is `0`, "
146
+ "which means that the property is optional.",
147
+ )
148
+ max_count: int | float | None = Field(
149
+ alias="Max Count",
150
+ default=None,
151
+ description="Maximum number of values that the property can hold. "
152
+ "If no value is provided, the default value is `inf`, "
153
+ "which means that the property can hold any number of values (listable).",
154
+ )
155
+ default: Any | None = Field(alias="Default", default=None, description="Default value of the property.")
156
+ transformation: RDFPath | None = Field(
157
+ alias="Transformation",
158
+ default=None,
159
+ description="The rule that is used to populate the data model. "
160
+ "The rule is provided in a RDFPath query syntax which is converted to downstream solution query (e.g. SPARQL).",
128
161
  )
129
- min_count: int | None = Field(alias="Min Count", default=None)
130
- max_count: int | float | None = Field(alias="Max Count", default=None)
131
- default: Any | None = Field(alias="Default", default=None)
132
- transformation: RDFPath | None = Field(alias="Transformation", default=None)
133
162
  inherited: bool = Field(
134
163
  default=False,
135
164
  exclude=True,
@@ -221,17 +250,21 @@ class InformationProperty(SheetRow):
221
250
 
222
251
 
223
252
  class InformationRules(BaseRules):
224
- metadata: InformationMetadata = Field(alias="Metadata")
225
- properties: SheetList[InformationProperty] = Field(alias="Properties")
226
- classes: SheetList[InformationClass] = Field(alias="Classes")
227
- prefixes: dict[str, Namespace] = Field(default_factory=get_default_prefixes, alias="Prefixes")
253
+ metadata: InformationMetadata = Field(alias="Metadata", description="Metadata for the logical data model")
254
+ properties: SheetList[InformationProperty] = Field(alias="Properties", description="List of properties")
255
+ classes: SheetList[InformationClass] = Field(alias="Classes", description="List of classes")
256
+ prefixes: dict[str, Namespace] = Field(
257
+ alias="Prefixes",
258
+ default_factory=get_default_prefixes_and_namespaces,
259
+ description="the definition of the prefixes that are used in the semantic data model",
260
+ )
228
261
 
229
262
  @field_validator("prefixes", mode="before")
230
263
  def parse_str(cls, values: Any) -> Any:
231
264
  if isinstance(values, dict):
232
265
  return {key: Namespace(value) if isinstance(value, str) else value for key, value in values.items()}
233
266
  elif values is None:
234
- values = get_default_prefixes()
267
+ values = get_default_prefixes_and_namespaces()
235
268
  return values
236
269
 
237
270
  def as_dms_rules(self) -> "DMSRules":
@@ -38,6 +38,24 @@ from .exceptions import NeatSessionError, session_class_wrapper
38
38
 
39
39
  @session_class_wrapper
40
40
  class NeatSession:
41
+ """Creates a new NeatSession.
42
+
43
+ This is the main entry point for using Neat. It provides access to the different APIs that can be used to read,
44
+ write, and manipulate data and data models.
45
+
46
+ Args:
47
+ client: The CogniteClient to use for reading and writing data.
48
+ storage: The storage type to use for storing data and data models. Can be either "memory" or "oxigraph".
49
+ In "memory" mode works well for small data sets and when only working with data models. It is works
50
+ well for all notebook environments. In "oxigraph" mode, the data is stored in an Oxigraph database. This
51
+ is more performant for larger data sets and when working with data. Note that this option requires
52
+ additional dependencies to be installed and is not available in CDF Notebooks.
53
+ verbose: Whether to print information about the operations being performed.
54
+ load_engine: Whether to load the Neat Engine. Can be "newest", "cache", or "skip". "newest" will always
55
+ check for the newest version of the engine. "cache" will load the engine if it has been downloaded before.
56
+ "skip" will not load the engine.
57
+ """
58
+
41
59
  def __init__(
42
60
  self,
43
61
  client: CogniteClient | None = None,
@@ -7,8 +7,18 @@ from cognite.client.data_classes.data_modeling import DataModelIdentifier
7
7
  from rdflib import URIRef
8
8
 
9
9
  from cognite.neat._client import NeatClient
10
- from cognite.neat._constants import DEFAULT_NAMESPACE
11
- from cognite.neat._graph.transformers import RelationshipAsEdgeTransformer
10
+ from cognite.neat._constants import (
11
+ DEFAULT_NAMESPACE,
12
+ get_default_prefixes_and_namespaces,
13
+ )
14
+ from cognite.neat._graph.transformers import (
15
+ AttachPropertyFromTargetToSource,
16
+ PruneDeadEndEdges,
17
+ PruneInstancesOfUnknownType,
18
+ PruneTypes,
19
+ RelationshipAsEdgeTransformer,
20
+ Transformers,
21
+ )
12
22
  from cognite.neat._graph.transformers._rdfpath import MakeConnectionOnExactMatch
13
23
  from cognite.neat._rules._shared import InputRules, ReadRules
14
24
  from cognite.neat._rules.importers import DMSImporter
@@ -50,6 +60,79 @@ class InstancePrepareAPI:
50
60
  self._state = state
51
61
  self._verbose = verbose
52
62
 
63
+ def dexpi(self) -> None:
64
+ """Prepares extracted DEXPI graph for further usage in CDF
65
+
66
+ This method bundles several graph transformers which:
67
+ - attach values of generic attributes to nodes
68
+ - create associations between nodes
69
+ - remove unused generic attributes
70
+ - remove associations between nodes that do not exist in the extracted graph
71
+ - remove edges to nodes that do not exist in the extracted graph
72
+
73
+ and therefore safeguard CDF from a bad graph
74
+ """
75
+
76
+ DEXPI = get_default_prefixes_and_namespaces()["dexpi"]
77
+
78
+ transformers = [
79
+ # Remove any instance which type is unknown
80
+ PruneInstancesOfUnknownType(),
81
+ # Directly connect generic attributes
82
+ AttachPropertyFromTargetToSource(
83
+ target_property=DEXPI.Value,
84
+ target_property_holding_new_property=DEXPI.Name,
85
+ target_node_type=DEXPI.GenericAttribute,
86
+ delete_target_node=True,
87
+ ),
88
+ # Directly connect associations
89
+ AttachPropertyFromTargetToSource(
90
+ target_property=DEXPI.ItemID,
91
+ target_property_holding_new_property=DEXPI.Type,
92
+ target_node_type=DEXPI.Association,
93
+ delete_target_node=True,
94
+ ),
95
+ # Remove unused generic attributes and associations
96
+ PruneTypes([DEXPI.GenericAttribute, DEXPI.Association]),
97
+ # Remove edges to nodes that do not exist in the extracted graph
98
+ PruneDeadEndEdges(),
99
+ ]
100
+
101
+ for transformer in transformers:
102
+ self._state.instances.store.transform(cast(Transformers, transformer))
103
+
104
+ def aml(self) -> None:
105
+ """Prepares extracted AutomationML graph for further usage in CDF
106
+
107
+ This method bundles several graph transformers which:
108
+ - attach values of attributes to nodes
109
+ - remove unused attributes
110
+ - remove edges to nodes that do not exist in the extracted graph
111
+
112
+ and therefore safeguard CDF from a bad graph
113
+ """
114
+
115
+ AML = get_default_prefixes_and_namespaces()["aml"]
116
+
117
+ transformers = [
118
+ # Remove any instance which type is unknown
119
+ PruneInstancesOfUnknownType(),
120
+ # Directly connect generic attributes
121
+ AttachPropertyFromTargetToSource(
122
+ target_property=AML.Value,
123
+ target_property_holding_new_property=AML.Name,
124
+ target_node_type=AML.Attribute,
125
+ delete_target_node=True,
126
+ ),
127
+ # Prune unused attributes
128
+ PruneTypes([AML.Attribute]),
129
+ # # Remove edges to nodes that do not exist in the extracted graph
130
+ PruneDeadEndEdges(),
131
+ ]
132
+
133
+ for transformer in transformers:
134
+ self._state.instances.store.transform(cast(Transformers, transformer))
135
+
53
136
  def make_connection_on_exact_match(
54
137
  self,
55
138
  source: tuple[str, str],
@@ -17,7 +17,7 @@ from cognite.neat._rules.importers import BaseImporter
17
17
  from cognite.neat._store._provenance import Activity as ProvenanceActivity
18
18
  from cognite.neat._store._provenance import Change
19
19
  from cognite.neat._store._provenance import Entity as ProvenanceEntity
20
- from cognite.neat._utils.reader import GitHubReader, NeatReader, PathReader
20
+ from cognite.neat._utils.reader import GitHubReader, HttpFileReader, NeatReader, PathReader
21
21
 
22
22
  from ._state import SessionState
23
23
  from ._wizard import NeatObjectType, RDFFileType, XMLFileType, object_wizard, rdf_dm_wizard, xml_format_wizard
@@ -244,9 +244,9 @@ class YamlReadAPI(BaseReadAPI):
244
244
  class CSVReadAPI(BaseReadAPI):
245
245
  def __call__(self, io: Any, type: str, primary_key: str) -> None:
246
246
  reader = NeatReader.create(io)
247
- if isinstance(reader, GitHubReader):
247
+ if isinstance(reader, HttpFileReader):
248
248
  path = Path(tempfile.gettempdir()).resolve() / reader.name
249
- path.write_text(reader.read_text())
249
+ path.write_text(reader.read_text(), encoding="utf-8", newline="\n")
250
250
  elif isinstance(reader, PathReader):
251
251
  path = reader.path
252
252
  else:
@@ -150,7 +150,7 @@ class CDFToAPI:
150
150
  - "skip": If any component already exists, it will be skipped.
151
151
  - "update": If any component already exists, it will be updated.
152
152
  - "force": If any component already exists, and the update fails, it will be deleted and recreated.
153
- - "recreate": All components will be deleted and recreated.
153
+ - "recreate": All components will be deleted and recreated. The exception is spaces, which will be updated.
154
154
 
155
155
  """
156
156