cognite-neat 0.107.0__py3-none-any.whl → 0.108.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (52) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
  4. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +83 -6
  5. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
  6. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
  7. cognite/neat/_graph/extractors/_dms.py +162 -47
  8. cognite/neat/_graph/extractors/_dms_graph.py +54 -4
  9. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  10. cognite/neat/_graph/extractors/_rdf_file.py +3 -2
  11. cognite/neat/_graph/loaders/__init__.py +1 -3
  12. cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
  13. cognite/neat/_graph/queries/_base.py +140 -84
  14. cognite/neat/_graph/queries/_construct.py +1 -1
  15. cognite/neat/_graph/transformers/__init__.py +3 -1
  16. cognite/neat/_graph/transformers/_value_type.py +54 -3
  17. cognite/neat/_issues/errors/_resources.py +1 -1
  18. cognite/neat/_issues/warnings/__init__.py +0 -2
  19. cognite/neat/_issues/warnings/_models.py +1 -1
  20. cognite/neat/_issues/warnings/_properties.py +0 -8
  21. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  22. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  23. cognite/neat/_rules/importers/__init__.py +3 -1
  24. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  25. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  26. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  27. cognite/neat/_rules/importers/_rdf/_inference2rules.py +241 -18
  28. cognite/neat/_rules/models/_base_rules.py +13 -3
  29. cognite/neat/_rules/models/dms/_rules.py +1 -8
  30. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  31. cognite/neat/_rules/models/information/_rules_input.py +5 -0
  32. cognite/neat/_rules/transformers/__init__.py +6 -0
  33. cognite/neat/_rules/transformers/_converters.py +98 -7
  34. cognite/neat/_session/_base.py +55 -4
  35. cognite/neat/_session/_drop.py +5 -1
  36. cognite/neat/_session/_inspect.py +3 -2
  37. cognite/neat/_session/_read.py +61 -14
  38. cognite/neat/_session/_set.py +27 -0
  39. cognite/neat/_session/_show.py +4 -4
  40. cognite/neat/_session/_state.py +8 -4
  41. cognite/neat/_session/_to.py +4 -1
  42. cognite/neat/_session/_wizard.py +1 -1
  43. cognite/neat/_session/exceptions.py +2 -1
  44. cognite/neat/_store/_graph_store.py +287 -133
  45. cognite/neat/_store/_rules_store.py +108 -1
  46. cognite/neat/_utils/auth.py +1 -1
  47. cognite/neat/_version.py +1 -1
  48. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +1 -1
  49. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +52 -52
  50. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
  51. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +0 -0
  52. {cognite_neat-0.107.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
@@ -302,8 +302,7 @@ class Interface(DTDLBase):
302
302
  spec_version = frozenset(["2", "3"])
303
303
  default_context: ClassVar[IRI] = Field(
304
304
  "dtmi:dtdl:context;3",
305
- description="This can be set directly on the class to change the "
306
- "default context used when parsing a document.",
305
+ description="This can be set directly on the class to change the default context used when parsing a document.",
307
306
  )
308
307
  id_: DTMI = Field(alias="@id") # type: ignore[assignment]
309
308
  context: IRI | None = Field(alias="@context")
@@ -1,5 +1,5 @@
1
1
  from ._imf2rules import IMFImporter
2
- from ._inference2rules import InferenceImporter
2
+ from ._inference2rules import InferenceImporter, SubclassInferenceImporter
3
3
  from ._owl2rules import OWLImporter
4
4
 
5
- __all__ = ["IMFImporter", "InferenceImporter", "OWLImporter"]
5
+ __all__ = ["IMFImporter", "InferenceImporter", "OWLImporter", "SubclassInferenceImporter"]
@@ -72,7 +72,7 @@ class BaseRDFImporter(BaseImporter[InformationInputRules]):
72
72
  ):
73
73
  return cls(
74
74
  IssueList(title=f"{cls.__name__} issues"),
75
- store.graph,
75
+ store.dataset,
76
76
  data_model_id=data_model_id,
77
77
  max_number_of_instance=max_number_of_instance,
78
78
  non_existing_node_type=non_existing_node_type,
@@ -140,7 +140,7 @@ class BaseRDFImporter(BaseImporter[InformationInputRules]):
140
140
  prefixes: Dict of prefixes and namespaces
141
141
  """
142
142
  if Namespace(get_namespace(URI)) not in prefixes.values():
143
- prefixes[f"prefix_{len(prefixes)+1}"] = Namespace(get_namespace(URI))
143
+ prefixes[f"prefix_{len(prefixes) + 1}"] = Namespace(get_namespace(URI))
144
144
 
145
145
  @property
146
146
  def _metadata(self) -> dict:
@@ -1,18 +1,25 @@
1
+ import itertools
1
2
  from collections import Counter, defaultdict
2
- from collections.abc import Mapping
3
+ from collections.abc import Iterable, Mapping
4
+ from dataclasses import dataclass
3
5
  from datetime import datetime, timezone
4
6
  from pathlib import Path
5
- from typing import ClassVar, cast
7
+ from typing import Any, ClassVar, cast
6
8
 
7
9
  from cognite.client import data_modeling as dm
8
- from rdflib import RDF, Namespace, URIRef
10
+ from rdflib import RDF, Graph, Namespace, URIRef
9
11
  from rdflib import Literal as RdfLiteral
10
12
 
11
- from cognite.neat._issues.warnings import PropertySkippedWarning, PropertyValueTypeUndefinedWarning
12
- from cognite.neat._rules.models import data_types
13
+ from cognite.neat._constants import NEAT
14
+ from cognite.neat._issues import IssueList
15
+ from cognite.neat._issues.warnings import PropertyValueTypeUndefinedWarning
16
+ from cognite.neat._rules.analysis import InformationAnalysis
17
+ from cognite.neat._rules.models import InformationRules, data_types
13
18
  from cognite.neat._rules.models.data_types import AnyURI
14
19
  from cognite.neat._rules.models.entities._single_value import UnknownEntity
15
20
  from cognite.neat._rules.models.information import (
21
+ InformationInputClass,
22
+ InformationInputProperty,
16
23
  InformationMetadata,
17
24
  )
18
25
  from cognite.neat._store import NeatGraphStore
@@ -157,7 +164,7 @@ class InferenceImporter(BaseRDFImporter):
157
164
  for class_uri, no_instances in self.graph.query(ORDERED_CLASSES_QUERY): # type: ignore[misc]
158
165
  if (class_id := remove_namespace_from_uri(cast(URIRef, class_uri))) in classes:
159
166
  # handles cases when class id is already present in classes
160
- class_id = f"{class_id}_{len(classes)+1}"
167
+ class_id = f"{class_id}_{len(classes) + 1}"
161
168
 
162
169
  classes[class_id] = {
163
170
  "class_": class_id,
@@ -190,18 +197,6 @@ class InferenceImporter(BaseRDFImporter):
190
197
  if property_uri == RDF.type:
191
198
  continue
192
199
  property_id = remove_namespace_from_uri(property_uri)
193
- if property_id in {"external_id", "externalId"}:
194
- skip_issue = PropertySkippedWarning(
195
- resource_type="Property",
196
- identifier=f"{class_id}:{property_id}",
197
- property_name=property_id,
198
- reason="External ID is assumed to be the unique identifier of the instance "
199
- "and is not part of the data model schema.",
200
- )
201
- if skip_issue not in self.issue_list:
202
- self.issue_list.append(skip_issue)
203
- continue
204
-
205
200
  self._add_uri_namespace_to_prefixes(cast(URIRef, property_uri), prefixes)
206
201
 
207
202
  if isinstance(data_type_uri, URIRef):
@@ -292,3 +287,231 @@ class InferenceImporter(BaseRDFImporter):
292
287
  @property
293
288
  def source_uri(self) -> URIRef:
294
289
  return INSTANCES_ENTITY.id_
290
+
291
+
292
+ # Internal helper class
293
+ @dataclass
294
+ class _ReadProperties:
295
+ class_uri: URIRef
296
+ subclass_uri: URIRef
297
+ property_uri: URIRef
298
+ data_type: URIRef | None
299
+ object_type: URIRef | None
300
+ max_occurrence: int
301
+ instance_count: int
302
+
303
+
304
+ class SubclassInferenceImporter(BaseRDFImporter):
305
+ """Infer subclasses from a triple store.
306
+
307
+ Assumes that the graph already is connected to a schema. The classes should
308
+ match the RDF.type of the instances in the graph, while the subclasses should
309
+ match the NEAT.type of the instances in the graph.
310
+
311
+ ClassVars:
312
+ overwrite_data_types: Mapping of data types to be overwritten. The InferenceImporter will overwrite
313
+ 32-bit integer and 32-bit float data types to 64-bit integer and 64-bit float data types
314
+
315
+ Args:
316
+ issue_list: Issue list to store issues
317
+ graph: Knowledge graph
318
+ max_number_of_instance: Maximum number of instances to be used in inference
319
+ """
320
+
321
+ overwrite_data_types: ClassVar[Mapping[URIRef, URIRef]] = {
322
+ data_types.Integer.as_xml_uri_ref(): data_types.Long.as_xml_uri_ref(),
323
+ data_types.Float.as_xml_uri_ref(): data_types.Double.as_xml_uri_ref(),
324
+ }
325
+
326
+ def __init__(
327
+ self,
328
+ issue_list: IssueList,
329
+ graph: Graph,
330
+ rules: InformationRules,
331
+ max_number_of_instance: int,
332
+ non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
333
+ ) -> None:
334
+ super().__init__(
335
+ issue_list,
336
+ graph,
337
+ rules.metadata.as_data_model_id().as_tuple(), # type: ignore[arg-type]
338
+ max_number_of_instance,
339
+ non_existing_node_type,
340
+ language="en",
341
+ )
342
+ self._rules = rules
343
+
344
+ _ordered_subclass_query = f"""SELECT DISTINCT ?class ?subclass (count(?s) as ?instances )
345
+ WHERE {{ ?s a ?class . ?s <{NEAT.type}> ?subclass }}
346
+ group by ?class ?subclass order by DESC(?instances)"""
347
+
348
+ _properties_query = """SELECT DISTINCT ?property ?dataType ?objectType
349
+ WHERE {{
350
+ ?s a <{type}> .
351
+ ?s <{neat_type}> <{subtype}> .
352
+ ?s ?property ?value .
353
+ BIND(datatype(?value) AS ?dataType) .
354
+ OPTIONAL {{?value rdf:type ?objectType}}
355
+ }}"""
356
+
357
+ _max_occurrence_query = """SELECT (MAX(?count) AS ?maxCount)
358
+ WHERE {{
359
+ {{
360
+ SELECT ?subject (COUNT(?object) AS ?count)
361
+ WHERE {{
362
+ ?subject a <{type}> .
363
+ ?subject <{neat_type}> <{subtype}> .
364
+ ?subject <{property}> ?object .
365
+ }}
366
+ GROUP BY ?subject
367
+ }}
368
+ }}"""
369
+
370
+ def _to_rules_components(
371
+ self,
372
+ ) -> dict:
373
+ properties_by_class_subclass_pair = self._read_class_properties_from_graph()
374
+ existing_classes = {class_.class_.suffix: class_ for class_ in self._rules.classes}
375
+ prefixes = self._rules.prefixes.copy()
376
+
377
+ classes: list[InformationInputClass] = []
378
+ properties: list[InformationInputProperty] = []
379
+ # Help for IDE
380
+ subclass_uri: URIRef
381
+ for class_uri, class_properties_iterable in itertools.groupby(
382
+ properties_by_class_subclass_pair, key=lambda x: x.class_uri
383
+ ):
384
+ properties_by_subclass_by_property = self._get_properties_by_subclass_by_property(class_properties_iterable)
385
+
386
+ shared_property_uris = set.intersection(
387
+ *[
388
+ set(properties_by_property.keys())
389
+ for properties_by_property in properties_by_subclass_by_property.values()
390
+ ]
391
+ )
392
+ class_suffix = remove_namespace_from_uri(class_uri)
393
+ self._add_uri_namespace_to_prefixes(class_uri, prefixes)
394
+ if class_suffix not in existing_classes:
395
+ classes.append(InformationInputClass(class_=class_suffix))
396
+ else:
397
+ classes.append(InformationInputClass.load(existing_classes[class_suffix].model_dump()))
398
+ shared_properties: dict[URIRef, list[_ReadProperties]] = defaultdict(list)
399
+ for subclass_uri, properties_by_property_uri in properties_by_subclass_by_property.items():
400
+ subclass_suffix = remove_namespace_from_uri(subclass_uri)
401
+ self._add_uri_namespace_to_prefixes(subclass_uri, prefixes)
402
+ if subclass_suffix not in existing_classes:
403
+ classes.append(InformationInputClass(class_=subclass_suffix, implements=class_suffix))
404
+ else:
405
+ classes.append(InformationInputClass.load(existing_classes[subclass_suffix].model_dump()))
406
+ for property_uri, read_properties in properties_by_property_uri.items():
407
+ if property_uri in shared_property_uris:
408
+ shared_properties[property_uri].extend(read_properties)
409
+ continue
410
+ properties.append(
411
+ self._create_property(read_properties, subclass_suffix, class_uri, property_uri, prefixes)
412
+ )
413
+ for property_uri, read_properties in shared_properties.items():
414
+ properties.append(
415
+ self._create_property(read_properties, class_suffix, class_uri, property_uri, prefixes)
416
+ )
417
+
418
+ return {
419
+ "metadata": self._rules.metadata.model_dump(),
420
+ "classes": [cls.dump(self._rules.metadata.prefix) for cls in classes],
421
+ "properties": [prop.dump(self._rules.metadata.prefix) for prop in properties],
422
+ "prefixes": self._rules.prefixes,
423
+ }
424
+
425
+ @staticmethod
426
+ def _get_properties_by_subclass_by_property(
427
+ class_properties_iterable: Iterable[_ReadProperties],
428
+ ) -> dict[URIRef, dict[URIRef, list[_ReadProperties]]]:
429
+ properties_by_subclass_by_property: dict[URIRef, dict[URIRef, list[_ReadProperties]]] = {}
430
+ for subclass_uri, subclass_properties_iterable in itertools.groupby(
431
+ class_properties_iterable, key=lambda x: x.subclass_uri
432
+ ):
433
+ properties_by_subclass_by_property[subclass_uri] = defaultdict(list)
434
+ for read_prop in subclass_properties_iterable:
435
+ properties_by_subclass_by_property[subclass_uri][read_prop.property_uri].append(read_prop)
436
+ return properties_by_subclass_by_property
437
+
438
+ def _read_class_properties_from_graph(self) -> list[_ReadProperties]:
439
+ count_by_class_subclass_pair: dict[tuple[URIRef, URIRef], int] = {}
440
+ # Infers all the classes w in the graph
441
+ for result_row in self.graph.query(self._ordered_subclass_query):
442
+ class_uri, subclass_uri, instance_count_literal = cast(tuple[URIRef, URIRef, RdfLiteral], result_row)
443
+ count_by_class_subclass_pair[(class_uri, subclass_uri)] = instance_count_literal.toPython()
444
+ analysis = InformationAnalysis(self._rules)
445
+ existing_class_properties = {
446
+ (class_entity.suffix, property)
447
+ for class_entity, properties in analysis.class_property_pairs(consider_inheritance=True).items()
448
+ for property in properties.keys()
449
+ }
450
+ properties_by_class_by_subclass: list[_ReadProperties] = []
451
+ for (class_uri, subclass_uri), instance_count in count_by_class_subclass_pair.items():
452
+ property_query = self._properties_query.format(type=class_uri, subtype=subclass_uri, neat_type=NEAT.type)
453
+ class_suffix = remove_namespace_from_uri(class_uri)
454
+ for result_row in self.graph.query(property_query):
455
+ property_uri, data_type_uri, object_type_uri = cast(tuple[URIRef, URIRef, URIRef], result_row)
456
+ if property_uri == RDF.type or property_uri == NEAT.type:
457
+ continue
458
+ property_str = remove_namespace_from_uri(property_uri)
459
+ if (class_suffix, property_str) in existing_class_properties:
460
+ continue
461
+ occurrence_query = self._max_occurrence_query.format(
462
+ type=class_uri, subtype=subclass_uri, property=property_uri, neat_type=NEAT.type
463
+ )
464
+ max_occurrence = 1 # default value
465
+ result_row, *_ = list(self.graph.query(occurrence_query))
466
+ if result_row:
467
+ max_occurrence_literal, *__ = cast(tuple[RdfLiteral, Any], result_row)
468
+ max_occurrence = int(max_occurrence_literal.toPython())
469
+ properties_by_class_by_subclass.append(
470
+ _ReadProperties(
471
+ class_uri=class_uri,
472
+ subclass_uri=subclass_uri,
473
+ property_uri=property_uri,
474
+ data_type=data_type_uri,
475
+ object_type=object_type_uri,
476
+ max_occurrence=max_occurrence,
477
+ instance_count=instance_count,
478
+ )
479
+ )
480
+ return properties_by_class_by_subclass
481
+
482
+ def _create_property(
483
+ self,
484
+ read_properties: list[_ReadProperties],
485
+ class_suffix: str,
486
+ class_uri: URIRef,
487
+ property_uri: URIRef,
488
+ prefixes: dict[str, Namespace],
489
+ ) -> InformationInputProperty:
490
+ first = read_properties[0]
491
+ value_type = self._get_value_type(read_properties, prefixes)
492
+ property_name = remove_namespace_from_uri(property_uri)
493
+ self._add_uri_namespace_to_prefixes(property_uri, prefixes)
494
+
495
+ return InformationInputProperty(
496
+ class_=class_suffix,
497
+ property_=property_name,
498
+ max_count=first.max_occurrence,
499
+ value_type=value_type,
500
+ instance_source=(f"{uri_to_short_form(class_uri, prefixes)}({uri_to_short_form(property_uri, prefixes)})"),
501
+ )
502
+
503
+ def _get_value_type(
504
+ self, read_properties: list[_ReadProperties], prefixes: dict[str, Namespace]
505
+ ) -> str | UnknownEntity:
506
+ value_types = {prop.data_type for prop in read_properties if prop.data_type} | {
507
+ prop.object_type for prop in read_properties if prop.object_type
508
+ }
509
+ if len(value_types) == 1:
510
+ uri_ref = value_types.pop()
511
+ self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
512
+ return remove_namespace_from_uri(uri_ref)
513
+ elif len(value_types) == 0:
514
+ return UnknownEntity()
515
+ for uri_ref in value_types:
516
+ self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
517
+ return " | ".join(remove_namespace_from_uri(uri_ref) for uri_ref in value_types)
@@ -21,6 +21,7 @@ from typing import (
21
21
  )
22
22
 
23
23
  import pandas as pd
24
+ from cognite.client import data_modeling as dm
24
25
  from pydantic import (
25
26
  BaseModel,
26
27
  BeforeValidator,
@@ -180,6 +181,12 @@ class BaseMetadata(SchemaModel):
180
181
  description="Date of the data model update",
181
182
  )
182
183
 
184
+ source_id: URIRefType | None = Field(
185
+ None,
186
+ description="Id of source that produced this rules",
187
+ alias="sourceId",
188
+ )
189
+
183
190
  @field_validator("*", mode="before")
184
191
  def strip_string(cls, value: Any) -> Any:
185
192
  if isinstance(value, str):
@@ -213,9 +220,6 @@ class BaseMetadata(SchemaModel):
213
220
  def prefix(self) -> str:
214
221
  return self.space
215
222
 
216
- def as_identifier(self) -> str:
217
- return f"{self.prefix}:{self.external_id}"
218
-
219
223
  def get_prefix(self) -> str:
220
224
  return self.prefix
221
225
 
@@ -234,6 +238,12 @@ class BaseMetadata(SchemaModel):
234
238
  """Namespace for the data model used for the entities in the data model."""
235
239
  return Namespace(f"{self.identifier}/")
236
240
 
241
+ def as_data_model_id(self) -> dm.DataModelId:
242
+ return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
243
+
244
+ def as_identifier(self) -> str:
245
+ return repr(self.as_data_model_id())
246
+
237
247
 
238
248
  class BaseRules(SchemaModel, ABC):
239
249
  """
@@ -64,9 +64,6 @@ class DMSMetadata(BaseMetadata):
64
64
  space=self.space,
65
65
  )
66
66
 
67
- def as_data_model_id(self) -> dm.DataModelId:
68
- return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
69
-
70
67
  def as_data_model(self) -> dm.DataModelApply:
71
68
  suffix = f"Creator: {', '.join(self.creator)}"
72
69
  if self.description:
@@ -83,9 +80,6 @@ class DMSMetadata(BaseMetadata):
83
80
  views=[],
84
81
  )
85
82
 
86
- def as_identifier(self) -> str:
87
- return repr(self.as_data_model_id())
88
-
89
83
  def get_prefix(self) -> str:
90
84
  return self.space
91
85
 
@@ -109,8 +103,7 @@ class DMSProperty(SheetRow):
109
103
  )
110
104
  value_type: DataType | ViewEntity | DMSUnknownEntity = Field(
111
105
  alias="Value Type",
112
- description="Value type that the property can hold. "
113
- "It takes either subset of CDF primitive types or a View id",
106
+ description="Value type that the property can hold. It takes either subset of CDF primitive types or a View id",
114
107
  )
115
108
  nullable: bool | None = Field(
116
109
  default=None,
@@ -37,6 +37,7 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
37
37
  created: datetime | str | None = None
38
38
  updated: datetime | str | None = None
39
39
  logical: str | URIRef | None = None
40
+ source_id: str | URIRef | None = None
40
41
 
41
42
  @classmethod
42
43
  def _get_verified_cls(cls) -> type[DMSMetadata]:
@@ -77,6 +78,9 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
77
78
  description = None
78
79
  return description, creator
79
80
 
81
+ def as_data_model_id(self) -> dm.DataModelId:
82
+ return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
83
+
80
84
  @property
81
85
  def identifier(self) -> URIRef:
82
86
  """Globally unique identifier for the data model.
@@ -3,6 +3,7 @@ from datetime import datetime
3
3
  from typing import Any
4
4
 
5
5
  import pandas as pd
6
+ from cognite.client import data_modeling as dm
6
7
  from rdflib import Namespace, URIRef
7
8
 
8
9
  from cognite.neat._constants import DEFAULT_NAMESPACE
@@ -36,6 +37,7 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
36
37
  updated: datetime | str | None = None
37
38
  physical: str | URIRef | None = None
38
39
  conceptual: str | URIRef | None = None
40
+ source_id: str | URIRef | None = None
39
41
 
40
42
  @classmethod
41
43
  def _get_verified_cls(cls) -> type[InformationMetadata]:
@@ -49,6 +51,9 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
49
51
  output["updated"] = datetime.now()
50
52
  return output
51
53
 
54
+ def as_data_model_id(self) -> dm.DataModelId:
55
+ return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
56
+
52
57
  @property
53
58
  def prefix(self) -> str:
54
59
  return self.space
@@ -3,10 +3,13 @@ from ._converters import (
3
3
  AddClassImplements,
4
4
  ChangeViewPrefix,
5
5
  ClassicPrepareCore,
6
+ ConversionTransformer,
6
7
  ConvertToRules,
7
8
  DMSToInformation,
8
9
  IncludeReferenced,
9
10
  InformationToDMS,
11
+ MergeDMSRules,
12
+ MergeInformationRules,
10
13
  PrefixEntities,
11
14
  ReduceCogniteModel,
12
15
  SetIDDMSModel,
@@ -24,11 +27,14 @@ __all__ = [
24
27
  "AsParentPropertyId",
25
28
  "ChangeViewPrefix",
26
29
  "ClassicPrepareCore",
30
+ "ConversionTransformer",
27
31
  "ConvertToRules",
28
32
  "DMSToInformation",
29
33
  "IncludeReferenced",
30
34
  "InformationToDMS",
31
35
  "MapOneToOne",
36
+ "MergeDMSRules",
37
+ "MergeInformationRules",
32
38
  "PrefixEntities",
33
39
  "ReduceCogniteModel",
34
40
  "RuleMapper",
@@ -16,6 +16,7 @@ from cognite.neat._client.data_classes.data_modeling import ContainerApplyDict,
16
16
  from cognite.neat._constants import (
17
17
  COGNITE_MODELS,
18
18
  DMS_CONTAINER_PROPERTY_SIZE_LIMIT,
19
+ DMS_RESERVED_PROPERTIES,
19
20
  get_default_prefixes_and_namespaces,
20
21
  )
21
22
  from cognite.neat._issues.errors import NeatValueError
@@ -41,9 +42,9 @@ from cognite.neat._rules.models import (
41
42
  )
42
43
  from cognite.neat._rules.models._rdfpath import Entity as RDFPathEntity
43
44
  from cognite.neat._rules.models._rdfpath import RDFPath, SingleProperty
44
- from cognite.neat._rules.models.data_types import AnyURI, DataType, String
45
+ from cognite.neat._rules.models.data_types import AnyURI, DataType, Enum, File, String, Timeseries
45
46
  from cognite.neat._rules.models.dms import DMSMetadata, DMSProperty, DMSValidation, DMSView
46
- from cognite.neat._rules.models.dms._rules import DMSContainer
47
+ from cognite.neat._rules.models.dms._rules import DMSContainer, DMSEnum, DMSNode
47
48
  from cognite.neat._rules.models.entities import (
48
49
  ClassEntity,
49
50
  ContainerEntity,
@@ -252,11 +253,16 @@ class PrefixEntities(RulesTransformer[ReadRules[T_InputRules], ReadRules[T_Input
252
253
  class InformationToDMS(ConversionTransformer[InformationRules, DMSRules]):
253
254
  """Converts InformationRules to DMSRules."""
254
255
 
255
- def __init__(self, ignore_undefined_value_types: bool = False):
256
+ def __init__(
257
+ self, ignore_undefined_value_types: bool = False, reserved_properties: Literal["error", "skip"] = "error"
258
+ ):
256
259
  self.ignore_undefined_value_types = ignore_undefined_value_types
260
+ self.reserved_properties = reserved_properties
257
261
 
258
262
  def transform(self, rules: InformationRules) -> DMSRules:
259
- return _InformationRulesConverter(rules).as_dms_rules(self.ignore_undefined_value_types)
263
+ return _InformationRulesConverter(rules).as_dms_rules(
264
+ self.ignore_undefined_value_types, self.reserved_properties
265
+ )
260
266
 
261
267
 
262
268
  class DMSToInformation(ConversionTransformer[DMSRules, InformationRules]):
@@ -823,11 +829,21 @@ class ClassicPrepareCore(RulesTransformer[InformationRules, InformationRules]):
823
829
  - ClassicTimeseries.isString from boolean to string
824
830
  - Add class ClassicSourceSystem, and update all source properties from string to ClassicSourceSystem.
825
831
  - Rename externalId properties to classicExternalId
826
- - Renames the Relationship.sourceExternaId and Relationship.targetExternalId to startNode and endNode
832
+ - Renames the Relationship.sourceExternalId and Relationship.targetExternalId to startNode and endNode
833
+ - If reference_timeseries is True, the classicExternalId property of the TimeSeries class will change type
834
+ from string to timeseries.
835
+ - If reference_files is True, the classicExternalId property of the File class will change type from string to file.
827
836
  """
828
837
 
829
- def __init__(self, instance_namespace: Namespace) -> None:
838
+ def __init__(
839
+ self,
840
+ instance_namespace: Namespace,
841
+ reference_timeseries: bool = False,
842
+ reference_files: bool = False,
843
+ ) -> None:
830
844
  self.instance_namespace = instance_namespace
845
+ self.reference_timeseries = reference_timeseries
846
+ self.reference_files = reference_files
831
847
 
832
848
  @property
833
849
  def description(self) -> str:
@@ -851,6 +867,10 @@ class ClassicPrepareCore(RulesTransformer[InformationRules, InformationRules]):
851
867
  prop.value_type = ClassEntity(prefix=prefix, suffix="ClassicSourceSystem")
852
868
  elif prop.property_ == "externalId":
853
869
  prop.property_ = "classicExternalId"
870
+ if self.reference_timeseries and prop.class_.suffix == "ClassicTimeSeries":
871
+ prop.value_type = Timeseries()
872
+ elif self.reference_files and prop.class_.suffix == "ClassicFile":
873
+ prop.value_type = File()
854
874
  elif prop.property_ == "sourceExternalId" and prop.class_.suffix == "ClassicRelationship":
855
875
  prop.property_ = "startNode"
856
876
  elif prop.property_ == "targetExternalId" and prop.class_.suffix == "ClassicRelationship":
@@ -907,6 +927,68 @@ class ChangeViewPrefix(RulesTransformer[DMSRules, DMSRules]):
907
927
  return output
908
928
 
909
929
 
930
+ class MergeDMSRules(RulesTransformer[DMSRules, DMSRules]):
931
+ def __init__(self, extra: DMSRules) -> None:
932
+ self.extra = extra
933
+
934
+ def transform(self, rules: DMSRules) -> DMSRules:
935
+ output = rules.model_copy(deep=True)
936
+ existing_views = {view.view for view in output.views}
937
+ for view in self.extra.views:
938
+ if view.view not in existing_views:
939
+ output.views.append(view)
940
+ existing_properties = {(prop.view, prop.view_property) for prop in output.properties}
941
+ existing_containers = {container.container for container in output.containers or []}
942
+ existing_enum_collections = {collection.collection for collection in output.enum or []}
943
+ new_containers_by_entity = {container.container: container for container in self.extra.containers or []}
944
+ new_enum_collections_by_entity = {collection.collection: collection for collection in self.extra.enum or []}
945
+ for prop in self.extra.properties:
946
+ if (prop.view, prop.view_property) in existing_properties:
947
+ continue
948
+ output.properties.append(prop)
949
+ if prop.container and prop.container not in existing_containers:
950
+ if output.containers is None:
951
+ output.containers = SheetList[DMSContainer]()
952
+ output.containers.append(new_containers_by_entity[prop.container])
953
+ if isinstance(prop.value_type, Enum) and prop.value_type.collection not in existing_enum_collections:
954
+ if output.enum is None:
955
+ output.enum = SheetList[DMSEnum]()
956
+ output.enum.append(new_enum_collections_by_entity[prop.value_type.collection])
957
+
958
+ existing_nodes = {node.node for node in output.nodes or []}
959
+ for node in self.extra.nodes or []:
960
+ if node.node not in existing_nodes:
961
+ if output.nodes is None:
962
+ output.nodes = SheetList[DMSNode]()
963
+ output.nodes.append(node)
964
+
965
+ return output
966
+
967
+ @property
968
+ def description(self) -> str:
969
+ return f"Merged with {self.extra.metadata.as_data_model_id()}"
970
+
971
+
972
+ class MergeInformationRules(RulesTransformer[InformationRules, InformationRules]):
973
+ def __init__(self, extra: InformationRules) -> None:
974
+ self.extra = extra
975
+
976
+ def transform(self, rules: InformationRules) -> InformationRules:
977
+ output = rules.model_copy(deep=True)
978
+ existing_classes = {cls.class_ for cls in output.classes}
979
+ for cls in self.extra.classes:
980
+ if cls.class_ not in existing_classes:
981
+ output.classes.append(cls)
982
+ existing_properties = {(prop.class_, prop.property_) for prop in output.properties}
983
+ for prop in self.extra.properties:
984
+ if (prop.class_, prop.property_) not in existing_properties:
985
+ output.properties.append(prop)
986
+ for prefix, namespace in self.extra.prefixes.items():
987
+ if prefix not in output.prefixes:
988
+ output.prefixes[prefix] = namespace
989
+ return output
990
+
991
+
910
992
  class _InformationRulesConverter:
911
993
  _start_or_end_node: ClassVar[frozenset[str]] = frozenset({"endNode", "end_node", "startNode", "start_node"})
912
994
 
@@ -914,7 +996,9 @@ class _InformationRulesConverter:
914
996
  self.rules = information
915
997
  self.property_count_by_container: dict[ContainerEntity, int] = defaultdict(int)
916
998
 
917
- def as_dms_rules(self, ignore_undefined_value_types: bool = False) -> "DMSRules":
999
+ def as_dms_rules(
1000
+ self, ignore_undefined_value_types: bool = False, reserved_properties: Literal["error", "skip"] = "error"
1001
+ ) -> "DMSRules":
918
1002
  from cognite.neat._rules.models.dms._rules import (
919
1003
  DMSContainer,
920
1004
  DMSProperty,
@@ -957,6 +1041,13 @@ class _InformationRulesConverter:
957
1041
  continue
958
1042
  if prop.class_ in edge_classes and prop.property_ in self._start_or_end_node:
959
1043
  continue
1044
+ if prop.property_ in DMS_RESERVED_PROPERTIES:
1045
+ msg = f"Property {prop.property_} is a reserved property in DMS."
1046
+ if reserved_properties == "error":
1047
+ raise NeatValueError(msg)
1048
+ warnings.warn(NeatValueWarning(f"{msg} Skipping..."), stacklevel=2)
1049
+ continue
1050
+
960
1051
  dms_property = self._as_dms_property(
961
1052
  prop,
962
1053
  default_space,