cognite-neat 0.107.0__py3-none-any.whl → 0.109.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (69) hide show
  1. cognite/neat/_constants.py +35 -1
  2. cognite/neat/_graph/_shared.py +4 -0
  3. cognite/neat/_graph/extractors/_classic_cdf/_base.py +115 -14
  4. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +87 -6
  5. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +48 -12
  6. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +19 -1
  7. cognite/neat/_graph/extractors/_dms.py +162 -47
  8. cognite/neat/_graph/extractors/_dms_graph.py +54 -4
  9. cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
  10. cognite/neat/_graph/extractors/_rdf_file.py +3 -2
  11. cognite/neat/_graph/loaders/__init__.py +1 -3
  12. cognite/neat/_graph/loaders/_rdf2dms.py +20 -10
  13. cognite/neat/_graph/queries/_base.py +144 -84
  14. cognite/neat/_graph/queries/_construct.py +1 -1
  15. cognite/neat/_graph/transformers/__init__.py +3 -1
  16. cognite/neat/_graph/transformers/_base.py +4 -4
  17. cognite/neat/_graph/transformers/_classic_cdf.py +13 -13
  18. cognite/neat/_graph/transformers/_prune_graph.py +3 -3
  19. cognite/neat/_graph/transformers/_rdfpath.py +3 -4
  20. cognite/neat/_graph/transformers/_value_type.py +71 -13
  21. cognite/neat/_issues/errors/__init__.py +2 -0
  22. cognite/neat/_issues/errors/_external.py +8 -0
  23. cognite/neat/_issues/errors/_resources.py +1 -1
  24. cognite/neat/_issues/warnings/__init__.py +0 -2
  25. cognite/neat/_issues/warnings/_models.py +1 -1
  26. cognite/neat/_issues/warnings/_properties.py +0 -8
  27. cognite/neat/_issues/warnings/_resources.py +1 -1
  28. cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
  29. cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
  30. cognite/neat/_rules/exporters/_rules2yaml.py +1 -1
  31. cognite/neat/_rules/importers/__init__.py +3 -1
  32. cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
  33. cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
  34. cognite/neat/_rules/importers/_rdf/_base.py +2 -2
  35. cognite/neat/_rules/importers/_rdf/_inference2rules.py +310 -26
  36. cognite/neat/_rules/models/_base_rules.py +22 -11
  37. cognite/neat/_rules/models/dms/_exporter.py +5 -4
  38. cognite/neat/_rules/models/dms/_rules.py +1 -8
  39. cognite/neat/_rules/models/dms/_rules_input.py +4 -0
  40. cognite/neat/_rules/models/information/_rules_input.py +5 -0
  41. cognite/neat/_rules/transformers/__init__.py +10 -3
  42. cognite/neat/_rules/transformers/_base.py +6 -1
  43. cognite/neat/_rules/transformers/_converters.py +530 -364
  44. cognite/neat/_rules/transformers/_mapping.py +4 -4
  45. cognite/neat/_session/_base.py +100 -47
  46. cognite/neat/_session/_create.py +133 -0
  47. cognite/neat/_session/_drop.py +60 -2
  48. cognite/neat/_session/_fix.py +28 -0
  49. cognite/neat/_session/_inspect.py +22 -7
  50. cognite/neat/_session/_mapping.py +8 -8
  51. cognite/neat/_session/_prepare.py +3 -247
  52. cognite/neat/_session/_read.py +138 -17
  53. cognite/neat/_session/_set.py +50 -1
  54. cognite/neat/_session/_show.py +16 -43
  55. cognite/neat/_session/_state.py +53 -52
  56. cognite/neat/_session/_to.py +11 -4
  57. cognite/neat/_session/_wizard.py +1 -1
  58. cognite/neat/_session/exceptions.py +8 -1
  59. cognite/neat/_store/_graph_store.py +301 -146
  60. cognite/neat/_store/_provenance.py +36 -20
  61. cognite/neat/_store/_rules_store.py +253 -267
  62. cognite/neat/_store/exceptions.py +40 -4
  63. cognite/neat/_utils/auth.py +5 -3
  64. cognite/neat/_version.py +1 -1
  65. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/METADATA +1 -1
  66. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/RECORD +69 -67
  67. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/LICENSE +0 -0
  68. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/WHEEL +0 -0
  69. {cognite_neat-0.107.0.dist-info → cognite_neat-0.109.0.dist-info}/entry_points.txt +0 -0
@@ -1,18 +1,25 @@
1
+ import itertools
1
2
  from collections import Counter, defaultdict
2
- from collections.abc import Mapping
3
+ from collections.abc import Iterable, Mapping
4
+ from dataclasses import dataclass
3
5
  from datetime import datetime, timezone
4
6
  from pathlib import Path
5
- from typing import ClassVar, cast
7
+ from typing import Any, ClassVar, cast
6
8
 
7
9
  from cognite.client import data_modeling as dm
8
- from rdflib import RDF, Namespace, URIRef
10
+ from rdflib import RDF, RDFS, Graph, Namespace, URIRef
9
11
  from rdflib import Literal as RdfLiteral
10
12
 
11
- from cognite.neat._issues.warnings import PropertySkippedWarning, PropertyValueTypeUndefinedWarning
12
- from cognite.neat._rules.models import data_types
13
+ from cognite.neat._constants import NEAT, get_default_prefixes_and_namespaces
14
+ from cognite.neat._issues import IssueList
15
+ from cognite.neat._issues.warnings import PropertyValueTypeUndefinedWarning
16
+ from cognite.neat._rules.analysis import InformationAnalysis
17
+ from cognite.neat._rules.models import InformationRules, data_types
13
18
  from cognite.neat._rules.models.data_types import AnyURI
14
19
  from cognite.neat._rules.models.entities._single_value import UnknownEntity
15
20
  from cognite.neat._rules.models.information import (
21
+ InformationInputClass,
22
+ InformationInputProperty,
16
23
  InformationMetadata,
17
24
  )
18
25
  from cognite.neat._store import NeatGraphStore
@@ -74,7 +81,7 @@ class InferenceImporter(BaseRDFImporter):
74
81
  def from_graph_store(
75
82
  cls,
76
83
  store: NeatGraphStore,
77
- data_model_id: (dm.DataModelId | tuple[str, str, str]) = DEFAULT_INFERENCE_DATA_MODEL_ID,
84
+ data_model_id: dm.DataModelId | tuple[str, str, str] = DEFAULT_INFERENCE_DATA_MODEL_ID,
78
85
  max_number_of_instance: int = -1,
79
86
  non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
80
87
  language: str = "en",
@@ -157,7 +164,7 @@ class InferenceImporter(BaseRDFImporter):
157
164
  for class_uri, no_instances in self.graph.query(ORDERED_CLASSES_QUERY): # type: ignore[misc]
158
165
  if (class_id := remove_namespace_from_uri(cast(URIRef, class_uri))) in classes:
159
166
  # handles cases when class id is already present in classes
160
- class_id = f"{class_id}_{len(classes)+1}"
167
+ class_id = f"{class_id}_{len(classes) + 1}"
161
168
 
162
169
  classes[class_id] = {
163
170
  "class_": class_id,
@@ -187,21 +194,10 @@ class InferenceImporter(BaseRDFImporter):
187
194
  INSTANCE_PROPERTIES_DEFINITION.replace("instance_id", instance)
188
195
  ): # type: ignore[misc]
189
196
  # this is to skip rdf:type property
197
+
190
198
  if property_uri == RDF.type:
191
199
  continue
192
200
  property_id = remove_namespace_from_uri(property_uri)
193
- if property_id in {"external_id", "externalId"}:
194
- skip_issue = PropertySkippedWarning(
195
- resource_type="Property",
196
- identifier=f"{class_id}:{property_id}",
197
- property_name=property_id,
198
- reason="External ID is assumed to be the unique identifier of the instance "
199
- "and is not part of the data model schema.",
200
- )
201
- if skip_issue not in self.issue_list:
202
- self.issue_list.append(skip_issue)
203
- continue
204
-
205
201
  self._add_uri_namespace_to_prefixes(cast(URIRef, property_uri), prefixes)
206
202
 
207
203
  if isinstance(data_type_uri, URIRef):
@@ -250,13 +246,8 @@ class InferenceImporter(BaseRDFImporter):
250
246
  elif id_ in properties and definition["value_type"] not in properties[id_]["value_type"]:
251
247
  properties[id_]["value_type"].add(definition["value_type"])
252
248
 
253
- # USE CASE 3: existing but max count is different
254
- elif (
255
- id_ in properties
256
- and definition["value_type"] in properties[id_]["value_type"]
257
- and properties[id_]["max_count"] != definition["max_count"]
258
- ):
259
- properties[id_]["max_count"] = max(properties[id_]["max_count"], definition["max_count"])
249
+ # always update max_count with the upmost value
250
+ properties[id_]["max_count"] = max(properties[id_]["max_count"], definition["max_count"])
260
251
 
261
252
  # Create multi-value properties otherwise single value
262
253
  for property_ in properties.values():
@@ -292,3 +283,296 @@ class InferenceImporter(BaseRDFImporter):
292
283
  @property
293
284
  def source_uri(self) -> URIRef:
294
285
  return INSTANCES_ENTITY.id_
286
+
287
+
288
+ # Internal helper class
289
+ @dataclass
290
+ class _ReadProperties:
291
+ type_uri: URIRef
292
+ property_uri: URIRef
293
+ value_type: URIRef
294
+ parent_uri: URIRef | None
295
+ max_occurrence: int
296
+ instance_count: int
297
+
298
+
299
+ class SubclassInferenceImporter(BaseRDFImporter):
300
+ """Infer subclasses from a triple store.
301
+
302
+ Assumes that the graph already is connected to a schema. The classes should
303
+ match the RDF.type of the instances in the graph, while the subclasses should
304
+ match the NEAT.type of the instances in the graph.
305
+
306
+ ClassVars:
307
+ overwrite_data_types: Mapping of data types to be overwritten. The InferenceImporter will overwrite
308
+ 32-bit integer and 32-bit float data types to 64-bit integer and 64-bit float data types
309
+
310
+ Args:
311
+ issue_list: Issue list to store issues
312
+ graph: Knowledge graph
313
+ """
314
+
315
+ overwrite_data_types: ClassVar[Mapping[URIRef, URIRef]] = {
316
+ data_types.Integer.as_xml_uri_ref(): data_types.Long.as_xml_uri_ref(),
317
+ data_types.Float.as_xml_uri_ref(): data_types.Double.as_xml_uri_ref(),
318
+ }
319
+
320
+ _ordered_class_query = """SELECT DISTINCT ?class (count(?s) as ?instances )
321
+ WHERE { ?s a ?class }
322
+ group by ?class order by DESC(?instances)"""
323
+
324
+ _type_parent_query = f"""SELECT ?parent ?type
325
+ WHERE {{ ?s a ?type .
326
+ ?type <{RDFS.subClassOf}> ?parent }}"""
327
+
328
+ _properties_query = """SELECT DISTINCT ?property ?valueType
329
+ WHERE {{
330
+ ?s a <{type}> .
331
+ ?s ?property ?object .
332
+ OPTIONAL {{ ?object a ?objectType }}
333
+ BIND(
334
+ IF(
335
+ isLiteral(?object), datatype(?object),
336
+ IF(BOUND(?objectType), ?objectType, <{unknown_type}>)
337
+ ) AS ?valueType
338
+ )
339
+ }}"""
340
+
341
+ _max_occurrence_query = """SELECT (MAX(?count) AS ?maxCount)
342
+ WHERE {{
343
+ {{
344
+ SELECT ?subject (COUNT(?object) AS ?count)
345
+ WHERE {{
346
+ ?subject a <{type}> .
347
+ ?subject <{property}> ?object .
348
+ }}
349
+ GROUP BY ?subject
350
+ }}
351
+ }}"""
352
+
353
+ def __init__(
354
+ self,
355
+ issue_list: IssueList,
356
+ graph: Graph,
357
+ rules: InformationRules | None = None,
358
+ data_model_id: dm.DataModelId | tuple[str, str, str] | None = None,
359
+ non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
360
+ ) -> None:
361
+ if sum([1 for v in [rules, data_model_id] if v is not None]) != 1:
362
+ raise ValueError("Exactly one of rules or data_model_id must be provided.")
363
+ if data_model_id is not None:
364
+ identifier = data_model_id
365
+ elif rules is not None:
366
+ identifier = rules.metadata.as_data_model_id().as_tuple() # type: ignore[assignment]
367
+ else:
368
+ raise ValueError("Exactly one of rules or data_model_id must be provided.")
369
+ super().__init__(issue_list, graph, identifier, -1, non_existing_node_type, language="en")
370
+ self._rules = rules
371
+
372
+ def _to_rules_components(
373
+ self,
374
+ ) -> dict:
375
+ if self._rules:
376
+ prefixes = self._rules.prefixes.copy()
377
+ else:
378
+ prefixes = get_default_prefixes_and_namespaces()
379
+
380
+ parent_by_child = self._read_parent_by_child_from_graph()
381
+ read_properties = self._read_class_properties_from_graph(parent_by_child)
382
+ classes, properties = self._create_classes_properties(read_properties, prefixes)
383
+
384
+ if self._rules:
385
+ metadata = self._rules.metadata.model_dump()
386
+ default_space = self._rules.metadata.prefix
387
+ else:
388
+ metadata = self._default_metadata()
389
+ default_space = metadata["space"]
390
+ return {
391
+ "metadata": metadata,
392
+ "classes": [cls.dump(default_space) for cls in classes],
393
+ "properties": [prop.dump(default_space) for prop in properties],
394
+ "prefixes": prefixes,
395
+ }
396
+
397
+ def _create_classes_properties(
398
+ self, read_properties: list[_ReadProperties], prefixes: dict[str, Namespace]
399
+ ) -> tuple[list[InformationInputClass], list[InformationInputProperty]]:
400
+ if self._rules:
401
+ existing_classes = {class_.class_.suffix: class_ for class_ in self._rules.classes}
402
+ else:
403
+ existing_classes = {}
404
+ classes: list[InformationInputClass] = []
405
+ properties: list[InformationInputProperty] = []
406
+ # Help for IDE
407
+ type_uri: URIRef
408
+ parent_uri: URIRef
409
+ for parent_uri, parent_class_properties_iterable in itertools.groupby(
410
+ sorted(read_properties, key=lambda x: x.parent_uri or NEAT.EmptyType),
411
+ key=lambda x: x.parent_uri or NEAT.EmptyType,
412
+ ):
413
+ properties_by_class_by_property = self._get_properties_by_class_by_property(
414
+ parent_class_properties_iterable
415
+ )
416
+
417
+ parent_suffix: str | None = None
418
+ if parent_uri != NEAT.EmptyType:
419
+ shared_property_uris = set.intersection(
420
+ *[
421
+ set(properties_by_property.keys())
422
+ for properties_by_property in properties_by_class_by_property.values()
423
+ ]
424
+ )
425
+ parent_suffix = remove_namespace_from_uri(parent_uri)
426
+ self._add_uri_namespace_to_prefixes(parent_uri, prefixes)
427
+ if parent_suffix not in existing_classes:
428
+ classes.append(InformationInputClass(class_=parent_suffix))
429
+ else:
430
+ classes.append(InformationInputClass.load(existing_classes[parent_suffix].model_dump()))
431
+ else:
432
+ shared_property_uris = set()
433
+ shared_properties: dict[URIRef, list[_ReadProperties]] = defaultdict(list)
434
+ for type_uri, properties_by_property_uri in properties_by_class_by_property.items():
435
+ class_suffix = remove_namespace_from_uri(type_uri)
436
+ self._add_uri_namespace_to_prefixes(type_uri, prefixes)
437
+
438
+ if class_suffix not in existing_classes:
439
+ classes.append(
440
+ InformationInputClass(
441
+ class_=class_suffix,
442
+ implements=parent_suffix,
443
+ )
444
+ )
445
+ else:
446
+ classes.append(InformationInputClass.load(existing_classes[class_suffix].model_dump()))
447
+ for property_uri, read_properties in properties_by_property_uri.items():
448
+ if property_uri in shared_property_uris:
449
+ shared_properties[property_uri].extend(read_properties)
450
+ continue
451
+ properties.append(
452
+ self._create_property(read_properties, class_suffix, type_uri, property_uri, prefixes)
453
+ )
454
+
455
+ if parent_suffix:
456
+ for property_uri, read_properties in shared_properties.items():
457
+ properties.append(
458
+ self._create_property(
459
+ read_properties, parent_suffix, read_properties[0].type_uri, property_uri, prefixes
460
+ )
461
+ )
462
+ return classes, properties
463
+
464
+ @staticmethod
465
+ def _get_properties_by_class_by_property(
466
+ parent_class_properties_iterable: Iterable[_ReadProperties],
467
+ ) -> dict[URIRef, dict[URIRef, list[_ReadProperties]]]:
468
+ properties_by_class_by_property: dict[URIRef, dict[URIRef, list[_ReadProperties]]] = {}
469
+ for class_uri, class_properties_iterable in itertools.groupby(
470
+ sorted(parent_class_properties_iterable, key=lambda x: x.type_uri), key=lambda x: x.type_uri
471
+ ):
472
+ properties_by_class_by_property[class_uri] = defaultdict(list)
473
+ for read_prop in class_properties_iterable:
474
+ properties_by_class_by_property[class_uri][read_prop.property_uri].append(read_prop)
475
+ return properties_by_class_by_property
476
+
477
+ def _read_class_properties_from_graph(self, parent_by_child: dict[URIRef, URIRef]) -> list[_ReadProperties]:
478
+ count_by_type: dict[URIRef, int] = {}
479
+ # Infers all the classes in the graph
480
+ for result_row in self.graph.query(self._ordered_class_query):
481
+ type_uri, instance_count_literal = cast(tuple[URIRef, RdfLiteral], result_row)
482
+ count_by_type[type_uri] = instance_count_literal.toPython()
483
+ if self._rules:
484
+ analysis = InformationAnalysis(self._rules)
485
+ existing_class_properties = {
486
+ (class_entity.suffix, prop.property_)
487
+ for class_entity, properties in analysis.classes_with_properties(
488
+ consider_inheritance=True, allow_different_namespace=True
489
+ ).items()
490
+ for prop in properties
491
+ }
492
+ else:
493
+ existing_class_properties = set()
494
+ properties_by_class_by_subclass: list[_ReadProperties] = []
495
+ for type_uri, instance_count in count_by_type.items():
496
+ property_query = self._properties_query.format(type=type_uri, unknown_type=NEAT.UnknownType)
497
+ class_suffix = remove_namespace_from_uri(type_uri)
498
+ for result_row in self.graph.query(property_query):
499
+ property_uri, value_type_uri = cast(tuple[URIRef, URIRef], result_row)
500
+ if property_uri == RDF.type:
501
+ continue
502
+ property_str = remove_namespace_from_uri(property_uri)
503
+ if (class_suffix, property_str) in existing_class_properties:
504
+ continue
505
+ occurrence_query = self._max_occurrence_query.format(type=type_uri, property=property_uri)
506
+ max_occurrence = 1 # default value
507
+ occurrence_row, *_ = list(self.graph.query(occurrence_query))
508
+ if occurrence_row:
509
+ max_occurrence_literal, *__ = cast(tuple[RdfLiteral, Any], occurrence_row)
510
+ max_occurrence = int(max_occurrence_literal.toPython())
511
+ properties_by_class_by_subclass.append(
512
+ _ReadProperties(
513
+ type_uri=type_uri,
514
+ property_uri=property_uri,
515
+ parent_uri=parent_by_child.get(type_uri),
516
+ value_type=value_type_uri,
517
+ max_occurrence=max_occurrence,
518
+ instance_count=instance_count,
519
+ )
520
+ )
521
+ return properties_by_class_by_subclass
522
+
523
+ def _read_parent_by_child_from_graph(self) -> dict[URIRef, URIRef]:
524
+ parent_by_child: dict[URIRef, URIRef] = {}
525
+ for result_row in self.graph.query(self._type_parent_query):
526
+ parent_uri, child_uri = cast(tuple[URIRef, URIRef], result_row)
527
+ parent_by_child[child_uri] = parent_uri
528
+ return parent_by_child
529
+
530
+ def _create_property(
531
+ self,
532
+ read_properties: list[_ReadProperties],
533
+ class_suffix: str,
534
+ type_uri: URIRef,
535
+ property_uri: URIRef,
536
+ prefixes: dict[str, Namespace],
537
+ ) -> InformationInputProperty:
538
+ first = read_properties[0]
539
+ value_type = self._get_value_type(read_properties, prefixes)
540
+ property_name = remove_namespace_from_uri(property_uri)
541
+ self._add_uri_namespace_to_prefixes(property_uri, prefixes)
542
+
543
+ return InformationInputProperty(
544
+ class_=class_suffix,
545
+ property_=property_name,
546
+ max_count=first.max_occurrence,
547
+ value_type=value_type,
548
+ instance_source=(f"{uri_to_short_form(type_uri, prefixes)}({uri_to_short_form(property_uri, prefixes)})"),
549
+ )
550
+
551
+ def _get_value_type(
552
+ self, read_properties: list[_ReadProperties], prefixes: dict[str, Namespace]
553
+ ) -> str | UnknownEntity:
554
+ value_types = {self.overwrite_data_types.get(prop.value_type, prop.value_type) for prop in read_properties}
555
+ if len(value_types) == 1:
556
+ uri_ref = value_types.pop()
557
+ if uri_ref == NEAT.UnknownType:
558
+ return UnknownEntity()
559
+ self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
560
+ return remove_namespace_from_uri(uri_ref)
561
+ elif len(value_types) == 0:
562
+ return UnknownEntity()
563
+ for uri_ref in value_types:
564
+ self._add_uri_namespace_to_prefixes(uri_ref, prefixes)
565
+ return " | ".join(remove_namespace_from_uri(uri_ref) for uri_ref in value_types)
566
+
567
+ def _default_metadata(self) -> dict[str, Any]:
568
+ now = datetime.now(timezone.utc)
569
+ return InformationMetadata(
570
+ space=self.data_model_id.space,
571
+ external_id=self.data_model_id.external_id,
572
+ version=cast(str, self.data_model_id.version),
573
+ name="Inferred Model",
574
+ creator=["NEAT"],
575
+ created=now,
576
+ updated=now,
577
+ description="Inferred model from knowledge graph",
578
+ ).model_dump()
@@ -21,6 +21,7 @@ from typing import (
21
21
  )
22
22
 
23
23
  import pandas as pd
24
+ from cognite.client import data_modeling as dm
24
25
  from pydantic import (
25
26
  BaseModel,
26
27
  BeforeValidator,
@@ -180,6 +181,12 @@ class BaseMetadata(SchemaModel):
180
181
  description="Date of the data model update",
181
182
  )
182
183
 
184
+ source_id: URIRefType | None = Field(
185
+ None,
186
+ description="Id of source that produced this rules",
187
+ alias="sourceId",
188
+ )
189
+
183
190
  @field_validator("*", mode="before")
184
191
  def strip_string(cls, value: Any) -> Any:
185
192
  if isinstance(value, str):
@@ -213,9 +220,6 @@ class BaseMetadata(SchemaModel):
213
220
  def prefix(self) -> str:
214
221
  return self.space
215
222
 
216
- def as_identifier(self) -> str:
217
- return f"{self.prefix}:{self.external_id}"
218
-
219
223
  def get_prefix(self) -> str:
220
224
  return self.prefix
221
225
 
@@ -234,6 +238,12 @@ class BaseMetadata(SchemaModel):
234
238
  """Namespace for the data model used for the entities in the data model."""
235
239
  return Namespace(f"{self.identifier}/")
236
240
 
241
+ def as_data_model_id(self) -> dm.DataModelId:
242
+ return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
243
+
244
+ def as_identifier(self) -> str:
245
+ return repr(self.as_data_model_id())
246
+
237
247
 
238
248
  class BaseRules(SchemaModel, ABC):
239
249
  """
@@ -291,6 +301,7 @@ class BaseRules(SchemaModel, ABC):
291
301
  def dump(
292
302
  self,
293
303
  entities_exclude_defaults: bool = True,
304
+ sort: bool = False,
294
305
  mode: Literal["python", "json"] = "python",
295
306
  by_alias: bool = False,
296
307
  exclude: IncEx | None = None,
@@ -307,6 +318,7 @@ class BaseRules(SchemaModel, ABC):
307
318
  For example, given a class that is dumped as 'my_prefix:MyClass', if the prefix for the rules
308
319
  set in metadata.prefix = 'my_prefix', then this class will be dumped as 'MyClass' when this flag is set.
309
320
  Defaults to True.
321
+ sort: Whether to sort the entities in the output.
310
322
  mode: The mode in which `to_python` should run.
311
323
  If mode is 'json', the output will only contain JSON serializable types.
312
324
  If mode is 'python', the output may contain non-JSON-serializable Python objects.
@@ -316,11 +328,12 @@ class BaseRules(SchemaModel, ABC):
316
328
  exclude_unset: Whether to exclude fields that have not been explicitly set.
317
329
  exclude_defaults: Whether to exclude fields that are set to their default value.
318
330
  """
319
- for field_name in self.model_fields.keys():
320
- value = getattr(self, field_name)
321
- # Ensure deterministic order of properties, classes, views, and so on
322
- if isinstance(value, SheetList):
323
- value.sort(key=lambda x: x._identifier())
331
+ if sort:
332
+ for field_name in self.model_fields.keys():
333
+ value = getattr(self, field_name)
334
+ # Ensure deterministic order of properties, classes, views, and so on
335
+ if isinstance(value, SheetList):
336
+ value.sort(key=lambda x: x._identifier())
324
337
 
325
338
  context: dict[str, Any] = {}
326
339
  if entities_exclude_defaults:
@@ -328,7 +341,7 @@ class BaseRules(SchemaModel, ABC):
328
341
 
329
342
  exclude_input: IncEx | None = exclude
330
343
 
331
- output = self.model_dump(
344
+ return self.model_dump(
332
345
  mode=mode,
333
346
  by_alias=by_alias,
334
347
  exclude=exclude_input,
@@ -338,8 +351,6 @@ class BaseRules(SchemaModel, ABC):
338
351
  context=context,
339
352
  )
340
353
 
341
- return output
342
-
343
354
 
344
355
  class SheetRow(SchemaModel):
345
356
  neatId: URIRefType | None = Field(
@@ -292,10 +292,11 @@ class _DMSExporter:
292
292
  for container in containers:
293
293
  container_id = container.as_id()
294
294
  if not (container_properties := container_properties_by_id.get(container_id)):
295
- warnings.warn(
296
- EmptyContainerWarning(container_id),
297
- stacklevel=2,
298
- )
295
+ if container_id.space not in COGNITE_SPACES:
296
+ warnings.warn(
297
+ EmptyContainerWarning(container_id),
298
+ stacklevel=2,
299
+ )
299
300
  container_to_drop.add(container_id)
300
301
  continue
301
302
  for prop in container_properties:
@@ -64,9 +64,6 @@ class DMSMetadata(BaseMetadata):
64
64
  space=self.space,
65
65
  )
66
66
 
67
- def as_data_model_id(self) -> dm.DataModelId:
68
- return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
69
-
70
67
  def as_data_model(self) -> dm.DataModelApply:
71
68
  suffix = f"Creator: {', '.join(self.creator)}"
72
69
  if self.description:
@@ -83,9 +80,6 @@ class DMSMetadata(BaseMetadata):
83
80
  views=[],
84
81
  )
85
82
 
86
- def as_identifier(self) -> str:
87
- return repr(self.as_data_model_id())
88
-
89
83
  def get_prefix(self) -> str:
90
84
  return self.space
91
85
 
@@ -109,8 +103,7 @@ class DMSProperty(SheetRow):
109
103
  )
110
104
  value_type: DataType | ViewEntity | DMSUnknownEntity = Field(
111
105
  alias="Value Type",
112
- description="Value type that the property can hold. "
113
- "It takes either subset of CDF primitive types or a View id",
106
+ description="Value type that the property can hold. It takes either subset of CDF primitive types or a View id",
114
107
  )
115
108
  nullable: bool | None = Field(
116
109
  default=None,
@@ -37,6 +37,7 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
37
37
  created: datetime | str | None = None
38
38
  updated: datetime | str | None = None
39
39
  logical: str | URIRef | None = None
40
+ source_id: str | URIRef | None = None
40
41
 
41
42
  @classmethod
42
43
  def _get_verified_cls(cls) -> type[DMSMetadata]:
@@ -77,6 +78,9 @@ class DMSInputMetadata(InputComponent[DMSMetadata]):
77
78
  description = None
78
79
  return description, creator
79
80
 
81
+ def as_data_model_id(self) -> dm.DataModelId:
82
+ return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
83
+
80
84
  @property
81
85
  def identifier(self) -> URIRef:
82
86
  """Globally unique identifier for the data model.
@@ -3,6 +3,7 @@ from datetime import datetime
3
3
  from typing import Any
4
4
 
5
5
  import pandas as pd
6
+ from cognite.client import data_modeling as dm
6
7
  from rdflib import Namespace, URIRef
7
8
 
8
9
  from cognite.neat._constants import DEFAULT_NAMESPACE
@@ -36,6 +37,7 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
36
37
  updated: datetime | str | None = None
37
38
  physical: str | URIRef | None = None
38
39
  conceptual: str | URIRef | None = None
40
+ source_id: str | URIRef | None = None
39
41
 
40
42
  @classmethod
41
43
  def _get_verified_cls(cls) -> type[InformationMetadata]:
@@ -49,6 +51,9 @@ class InformationInputMetadata(InputComponent[InformationMetadata]):
49
51
  output["updated"] = datetime.now()
50
52
  return output
51
53
 
54
+ def as_data_model_id(self) -> dm.DataModelId:
55
+ return dm.DataModelId(space=self.space, external_id=self.external_id, version=self.version)
56
+
52
57
  @property
53
58
  def prefix(self) -> str:
54
59
  return self.space
@@ -1,14 +1,17 @@
1
- from ._base import RulesTransformer
1
+ from ._base import RulesTransformer, VerifiedRulesTransformer
2
2
  from ._converters import (
3
3
  AddClassImplements,
4
4
  ChangeViewPrefix,
5
5
  ClassicPrepareCore,
6
+ ConversionTransformer,
6
7
  ConvertToRules,
7
8
  DMSToInformation,
9
+ DropModelViews,
8
10
  IncludeReferenced,
9
11
  InformationToDMS,
12
+ MergeDMSRules,
13
+ MergeInformationRules,
10
14
  PrefixEntities,
11
- ReduceCogniteModel,
12
15
  SetIDDMSModel,
13
16
  ToCompliantEntities,
14
17
  ToDataProductModel,
@@ -24,13 +27,16 @@ __all__ = [
24
27
  "AsParentPropertyId",
25
28
  "ChangeViewPrefix",
26
29
  "ClassicPrepareCore",
30
+ "ConversionTransformer",
27
31
  "ConvertToRules",
28
32
  "DMSToInformation",
33
+ "DropModelViews",
29
34
  "IncludeReferenced",
30
35
  "InformationToDMS",
31
36
  "MapOneToOne",
37
+ "MergeDMSRules",
38
+ "MergeInformationRules",
32
39
  "PrefixEntities",
33
- "ReduceCogniteModel",
34
40
  "RuleMapper",
35
41
  "RulesTransformer",
36
42
  "SetIDDMSModel",
@@ -39,6 +45,7 @@ __all__ = [
39
45
  "ToEnterpriseModel",
40
46
  "ToExtensionModel",
41
47
  "ToSolutionModel",
48
+ "VerifiedRulesTransformer",
42
49
  "VerifyAnyRules",
43
50
  "VerifyDMSRules",
44
51
  "VerifyInformationRules",
@@ -5,12 +5,14 @@ from types import UnionType
5
5
  from typing import Generic, TypeVar, Union, get_args, get_origin
6
6
 
7
7
  from cognite.neat._constants import DEFAULT_NAMESPACE
8
- from cognite.neat._rules._shared import ReadRules, Rules
8
+ from cognite.neat._rules._shared import ReadRules, Rules, VerifiedRules
9
9
  from cognite.neat._rules.models import DMSInputRules, InformationInputRules
10
10
  from cognite.neat._store._provenance import Agent as ProvenanceAgent
11
11
 
12
12
  T_RulesIn = TypeVar("T_RulesIn", bound=Rules)
13
13
  T_RulesOut = TypeVar("T_RulesOut", bound=Rules)
14
+ T_VerifiedIn = TypeVar("T_VerifiedIn", bound=VerifiedRules)
15
+ T_VerifiedOut = TypeVar("T_VerifiedOut", bound=VerifiedRules)
14
16
 
15
17
 
16
18
  class RulesTransformer(ABC, Generic[T_RulesIn, T_RulesOut]):
@@ -62,3 +64,6 @@ class RulesTransformer(ABC, Generic[T_RulesIn, T_RulesOut]):
62
64
  return ReadRules[DMSInputRules], ReadRules[InformationInputRules]
63
65
 
64
66
  return (annotation,)
67
+
68
+
69
+ class VerifiedRulesTransformer(RulesTransformer[T_VerifiedIn, T_VerifiedOut], ABC): ...