cognite-neat 0.121.2__py3-none-any.whl → 0.122.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/core/_constants.py +1 -1
- cognite/neat/core/_data_model/catalog/__init__.py +1 -1
- cognite/neat/core/_data_model/exporters/__init__.py +1 -1
- cognite/neat/core/_data_model/exporters/_data_model2instance_template.py +11 -11
- cognite/neat/core/_data_model/importers/__init__.py +2 -2
- cognite/neat/core/_data_model/importers/_dict2data_model.py +11 -6
- cognite/neat/core/_data_model/importers/_rdf/__init__.py +3 -3
- cognite/neat/core/_data_model/importers/_rdf/_base.py +8 -8
- cognite/neat/core/_data_model/importers/_rdf/{_imf2rules.py → _imf2data_model.py} +2 -2
- cognite/neat/core/_data_model/importers/_rdf/{_inference2rules.py → _inference2rdata_model.py} +26 -26
- cognite/neat/core/_data_model/importers/_rdf/{_owl2rules.py → _owl2data_model.py} +5 -5
- cognite/neat/core/_data_model/importers/_spreadsheet2data_model.py +6 -9
- cognite/neat/core/_data_model/models/__init__.py +5 -3
- cognite/neat/core/_data_model/models/_base_unverified.py +12 -12
- cognite/neat/core/_data_model/models/_base_verified.py +6 -11
- cognite/neat/core/_data_model/models/conceptual/_validation.py +1 -1
- cognite/neat/core/_data_model/models/conceptual/_verified.py +2 -2
- cognite/neat/core/_data_model/models/mapping/_classic2core.py +1 -1
- cognite/neat/core/_data_model/models/physical/_exporter.py +4 -3
- cognite/neat/core/_data_model/models/physical/_validation.py +5 -5
- cognite/neat/core/_data_model/transformers/__init__.py +2 -2
- cognite/neat/core/_data_model/transformers/_base.py +1 -1
- cognite/neat/core/_data_model/transformers/_converters.py +9 -9
- cognite/neat/core/_data_model/transformers/_verification.py +1 -1
- cognite/neat/core/_instances/extractors/_base.py +1 -1
- cognite/neat/core/_instances/extractors/_classic_cdf/_classic.py +1 -1
- cognite/neat/core/_instances/extractors/_mock_graph_generator.py +14 -13
- cognite/neat/core/_instances/loaders/_base.py +2 -2
- cognite/neat/core/_instances/loaders/_rdf2dms.py +26 -20
- cognite/neat/core/_instances/transformers/_rdfpath.py +1 -1
- cognite/neat/core/_issues/_factory.py +1 -1
- cognite/neat/core/_issues/errors/_resources.py +1 -1
- cognite/neat/core/_issues/warnings/_properties.py +1 -1
- cognite/neat/session/_base.py +26 -22
- cognite/neat/session/_drop.py +2 -2
- cognite/neat/session/_experimental.py +1 -1
- cognite/neat/session/_inspect.py +8 -8
- cognite/neat/session/_mapping.py +9 -5
- cognite/neat/session/_read.py +38 -36
- cognite/neat/session/_set.py +5 -5
- cognite/neat/session/_show.py +15 -12
- cognite/neat/session/_state/README.md +1 -1
- cognite/neat/session/_state.py +18 -18
- cognite/neat/session/_subset.py +6 -6
- cognite/neat/session/_template.py +13 -11
- cognite/neat/session/_to.py +35 -35
- cognite/neat/session/exceptions.py +5 -2
- {cognite_neat-0.121.2.dist-info → cognite_neat-0.122.1.dist-info}/METADATA +17 -10
- {cognite_neat-0.121.2.dist-info → cognite_neat-0.122.1.dist-info}/RECORD +52 -52
- {cognite_neat-0.121.2.dist-info → cognite_neat-0.122.1.dist-info}/WHEEL +0 -0
- {cognite_neat-0.121.2.dist-info → cognite_neat-0.122.1.dist-info}/licenses/LICENSE +0 -0
cognite/neat/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.122.1"
|
|
2
2
|
__engine__ = "^2.0.4"
|
cognite/neat/core/_constants.py
CHANGED
|
@@ -104,7 +104,7 @@ COGNITE_CONCEPTS = (
|
|
|
104
104
|
|
|
105
105
|
DMS_LISTABLE_PROPERTY_LIMIT = 1000
|
|
106
106
|
|
|
107
|
-
|
|
107
|
+
EXAMPLE_DATA_MODELS = PACKAGE_DIRECTORY / "core" / "_data_model" / "examples"
|
|
108
108
|
EXAMPLE_GRAPHS = PACKAGE_DIRECTORY / "core" / "_instances" / "examples"
|
|
109
109
|
|
|
110
110
|
DEFAULT_SPACE_URI = "http://purl.org/cognite/space/{space}#"
|
|
@@ -34,6 +34,6 @@ def _repr_html_() -> str:
|
|
|
34
34
|
)._repr_html_()
|
|
35
35
|
|
|
36
36
|
return (
|
|
37
|
-
"<strong>Exporter</strong> An exporter converts Neat's representation of a data model
|
|
37
|
+
"<strong>Exporter</strong> An exporter converts Neat's representation of a data model"
|
|
38
38
|
f" into a schema/data model for a target format.<br />{table}"
|
|
39
39
|
)
|
|
@@ -20,8 +20,8 @@ from ._base import BaseExporter
|
|
|
20
20
|
|
|
21
21
|
class InstanceTemplateExporter(BaseExporter[ConceptualDataModel, Workbook]):
|
|
22
22
|
"""
|
|
23
|
-
Converts
|
|
24
|
-
instances based on
|
|
23
|
+
Converts conceptual data model to a templated spreadsheet meant for capturing
|
|
24
|
+
instances based on concept definitions in the data model.
|
|
25
25
|
|
|
26
26
|
Args:
|
|
27
27
|
no_rows: number of rows for processing, by default 1000
|
|
@@ -50,30 +50,30 @@ class InstanceTemplateExporter(BaseExporter[ConceptualDataModel, Workbook]):
|
|
|
50
50
|
|
|
51
51
|
def export(
|
|
52
52
|
self,
|
|
53
|
-
|
|
53
|
+
data_model: ConceptualDataModel,
|
|
54
54
|
) -> Workbook:
|
|
55
55
|
workbook = Workbook()
|
|
56
56
|
|
|
57
57
|
# Remove default sheet named "Sheet"
|
|
58
58
|
workbook.remove(workbook["Sheet"])
|
|
59
59
|
|
|
60
|
-
for
|
|
61
|
-
workbook.create_sheet(title=
|
|
60
|
+
for concept, properties in DataModelAnalysis(data_model).properties_by_id_by_concept().items():
|
|
61
|
+
workbook.create_sheet(title=concept.suffix)
|
|
62
62
|
|
|
63
63
|
# Add header rows
|
|
64
|
-
workbook[
|
|
64
|
+
workbook[concept.suffix].append(["identifier", *list(properties.keys())])
|
|
65
65
|
|
|
66
66
|
if self.auto_identifier_type == "uuid":
|
|
67
|
-
_add_uuid_identifiers(workbook,
|
|
67
|
+
_add_uuid_identifiers(workbook, concept.suffix, self.no_rows)
|
|
68
68
|
else:
|
|
69
69
|
# Default to index-based identifier
|
|
70
|
-
_add_index_identifiers(workbook,
|
|
70
|
+
_add_index_identifiers(workbook, concept.suffix, self.no_rows)
|
|
71
71
|
|
|
72
72
|
for i, property_ in enumerate(properties.values()):
|
|
73
73
|
if property_.type_ == EntityTypes.object_property and self.add_drop_down_list:
|
|
74
74
|
_add_drop_down_list(
|
|
75
75
|
workbook,
|
|
76
|
-
|
|
76
|
+
concept.suffix,
|
|
77
77
|
get_column_letter(i + 2),
|
|
78
78
|
self.no_rows,
|
|
79
79
|
cast(ConceptEntity, property_.value_type).suffix,
|
|
@@ -85,9 +85,9 @@ class InstanceTemplateExporter(BaseExporter[ConceptualDataModel, Workbook]):
|
|
|
85
85
|
|
|
86
86
|
return workbook
|
|
87
87
|
|
|
88
|
-
def export_to_file(self,
|
|
88
|
+
def export_to_file(self, data_model: ConceptualDataModel, filepath: Path) -> None:
|
|
89
89
|
"""Exports graph capturing sheet to excel file."""
|
|
90
|
-
data = self.export(
|
|
90
|
+
data = self.export(data_model)
|
|
91
91
|
try:
|
|
92
92
|
data.save(filepath)
|
|
93
93
|
finally:
|
|
@@ -17,7 +17,7 @@ __all__ = [
|
|
|
17
17
|
"SubclassInferenceImporter",
|
|
18
18
|
]
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
DataModelImporters = (
|
|
21
21
|
OWLImporter
|
|
22
22
|
| IMFImporter
|
|
23
23
|
| DMSImporter
|
|
@@ -47,5 +47,5 @@ def _repr_html_() -> str:
|
|
|
47
47
|
|
|
48
48
|
return (
|
|
49
49
|
"<strong>Importer</strong> An importer reads data/schema/data model from a source"
|
|
50
|
-
f" and converts it into Neat's representation of a data model
|
|
50
|
+
f" and converts it into Neat's representation of a data model.<br />{table}"
|
|
51
51
|
)
|
|
@@ -7,7 +7,10 @@ from cognite.neat.core._data_model._shared import (
|
|
|
7
7
|
ImportedDataModel,
|
|
8
8
|
T_UnverifiedDataModel,
|
|
9
9
|
)
|
|
10
|
-
from cognite.neat.core._data_model.models import
|
|
10
|
+
from cognite.neat.core._data_model.models import (
|
|
11
|
+
UNVERIFIED_DATA_MODEL_BY_ROLE,
|
|
12
|
+
RoleTypes,
|
|
13
|
+
)
|
|
11
14
|
from cognite.neat.core._issues import IssueList, MultiValueError, NeatIssue
|
|
12
15
|
from cognite.neat.core._issues.errors import (
|
|
13
16
|
FileMissingRequiredFieldError,
|
|
@@ -44,16 +47,18 @@ class YAMLReader:
|
|
|
44
47
|
|
|
45
48
|
|
|
46
49
|
class DictImporter(BaseImporter[T_UnverifiedDataModel]):
|
|
47
|
-
"""Imports the
|
|
50
|
+
"""Imports the data model from a YAML file.
|
|
48
51
|
|
|
49
52
|
Args:
|
|
50
53
|
raw_data: The raw data to be imported.
|
|
51
54
|
|
|
52
55
|
.. note::
|
|
53
56
|
|
|
54
|
-
YAML files are typically used for storing
|
|
55
|
-
|
|
56
|
-
|
|
57
|
+
YAML files are typically used for storing data model when checked into version
|
|
58
|
+
control systems, e.g., git-history.
|
|
59
|
+
The advantage of using YAML files over Excel is that tools like git can
|
|
60
|
+
show the differences between different
|
|
61
|
+
versions of the data model.
|
|
57
62
|
|
|
58
63
|
"""
|
|
59
64
|
|
|
@@ -115,7 +120,7 @@ class DictImporter(BaseImporter[T_UnverifiedDataModel]):
|
|
|
115
120
|
|
|
116
121
|
role_input = RoleTypes(metadata["role"])
|
|
117
122
|
role_enum = RoleTypes(role_input)
|
|
118
|
-
data_model_cls =
|
|
123
|
+
data_model_cls = UNVERIFIED_DATA_MODEL_BY_ROLE[role_enum]
|
|
119
124
|
|
|
120
125
|
data_model = cast(T_UnverifiedDataModel, data_model_cls.load(self.raw_data))
|
|
121
126
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .
|
|
3
|
-
from .
|
|
1
|
+
from ._imf2data_model import IMFImporter
|
|
2
|
+
from ._inference2rdata_model import InferenceImporter, SubclassInferenceImporter
|
|
3
|
+
from ._owl2data_model import OWLImporter
|
|
4
4
|
|
|
5
5
|
__all__ = ["IMFImporter", "InferenceImporter", "OWLImporter", "SubclassInferenceImporter"]
|
|
@@ -28,13 +28,13 @@ DEFAULT_RDF_DATA_MODEL_ID = ("neat_space", "RDFDataModel", "rdf")
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class BaseRDFImporter(BaseImporter[UnverifiedConceptualDataModel]):
|
|
31
|
-
"""Baser RDF importers used for all
|
|
31
|
+
"""Baser RDF importers used for all data model importers that are using RDF as input.
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
34
|
issue_list: Issue list to store issues
|
|
35
|
-
graph:
|
|
36
|
-
data_model_id: Data model id to be used for the imported
|
|
37
|
-
space: CDF Space to be used for the imported
|
|
35
|
+
graph: graph where instances are stored
|
|
36
|
+
data_model_id: Data model id to be used for the imported data model
|
|
37
|
+
space: CDF Space to be used for the imported data model
|
|
38
38
|
language: Language for description and human readable entity names
|
|
39
39
|
|
|
40
40
|
|
|
@@ -119,18 +119,18 @@ class BaseRDFImporter(BaseImporter[UnverifiedConceptualDataModel]):
|
|
|
119
119
|
self,
|
|
120
120
|
) -> ImportedDataModel[UnverifiedConceptualDataModel]:
|
|
121
121
|
"""
|
|
122
|
-
Creates `
|
|
122
|
+
Creates `ImportedDataModel` object from the data for target role.
|
|
123
123
|
"""
|
|
124
124
|
if self.issue_list.has_errors:
|
|
125
125
|
# In case there were errors during the import, the to_data_model method will return None
|
|
126
126
|
self.issue_list.trigger_warnings()
|
|
127
127
|
raise MultiValueError(self.issue_list.errors)
|
|
128
128
|
|
|
129
|
-
|
|
129
|
+
data_model_dict = self._to_data_model_components()
|
|
130
130
|
|
|
131
|
-
|
|
131
|
+
data_model = UnverifiedConceptualDataModel.load(data_model_dict)
|
|
132
132
|
self.issue_list.trigger_warnings()
|
|
133
|
-
return ImportedDataModel(
|
|
133
|
+
return ImportedDataModel(data_model, {})
|
|
134
134
|
|
|
135
135
|
def _to_data_model_components(self) -> dict:
|
|
136
136
|
raise NotImplementedError()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""This module performs importing of various formats to one of serializations for which
|
|
2
|
-
there are loaders to
|
|
2
|
+
there are loaders to data model pydantic class."""
|
|
3
3
|
|
|
4
4
|
from cognite.neat.core._data_model.importers._rdf._base import BaseRDFImporter
|
|
5
5
|
from cognite.neat.core._data_model.importers._rdf._shared import (
|
|
@@ -75,7 +75,7 @@ PROPERTIES_QUERY = """
|
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
class IMFImporter(BaseRDFImporter):
|
|
78
|
-
"""Convert IMF Types provided as SHACL shapes to
|
|
78
|
+
"""Convert IMF Types provided as SHACL shapes to unverified data model."""
|
|
79
79
|
|
|
80
80
|
@property
|
|
81
81
|
def description(self) -> str:
|
cognite/neat/core/_data_model/importers/_rdf/{_inference2rules.py → _inference2rdata_model.py}
RENAMED
|
@@ -56,10 +56,10 @@ INSTANCE_PROPERTIES_DEFINITION = """SELECT ?property (count(?property) as ?occur
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class InferenceImporter(BaseRDFImporter):
|
|
59
|
-
"""Infers
|
|
59
|
+
"""Infers data model from instances stored as triples.
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
Use the factory methods to create
|
|
61
|
+
Data model inference through analysis of knowledge graph provided in various formats.
|
|
62
|
+
Use the factory methods to create triples from sources such as
|
|
63
63
|
RDF files, JSON files, YAML files, XML files, or directly from a graph store.
|
|
64
64
|
|
|
65
65
|
ClassVars:
|
|
@@ -151,7 +151,7 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
151
151
|
"""Convert RDF graph to dictionary defining data model and prefixes of the graph
|
|
152
152
|
|
|
153
153
|
Args:
|
|
154
|
-
graph: RDF graph to be converted to
|
|
154
|
+
graph: RDF graph to be converted to data model
|
|
155
155
|
max_number_of_instance: Max number of instances to be considered for each class
|
|
156
156
|
|
|
157
157
|
Returns:
|
|
@@ -218,7 +218,7 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
218
218
|
resource_type="Property",
|
|
219
219
|
identifier=f"{concept_id}:{property_id}",
|
|
220
220
|
property_name=property_id,
|
|
221
|
-
default_action="Remove the property from the
|
|
221
|
+
default_action="Remove the property from the data model",
|
|
222
222
|
recommended_action="Make sure that graph is complete",
|
|
223
223
|
)
|
|
224
224
|
|
|
@@ -300,11 +300,11 @@ class _ReadProperties:
|
|
|
300
300
|
|
|
301
301
|
|
|
302
302
|
class SubclassInferenceImporter(BaseRDFImporter):
|
|
303
|
-
"""Infer subclasses from a triple store.
|
|
303
|
+
"""Infer subclasses from instances stored in a triple store.
|
|
304
304
|
|
|
305
|
-
Assumes that the
|
|
306
|
-
match the RDF.type of the instances
|
|
307
|
-
|
|
305
|
+
Assumes that the instances already are connected to a schema. The classes should
|
|
306
|
+
match the RDF.type of the instances, while the subclasses should match the NEAT.type
|
|
307
|
+
of the instances.
|
|
308
308
|
|
|
309
309
|
ClassVars:
|
|
310
310
|
overwrite_data_types: Mapping of data types to be overwritten. The InferenceImporter will overwrite
|
|
@@ -357,26 +357,26 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
357
357
|
self,
|
|
358
358
|
issue_list: IssueList,
|
|
359
359
|
graph: Graph,
|
|
360
|
-
|
|
360
|
+
data_model: ConceptualDataModel | None = None,
|
|
361
361
|
data_model_id: dm.DataModelId | tuple[str, str, str] | None = None,
|
|
362
362
|
non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
|
|
363
363
|
) -> None:
|
|
364
|
-
if sum([1 for v in [
|
|
365
|
-
raise ValueError("Exactly one of
|
|
364
|
+
if sum([1 for v in [data_model, data_model_id] if v is not None]) != 1:
|
|
365
|
+
raise ValueError("Exactly one of data model or data_model_id must be provided.")
|
|
366
366
|
if data_model_id is not None:
|
|
367
367
|
identifier = data_model_id
|
|
368
|
-
elif
|
|
369
|
-
identifier =
|
|
368
|
+
elif data_model is not None:
|
|
369
|
+
identifier = data_model.metadata.as_data_model_id().as_tuple() # type: ignore[assignment]
|
|
370
370
|
else:
|
|
371
|
-
raise ValueError("Exactly one of
|
|
371
|
+
raise ValueError("Exactly one of data model or data_model_id must be provided.")
|
|
372
372
|
super().__init__(issue_list, graph, identifier, -1, non_existing_node_type, language="en")
|
|
373
|
-
self.
|
|
373
|
+
self._data_model = data_model
|
|
374
374
|
|
|
375
375
|
def _to_data_model_components(
|
|
376
376
|
self,
|
|
377
377
|
) -> dict:
|
|
378
|
-
if self.
|
|
379
|
-
prefixes = self.
|
|
378
|
+
if self._data_model:
|
|
379
|
+
prefixes = self._data_model.prefixes.copy()
|
|
380
380
|
else:
|
|
381
381
|
prefixes = get_default_prefixes_and_namespaces()
|
|
382
382
|
|
|
@@ -384,9 +384,9 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
384
384
|
read_properties = self._read_class_properties_from_graph(parent_by_child)
|
|
385
385
|
classes, properties = self._create_classes_properties(read_properties, prefixes)
|
|
386
386
|
|
|
387
|
-
if self.
|
|
388
|
-
metadata = self.
|
|
389
|
-
default_space = self.
|
|
387
|
+
if self._data_model:
|
|
388
|
+
metadata = self._data_model.metadata.model_dump()
|
|
389
|
+
default_space = self._data_model.metadata.prefix
|
|
390
390
|
else:
|
|
391
391
|
metadata = self._default_metadata()
|
|
392
392
|
default_space = metadata["space"]
|
|
@@ -400,8 +400,8 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
400
400
|
def _create_classes_properties(
|
|
401
401
|
self, read_properties: list[_ReadProperties], prefixes: dict[str, Namespace]
|
|
402
402
|
) -> tuple[list[UnverifiedConcept], list[UnverifiedConceptualProperty]]:
|
|
403
|
-
if self.
|
|
404
|
-
existing_classes = {class_.concept.suffix: class_ for class_ in self.
|
|
403
|
+
if self._data_model:
|
|
404
|
+
existing_classes = {class_.concept.suffix: class_ for class_ in self._data_model.concepts}
|
|
405
405
|
else:
|
|
406
406
|
existing_classes = {}
|
|
407
407
|
classes: list[UnverifiedConcept] = []
|
|
@@ -509,8 +509,8 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
509
509
|
for result_row in self.graph.query(self._ordered_class_query):
|
|
510
510
|
type_uri, instance_count_literal = cast(tuple[URIRef, RdfLiteral], result_row)
|
|
511
511
|
count_by_type[type_uri] = instance_count_literal.toPython()
|
|
512
|
-
if self.
|
|
513
|
-
analysis = DataModelAnalysis(self.
|
|
512
|
+
if self._data_model:
|
|
513
|
+
analysis = DataModelAnalysis(self._data_model)
|
|
514
514
|
existing_class_properties = {
|
|
515
515
|
(class_entity.suffix, prop.property_): prop
|
|
516
516
|
for class_entity, properties in analysis.properties_by_concepts(
|
|
@@ -518,7 +518,7 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
518
518
|
).items()
|
|
519
519
|
for prop in properties
|
|
520
520
|
}
|
|
521
|
-
existing_classes = {cls_.concept.suffix: cls_ for cls_ in self.
|
|
521
|
+
existing_classes = {cls_.concept.suffix: cls_ for cls_ in self._data_model.concepts}
|
|
522
522
|
else:
|
|
523
523
|
existing_class_properties = {}
|
|
524
524
|
existing_classes = {}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""This module performs importing of various formats to one of serializations for which
|
|
2
|
-
there are loaders to
|
|
2
|
+
there are loaders to data model pydantic class."""
|
|
3
3
|
|
|
4
4
|
from cognite.neat.core._data_model.importers._rdf._base import BaseRDFImporter
|
|
5
5
|
from cognite.neat.core._data_model.importers._rdf._shared import (
|
|
@@ -47,20 +47,20 @@ PROPERTIES_QUERY = """
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class OWLImporter(BaseRDFImporter):
|
|
50
|
-
"""Convert OWL ontology to
|
|
50
|
+
"""Convert OWL ontology to unverified data model.
|
|
51
51
|
|
|
52
52
|
Args:
|
|
53
53
|
filepath: Path to OWL ontology
|
|
54
54
|
|
|
55
55
|
!!! Note
|
|
56
56
|
OWL Ontologies are information models which completeness varies. As such, constructing functional
|
|
57
|
-
data model directly will often be impossible, therefore the produced
|
|
58
|
-
To avoid this, neat will automatically attempt to make the imported
|
|
57
|
+
data model directly will often be impossible, therefore the produced data model object will be ill formed.
|
|
58
|
+
To avoid this, neat will automatically attempt to make the imported data model compliant by adding default
|
|
59
59
|
values for missing information, attaching dangling properties to default containers based on the
|
|
60
60
|
property type, etc.
|
|
61
61
|
|
|
62
62
|
One has to be aware that NEAT will be opinionated about how to make the ontology
|
|
63
|
-
compliant, and that the resulting
|
|
63
|
+
compliant, and that the resulting data model may not be what you expect.
|
|
64
64
|
|
|
65
65
|
"""
|
|
66
66
|
|
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
"""This module performs importing of
|
|
2
|
-
In more details, it traverses the graph and abstracts class and properties, basically
|
|
3
|
-
generating a list of data_model based on which nodes that form the graph are made.
|
|
4
|
-
"""
|
|
1
|
+
"""This module performs importing of data model from spreadsheets."""
|
|
5
2
|
|
|
6
3
|
import tempfile
|
|
7
4
|
from collections import UserDict, defaultdict
|
|
@@ -20,8 +17,8 @@ from cognite.neat.core._data_model._shared import (
|
|
|
20
17
|
T_UnverifiedDataModel,
|
|
21
18
|
)
|
|
22
19
|
from cognite.neat.core._data_model.models import (
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
UNVERIFIED_DATA_MODEL_BY_ROLE,
|
|
21
|
+
VERIFIED_DATA_MODEL_BY_ROLE,
|
|
25
22
|
RoleTypes,
|
|
26
23
|
SchemaCompleteness,
|
|
27
24
|
)
|
|
@@ -58,8 +55,8 @@ MANDATORY_SHEETS_BY_ROLE: dict[RoleTypes, set[str]] = {
|
|
|
58
55
|
role_type: {
|
|
59
56
|
str(sheet_name)
|
|
60
57
|
for sheet_name in (
|
|
61
|
-
|
|
62
|
-
if
|
|
58
|
+
VERIFIED_DATA_MODEL_BY_ROLE.get(role_type).mandatory_fields(use_alias=True) # type: ignore
|
|
59
|
+
if VERIFIED_DATA_MODEL_BY_ROLE.get(role_type)
|
|
63
60
|
else []
|
|
64
61
|
)
|
|
65
62
|
if sheet_name is not None
|
|
@@ -280,7 +277,7 @@ class ExcelImporter(BaseImporter[T_UnverifiedDataModel]):
|
|
|
280
277
|
original_role = user_read.role
|
|
281
278
|
read_info_by_sheet = user_read.read_info_by_sheet
|
|
282
279
|
|
|
283
|
-
data_model_cls =
|
|
280
|
+
data_model_cls = UNVERIFIED_DATA_MODEL_BY_ROLE[original_role]
|
|
284
281
|
data_model = cast(T_UnverifiedDataModel, data_model_cls.load(sheets))
|
|
285
282
|
|
|
286
283
|
# Delete the temporary file if it was created
|
|
@@ -10,18 +10,20 @@ from ._base_verified import DataModelType, ExtensionCategory, RoleTypes, SchemaC
|
|
|
10
10
|
from .physical._unverified import UnverifiedPhysicalDataModel
|
|
11
11
|
from .physical._verified import PhysicalDataModel
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
UNVERIFIED_DATA_MODEL_BY_ROLE: dict[
|
|
14
|
+
RoleTypes, type[UnverifiedConceptualDataModel] | type[UnverifiedPhysicalDataModel]
|
|
15
|
+
] = {
|
|
14
16
|
RoleTypes.information: UnverifiedConceptualDataModel,
|
|
15
17
|
RoleTypes.dms: UnverifiedPhysicalDataModel,
|
|
16
18
|
}
|
|
17
|
-
|
|
19
|
+
VERIFIED_DATA_MODEL_BY_ROLE: dict[RoleTypes, type[ConceptualDataModel] | type[PhysicalDataModel]] = {
|
|
18
20
|
RoleTypes.information: ConceptualDataModel,
|
|
19
21
|
RoleTypes.dms: PhysicalDataModel,
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
__all__ = [
|
|
24
|
-
"
|
|
26
|
+
"UNVERIFIED_DATA_MODEL_BY_ROLE",
|
|
25
27
|
"ConceptualDataModel",
|
|
26
28
|
"DMSSchema",
|
|
27
29
|
"DataModelType",
|
|
@@ -27,29 +27,29 @@ if sys.version_info >= (3, 11):
|
|
|
27
27
|
else:
|
|
28
28
|
from typing_extensions import Self
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
T_BaseDataModel = TypeVar("T_BaseDataModel", bound=BaseVerifiedDataModel)
|
|
31
|
+
T_DataModel = TypeVar("T_DataModel", bound=SchemaModel)
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
@dataclass
|
|
35
|
-
class UnverifiedDataModel(Generic[
|
|
36
|
-
"""Input
|
|
35
|
+
class UnverifiedDataModel(Generic[T_BaseDataModel], ABC):
|
|
36
|
+
"""Input data model are raw data that is not yet validated."""
|
|
37
37
|
|
|
38
38
|
@classmethod
|
|
39
39
|
@abstractmethod
|
|
40
|
-
def _get_verified_cls(cls) -> type[
|
|
40
|
+
def _get_verified_cls(cls) -> type[T_BaseDataModel]:
|
|
41
41
|
raise NotImplementedError("This method should be implemented in the subclass.")
|
|
42
42
|
|
|
43
43
|
@classmethod
|
|
44
44
|
@overload
|
|
45
|
-
def load(cls: "type[
|
|
45
|
+
def load(cls: "type[T_UnverifiedDataModel]", data: dict[str, Any]) -> "T_UnverifiedDataModel": ...
|
|
46
46
|
|
|
47
47
|
@classmethod
|
|
48
48
|
@overload
|
|
49
|
-
def load(cls: "type[
|
|
49
|
+
def load(cls: "type[T_UnverifiedDataModel]", data: None) -> None: ...
|
|
50
50
|
|
|
51
51
|
@classmethod
|
|
52
|
-
def load(cls: "type[
|
|
52
|
+
def load(cls: "type[T_UnverifiedDataModel]", data: dict | None) -> "T_UnverifiedDataModel | None":
|
|
53
53
|
if data is None:
|
|
54
54
|
return None
|
|
55
55
|
return cls._load(data)
|
|
@@ -110,7 +110,7 @@ class UnverifiedDataModel(Generic[T_BaseRules], ABC):
|
|
|
110
110
|
def _dataclass_fields(self) -> list[Field]:
|
|
111
111
|
return list(fields(self))
|
|
112
112
|
|
|
113
|
-
def as_verified_data_model(self) ->
|
|
113
|
+
def as_verified_data_model(self) -> T_BaseDataModel:
|
|
114
114
|
cls_ = self._get_verified_cls()
|
|
115
115
|
return cls_.model_validate(self.dump())
|
|
116
116
|
|
|
@@ -127,14 +127,14 @@ class UnverifiedDataModel(Generic[T_BaseRules], ABC):
|
|
|
127
127
|
return output
|
|
128
128
|
|
|
129
129
|
|
|
130
|
-
|
|
130
|
+
T_UnverifiedDataModel = TypeVar("T_UnverifiedDataModel", bound=UnverifiedDataModel)
|
|
131
131
|
|
|
132
132
|
|
|
133
133
|
@dataclass
|
|
134
|
-
class UnverifiedComponent(ABC, Generic[
|
|
134
|
+
class UnverifiedComponent(ABC, Generic[T_DataModel]):
|
|
135
135
|
@classmethod
|
|
136
136
|
@abstractmethod
|
|
137
|
-
def _get_verified_cls(cls) -> type[
|
|
137
|
+
def _get_verified_cls(cls) -> type[T_DataModel]:
|
|
138
138
|
raise NotImplementedError("This method should be implemented in the subclass.")
|
|
139
139
|
|
|
140
140
|
@classmethod
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""This module contains the definition of `
|
|
1
|
+
"""This module contains the definition of `DataModel` pydantic model and all
|
|
2
2
|
its sub-models and validators.
|
|
3
3
|
"""
|
|
4
4
|
|
|
@@ -160,7 +160,7 @@ class BaseVerifiedMetadata(SchemaModel):
|
|
|
160
160
|
|
|
161
161
|
creator: StrListType = Field(
|
|
162
162
|
description=(
|
|
163
|
-
"List of contributors (comma
|
|
163
|
+
"List of contributors (comma separated) to the data model creation, "
|
|
164
164
|
"typically information architects are considered as contributors."
|
|
165
165
|
),
|
|
166
166
|
)
|
|
@@ -175,7 +175,7 @@ class BaseVerifiedMetadata(SchemaModel):
|
|
|
175
175
|
|
|
176
176
|
source_id: URIRefType | None = Field(
|
|
177
177
|
None,
|
|
178
|
-
description="Id of source that produced this
|
|
178
|
+
description="Id of source that produced this data model",
|
|
179
179
|
alias="sourceId",
|
|
180
180
|
)
|
|
181
181
|
|
|
@@ -254,12 +254,7 @@ class BaseVerifiedMetadata(SchemaModel):
|
|
|
254
254
|
|
|
255
255
|
class BaseVerifiedDataModel(SchemaModel, ABC):
|
|
256
256
|
"""
|
|
257
|
-
|
|
258
|
-
definitions and (optionally) the transformation rules used to transform the data/graph
|
|
259
|
-
from the source representation to the target representation defined by the data model.
|
|
260
|
-
The rules are defined in an Excel sheet and then parsed into a `Rules` object. The
|
|
261
|
-
`Rules` object is then used to generate data model and the `RDF` graph made of data
|
|
262
|
-
model instances.
|
|
257
|
+
Data Model is a core concept in `neat`.
|
|
263
258
|
|
|
264
259
|
Args:
|
|
265
260
|
metadata: Data model metadata
|
|
@@ -317,11 +312,11 @@ class BaseVerifiedDataModel(SchemaModel, ABC):
|
|
|
317
312
|
) -> dict[str, Any]:
|
|
318
313
|
"""Dump the model to a dictionary.
|
|
319
314
|
|
|
320
|
-
This is used in the Exporters to dump
|
|
315
|
+
This is used in the Exporters to the dump data model in the required format.
|
|
321
316
|
|
|
322
317
|
Args:
|
|
323
318
|
entities_exclude_defaults: Whether to exclude default prefix (and version) for entities.
|
|
324
|
-
For example, given a class that is dumped as 'my_prefix:MyClass', if the prefix for the
|
|
319
|
+
For example, given a class that is dumped as 'my_prefix:MyClass', if the prefix for the data model
|
|
325
320
|
set in metadata.prefix = 'my_prefix', then this class will be dumped as 'MyClass' when this flag is set.
|
|
326
321
|
Defaults to True.
|
|
327
322
|
sort: Whether to sort the entities in the output.
|
|
@@ -23,7 +23,7 @@ from ._verified import ConceptualDataModel, ConceptualProperty
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class ConceptualValidation:
|
|
26
|
-
"""This class does all the validation of the
|
|
26
|
+
"""This class does all the validation of the conceptual data model that have dependencies
|
|
27
27
|
between components."""
|
|
28
28
|
|
|
29
29
|
def __init__(
|
|
@@ -43,7 +43,7 @@ class ConceptualMetadata(BaseVerifiedMetadata):
|
|
|
43
43
|
level: ClassVar[DataModelLevel] = DataModelLevel.conceptual
|
|
44
44
|
|
|
45
45
|
# Linking to Conceptual and Physical data model aspects
|
|
46
|
-
physical: URIRef | str | None = Field(None, description="Link to the physical data model
|
|
46
|
+
physical: URIRef | str | None = Field(None, description="Link to the physical data model level")
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
def _get_metadata(context: Any) -> ConceptualMetadata | None:
|
|
@@ -300,7 +300,7 @@ class ConceptualDataModel(BaseVerifiedDataModel):
|
|
|
300
300
|
if view.conceptual in classes_by_neat_id:
|
|
301
301
|
classes_by_neat_id[view.conceptual].physical = neat_id
|
|
302
302
|
|
|
303
|
-
def
|
|
303
|
+
def as_physical_data_model(self) -> "PhysicalDataModel":
|
|
304
304
|
from cognite.neat.core._data_model.transformers._converters import (
|
|
305
305
|
_ConceptualDataModelConverter,
|
|
306
306
|
)
|
|
@@ -32,7 +32,7 @@ def load_classic_to_core_mapping(org_name: str | None, source_space: str, source
|
|
|
32
32
|
|
|
33
33
|
read: ImportedDataModel[UnverifiedPhysicalDataModel] = DictImporter(loaded).to_data_model()
|
|
34
34
|
if not isinstance(read.unverified_data_model, UnverifiedPhysicalDataModel):
|
|
35
|
-
raise NeatValueError(f"Expected
|
|
35
|
+
raise NeatValueError(f"Expected physical data model, but got {type(read.unverified_data_model).__name__}")
|
|
36
36
|
|
|
37
37
|
verified = VerifyPhysicalDataModel(validate=False).transform(read)
|
|
38
38
|
|
|
@@ -63,15 +63,16 @@ from ._verified import (
|
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
class _DMSExporter:
|
|
66
|
-
"""The DMS Exporter is responsible for exporting the
|
|
66
|
+
"""The DMS Exporter is responsible for exporting the physical data model to a DMSSchema.
|
|
67
67
|
|
|
68
|
-
This kept in this location such that it can be used by the
|
|
68
|
+
This kept in this location such that it can be used by the physical data model to validate the schema.
|
|
69
69
|
(This module cannot have a dependency on the exporter module, as it would create a circular dependency.)
|
|
70
70
|
|
|
71
71
|
Args
|
|
72
72
|
include_pipeline (bool): If True, the pipeline will be included with the schema. Pipeline means the
|
|
73
73
|
raw tables and transformations necessary to populate the data model.
|
|
74
|
-
instance_space (str): The space to use for the instance. Defaults to None,`
|
|
74
|
+
instance_space (str): The space to use for the instance. Defaults to None,`
|
|
75
|
+
PhysicalDataModel.metadata.space` will be used
|
|
75
76
|
remove_cdf_spaces(bool): The
|
|
76
77
|
"""
|
|
77
78
|
|