cognite-neat 0.121.1__py3-none-any.whl → 0.122.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/core/_client/_api/statistics.py +91 -0
- cognite/neat/core/_client/_api_client.py +2 -0
- cognite/neat/core/_client/data_classes/statistics.py +125 -0
- cognite/neat/core/_client/testing.py +4 -0
- cognite/neat/core/_constants.py +6 -7
- cognite/neat/core/_data_model/_constants.py +23 -16
- cognite/neat/core/_data_model/_shared.py +33 -17
- cognite/neat/core/_data_model/analysis/__init__.py +2 -2
- cognite/neat/core/_data_model/analysis/_base.py +186 -183
- cognite/neat/core/_data_model/catalog/__init__.py +2 -2
- cognite/neat/core/_data_model/exporters/__init__.py +6 -6
- cognite/neat/core/_data_model/exporters/_base.py +10 -8
- cognite/neat/core/_data_model/exporters/{_rules2dms.py → _data_model2dms.py} +22 -18
- cognite/neat/core/_data_model/exporters/{_rules2excel.py → _data_model2excel.py} +51 -51
- cognite/neat/core/_data_model/exporters/{_rules2instance_template.py → _data_model2instance_template.py} +14 -14
- cognite/neat/core/_data_model/exporters/{_rules2ontology.py → _data_model2ontology.py} +50 -50
- cognite/neat/core/_data_model/exporters/{_rules2yaml.py → _data_model2yaml.py} +21 -18
- cognite/neat/core/_data_model/importers/__init__.py +8 -8
- cognite/neat/core/_data_model/importers/_base.py +8 -6
- cognite/neat/core/_data_model/importers/_base_file_reader.py +56 -0
- cognite/neat/core/_data_model/importers/{_yaml2rules.py → _dict2data_model.py} +50 -25
- cognite/neat/core/_data_model/importers/{_dms2rules.py → _dms2data_model.py} +58 -49
- cognite/neat/core/_data_model/importers/{_dtdl2rules → _dtdl2data_model}/dtdl_converter.py +22 -22
- cognite/neat/core/_data_model/importers/{_dtdl2rules → _dtdl2data_model}/dtdl_importer.py +7 -7
- cognite/neat/core/_data_model/importers/{_dtdl2rules → _dtdl2data_model}/spec.py +3 -3
- cognite/neat/core/_data_model/importers/_rdf/__init__.py +3 -3
- cognite/neat/core/_data_model/importers/_rdf/_base.py +15 -15
- cognite/neat/core/_data_model/importers/_rdf/{_imf2rules.py → _imf2data_model.py} +17 -17
- cognite/neat/core/_data_model/importers/_rdf/{_inference2rules.py → _inference2rdata_model.py} +59 -59
- cognite/neat/core/_data_model/importers/_rdf/{_owl2rules.py → _owl2data_model.py} +17 -17
- cognite/neat/core/_data_model/importers/_rdf/_shared.py +25 -25
- cognite/neat/core/_data_model/importers/{_spreadsheet2rules.py → _spreadsheet2data_model.py} +76 -19
- cognite/neat/core/_data_model/models/__init__.py +11 -9
- cognite/neat/core/_data_model/models/_base_unverified.py +12 -12
- cognite/neat/core/_data_model/models/_base_verified.py +9 -14
- cognite/neat/core/_data_model/models/_types.py +6 -6
- cognite/neat/core/_data_model/models/conceptual/__init__.py +6 -6
- cognite/neat/core/_data_model/models/conceptual/_unverified.py +20 -20
- cognite/neat/core/_data_model/models/conceptual/_validation.py +88 -78
- cognite/neat/core/_data_model/models/conceptual/_verified.py +54 -52
- cognite/neat/core/_data_model/models/data_types.py +2 -2
- cognite/neat/core/_data_model/models/entities/__init__.py +8 -8
- cognite/neat/core/_data_model/models/entities/_loaders.py +11 -10
- cognite/neat/core/_data_model/models/entities/_multi_value.py +5 -5
- cognite/neat/core/_data_model/models/entities/_single_value.py +44 -38
- cognite/neat/core/_data_model/models/entities/_types.py +9 -3
- cognite/neat/core/_data_model/models/entities/_wrapped.py +3 -3
- cognite/neat/core/_data_model/models/mapping/_classic2core.py +12 -9
- cognite/neat/core/_data_model/models/physical/__init__.py +40 -0
- cognite/neat/core/_data_model/models/{dms → physical}/_exporter.py +75 -55
- cognite/neat/core/_data_model/models/{dms/_rules_input.py → physical/_unverified.py} +48 -39
- cognite/neat/core/_data_model/models/{dms → physical}/_validation.py +17 -15
- cognite/neat/core/_data_model/models/{dms/_rules.py → physical/_verified.py} +68 -60
- cognite/neat/core/_data_model/transformers/__init__.py +29 -25
- cognite/neat/core/_data_model/transformers/_base.py +27 -20
- cognite/neat/core/_data_model/transformers/_converters.py +707 -622
- cognite/neat/core/_data_model/transformers/_mapping.py +74 -55
- cognite/neat/core/_data_model/transformers/_verification.py +64 -55
- cognite/neat/core/_instances/extractors/_base.py +2 -2
- cognite/neat/core/_instances/extractors/_classic_cdf/_classic.py +9 -9
- cognite/neat/core/_instances/extractors/_dms_graph.py +42 -34
- cognite/neat/core/_instances/extractors/_mock_graph_generator.py +107 -103
- cognite/neat/core/_instances/loaders/_base.py +3 -3
- cognite/neat/core/_instances/loaders/_rdf2dms.py +22 -22
- cognite/neat/core/_instances/transformers/_base.py +7 -4
- cognite/neat/core/_instances/transformers/_rdfpath.py +1 -1
- cognite/neat/core/_instances/transformers/_value_type.py +2 -6
- cognite/neat/core/_issues/_base.py +4 -4
- cognite/neat/core/_issues/_factory.py +1 -1
- cognite/neat/core/_issues/errors/__init__.py +2 -2
- cognite/neat/core/_issues/errors/_resources.py +1 -1
- cognite/neat/core/_issues/errors/_wrapper.py +2 -2
- cognite/neat/core/_issues/warnings/_models.py +4 -4
- cognite/neat/core/_issues/warnings/_properties.py +1 -1
- cognite/neat/core/_store/__init__.py +3 -3
- cognite/neat/core/_store/{_rules_store.py → _data_model.py} +119 -112
- cognite/neat/core/_store/{_graph_store.py → _instance.py} +3 -4
- cognite/neat/core/_store/_provenance.py +2 -2
- cognite/neat/core/_store/exceptions.py +2 -2
- cognite/neat/core/_utils/rdf_.py +14 -0
- cognite/neat/core/_utils/text.py +1 -1
- cognite/neat/session/_base.py +42 -36
- cognite/neat/session/_drop.py +2 -2
- cognite/neat/session/_experimental.py +1 -1
- cognite/neat/session/_inspect.py +13 -13
- cognite/neat/session/_mapping.py +15 -9
- cognite/neat/session/_read.py +39 -37
- cognite/neat/session/_set.py +6 -6
- cognite/neat/session/_show.py +24 -21
- cognite/neat/session/_state/README.md +1 -1
- cognite/neat/session/_state.py +27 -27
- cognite/neat/session/_subset.py +14 -11
- cognite/neat/session/_template.py +23 -21
- cognite/neat/session/_to.py +42 -42
- {cognite_neat-0.121.1.dist-info → cognite_neat-0.122.0.dist-info}/METADATA +14 -7
- {cognite_neat-0.121.1.dist-info → cognite_neat-0.122.0.dist-info}/RECORD +102 -100
- cognite/neat/core/_data_model/exporters/_validation.py +0 -14
- cognite/neat/core/_data_model/models/dms/__init__.py +0 -32
- /cognite/neat/core/_data_model/catalog/{info-rules-imf.xlsx → conceptual-imf-data-model.xlsx} +0 -0
- /cognite/neat/core/_data_model/importers/{_dtdl2rules → _dtdl2data_model}/__init__.py +0 -0
- /cognite/neat/core/_data_model/importers/{_dtdl2rules → _dtdl2data_model}/_unit_lookup.py +0 -0
- {cognite_neat-0.121.1.dist-info → cognite_neat-0.122.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.121.1.dist-info → cognite_neat-0.122.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
"""This module performs importing of various formats to one of serializations for which
|
|
2
|
-
there are loaders to
|
|
2
|
+
there are loaders to data model pydantic class."""
|
|
3
3
|
|
|
4
4
|
from cognite.neat.core._data_model.importers._rdf._base import BaseRDFImporter
|
|
5
5
|
from cognite.neat.core._data_model.importers._rdf._shared import (
|
|
6
|
-
|
|
6
|
+
parse_concepts,
|
|
7
7
|
parse_properties,
|
|
8
8
|
)
|
|
9
9
|
|
|
10
10
|
CLASSES_QUERY = """
|
|
11
|
-
SELECT ?
|
|
11
|
+
SELECT ?concept ?name ?description ?implements
|
|
12
12
|
WHERE {{
|
|
13
13
|
VALUES ?type {{ imf:BlockType imf:TerminalType imf:AttributeType }}
|
|
14
|
-
?
|
|
14
|
+
?concept a ?type .
|
|
15
15
|
|
|
16
|
-
OPTIONAL {{?
|
|
17
|
-
OPTIONAL {{?
|
|
18
|
-
OPTIONAL {{?
|
|
16
|
+
OPTIONAL {{?concept rdfs:subClassOf ?parent }}.
|
|
17
|
+
OPTIONAL {{?concept rdfs:label|skos:prefLabel ?name }}.
|
|
18
|
+
OPTIONAL {{?concept rdfs:comment|skos:definition ?description}}.
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
# Add imf:Attribute as parent class when no parent is found
|
|
22
22
|
BIND(IF(!bound(?parent) && ?type = imf:AttributeType, imf:Attribute, ?parent) AS ?implements)
|
|
23
23
|
|
|
24
24
|
# FILTERS
|
|
25
|
-
FILTER (!isBlank(?
|
|
25
|
+
FILTER (!isBlank(?concept))
|
|
26
26
|
FILTER (!bound(?implements) || !isBlank(?implements))
|
|
27
27
|
|
|
28
28
|
FILTER (!bound(?name) || LANG(?name) = "" || LANGMATCHES(LANG(?name), "{language}"))
|
|
@@ -31,13 +31,13 @@ CLASSES_QUERY = """
|
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
PROPERTIES_QUERY = """
|
|
34
|
-
SELECT ?
|
|
34
|
+
SELECT ?concept ?property_ ?name ?description ?value_type ?min_count ?max_count ?default
|
|
35
35
|
WHERE
|
|
36
36
|
{{
|
|
37
37
|
# CASE 1: Handling Blocks and Terminals
|
|
38
38
|
{{
|
|
39
39
|
VALUES ?type {{ imf:BlockType imf:TerminalType }}
|
|
40
|
-
?
|
|
40
|
+
?concept a ?type ;
|
|
41
41
|
sh:property ?propertyShape .
|
|
42
42
|
?propertyShape sh:path ?property_ .
|
|
43
43
|
|
|
@@ -55,10 +55,10 @@ PROPERTIES_QUERY = """
|
|
|
55
55
|
|
|
56
56
|
# CASE 2: Handling Attributes
|
|
57
57
|
{{
|
|
58
|
-
?
|
|
58
|
+
?concept a imf:AttributeType .
|
|
59
59
|
BIND(xsd:anyURI AS ?valueShape)
|
|
60
60
|
BIND(imf:predicate AS ?property_)
|
|
61
|
-
?
|
|
61
|
+
?concept ?property_ ?defaultURI .
|
|
62
62
|
BIND(STR(?defaultURI) AS ?default)
|
|
63
63
|
|
|
64
64
|
}}
|
|
@@ -67,7 +67,7 @@ PROPERTIES_QUERY = """
|
|
|
67
67
|
BIND(IF(BOUND(?valueShape), ?valueShape, IF(BOUND(?range) , ?range , ?valueShape)) AS ?value_type)
|
|
68
68
|
|
|
69
69
|
FILTER (!isBlank(?property_))
|
|
70
|
-
FILTER (!bound(?
|
|
70
|
+
FILTER (!bound(?concept) || !isBlank(?concept))
|
|
71
71
|
FILTER (!bound(?name) || LANG(?name) = "" || LANGMATCHES(LANG(?name), "{language}"))
|
|
72
72
|
FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "{language}"))
|
|
73
73
|
}}
|
|
@@ -75,23 +75,23 @@ PROPERTIES_QUERY = """
|
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
class IMFImporter(BaseRDFImporter):
|
|
78
|
-
"""Convert IMF Types provided as SHACL shapes to
|
|
78
|
+
"""Convert IMF Types provided as SHACL shapes to unverified data model."""
|
|
79
79
|
|
|
80
80
|
@property
|
|
81
81
|
def description(self) -> str:
|
|
82
82
|
return f"IMF Types {self.source_name} read as unverified data model"
|
|
83
83
|
|
|
84
|
-
def
|
|
84
|
+
def _to_data_model_components(
|
|
85
85
|
self,
|
|
86
86
|
) -> dict:
|
|
87
|
-
classes, issue_list =
|
|
87
|
+
classes, issue_list = parse_concepts(self.graph, CLASSES_QUERY, self.language, self.issue_list)
|
|
88
88
|
self.issue_list = issue_list
|
|
89
89
|
properties, issue_list = parse_properties(self.graph, PROPERTIES_QUERY, self.language, self.issue_list)
|
|
90
90
|
self.issue_list = issue_list
|
|
91
91
|
|
|
92
92
|
components = {
|
|
93
93
|
"Metadata": self._metadata,
|
|
94
|
-
"
|
|
94
|
+
"Concepts": list(classes.values()) if classes else [],
|
|
95
95
|
"Properties": list(properties.values()) if properties else [],
|
|
96
96
|
}
|
|
97
97
|
|
cognite/neat/core/_data_model/importers/_rdf/{_inference2rules.py → _inference2rdata_model.py}
RENAMED
|
@@ -12,19 +12,19 @@ from rdflib import Literal as RdfLiteral
|
|
|
12
12
|
|
|
13
13
|
from cognite.neat.core._config import GLOBAL_CONFIG
|
|
14
14
|
from cognite.neat.core._constants import NEAT, get_default_prefixes_and_namespaces
|
|
15
|
-
from cognite.neat.core._data_model.analysis import
|
|
15
|
+
from cognite.neat.core._data_model.analysis import DataModelAnalysis
|
|
16
16
|
from cognite.neat.core._data_model.models import ConceptualDataModel, data_types
|
|
17
17
|
from cognite.neat.core._data_model.models.conceptual import (
|
|
18
|
-
|
|
18
|
+
Concept,
|
|
19
19
|
ConceptualMetadata,
|
|
20
|
-
|
|
20
|
+
UnverifiedConcept,
|
|
21
21
|
UnverifiedConceptualProperty,
|
|
22
22
|
)
|
|
23
23
|
from cognite.neat.core._data_model.models.data_types import AnyURI
|
|
24
24
|
from cognite.neat.core._data_model.models.entities._single_value import UnknownEntity
|
|
25
25
|
from cognite.neat.core._issues import IssueList
|
|
26
26
|
from cognite.neat.core._issues.warnings import PropertyValueTypeUndefinedWarning
|
|
27
|
-
from cognite.neat.core._store import
|
|
27
|
+
from cognite.neat.core._store import NeatInstanceStore
|
|
28
28
|
from cognite.neat.core._store._provenance import INSTANCES_ENTITY
|
|
29
29
|
from cognite.neat.core._utils.collection_ import iterate_progress_bar
|
|
30
30
|
from cognite.neat.core._utils.rdf_ import remove_namespace_from_uri, uri_to_short_form
|
|
@@ -56,10 +56,10 @@ INSTANCE_PROPERTIES_DEFINITION = """SELECT ?property (count(?property) as ?occur
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
class InferenceImporter(BaseRDFImporter):
|
|
59
|
-
"""Infers
|
|
59
|
+
"""Infers data model from instances stored as triples.
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
Use the factory methods to create
|
|
61
|
+
Data model inference through analysis of knowledge graph provided in various formats.
|
|
62
|
+
Use the factory methods to create triples from sources such as
|
|
63
63
|
RDF files, JSON files, YAML files, XML files, or directly from a graph store.
|
|
64
64
|
|
|
65
65
|
ClassVars:
|
|
@@ -83,8 +83,8 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
83
83
|
@classmethod
|
|
84
84
|
def from_graph_store(
|
|
85
85
|
cls,
|
|
86
|
-
store:
|
|
87
|
-
data_model_id: dm.DataModelId | tuple[str, str, str] = DEFAULT_INFERENCE_DATA_MODEL_ID,
|
|
86
|
+
store: NeatInstanceStore,
|
|
87
|
+
data_model_id: (dm.DataModelId | tuple[str, str, str]) = DEFAULT_INFERENCE_DATA_MODEL_ID,
|
|
88
88
|
max_number_of_instance: int = -1,
|
|
89
89
|
non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
|
|
90
90
|
language: str = "en",
|
|
@@ -145,46 +145,46 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
145
145
|
) -> "InferenceImporter":
|
|
146
146
|
raise NotImplementedError("JSON file format is not supported yet.")
|
|
147
147
|
|
|
148
|
-
def
|
|
148
|
+
def _to_data_model_components(
|
|
149
149
|
self,
|
|
150
150
|
) -> dict:
|
|
151
151
|
"""Convert RDF graph to dictionary defining data model and prefixes of the graph
|
|
152
152
|
|
|
153
153
|
Args:
|
|
154
|
-
graph: RDF graph to be converted to
|
|
154
|
+
graph: RDF graph to be converted to data model
|
|
155
155
|
max_number_of_instance: Max number of instances to be considered for each class
|
|
156
156
|
|
|
157
157
|
Returns:
|
|
158
158
|
Tuple of data model and prefixes of the graph
|
|
159
159
|
"""
|
|
160
160
|
|
|
161
|
-
|
|
161
|
+
concepts: dict[str, dict] = {}
|
|
162
162
|
properties: dict[str, dict] = {}
|
|
163
163
|
prefixes: dict[str, Namespace] = {}
|
|
164
164
|
count_by_value_type_by_property: dict[str, dict[str, int]] = defaultdict(Counter)
|
|
165
165
|
|
|
166
166
|
# Infers all the classes in the graph
|
|
167
|
-
for
|
|
168
|
-
if (
|
|
167
|
+
for concept_uri, no_instances in self.graph.query(ORDERED_CLASSES_QUERY): # type: ignore[misc]
|
|
168
|
+
if (concept_id := remove_namespace_from_uri(cast(URIRef, concept_uri))) in concepts:
|
|
169
169
|
# handles cases when class id is already present in classes
|
|
170
|
-
|
|
170
|
+
concept_id = f"{concept_id}_{len(concepts) + 1}"
|
|
171
171
|
|
|
172
|
-
|
|
173
|
-
"
|
|
174
|
-
"uri":
|
|
172
|
+
concepts[concept_id] = {
|
|
173
|
+
"concept": concept_id,
|
|
174
|
+
"uri": concept_uri,
|
|
175
175
|
"comment": f"Inferred from knowledge graph, where this class has <{no_instances}> instances",
|
|
176
176
|
}
|
|
177
177
|
|
|
178
|
-
self._add_uri_namespace_to_prefixes(cast(URIRef,
|
|
178
|
+
self._add_uri_namespace_to_prefixes(cast(URIRef, concept_uri), prefixes)
|
|
179
179
|
|
|
180
180
|
instances_query = (
|
|
181
181
|
INSTANCES_OF_CLASS_QUERY if self.max_number_of_instance == -1 else INSTANCES_OF_CLASS_RICHNESS_ORDERED_QUERY
|
|
182
182
|
)
|
|
183
183
|
|
|
184
|
-
classes_iterable = iterate_progress_bar(
|
|
184
|
+
classes_iterable = iterate_progress_bar(concepts.items(), len(concepts), "Inferring classes")
|
|
185
185
|
|
|
186
186
|
# Infers all the properties of the class
|
|
187
|
-
for
|
|
187
|
+
for concept_id, class_definition in classes_iterable:
|
|
188
188
|
for ( # type: ignore[misc]
|
|
189
189
|
instance,
|
|
190
190
|
_,
|
|
@@ -216,19 +216,19 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
216
216
|
|
|
217
217
|
issue = PropertyValueTypeUndefinedWarning(
|
|
218
218
|
resource_type="Property",
|
|
219
|
-
identifier=f"{
|
|
219
|
+
identifier=f"{concept_id}:{property_id}",
|
|
220
220
|
property_name=property_id,
|
|
221
|
-
default_action="Remove the property from the
|
|
221
|
+
default_action="Remove the property from the data model",
|
|
222
222
|
recommended_action="Make sure that graph is complete",
|
|
223
223
|
)
|
|
224
224
|
|
|
225
225
|
if issue not in self.issue_list:
|
|
226
226
|
self.issue_list.append(issue)
|
|
227
227
|
|
|
228
|
-
id_ = f"{
|
|
228
|
+
id_ = f"{concept_id}:{property_id}"
|
|
229
229
|
|
|
230
230
|
definition = {
|
|
231
|
-
"
|
|
231
|
+
"concept": concept_id,
|
|
232
232
|
"property_": property_id,
|
|
233
233
|
"max_count": cast(RdfLiteral, occurrence).value,
|
|
234
234
|
"value_type": value_type_id,
|
|
@@ -265,7 +265,7 @@ class InferenceImporter(BaseRDFImporter):
|
|
|
265
265
|
|
|
266
266
|
return {
|
|
267
267
|
"metadata": self._default_metadata(),
|
|
268
|
-
"
|
|
268
|
+
"concepts": list(concepts.values()),
|
|
269
269
|
"properties": list(properties.values()),
|
|
270
270
|
"prefixes": prefixes,
|
|
271
271
|
}
|
|
@@ -300,11 +300,11 @@ class _ReadProperties:
|
|
|
300
300
|
|
|
301
301
|
|
|
302
302
|
class SubclassInferenceImporter(BaseRDFImporter):
|
|
303
|
-
"""Infer subclasses from a triple store.
|
|
303
|
+
"""Infer subclasses from instances stored in a triple store.
|
|
304
304
|
|
|
305
|
-
Assumes that the
|
|
306
|
-
match the RDF.type of the instances
|
|
307
|
-
|
|
305
|
+
Assumes that the instances already are connected to a schema. The classes should
|
|
306
|
+
match the RDF.type of the instances, while the subclasses should match the NEAT.type
|
|
307
|
+
of the instances.
|
|
308
308
|
|
|
309
309
|
ClassVars:
|
|
310
310
|
overwrite_data_types: Mapping of data types to be overwritten. The InferenceImporter will overwrite
|
|
@@ -357,26 +357,26 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
357
357
|
self,
|
|
358
358
|
issue_list: IssueList,
|
|
359
359
|
graph: Graph,
|
|
360
|
-
|
|
360
|
+
data_model: ConceptualDataModel | None = None,
|
|
361
361
|
data_model_id: dm.DataModelId | tuple[str, str, str] | None = None,
|
|
362
362
|
non_existing_node_type: UnknownEntity | AnyURI = DEFAULT_NON_EXISTING_NODE_TYPE,
|
|
363
363
|
) -> None:
|
|
364
|
-
if sum([1 for v in [
|
|
365
|
-
raise ValueError("Exactly one of
|
|
364
|
+
if sum([1 for v in [data_model, data_model_id] if v is not None]) != 1:
|
|
365
|
+
raise ValueError("Exactly one of data model or data_model_id must be provided.")
|
|
366
366
|
if data_model_id is not None:
|
|
367
367
|
identifier = data_model_id
|
|
368
|
-
elif
|
|
369
|
-
identifier =
|
|
368
|
+
elif data_model is not None:
|
|
369
|
+
identifier = data_model.metadata.as_data_model_id().as_tuple() # type: ignore[assignment]
|
|
370
370
|
else:
|
|
371
|
-
raise ValueError("Exactly one of
|
|
371
|
+
raise ValueError("Exactly one of data model or data_model_id must be provided.")
|
|
372
372
|
super().__init__(issue_list, graph, identifier, -1, non_existing_node_type, language="en")
|
|
373
|
-
self.
|
|
373
|
+
self._data_model = data_model
|
|
374
374
|
|
|
375
|
-
def
|
|
375
|
+
def _to_data_model_components(
|
|
376
376
|
self,
|
|
377
377
|
) -> dict:
|
|
378
|
-
if self.
|
|
379
|
-
prefixes = self.
|
|
378
|
+
if self._data_model:
|
|
379
|
+
prefixes = self._data_model.prefixes.copy()
|
|
380
380
|
else:
|
|
381
381
|
prefixes = get_default_prefixes_and_namespaces()
|
|
382
382
|
|
|
@@ -384,27 +384,27 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
384
384
|
read_properties = self._read_class_properties_from_graph(parent_by_child)
|
|
385
385
|
classes, properties = self._create_classes_properties(read_properties, prefixes)
|
|
386
386
|
|
|
387
|
-
if self.
|
|
388
|
-
metadata = self.
|
|
389
|
-
default_space = self.
|
|
387
|
+
if self._data_model:
|
|
388
|
+
metadata = self._data_model.metadata.model_dump()
|
|
389
|
+
default_space = self._data_model.metadata.prefix
|
|
390
390
|
else:
|
|
391
391
|
metadata = self._default_metadata()
|
|
392
392
|
default_space = metadata["space"]
|
|
393
393
|
return {
|
|
394
394
|
"metadata": metadata,
|
|
395
|
-
"
|
|
395
|
+
"concepts": [cls.dump(default_space) for cls in classes],
|
|
396
396
|
"properties": [prop.dump(default_space) for prop in properties],
|
|
397
397
|
"prefixes": prefixes,
|
|
398
398
|
}
|
|
399
399
|
|
|
400
400
|
def _create_classes_properties(
|
|
401
401
|
self, read_properties: list[_ReadProperties], prefixes: dict[str, Namespace]
|
|
402
|
-
) -> tuple[list[
|
|
403
|
-
if self.
|
|
404
|
-
existing_classes = {class_.
|
|
402
|
+
) -> tuple[list[UnverifiedConcept], list[UnverifiedConceptualProperty]]:
|
|
403
|
+
if self._data_model:
|
|
404
|
+
existing_classes = {class_.concept.suffix: class_ for class_ in self._data_model.concepts}
|
|
405
405
|
else:
|
|
406
406
|
existing_classes = {}
|
|
407
|
-
classes: list[
|
|
407
|
+
classes: list[UnverifiedConcept] = []
|
|
408
408
|
properties_by_class_suffix_by_property_id: dict[str, dict[str, UnverifiedConceptualProperty]] = {}
|
|
409
409
|
|
|
410
410
|
# Help for IDE
|
|
@@ -429,9 +429,9 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
429
429
|
parent_suffix = remove_namespace_from_uri(parent_uri)
|
|
430
430
|
self._add_uri_namespace_to_prefixes(parent_uri, prefixes)
|
|
431
431
|
if parent_suffix not in existing_classes:
|
|
432
|
-
classes.append(
|
|
432
|
+
classes.append(UnverifiedConcept(concept=parent_suffix))
|
|
433
433
|
else:
|
|
434
|
-
classes.append(
|
|
434
|
+
classes.append(UnverifiedConcept.load(existing_classes[parent_suffix].model_dump()))
|
|
435
435
|
else:
|
|
436
436
|
shared_property_uris = set()
|
|
437
437
|
shared_properties: dict[URIRef, list[_ReadProperties]] = defaultdict(list)
|
|
@@ -441,14 +441,14 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
441
441
|
|
|
442
442
|
if class_suffix not in existing_classes:
|
|
443
443
|
classes.append(
|
|
444
|
-
|
|
445
|
-
|
|
444
|
+
UnverifiedConcept(
|
|
445
|
+
concept=class_suffix,
|
|
446
446
|
implements=parent_suffix,
|
|
447
447
|
instance_source=type_uri,
|
|
448
448
|
)
|
|
449
449
|
)
|
|
450
450
|
else:
|
|
451
|
-
classes.append(
|
|
451
|
+
classes.append(UnverifiedConcept.load(existing_classes[class_suffix].model_dump()))
|
|
452
452
|
|
|
453
453
|
properties_by_id: dict[str, UnverifiedConceptualProperty] = {}
|
|
454
454
|
for property_uri, read_properties in properties_by_property_uri.items():
|
|
@@ -509,21 +509,21 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
509
509
|
for result_row in self.graph.query(self._ordered_class_query):
|
|
510
510
|
type_uri, instance_count_literal = cast(tuple[URIRef, RdfLiteral], result_row)
|
|
511
511
|
count_by_type[type_uri] = instance_count_literal.toPython()
|
|
512
|
-
if self.
|
|
513
|
-
analysis =
|
|
512
|
+
if self._data_model:
|
|
513
|
+
analysis = DataModelAnalysis(self._data_model)
|
|
514
514
|
existing_class_properties = {
|
|
515
515
|
(class_entity.suffix, prop.property_): prop
|
|
516
|
-
for class_entity, properties in analysis.
|
|
516
|
+
for class_entity, properties in analysis.properties_by_concepts(
|
|
517
517
|
include_ancestors=True, include_different_space=True
|
|
518
518
|
).items()
|
|
519
519
|
for prop in properties
|
|
520
520
|
}
|
|
521
|
-
existing_classes = {cls_.
|
|
521
|
+
existing_classes = {cls_.concept.suffix: cls_ for cls_ in self._data_model.concepts}
|
|
522
522
|
else:
|
|
523
523
|
existing_class_properties = {}
|
|
524
524
|
existing_classes = {}
|
|
525
525
|
properties_by_class_by_subclass: list[_ReadProperties] = []
|
|
526
|
-
existing_class:
|
|
526
|
+
existing_class: Concept | None
|
|
527
527
|
total_instance_count = sum(count_by_type.values())
|
|
528
528
|
iterable = count_by_type.items()
|
|
529
529
|
if GLOBAL_CONFIG.use_iterate_bar_threshold and total_instance_count > GLOBAL_CONFIG.use_iterate_bar_threshold:
|
|
@@ -581,7 +581,7 @@ class SubclassInferenceImporter(BaseRDFImporter):
|
|
|
581
581
|
first = read_properties[0]
|
|
582
582
|
value_type = self._get_value_type(read_properties, prefixes)
|
|
583
583
|
return UnverifiedConceptualProperty(
|
|
584
|
-
|
|
584
|
+
concept=class_suffix,
|
|
585
585
|
property_=property_id,
|
|
586
586
|
max_count=first.max_occurrence,
|
|
587
587
|
value_type=value_type,
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
"""This module performs importing of various formats to one of serializations for which
|
|
2
|
-
there are loaders to
|
|
2
|
+
there are loaders to data model pydantic class."""
|
|
3
3
|
|
|
4
4
|
from cognite.neat.core._data_model.importers._rdf._base import BaseRDFImporter
|
|
5
5
|
from cognite.neat.core._data_model.importers._rdf._shared import (
|
|
6
|
-
|
|
6
|
+
parse_concepts,
|
|
7
7
|
parse_properties,
|
|
8
8
|
)
|
|
9
9
|
|
|
10
|
-
CLASSES_QUERY = """SELECT ?
|
|
10
|
+
CLASSES_QUERY = """SELECT ?concept ?name ?description ?implements
|
|
11
11
|
WHERE {{
|
|
12
12
|
|
|
13
|
-
?
|
|
14
|
-
OPTIONAL {{?
|
|
15
|
-
OPTIONAL {{?
|
|
16
|
-
OPTIONAL {{?
|
|
13
|
+
?concept a owl:Class .
|
|
14
|
+
OPTIONAL {{?concept rdfs:subClassOf ?implements }}.
|
|
15
|
+
OPTIONAL {{?concept rdfs:label|skos:prefLabel ?name }}.
|
|
16
|
+
OPTIONAL {{?concept rdfs:comment|skos:definition ?description}} .
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
FILTER (!isBlank(?
|
|
19
|
+
FILTER (!isBlank(?concept ))
|
|
20
20
|
FILTER (!bound(?implements) || !isBlank(?implements))
|
|
21
21
|
|
|
22
22
|
FILTER (!bound(?name) || LANG(?name) = "" || LANGMATCHES(LANG(?name), "{language}"))
|
|
@@ -27,11 +27,11 @@ CLASSES_QUERY = """SELECT ?class_ ?name ?description ?implements
|
|
|
27
27
|
|
|
28
28
|
PROPERTIES_QUERY = """
|
|
29
29
|
|
|
30
|
-
SELECT ?
|
|
30
|
+
SELECT ?concept ?property_ ?name ?description ?value_type ?minCount ?maxCount ?default
|
|
31
31
|
WHERE {{
|
|
32
32
|
?property_ a ?property_Type.
|
|
33
33
|
FILTER (?property_Type IN (owl:ObjectProperty, owl:DatatypeProperty ) )
|
|
34
|
-
OPTIONAL {{?property_ rdfs:domain ?
|
|
34
|
+
OPTIONAL {{?property_ rdfs:domain ?concept }}.
|
|
35
35
|
OPTIONAL {{?property_ rdfs:range ?value_type }}.
|
|
36
36
|
OPTIONAL {{?property_ rdfs:label|skos:prefLabel ?name }}.
|
|
37
37
|
OPTIONAL {{?property_ rdfs:comment|skos:definition ?description}}.
|
|
@@ -47,27 +47,27 @@ PROPERTIES_QUERY = """
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class OWLImporter(BaseRDFImporter):
|
|
50
|
-
"""Convert OWL ontology to
|
|
50
|
+
"""Convert OWL ontology to unverified data model.
|
|
51
51
|
|
|
52
52
|
Args:
|
|
53
53
|
filepath: Path to OWL ontology
|
|
54
54
|
|
|
55
55
|
!!! Note
|
|
56
56
|
OWL Ontologies are information models which completeness varies. As such, constructing functional
|
|
57
|
-
data model directly will often be impossible, therefore the produced
|
|
58
|
-
To avoid this, neat will automatically attempt to make the imported
|
|
57
|
+
data model directly will often be impossible, therefore the produced data model object will be ill formed.
|
|
58
|
+
To avoid this, neat will automatically attempt to make the imported data model compliant by adding default
|
|
59
59
|
values for missing information, attaching dangling properties to default containers based on the
|
|
60
60
|
property type, etc.
|
|
61
61
|
|
|
62
62
|
One has to be aware that NEAT will be opinionated about how to make the ontology
|
|
63
|
-
compliant, and that the resulting
|
|
63
|
+
compliant, and that the resulting data model may not be what you expect.
|
|
64
64
|
|
|
65
65
|
"""
|
|
66
66
|
|
|
67
|
-
def
|
|
67
|
+
def _to_data_model_components(
|
|
68
68
|
self,
|
|
69
69
|
) -> dict:
|
|
70
|
-
|
|
70
|
+
concepts, issue_list = parse_concepts(self.graph, CLASSES_QUERY, self.language, self.issue_list)
|
|
71
71
|
self.issue_list = issue_list
|
|
72
72
|
|
|
73
73
|
# NeatError
|
|
@@ -76,7 +76,7 @@ class OWLImporter(BaseRDFImporter):
|
|
|
76
76
|
|
|
77
77
|
components = {
|
|
78
78
|
"Metadata": self._metadata,
|
|
79
|
-
"
|
|
79
|
+
"Concepts": list(concepts.values()) if concepts else [],
|
|
80
80
|
"Properties": list(properties.values()) if properties else [],
|
|
81
81
|
}
|
|
82
82
|
|
|
@@ -13,18 +13,18 @@ from cognite.neat.core._issues.warnings._resources import (
|
|
|
13
13
|
from cognite.neat.core._utils.rdf_ import convert_rdflib_content
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def
|
|
17
|
-
"""Parse
|
|
16
|
+
def parse_concepts(graph: Graph, query: str, language: str, issue_list: IssueList) -> tuple[dict, IssueList]:
|
|
17
|
+
"""Parse concepts from graph
|
|
18
18
|
|
|
19
19
|
Args:
|
|
20
|
-
graph: Graph containing
|
|
20
|
+
graph: Graph containing concept definitions
|
|
21
21
|
language: Language to use for parsing, by default "en"
|
|
22
22
|
|
|
23
23
|
Returns:
|
|
24
24
|
Dataframe containing owl classes
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
concepts: dict[str, dict] = {}
|
|
28
28
|
|
|
29
29
|
query = prepareQuery(query.format(language=language), initNs={k: v for k, v in graph.namespaces()})
|
|
30
30
|
expected_keys = [str(v) for v in query.algebra._vars]
|
|
@@ -33,42 +33,42 @@ def parse_classes(graph: Graph, query: str, language: str, issue_list: IssueList
|
|
|
33
33
|
res: dict = convert_rdflib_content(cast(ResultRow, raw).asdict(), True)
|
|
34
34
|
res = {key: res.get(key, None) for key in expected_keys}
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
concept_id = res["concept"]
|
|
37
37
|
|
|
38
38
|
# Safeguarding against incomplete semantic definitions
|
|
39
39
|
if res["implements"] and isinstance(res["implements"], BNode):
|
|
40
40
|
issue_list.append(
|
|
41
41
|
ResourceRetrievalWarning(
|
|
42
|
-
|
|
42
|
+
concept_id,
|
|
43
43
|
"implements",
|
|
44
|
-
error=("Unable to determine
|
|
44
|
+
error=("Unable to determine concept that is being implemented"),
|
|
45
45
|
)
|
|
46
46
|
)
|
|
47
47
|
continue
|
|
48
48
|
|
|
49
|
-
if
|
|
50
|
-
|
|
49
|
+
if concept_id not in concepts:
|
|
50
|
+
concepts[concept_id] = res
|
|
51
51
|
else:
|
|
52
52
|
# Handling implements
|
|
53
|
-
if
|
|
54
|
-
if res["implements"] not in
|
|
55
|
-
|
|
53
|
+
if concepts[concept_id]["implements"] and isinstance(concepts[concept_id]["implements"], list):
|
|
54
|
+
if res["implements"] not in concepts[concept_id]["implements"]:
|
|
55
|
+
concepts[concept_id]["implements"].append(res["implements"])
|
|
56
56
|
|
|
57
|
-
elif
|
|
58
|
-
|
|
57
|
+
elif concepts[concept_id]["implements"] and isinstance(concepts[concept_id]["implements"], str):
|
|
58
|
+
concepts[concept_id]["implements"] = [concepts[concept_id]["implements"]]
|
|
59
59
|
|
|
60
|
-
if res["implements"] not in
|
|
61
|
-
|
|
60
|
+
if res["implements"] not in concepts[concept_id]["implements"]:
|
|
61
|
+
concepts[concept_id]["implements"].append(res["implements"])
|
|
62
62
|
elif res["implements"]:
|
|
63
|
-
|
|
63
|
+
concepts[concept_id]["implements"] = [res["implements"]]
|
|
64
64
|
|
|
65
|
-
handle_meta("
|
|
66
|
-
handle_meta("
|
|
65
|
+
handle_meta("concept", concepts, concept_id, res, "name", issue_list)
|
|
66
|
+
handle_meta("concept", concepts, concept_id, res, "description", issue_list)
|
|
67
67
|
|
|
68
|
-
if not
|
|
69
|
-
issue_list.append(NeatValueError("Unable to parse
|
|
68
|
+
if not concepts:
|
|
69
|
+
issue_list.append(NeatValueError("Unable to parse concepts"))
|
|
70
70
|
|
|
71
|
-
return
|
|
71
|
+
return concepts, issue_list
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
def parse_properties(graph: Graph, query: str, language: str, issue_list: IssueList) -> tuple[dict, IssueList]:
|
|
@@ -94,12 +94,12 @@ def parse_properties(graph: Graph, query: str, language: str, issue_list: IssueL
|
|
|
94
94
|
property_id = res["property_"]
|
|
95
95
|
|
|
96
96
|
# Safeguarding against incomplete semantic definitions
|
|
97
|
-
if not res["
|
|
97
|
+
if not res["concept"] or isinstance(res["concept"], BNode):
|
|
98
98
|
issue_list.append(
|
|
99
99
|
ResourceRetrievalWarning(
|
|
100
100
|
property_id,
|
|
101
101
|
"property",
|
|
102
|
-
error=("Unable to determine to what
|
|
102
|
+
error=("Unable to determine to what concept property is being defined"),
|
|
103
103
|
)
|
|
104
104
|
)
|
|
105
105
|
continue
|
|
@@ -115,7 +115,7 @@ def parse_properties(graph: Graph, query: str, language: str, issue_list: IssueL
|
|
|
115
115
|
)
|
|
116
116
|
continue
|
|
117
117
|
|
|
118
|
-
id_ = f"{res['
|
|
118
|
+
id_ = f"{res['concept']}.{res['property_']}"
|
|
119
119
|
|
|
120
120
|
if id_ not in properties:
|
|
121
121
|
properties[id_] = res
|