cognite-neat 0.88.1__py3-none-any.whl → 0.88.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognite/neat/_version.py +1 -1
- cognite/neat/graph/__init__.py +0 -3
- cognite/neat/graph/loaders/_base.py +6 -6
- cognite/neat/graph/loaders/_rdf2asset.py +28 -31
- cognite/neat/graph/loaders/_rdf2dms.py +24 -15
- cognite/neat/issues/__init__.py +14 -0
- cognite/neat/issues/_base.py +415 -0
- cognite/neat/issues/errors/__init__.py +72 -0
- cognite/neat/issues/errors/_external.py +67 -0
- cognite/neat/issues/errors/_general.py +28 -0
- cognite/neat/issues/errors/_properties.py +62 -0
- cognite/neat/issues/errors/_resources.py +111 -0
- cognite/neat/issues/errors/_workflow.py +36 -0
- cognite/neat/{rules/issues → issues}/formatters.py +10 -10
- cognite/neat/issues/warnings/__init__.py +66 -0
- cognite/neat/issues/warnings/_external.py +40 -0
- cognite/neat/issues/warnings/_general.py +29 -0
- cognite/neat/issues/warnings/_models.py +92 -0
- cognite/neat/issues/warnings/_properties.py +44 -0
- cognite/neat/issues/warnings/_resources.py +55 -0
- cognite/neat/issues/warnings/user_modeling.py +113 -0
- cognite/neat/rules/_shared.py +10 -2
- cognite/neat/rules/exporters/_base.py +6 -6
- cognite/neat/rules/exporters/_rules2dms.py +19 -11
- cognite/neat/rules/exporters/_rules2excel.py +4 -4
- cognite/neat/rules/exporters/_rules2ontology.py +74 -51
- cognite/neat/rules/exporters/_rules2yaml.py +3 -3
- cognite/neat/rules/exporters/_validation.py +11 -96
- cognite/neat/rules/importers/__init__.py +7 -3
- cognite/neat/rules/importers/_base.py +9 -13
- cognite/neat/rules/importers/_dms2rules.py +42 -24
- cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py +49 -53
- cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py +31 -23
- cognite/neat/rules/importers/_dtdl2rules/spec.py +7 -0
- cognite/neat/rules/importers/_rdf/_imf2rules/__init__.py +3 -0
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2classes.py +82 -0
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2metadata.py +34 -0
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2properties.py +123 -0
- cognite/neat/rules/importers/{_owl2rules/_owl2rules.py → _rdf/_imf2rules/_imf2rules.py} +24 -18
- cognite/neat/rules/importers/{_inference2rules.py → _rdf/_inference2rules.py} +9 -9
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2classes.py +58 -0
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2metadata.py +68 -0
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2properties.py +60 -0
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2rules.py +76 -0
- cognite/neat/rules/importers/_rdf/_shared.py +586 -0
- cognite/neat/rules/importers/_spreadsheet2rules.py +35 -22
- cognite/neat/rules/importers/_yaml2rules.py +23 -21
- cognite/neat/rules/models/_constants.py +2 -1
- cognite/neat/rules/models/_rdfpath.py +4 -4
- cognite/neat/rules/models/_types/_field.py +9 -11
- cognite/neat/rules/models/asset/_rules.py +1 -3
- cognite/neat/rules/models/asset/_validation.py +14 -10
- cognite/neat/rules/models/dms/_converter.py +2 -4
- cognite/neat/rules/models/dms/_exporter.py +30 -8
- cognite/neat/rules/models/dms/_rules.py +23 -7
- cognite/neat/rules/models/dms/_schema.py +94 -62
- cognite/neat/rules/models/dms/_validation.py +105 -66
- cognite/neat/rules/models/entities.py +3 -0
- cognite/neat/rules/models/information/_converter.py +2 -2
- cognite/neat/rules/models/information/_rules.py +7 -8
- cognite/neat/rules/models/information/_validation.py +48 -25
- cognite/neat/rules/transformers/__init__.py +0 -0
- cognite/neat/rules/transformers/_base.py +15 -0
- cognite/neat/utils/auxiliary.py +2 -35
- cognite/neat/utils/text.py +17 -0
- cognite/neat/workflows/base.py +4 -4
- cognite/neat/workflows/cdf_store.py +3 -3
- cognite/neat/workflows/steps/data_contracts.py +1 -1
- cognite/neat/workflows/steps/lib/current/graph_extractor.py +3 -3
- cognite/neat/workflows/steps/lib/current/graph_loader.py +2 -2
- cognite/neat/workflows/steps/lib/current/graph_store.py +1 -1
- cognite/neat/workflows/steps/lib/current/rules_exporter.py +10 -10
- cognite/neat/workflows/steps/lib/current/rules_importer.py +78 -6
- cognite/neat/workflows/steps/lib/current/rules_validator.py +20 -9
- cognite/neat/workflows/steps/lib/io/io_steps.py +5 -5
- cognite/neat/workflows/steps_registry.py +4 -5
- {cognite_neat-0.88.1.dist-info → cognite_neat-0.88.3.dist-info}/METADATA +1 -1
- {cognite_neat-0.88.1.dist-info → cognite_neat-0.88.3.dist-info}/RECORD +86 -77
- cognite/neat/exceptions.py +0 -145
- cognite/neat/graph/exceptions.py +0 -90
- cognite/neat/graph/issues/loader.py +0 -104
- cognite/neat/issues.py +0 -158
- cognite/neat/rules/importers/_owl2rules/_owl2classes.py +0 -215
- cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +0 -209
- cognite/neat/rules/importers/_owl2rules/_owl2properties.py +0 -203
- cognite/neat/rules/issues/__init__.py +0 -26
- cognite/neat/rules/issues/base.py +0 -82
- cognite/neat/rules/issues/dms.py +0 -683
- cognite/neat/rules/issues/fileread.py +0 -197
- cognite/neat/rules/issues/importing.py +0 -423
- cognite/neat/rules/issues/ontology.py +0 -298
- cognite/neat/rules/issues/spreadsheet.py +0 -563
- cognite/neat/rules/issues/spreadsheet_file.py +0 -151
- cognite/neat/rules/issues/tables.py +0 -72
- cognite/neat/workflows/_exceptions.py +0 -41
- /cognite/neat/{graph/issues → rules/importers/_rdf}/__init__.py +0 -0
- /cognite/neat/rules/importers/{_owl2rules → _rdf/_owl2rules}/__init__.py +0 -0
- /cognite/neat/{graph/stores → store}/__init__.py +0 -0
- /cognite/neat/{graph/stores → store}/_base.py +0 -0
- /cognite/neat/{graph/stores → store}/_provenance.py +0 -0
- {cognite_neat-0.88.1.dist-info → cognite_neat-0.88.3.dist-info}/LICENSE +0 -0
- {cognite_neat-0.88.1.dist-info → cognite_neat-0.88.3.dist-info}/WHEEL +0 -0
- {cognite_neat-0.88.1.dist-info → cognite_neat-0.88.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,209 +0,0 @@
|
|
|
1
|
-
import datetime
|
|
2
|
-
|
|
3
|
-
from rdflib import Graph, Namespace
|
|
4
|
-
|
|
5
|
-
from cognite.neat.constants import DEFAULT_NAMESPACE
|
|
6
|
-
from cognite.neat.rules.models import RoleTypes, SchemaCompleteness
|
|
7
|
-
from cognite.neat.utils.collection_ import remove_none_elements_from_set
|
|
8
|
-
from cognite.neat.utils.rdf_ import convert_rdflib_content
|
|
9
|
-
from cognite.neat.utils.regex_patterns import PATTERNS
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def parse_owl_metadata(graph: Graph) -> dict:
|
|
13
|
-
"""Parse owl metadata from graph to dict.
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
graph: Graph containing owl metadata
|
|
17
|
-
|
|
18
|
-
Returns:
|
|
19
|
-
Dictionary containing owl metadata
|
|
20
|
-
|
|
21
|
-
!!! note "Compliant OWL metadata"
|
|
22
|
-
This makes the method very opinionated, but results in a compliant metadata.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"""
|
|
26
|
-
# TODO: Move dataframe to dict representation
|
|
27
|
-
|
|
28
|
-
query = f"""SELECT ?namespace ?prefix ?version ?created ?updated ?title ?description ?creator ?rights ?license
|
|
29
|
-
WHERE {{
|
|
30
|
-
?namespace a owl:Ontology .
|
|
31
|
-
OPTIONAL {{?namespace owl:versionInfo ?version }}.
|
|
32
|
-
OPTIONAL {{?namespace dcterms:creator ?creator }}.
|
|
33
|
-
OPTIONAL {{?namespace <{DEFAULT_NAMESPACE.prefix}> ?prefix }}.
|
|
34
|
-
OPTIONAL {{?namespace dcterms:title|rdfs:label|skos:prefLabel ?title }}.
|
|
35
|
-
OPTIONAL {{?namespace dcterms:modified ?updated }}.
|
|
36
|
-
OPTIONAL {{?namespace dcterms:created ?created }}.
|
|
37
|
-
OPTIONAL {{?namespace dcterms:description ?description }}.
|
|
38
|
-
OPTIONAL {{?namespace dcterms:rights|dc:rights ?rights }}.
|
|
39
|
-
|
|
40
|
-
OPTIONAL {{?namespace dcterms:license|dc:license ?license }}.
|
|
41
|
-
FILTER (!isBlank(?namespace))
|
|
42
|
-
FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "en"))
|
|
43
|
-
FILTER (!bound(?title) || LANG(?title) = "" || LANGMATCHES(LANG(?title), "en"))
|
|
44
|
-
}}
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
results = [{item for item in sublist} for sublist in list(zip(*graph.query(query), strict=True))]
|
|
48
|
-
|
|
49
|
-
raw_metadata = convert_rdflib_content(
|
|
50
|
-
{
|
|
51
|
-
"role": RoleTypes.information,
|
|
52
|
-
"schema": SchemaCompleteness.partial,
|
|
53
|
-
"prefix": results[1].pop(),
|
|
54
|
-
"namespace": Namespace(results[0].pop()),
|
|
55
|
-
"version": results[2].pop(),
|
|
56
|
-
"created": results[3].pop(),
|
|
57
|
-
"updated": results[4].pop(),
|
|
58
|
-
"title": results[5].pop(),
|
|
59
|
-
"description": results[6].pop(),
|
|
60
|
-
"creator": (
|
|
61
|
-
", ".join(remove_none_elements_from_set(results[7]))
|
|
62
|
-
if remove_none_elements_from_set(results[7])
|
|
63
|
-
else None
|
|
64
|
-
),
|
|
65
|
-
"rights": results[8].pop(),
|
|
66
|
-
"license": results[9].pop(),
|
|
67
|
-
}
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
return make_metadata_compliant(raw_metadata)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def make_metadata_compliant(metadata: dict) -> dict:
|
|
74
|
-
"""Attempts to fix errors in metadata, otherwise defaults to values that will pass validation.
|
|
75
|
-
|
|
76
|
-
Args:
|
|
77
|
-
metadata: Dictionary containing metadata
|
|
78
|
-
|
|
79
|
-
Returns:
|
|
80
|
-
Dictionary containing metadata with fixed errors
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
metadata = fix_namespace(metadata, default=Namespace("http://purl.org/cognite/neat#"))
|
|
84
|
-
metadata = fix_prefix(metadata)
|
|
85
|
-
metadata = fix_version(metadata)
|
|
86
|
-
metadata = fix_date(metadata, date_type="created", default=datetime.datetime.now().replace(microsecond=0))
|
|
87
|
-
metadata = fix_date(metadata, date_type="updated", default=datetime.datetime.now().replace(microsecond=0))
|
|
88
|
-
metadata = fix_title(metadata)
|
|
89
|
-
metadata = fix_description(metadata)
|
|
90
|
-
metadata = fix_author(metadata, "creator")
|
|
91
|
-
metadata = fix_rights(metadata)
|
|
92
|
-
metadata = fix_license(metadata)
|
|
93
|
-
|
|
94
|
-
return metadata
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def fix_license(metadata: dict, default: str = "Unknown license") -> dict:
|
|
98
|
-
if license := metadata.get("license", None):
|
|
99
|
-
if not isinstance(license, str):
|
|
100
|
-
metadata["license"] = default
|
|
101
|
-
elif isinstance(license, str) and len(license) == 0:
|
|
102
|
-
metadata["license"] = default
|
|
103
|
-
else:
|
|
104
|
-
metadata["license"] = default
|
|
105
|
-
return metadata
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def fix_rights(metadata: dict, default: str = "Unknown rights") -> dict:
|
|
109
|
-
if rights := metadata.get("rights", None):
|
|
110
|
-
if not isinstance(rights, str):
|
|
111
|
-
metadata["rights"] = default
|
|
112
|
-
elif isinstance(rights, str) and len(rights) == 0:
|
|
113
|
-
metadata["rights"] = default
|
|
114
|
-
else:
|
|
115
|
-
metadata["rights"] = default
|
|
116
|
-
return metadata
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def fix_author(metadata: dict, author_type: str = "creator", default: str = "NEAT") -> dict:
|
|
120
|
-
if author := metadata.get(author_type, None):
|
|
121
|
-
if not isinstance(author, str) or isinstance(author, list):
|
|
122
|
-
metadata[author_type] = default
|
|
123
|
-
elif isinstance(author, str) and len(author) == 0:
|
|
124
|
-
metadata[author_type] = default
|
|
125
|
-
else:
|
|
126
|
-
metadata[author_type] = default
|
|
127
|
-
return metadata
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def fix_description(metadata: dict, default: str = "This model has been inferred from OWL ontology") -> dict:
|
|
131
|
-
if description := metadata.get("description", None):
|
|
132
|
-
if not isinstance(description, str) or len(description) == 0:
|
|
133
|
-
metadata["description"] = default
|
|
134
|
-
elif isinstance(description, str) and len(description) > 1024:
|
|
135
|
-
metadata["description"] = metadata["description"][:1024]
|
|
136
|
-
else:
|
|
137
|
-
metadata["description"] = default
|
|
138
|
-
return metadata
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def fix_prefix(metadata: dict, default: str = "neat") -> dict:
|
|
142
|
-
if prefix := metadata.get("prefix", None):
|
|
143
|
-
if not isinstance(prefix, str) or not PATTERNS.prefix_compliance.match(prefix):
|
|
144
|
-
metadata["prefix"] = default
|
|
145
|
-
else:
|
|
146
|
-
metadata["prefix"] = default
|
|
147
|
-
return metadata
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def fix_namespace(metadata: dict, default: Namespace) -> dict:
|
|
151
|
-
if namespace := metadata.get("namespace", None):
|
|
152
|
-
if not isinstance(namespace, Namespace):
|
|
153
|
-
try:
|
|
154
|
-
metadata["namespace"] = Namespace(namespace)
|
|
155
|
-
except Exception:
|
|
156
|
-
metadata["namespace"] = default
|
|
157
|
-
else:
|
|
158
|
-
metadata["namespace"] = default
|
|
159
|
-
|
|
160
|
-
return metadata
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def fix_date(
|
|
164
|
-
metadata: dict,
|
|
165
|
-
date_type: str,
|
|
166
|
-
default: datetime.datetime,
|
|
167
|
-
) -> dict:
|
|
168
|
-
if date := metadata.get(date_type, None):
|
|
169
|
-
try:
|
|
170
|
-
if isinstance(date, datetime.datetime):
|
|
171
|
-
return metadata
|
|
172
|
-
elif isinstance(date, datetime.date):
|
|
173
|
-
metadata[date_type] = datetime.datetime.combine(metadata[date_type], datetime.datetime.min.time())
|
|
174
|
-
elif isinstance(date, str):
|
|
175
|
-
metadata[date_type] = datetime.datetime.strptime(metadata[date_type], "%Y-%m-%dT%H:%M:%SZ")
|
|
176
|
-
else:
|
|
177
|
-
metadata[date_type] = default
|
|
178
|
-
except Exception:
|
|
179
|
-
metadata[date_type] = default
|
|
180
|
-
else:
|
|
181
|
-
metadata[date_type] = default
|
|
182
|
-
|
|
183
|
-
return metadata
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def fix_version(metadata: dict, default: str = "1.0.0") -> dict:
|
|
187
|
-
if version := metadata.get("version", None):
|
|
188
|
-
if not PATTERNS.version_compliance.match(version):
|
|
189
|
-
metadata["version"] = default
|
|
190
|
-
else:
|
|
191
|
-
metadata["version"] = default
|
|
192
|
-
|
|
193
|
-
return metadata
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def fix_title(metadata: dict, default: str = "OWL Inferred Data Model") -> dict:
|
|
197
|
-
if title := metadata.get("title", None):
|
|
198
|
-
if not isinstance(title, str):
|
|
199
|
-
metadata["title"] = default
|
|
200
|
-
elif isinstance(title, str) and len(title) == 0:
|
|
201
|
-
metadata["title"] = default
|
|
202
|
-
elif isinstance(title, str) and len(title) > 255:
|
|
203
|
-
metadata["title"] = metadata["title"][:255]
|
|
204
|
-
else:
|
|
205
|
-
pass
|
|
206
|
-
else:
|
|
207
|
-
metadata["title"] = default
|
|
208
|
-
|
|
209
|
-
return metadata
|
|
@@ -1,203 +0,0 @@
|
|
|
1
|
-
from typing import cast
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd
|
|
5
|
-
from rdflib import Graph
|
|
6
|
-
|
|
7
|
-
from cognite.neat.rules.models._base import MatchType
|
|
8
|
-
from cognite.neat.utils.rdf_ import remove_namespace_from_uri
|
|
9
|
-
|
|
10
|
-
from ._owl2classes import _data_type_property_class, _object_property_class, _thing_class
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def parse_owl_properties(graph: Graph, language: str = "en") -> list[dict]:
|
|
14
|
-
"""Parse owl properties from graph to pandas dataframe.
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
graph: Graph containing owl properties
|
|
18
|
-
language: Language to use for parsing, by default "en"
|
|
19
|
-
|
|
20
|
-
Returns:
|
|
21
|
-
List of dictionaries containing owl properties
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
query = """
|
|
25
|
-
|
|
26
|
-
SELECT ?class ?property ?name ?description ?type ?minCount ?maxCount ?reference
|
|
27
|
-
?match ?propertyType
|
|
28
|
-
WHERE {
|
|
29
|
-
?property a ?propertyType.
|
|
30
|
-
FILTER (?propertyType IN (owl:ObjectProperty, owl:DatatypeProperty ) )
|
|
31
|
-
OPTIONAL {?property rdfs:domain ?class }.
|
|
32
|
-
OPTIONAL {?property rdfs:range ?type }.
|
|
33
|
-
OPTIONAL {?property rdfs:label ?name }.
|
|
34
|
-
OPTIONAL {?property rdfs:comment ?description} .
|
|
35
|
-
OPTIONAL {?property owl:maxCardinality ?maxCount} .
|
|
36
|
-
OPTIONAL {?property owl:minCardinality ?minCount} .
|
|
37
|
-
FILTER (!isBlank(?property))
|
|
38
|
-
FILTER (!bound(?type) || !isBlank(?type))
|
|
39
|
-
FILTER (!bound(?class) || !isBlank(?class))
|
|
40
|
-
FILTER (!bound(?name) || LANG(?name) = "" || LANGMATCHES(LANG(?name), "en"))
|
|
41
|
-
FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "en"))
|
|
42
|
-
BIND(IF(bound(?minCount), ?minCount, 0) AS ?minCount)
|
|
43
|
-
BIND(IF(bound(?maxCount), ?maxCount, 1) AS ?maxCount)
|
|
44
|
-
}
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
raw_df = _parse_raw_dataframe(cast(list[tuple], list(graph.query(query.replace("en", language)))))
|
|
48
|
-
if raw_df.empty:
|
|
49
|
-
return []
|
|
50
|
-
|
|
51
|
-
# group values and clean up
|
|
52
|
-
processed_df = _clean_up_properties(raw_df)
|
|
53
|
-
|
|
54
|
-
# make compliant
|
|
55
|
-
processed_df = make_properties_compliant(processed_df)
|
|
56
|
-
|
|
57
|
-
# drop column _property_type, which was a helper column:
|
|
58
|
-
processed_df.drop(columns=["_property_type"], inplace=True)
|
|
59
|
-
|
|
60
|
-
return processed_df.to_dict(orient="records")
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def _parse_raw_dataframe(query_results: list[tuple]) -> pd.DataFrame:
|
|
64
|
-
df = pd.DataFrame(
|
|
65
|
-
query_results,
|
|
66
|
-
columns=[
|
|
67
|
-
"Class",
|
|
68
|
-
"Property",
|
|
69
|
-
"Name",
|
|
70
|
-
"Description",
|
|
71
|
-
"Value Type",
|
|
72
|
-
"Min Count",
|
|
73
|
-
"Max Count",
|
|
74
|
-
"Reference",
|
|
75
|
-
"Match Type",
|
|
76
|
-
"_property_type",
|
|
77
|
-
],
|
|
78
|
-
)
|
|
79
|
-
if df.empty:
|
|
80
|
-
return df
|
|
81
|
-
|
|
82
|
-
df.replace(np.nan, "", regex=True, inplace=True)
|
|
83
|
-
|
|
84
|
-
df.Reference = df.Property
|
|
85
|
-
df.Class = df.Class.apply(lambda x: remove_namespace_from_uri(x))
|
|
86
|
-
df.Property = df.Property.apply(lambda x: remove_namespace_from_uri(x))
|
|
87
|
-
df["Value Type"] = df["Value Type"].apply(lambda x: remove_namespace_from_uri(x))
|
|
88
|
-
df["Match Type"] = len(df) * [MatchType.exact]
|
|
89
|
-
df["Comment"] = len(df) * [None]
|
|
90
|
-
df["_property_type"] = df["_property_type"].apply(lambda x: remove_namespace_from_uri(x))
|
|
91
|
-
|
|
92
|
-
return df
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def _clean_up_properties(df: pd.DataFrame) -> pd.DataFrame:
|
|
96
|
-
class_grouped_dfs = df.groupby("Class")
|
|
97
|
-
|
|
98
|
-
clean_list = []
|
|
99
|
-
|
|
100
|
-
for class_, class_grouped_df in class_grouped_dfs:
|
|
101
|
-
property_grouped_dfs = class_grouped_df.groupby("Property")
|
|
102
|
-
for property_, property_grouped_df in property_grouped_dfs:
|
|
103
|
-
clean_list += [
|
|
104
|
-
{
|
|
105
|
-
"Class": class_,
|
|
106
|
-
"Property": property_,
|
|
107
|
-
"Name": property_grouped_df["Name"].unique()[0],
|
|
108
|
-
"Description": "\n".join(list(property_grouped_df.Description.unique()))[:1024],
|
|
109
|
-
"Value Type": property_grouped_df["Value Type"].unique()[0],
|
|
110
|
-
"Min Count": property_grouped_df["Min Count"].unique()[0],
|
|
111
|
-
"Max Count": property_grouped_df["Max Count"].unique()[0],
|
|
112
|
-
"Reference": property_grouped_df["Reference"].unique()[0],
|
|
113
|
-
"Match Type": property_grouped_df["Match Type"].unique()[0],
|
|
114
|
-
"Comment": property_grouped_df["Comment"].unique()[0],
|
|
115
|
-
"_property_type": property_grouped_df["_property_type"].unique()[0],
|
|
116
|
-
}
|
|
117
|
-
]
|
|
118
|
-
|
|
119
|
-
df = pd.DataFrame(clean_list)
|
|
120
|
-
df.replace("", None, inplace=True)
|
|
121
|
-
|
|
122
|
-
return df
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def make_properties_compliant(properties: pd.DataFrame) -> pd.DataFrame:
|
|
126
|
-
# default to None if "Min Count" is not specified
|
|
127
|
-
properties["Min Count"] = properties["Min Count"].apply(lambda x: 0 if not isinstance(x, int) or x == "" else x)
|
|
128
|
-
|
|
129
|
-
# default to None if "Max Count" is not specified
|
|
130
|
-
properties["Max Count"] = properties["Max Count"].apply(lambda x: 1 if not isinstance(x, int) or x == "" else x)
|
|
131
|
-
|
|
132
|
-
# Replace empty or non-string values in "Match Type" column with "exact"
|
|
133
|
-
properties["Match Type"] = properties["Match Type"].fillna("exact")
|
|
134
|
-
properties["Match Type"] = properties["Match Type"].apply(
|
|
135
|
-
lambda x: "exact" if not isinstance(x, str) or len(x) == 0 else x
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
# Replace empty or non-string values in "Comment" column with a default value
|
|
139
|
-
properties["Comment"] = properties["Comment"].fillna("Imported from Ontology by NEAT")
|
|
140
|
-
properties["Comment"] = properties["Comment"].apply(
|
|
141
|
-
lambda x: "Imported from Ontology by NEAT" if not isinstance(x, str) or len(x) == 0 else x
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
# Reduce length of elements in the "Description" column to 1024 characters
|
|
145
|
-
properties["Description"] = properties["Description"].apply(lambda x: x[:1024] if isinstance(x, str) else None)
|
|
146
|
-
|
|
147
|
-
# fixes and additions
|
|
148
|
-
properties = fix_dangling_properties(properties)
|
|
149
|
-
properties = fix_missing_property_value_type(properties)
|
|
150
|
-
|
|
151
|
-
return properties
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def fix_dangling_properties(properties: pd.DataFrame) -> pd.DataFrame:
|
|
155
|
-
"""This method fixes properties which are missing a domain definition in the ontology.
|
|
156
|
-
|
|
157
|
-
Args:
|
|
158
|
-
properties: Dataframe containing properties
|
|
159
|
-
|
|
160
|
-
Returns:
|
|
161
|
-
Dataframe containing properties with fixed domain
|
|
162
|
-
"""
|
|
163
|
-
domain = {
|
|
164
|
-
"ObjectProperty": _object_property_class()["Class"],
|
|
165
|
-
"DatatypeProperty": _data_type_property_class()["Class"],
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
# apply missing range
|
|
169
|
-
properties["Class"] = properties.apply(
|
|
170
|
-
lambda row: (
|
|
171
|
-
domain[row._property_type]
|
|
172
|
-
if row._property_type == "ObjectProperty" and pd.isna(row["Class"])
|
|
173
|
-
else domain["DatatypeProperty"]
|
|
174
|
-
if pd.isna(row["Class"])
|
|
175
|
-
else row["Class"]
|
|
176
|
-
),
|
|
177
|
-
axis=1,
|
|
178
|
-
)
|
|
179
|
-
return properties
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
def fix_missing_property_value_type(properties: pd.DataFrame) -> pd.DataFrame:
|
|
183
|
-
"""This method fixes properties which are missing a range definition in the ontology.
|
|
184
|
-
|
|
185
|
-
Args:
|
|
186
|
-
properties: Dataframe containing properties
|
|
187
|
-
|
|
188
|
-
Returns:
|
|
189
|
-
Dataframe containing properties with fixed range
|
|
190
|
-
"""
|
|
191
|
-
# apply missing range
|
|
192
|
-
properties["Value Type"] = properties.apply(
|
|
193
|
-
lambda row: (
|
|
194
|
-
_thing_class()["Class"]
|
|
195
|
-
if row._property_type == "ObjectProperty" and pd.isna(row["Value Type"])
|
|
196
|
-
else "string"
|
|
197
|
-
if pd.isna(row["Value Type"])
|
|
198
|
-
else row["Value Type"]
|
|
199
|
-
),
|
|
200
|
-
axis=1,
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
return properties
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from cognite.neat.issues import MultiValueError
|
|
2
|
-
|
|
3
|
-
from . import dms, fileread, formatters, importing, spreadsheet, spreadsheet_file
|
|
4
|
-
from .base import (
|
|
5
|
-
DefaultPydanticError,
|
|
6
|
-
IssueList,
|
|
7
|
-
NeatValidationError,
|
|
8
|
-
ValidationIssue,
|
|
9
|
-
ValidationWarning,
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
__all__ = [
|
|
13
|
-
"DefaultPydanticError",
|
|
14
|
-
"MultiValueError",
|
|
15
|
-
"IssueList",
|
|
16
|
-
"NeatValidationError",
|
|
17
|
-
"ValidationIssue",
|
|
18
|
-
"ValidationIssue",
|
|
19
|
-
"ValidationWarning",
|
|
20
|
-
"dms",
|
|
21
|
-
"fileread",
|
|
22
|
-
"formatters",
|
|
23
|
-
"importing",
|
|
24
|
-
"spreadsheet",
|
|
25
|
-
"spreadsheet_file",
|
|
26
|
-
]
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
from abc import ABC
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from typing import Any
|
|
4
|
-
|
|
5
|
-
from pydantic_core import ErrorDetails
|
|
6
|
-
|
|
7
|
-
from cognite.neat.issues import MultiValueError, NeatError, NeatIssue, NeatIssueList, NeatWarning
|
|
8
|
-
|
|
9
|
-
__all__ = [
|
|
10
|
-
"ValidationIssue",
|
|
11
|
-
"NeatValidationError",
|
|
12
|
-
"DefaultPydanticError",
|
|
13
|
-
"ValidationWarning",
|
|
14
|
-
"IssueList",
|
|
15
|
-
"MultiValueError",
|
|
16
|
-
]
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
@dataclass(frozen=True)
|
|
20
|
-
class ValidationIssue(NeatIssue, ABC): ...
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@dataclass(frozen=True)
|
|
24
|
-
class NeatValidationError(NeatError, ValidationIssue, ABC):
|
|
25
|
-
@classmethod
|
|
26
|
-
def from_pydantic_errors(cls, errors: list[ErrorDetails], **kwargs) -> "list[NeatValidationError]":
|
|
27
|
-
"""Convert a list of pydantic errors to a list of Error instances.
|
|
28
|
-
|
|
29
|
-
This is intended to be overridden in subclasses to handle specific error types.
|
|
30
|
-
"""
|
|
31
|
-
all_errors: list[NeatValidationError] = []
|
|
32
|
-
for error in errors:
|
|
33
|
-
if isinstance(ctx := error.get("ctx"), dict) and isinstance(
|
|
34
|
-
multi_error := ctx.get("error"), MultiValueError
|
|
35
|
-
):
|
|
36
|
-
all_errors.extend(multi_error.errors) # type: ignore[arg-type]
|
|
37
|
-
else:
|
|
38
|
-
all_errors.append(DefaultPydanticError.from_pydantic_error(error))
|
|
39
|
-
return all_errors
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
@dataclass(frozen=True)
|
|
43
|
-
class DefaultPydanticError(NeatValidationError):
|
|
44
|
-
type: str
|
|
45
|
-
loc: tuple[int | str, ...]
|
|
46
|
-
msg: str
|
|
47
|
-
input: Any
|
|
48
|
-
ctx: dict[str, Any] | None
|
|
49
|
-
|
|
50
|
-
@classmethod
|
|
51
|
-
def from_pydantic_error(cls, error: ErrorDetails) -> "NeatValidationError":
|
|
52
|
-
return cls(
|
|
53
|
-
type=error["type"],
|
|
54
|
-
loc=error["loc"],
|
|
55
|
-
msg=error["msg"],
|
|
56
|
-
input=error.get("input"),
|
|
57
|
-
ctx=error.get("ctx"),
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
def dump(self) -> dict[str, Any]:
|
|
61
|
-
output = super().dump()
|
|
62
|
-
output["type"] = self.type
|
|
63
|
-
output["loc"] = self.loc
|
|
64
|
-
output["msg"] = self.msg
|
|
65
|
-
output["input"] = self.input
|
|
66
|
-
output["ctx"] = self.ctx
|
|
67
|
-
return output
|
|
68
|
-
|
|
69
|
-
def message(self) -> str:
|
|
70
|
-
if self.loc and len(self.loc) == 1:
|
|
71
|
-
return f"{self.loc[0]} sheet: {self.msg}"
|
|
72
|
-
elif self.loc and len(self.loc) == 2:
|
|
73
|
-
return f"{self.loc[0]} sheet field/column <{self.loc[1]}>: {self.msg}"
|
|
74
|
-
else:
|
|
75
|
-
return self.msg
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
@dataclass(frozen=True)
|
|
79
|
-
class ValidationWarning(NeatWarning, ValidationIssue, ABC): ...
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
class IssueList(NeatIssueList[ValidationIssue]): ...
|