cognite-neat 0.88.2__py3-none-any.whl → 0.89.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/constants.py +3 -0
- cognite/neat/graph/__init__.py +0 -3
- cognite/neat/graph/extractors/_mock_graph_generator.py +2 -1
- cognite/neat/graph/loaders/_base.py +3 -3
- cognite/neat/graph/loaders/_rdf2asset.py +24 -25
- cognite/neat/graph/loaders/_rdf2dms.py +20 -15
- cognite/neat/issues/__init__.py +1 -3
- cognite/neat/issues/_base.py +261 -71
- cognite/neat/issues/errors/__init__.py +73 -0
- cognite/neat/issues/errors/_external.py +67 -0
- cognite/neat/issues/errors/_general.py +35 -0
- cognite/neat/issues/errors/_properties.py +62 -0
- cognite/neat/issues/errors/_resources.py +111 -0
- cognite/neat/issues/errors/_workflow.py +36 -0
- cognite/neat/issues/formatters.py +1 -1
- cognite/neat/issues/warnings/__init__.py +66 -0
- cognite/neat/issues/warnings/_external.py +40 -0
- cognite/neat/issues/warnings/_general.py +29 -0
- cognite/neat/issues/warnings/_models.py +92 -0
- cognite/neat/issues/warnings/_properties.py +44 -0
- cognite/neat/issues/warnings/_resources.py +55 -0
- cognite/neat/issues/warnings/user_modeling.py +113 -0
- cognite/neat/rules/_shared.py +53 -2
- cognite/neat/rules/analysis/_base.py +1 -1
- cognite/neat/rules/exporters/_base.py +7 -18
- cognite/neat/rules/exporters/_rules2dms.py +17 -20
- cognite/neat/rules/exporters/_rules2excel.py +9 -16
- cognite/neat/rules/exporters/_rules2ontology.py +77 -64
- cognite/neat/rules/exporters/_rules2yaml.py +6 -9
- cognite/neat/rules/exporters/_validation.py +11 -96
- cognite/neat/rules/importers/_base.py +9 -58
- cognite/neat/rules/importers/_dms2rules.py +188 -135
- cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py +48 -35
- cognite/neat/rules/importers/_dtdl2rules/dtdl_importer.py +36 -45
- cognite/neat/rules/importers/_dtdl2rules/spec.py +7 -0
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2classes.py +8 -4
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2metadata.py +3 -3
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2properties.py +18 -11
- cognite/neat/rules/importers/_rdf/_imf2rules/_imf2rules.py +12 -19
- cognite/neat/rules/importers/_rdf/_inference2rules.py +14 -37
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2classes.py +1 -0
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2properties.py +1 -0
- cognite/neat/rules/importers/_rdf/_owl2rules/_owl2rules.py +9 -20
- cognite/neat/rules/importers/_rdf/_shared.py +4 -4
- cognite/neat/rules/importers/_spreadsheet2rules.py +46 -97
- cognite/neat/rules/importers/_yaml2rules.py +32 -58
- cognite/neat/rules/models/__init__.py +21 -5
- cognite/neat/rules/models/_base_input.py +162 -0
- cognite/neat/rules/models/{_base.py → _base_rules.py} +1 -12
- cognite/neat/rules/models/_rdfpath.py +4 -4
- cognite/neat/rules/models/{_types/_field.py → _types.py} +5 -10
- cognite/neat/rules/models/asset/__init__.py +5 -2
- cognite/neat/rules/models/asset/_rules.py +3 -23
- cognite/neat/rules/models/asset/_rules_input.py +40 -115
- cognite/neat/rules/models/asset/_validation.py +14 -10
- cognite/neat/rules/models/data_types.py +150 -44
- cognite/neat/rules/models/dms/__init__.py +19 -7
- cognite/neat/rules/models/dms/_exporter.py +102 -34
- cognite/neat/rules/models/dms/_rules.py +65 -162
- cognite/neat/rules/models/dms/_rules_input.py +186 -254
- cognite/neat/rules/models/dms/_schema.py +87 -78
- cognite/neat/rules/models/dms/_serializer.py +44 -3
- cognite/neat/rules/models/dms/_validation.py +106 -68
- cognite/neat/rules/models/domain.py +52 -1
- cognite/neat/rules/models/entities/__init__.py +63 -0
- cognite/neat/rules/models/entities/_constants.py +73 -0
- cognite/neat/rules/models/entities/_loaders.py +76 -0
- cognite/neat/rules/models/entities/_multi_value.py +67 -0
- cognite/neat/rules/models/{entities.py → entities/_single_value.py} +74 -232
- cognite/neat/rules/models/entities/_types.py +86 -0
- cognite/neat/rules/models/{wrapped_entities.py → entities/_wrapped.py} +1 -1
- cognite/neat/rules/models/information/__init__.py +10 -2
- cognite/neat/rules/models/information/_rules.py +10 -22
- cognite/neat/rules/models/information/_rules_input.py +57 -204
- cognite/neat/rules/models/information/_validation.py +48 -25
- cognite/neat/rules/transformers/__init__.py +21 -0
- cognite/neat/rules/transformers/_base.py +81 -0
- cognite/neat/rules/{models/information/_converter.py → transformers/_converters.py} +217 -21
- cognite/neat/rules/transformers/_map_onto.py +97 -0
- cognite/neat/rules/transformers/_pipelines.py +61 -0
- cognite/neat/rules/transformers/_verification.py +136 -0
- cognite/neat/{graph/stores → store}/_provenance.py +10 -1
- cognite/neat/utils/auxiliary.py +2 -35
- cognite/neat/utils/cdf/data_classes.py +20 -0
- cognite/neat/utils/regex_patterns.py +6 -0
- cognite/neat/utils/text.py +17 -0
- cognite/neat/workflows/base.py +4 -4
- cognite/neat/workflows/cdf_store.py +3 -3
- cognite/neat/workflows/steps/data_contracts.py +1 -1
- cognite/neat/workflows/steps/lib/current/graph_extractor.py +3 -3
- cognite/neat/workflows/steps/lib/current/graph_loader.py +2 -2
- cognite/neat/workflows/steps/lib/current/graph_store.py +1 -1
- cognite/neat/workflows/steps/lib/current/rules_exporter.py +116 -47
- cognite/neat/workflows/steps/lib/current/rules_importer.py +30 -28
- cognite/neat/workflows/steps/lib/current/rules_validator.py +5 -6
- cognite/neat/workflows/steps/lib/io/io_steps.py +5 -5
- cognite/neat/workflows/steps_registry.py +4 -5
- {cognite_neat-0.88.2.dist-info → cognite_neat-0.89.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.88.2.dist-info → cognite_neat-0.89.0.dist-info}/RECORD +105 -106
- cognite/neat/exceptions.py +0 -145
- cognite/neat/graph/exceptions.py +0 -90
- cognite/neat/issues/errors/external.py +0 -21
- cognite/neat/issues/errors/properties.py +0 -75
- cognite/neat/issues/errors/resources.py +0 -123
- cognite/neat/issues/errors/schema.py +0 -0
- cognite/neat/issues/neat_warnings/__init__.py +0 -2
- cognite/neat/issues/neat_warnings/identifier.py +0 -27
- cognite/neat/issues/neat_warnings/models.py +0 -22
- cognite/neat/issues/neat_warnings/properties.py +0 -77
- cognite/neat/issues/neat_warnings/resources.py +0 -125
- cognite/neat/rules/issues/__init__.py +0 -22
- cognite/neat/rules/issues/base.py +0 -63
- cognite/neat/rules/issues/dms.py +0 -549
- cognite/neat/rules/issues/fileread.py +0 -197
- cognite/neat/rules/issues/ontology.py +0 -298
- cognite/neat/rules/issues/spreadsheet.py +0 -563
- cognite/neat/rules/issues/spreadsheet_file.py +0 -151
- cognite/neat/rules/issues/tables.py +0 -72
- cognite/neat/rules/models/_constants.py +0 -1
- cognite/neat/rules/models/_types/__init__.py +0 -19
- cognite/neat/rules/models/asset/_converter.py +0 -4
- cognite/neat/rules/models/dms/_converter.py +0 -145
- cognite/neat/workflows/_exceptions.py +0 -41
- /cognite/neat/{graph/stores → store}/__init__.py +0 -0
- /cognite/neat/{graph/stores → store}/_base.py +0 -0
- {cognite_neat-0.88.2.dist-info → cognite_neat-0.89.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.88.2.dist-info → cognite_neat-0.89.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.88.2.dist-info → cognite_neat-0.89.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
from collections import Counter, defaultdict
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import cast
|
|
5
5
|
|
|
6
6
|
from rdflib import Graph, Namespace, URIRef
|
|
7
7
|
from rdflib import Literal as RdfLiteral
|
|
8
8
|
|
|
9
|
-
import cognite.neat.rules.issues as issues
|
|
10
9
|
from cognite.neat.constants import DEFAULT_NAMESPACE, get_default_prefixes
|
|
11
|
-
from cognite.neat.graph.stores import NeatGraphStore
|
|
12
10
|
from cognite.neat.issues import IssueList
|
|
13
|
-
from cognite.neat.
|
|
14
|
-
from cognite.neat.rules.
|
|
15
|
-
from cognite.neat.rules.
|
|
11
|
+
from cognite.neat.issues.errors import FileReadError
|
|
12
|
+
from cognite.neat.rules._shared import ReadRules
|
|
13
|
+
from cognite.neat.rules.importers._base import BaseImporter
|
|
14
|
+
from cognite.neat.rules.models._base_rules import MatchType
|
|
16
15
|
from cognite.neat.rules.models.information import (
|
|
16
|
+
InformationInputRules,
|
|
17
17
|
InformationMetadata,
|
|
18
|
-
InformationRulesInput,
|
|
19
18
|
)
|
|
19
|
+
from cognite.neat.store import NeatGraphStore
|
|
20
20
|
from cognite.neat.utils.rdf_ import get_namespace, remove_namespace_from_uri, uri_to_short_form
|
|
21
21
|
|
|
22
22
|
ORDERED_CLASSES_QUERY = """SELECT ?class (count(?s) as ?instances )
|
|
@@ -44,7 +44,7 @@ INSTANCE_PROPERTIES_DEFINITION = """SELECT ?property (count(?property) as ?occur
|
|
|
44
44
|
GROUP BY ?property ?dataType ?objectType"""
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
class InferenceImporter(BaseImporter):
|
|
47
|
+
class InferenceImporter(BaseImporter[InformationInputRules]):
|
|
48
48
|
"""Infers rules from a triple store.
|
|
49
49
|
|
|
50
50
|
Rules inference through analysis of knowledge graph provided in various formats.
|
|
@@ -106,8 +106,8 @@ class InferenceImporter(BaseImporter):
|
|
|
106
106
|
graph = Graph()
|
|
107
107
|
try:
|
|
108
108
|
graph.parse(filepath)
|
|
109
|
-
except Exception:
|
|
110
|
-
issue_list.append(
|
|
109
|
+
except Exception as e:
|
|
110
|
+
issue_list.append(FileReadError(filepath, str(e)))
|
|
111
111
|
|
|
112
112
|
return cls(
|
|
113
113
|
issue_list,
|
|
@@ -147,44 +147,21 @@ class InferenceImporter(BaseImporter):
|
|
|
147
147
|
) -> "InferenceImporter":
|
|
148
148
|
raise NotImplementedError("JSON file format is not supported yet.")
|
|
149
149
|
|
|
150
|
-
@overload
|
|
151
|
-
def to_rules(self, errors: Literal["raise"], role: RoleTypes | None = None) -> Rules: ...
|
|
152
|
-
|
|
153
|
-
@overload
|
|
154
|
-
def to_rules(
|
|
155
|
-
self,
|
|
156
|
-
errors: Literal["continue"] = "continue",
|
|
157
|
-
role: RoleTypes | None = None,
|
|
158
|
-
) -> tuple[Rules | None, IssueList]: ...
|
|
159
|
-
|
|
160
150
|
def to_rules(
|
|
161
151
|
self,
|
|
162
|
-
|
|
163
|
-
role: RoleTypes | None = None,
|
|
164
|
-
) -> tuple[Rules | None, IssueList] | Rules:
|
|
152
|
+
) -> ReadRules[InformationInputRules]:
|
|
165
153
|
"""
|
|
166
154
|
Creates `Rules` object from the data for target role.
|
|
167
155
|
"""
|
|
168
156
|
|
|
169
157
|
if self.issue_list.has_errors:
|
|
170
158
|
# In case there were errors during the import, the to_rules method will return None
|
|
171
|
-
return
|
|
159
|
+
return ReadRules(None, self.issue_list, {})
|
|
172
160
|
|
|
173
161
|
rules_dict = self._to_rules_components()
|
|
174
162
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
rules = InformationRulesInput.load(rules_dict).as_rules()
|
|
178
|
-
|
|
179
|
-
if future.result == "failure" or self.issue_list.has_errors:
|
|
180
|
-
return self._return_or_raise(self.issue_list, errors)
|
|
181
|
-
|
|
182
|
-
return self._to_output(
|
|
183
|
-
rules,
|
|
184
|
-
self.issue_list,
|
|
185
|
-
errors=errors,
|
|
186
|
-
role=role,
|
|
187
|
-
)
|
|
163
|
+
rules = InformationInputRules.load(rules_dict)
|
|
164
|
+
return ReadRules(rules, self.issue_list, {})
|
|
188
165
|
|
|
189
166
|
def _to_rules_components(
|
|
190
167
|
self,
|
|
@@ -34,6 +34,7 @@ def parse_owl_classes(graph: Graph, language: str = "en") -> list[dict]:
|
|
|
34
34
|
FILTER (!bound(?parentClass) || !isBlank(?parentClass))
|
|
35
35
|
FILTER (!bound(?name) || LANG(?name) = "" || LANGMATCHES(LANG(?name), "en"))
|
|
36
36
|
FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "en"))
|
|
37
|
+
BIND(?class AS ?reference)
|
|
37
38
|
}
|
|
38
39
|
"""
|
|
39
40
|
|
|
@@ -40,6 +40,7 @@ def parse_owl_properties(graph: Graph, language: str = "en") -> list[dict]:
|
|
|
40
40
|
FILTER (!bound(?description) || LANG(?description) = "" || LANGMATCHES(LANG(?description), "en"))
|
|
41
41
|
BIND(IF(bound(?minCount), ?minCount, 0) AS ?minCount)
|
|
42
42
|
BIND(IF(bound(?maxCount), ?maxCount, 1) AS ?maxCount)
|
|
43
|
+
BIND(?property AS ?reference)
|
|
43
44
|
}
|
|
44
45
|
"""
|
|
45
46
|
|
|
@@ -2,21 +2,22 @@
|
|
|
2
2
|
there are loaders to TransformationRules pydantic class."""
|
|
3
3
|
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Literal, overload
|
|
6
5
|
|
|
7
6
|
from rdflib import DC, DCTERMS, OWL, RDF, RDFS, SKOS, Graph
|
|
8
7
|
|
|
9
8
|
from cognite.neat.issues import IssueList
|
|
10
|
-
from cognite.neat.
|
|
9
|
+
from cognite.neat.issues.errors import FileReadError
|
|
10
|
+
from cognite.neat.rules._shared import ReadRules
|
|
11
|
+
from cognite.neat.rules.importers._base import BaseImporter
|
|
11
12
|
from cognite.neat.rules.importers._rdf._shared import make_components_compliant
|
|
12
|
-
from cognite.neat.rules.models import
|
|
13
|
+
from cognite.neat.rules.models import InformationInputRules
|
|
13
14
|
|
|
14
15
|
from ._owl2classes import parse_owl_classes
|
|
15
16
|
from ._owl2metadata import parse_owl_metadata
|
|
16
17
|
from ._owl2properties import parse_owl_properties
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
class OWLImporter(BaseImporter):
|
|
20
|
+
class OWLImporter(BaseImporter[InformationInputRules]):
|
|
20
21
|
"""Convert OWL ontology to tables/ transformation rules / Excel file.
|
|
21
22
|
|
|
22
23
|
Args:
|
|
@@ -37,24 +38,12 @@ class OWLImporter(BaseImporter):
|
|
|
37
38
|
def __init__(self, filepath: Path):
|
|
38
39
|
self.owl_filepath = filepath
|
|
39
40
|
|
|
40
|
-
|
|
41
|
-
def to_rules(self, errors: Literal["raise"], role: RoleTypes | None = None) -> Rules: ...
|
|
42
|
-
|
|
43
|
-
@overload
|
|
44
|
-
def to_rules(
|
|
45
|
-
self, errors: Literal["continue"] = "continue", role: RoleTypes | None = None
|
|
46
|
-
) -> tuple[Rules | None, IssueList]: ...
|
|
47
|
-
|
|
48
|
-
def to_rules(
|
|
49
|
-
self,
|
|
50
|
-
errors: Literal["raise", "continue"] = "continue",
|
|
51
|
-
role: RoleTypes | None = None,
|
|
52
|
-
) -> tuple[Rules | None, IssueList] | Rules:
|
|
41
|
+
def to_rules(self) -> ReadRules[InformationInputRules]:
|
|
53
42
|
graph = Graph()
|
|
54
43
|
try:
|
|
55
44
|
graph.parse(self.owl_filepath)
|
|
56
45
|
except Exception as e:
|
|
57
|
-
|
|
46
|
+
return ReadRules(None, IssueList([FileReadError(self.owl_filepath, f"Could not parse owl file: {e}")]), {})
|
|
58
47
|
|
|
59
48
|
# bind key namespaces
|
|
60
49
|
graph.bind("owl", OWL)
|
|
@@ -72,5 +61,5 @@ class OWLImporter(BaseImporter):
|
|
|
72
61
|
|
|
73
62
|
components = make_components_compliant(components)
|
|
74
63
|
|
|
75
|
-
rules =
|
|
76
|
-
return
|
|
64
|
+
rules = InformationInputRules.load(components)
|
|
65
|
+
return ReadRules(rules, IssueList(), {})
|
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from rdflib import OWL, Literal, Namespace
|
|
6
6
|
|
|
7
|
-
from cognite.neat.rules.models.
|
|
7
|
+
from cognite.neat.rules.models._base_rules import MatchType
|
|
8
8
|
from cognite.neat.rules.models.data_types import _XSD_TYPES
|
|
9
9
|
from cognite.neat.utils.rdf_ import remove_namespace_from_uri
|
|
10
10
|
from cognite.neat.utils.regex_patterns import PATTERNS
|
|
@@ -23,13 +23,13 @@ def parse_raw_classes_dataframe(query_results: list[tuple]) -> pd.DataFrame:
|
|
|
23
23
|
"Comment",
|
|
24
24
|
],
|
|
25
25
|
)
|
|
26
|
+
|
|
26
27
|
if df.empty:
|
|
27
28
|
return df
|
|
28
29
|
|
|
29
30
|
# # remove NaNs
|
|
30
31
|
df.replace(np.nan, "", regex=True, inplace=True)
|
|
31
32
|
|
|
32
|
-
df.Reference = df.Class
|
|
33
33
|
df.Class = df.Class.apply(lambda x: remove_namespace_from_uri(x))
|
|
34
34
|
df["Match Type"] = len(df) * [MatchType.exact]
|
|
35
35
|
df["Comment"] = len(df) * [None]
|
|
@@ -202,7 +202,7 @@ def parse_raw_properties_dataframe(query_results: list[tuple]) -> pd.DataFrame:
|
|
|
202
202
|
return df
|
|
203
203
|
|
|
204
204
|
df.replace(np.nan, "", regex=True, inplace=True)
|
|
205
|
-
|
|
205
|
+
|
|
206
206
|
df.Class = df.Class.apply(lambda x: remove_namespace_from_uri(x))
|
|
207
207
|
df.Property = df.Property.apply(lambda x: remove_namespace_from_uri(x))
|
|
208
208
|
df["Value Type"] = df["Value Type"].apply(lambda x: remove_namespace_from_uri(x))
|
|
@@ -231,7 +231,7 @@ def clean_up_properties(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
231
231
|
"Min Count": property_grouped_df["Min Count"].unique()[0],
|
|
232
232
|
"Max Count": property_grouped_df["Max Count"].unique()[0],
|
|
233
233
|
"Default": property_grouped_df["Default"].unique()[0],
|
|
234
|
-
"Reference": property_grouped_df["Reference"].unique()[0]
|
|
234
|
+
"Reference": property_grouped_df["Reference"].unique()[0],
|
|
235
235
|
"Match Type": property_grouped_df["Match Type"].unique()[0],
|
|
236
236
|
"Comment": property_grouped_df["Comment"].unique()[0],
|
|
237
237
|
"_property_type": property_grouped_df["_property_type"].unique()[0],
|
|
@@ -6,29 +6,30 @@ generating a list of rules based on which nodes that form the graph are made.
|
|
|
6
6
|
from collections import UserDict, defaultdict
|
|
7
7
|
from dataclasses import dataclass
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Literal, cast
|
|
9
|
+
from typing import Literal, cast
|
|
10
10
|
|
|
11
11
|
import pandas as pd
|
|
12
|
+
from cognite.client.utils._importing import local_import
|
|
12
13
|
from pandas import ExcelFile
|
|
13
14
|
|
|
14
15
|
from cognite.neat.issues import IssueList
|
|
15
|
-
from cognite.neat.
|
|
16
|
+
from cognite.neat.issues.errors import (
|
|
17
|
+
FileMissingRequiredFieldError,
|
|
18
|
+
FileNotFoundNeatError,
|
|
19
|
+
FileReadError,
|
|
20
|
+
PropertyDefinitionDuplicatedError,
|
|
21
|
+
)
|
|
22
|
+
from cognite.neat.rules._shared import ReadRules, T_InputRules
|
|
16
23
|
from cognite.neat.rules.models import (
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
DMSRules,
|
|
20
|
-
DomainRules,
|
|
21
|
-
InformationRules,
|
|
24
|
+
INPUT_RULES_BY_ROLE,
|
|
25
|
+
VERIFIED_RULES_BY_ROLE,
|
|
22
26
|
RoleTypes,
|
|
23
27
|
SchemaCompleteness,
|
|
24
28
|
)
|
|
25
|
-
from cognite.neat.rules.models.asset import AssetRulesInput
|
|
26
|
-
from cognite.neat.rules.models.dms import DMSRulesInput
|
|
27
|
-
from cognite.neat.rules.models.information import InformationRulesInput
|
|
28
|
-
from cognite.neat.utils.auxiliary import local_import
|
|
29
29
|
from cognite.neat.utils.spreadsheet import SpreadsheetRead, read_individual_sheet
|
|
30
|
+
from cognite.neat.utils.text import humanize_collection
|
|
30
31
|
|
|
31
|
-
from ._base import BaseImporter
|
|
32
|
+
from ._base import BaseImporter
|
|
32
33
|
|
|
33
34
|
SOURCE_SHEET__TARGET_FIELD__HEADERS = [
|
|
34
35
|
(
|
|
@@ -47,7 +48,7 @@ SOURCE_SHEET__TARGET_FIELD__HEADERS = [
|
|
|
47
48
|
]
|
|
48
49
|
|
|
49
50
|
MANDATORY_SHEETS_BY_ROLE: dict[RoleTypes, set[str]] = {
|
|
50
|
-
role_type: {str(sheet_name) for sheet_name in
|
|
51
|
+
role_type: {str(sheet_name) for sheet_name in VERIFIED_RULES_BY_ROLE[role_type].mandatory_fields(use_alias=True)}
|
|
51
52
|
for role_type in RoleTypes.__members__.values()
|
|
52
53
|
}
|
|
53
54
|
|
|
@@ -77,12 +78,12 @@ class MetadataRaw(UserDict):
|
|
|
77
78
|
|
|
78
79
|
def is_valid(self, issue_list: IssueList, filepath: Path) -> bool:
|
|
79
80
|
if not self.has_role_field:
|
|
80
|
-
issue_list.append(
|
|
81
|
+
issue_list.append(FileMissingRequiredFieldError(filepath, "metadata", "role"))
|
|
81
82
|
return False
|
|
82
83
|
|
|
83
84
|
# check if there is a schema field if role is not domain expert
|
|
84
85
|
if self.role != RoleTypes.domain_expert and not self.has_schema_field:
|
|
85
|
-
issue_list.append(
|
|
86
|
+
issue_list.append(FileMissingRequiredFieldError(filepath, "metadata", "schema"))
|
|
86
87
|
return False
|
|
87
88
|
return True
|
|
88
89
|
|
|
@@ -153,11 +154,7 @@ class SpreadsheetReader:
|
|
|
153
154
|
def _read_metadata(self, excel_file: ExcelFile, filepath: Path) -> MetadataRaw | None:
|
|
154
155
|
if self.metadata_sheet_name not in excel_file.sheet_names:
|
|
155
156
|
if self.required:
|
|
156
|
-
self.issue_list.append(
|
|
157
|
-
issues.spreadsheet_file.MetadataSheetMissingOrFailedError(
|
|
158
|
-
filepath, sheet_name=self.metadata_sheet_name
|
|
159
|
-
)
|
|
160
|
-
)
|
|
157
|
+
self.issue_list.append(FileMissingRequiredFieldError(filepath, "sheet", self.metadata_sheet_name))
|
|
161
158
|
return None
|
|
162
159
|
|
|
163
160
|
metadata = MetadataRaw.from_excel(excel_file, self.metadata_sheet_name)
|
|
@@ -178,7 +175,9 @@ class SpreadsheetReader:
|
|
|
178
175
|
if missing_sheets := expected_sheet_names.difference(set(excel_file.sheet_names)):
|
|
179
176
|
if self.required:
|
|
180
177
|
self.issue_list.append(
|
|
181
|
-
|
|
178
|
+
FileMissingRequiredFieldError(
|
|
179
|
+
cast(Path, excel_file.io), "sheets", humanize_collection(missing_sheets)
|
|
180
|
+
)
|
|
182
181
|
)
|
|
183
182
|
return None, read_info_by_sheet
|
|
184
183
|
|
|
@@ -197,13 +196,13 @@ class SpreadsheetReader:
|
|
|
197
196
|
excel_file, source_sheet_name, return_read_info=True, expected_headers=[headers]
|
|
198
197
|
)
|
|
199
198
|
except Exception as e:
|
|
200
|
-
self.issue_list.append(
|
|
199
|
+
self.issue_list.append(FileReadError(cast(Path, excel_file.io), str(e)))
|
|
201
200
|
continue
|
|
202
201
|
|
|
203
202
|
return sheets, read_info_by_sheet
|
|
204
203
|
|
|
205
204
|
|
|
206
|
-
class ExcelImporter(BaseImporter):
|
|
205
|
+
class ExcelImporter(BaseImporter[T_InputRules]):
|
|
207
206
|
"""Import rules from an Excel file.
|
|
208
207
|
|
|
209
208
|
Args:
|
|
@@ -213,30 +212,18 @@ class ExcelImporter(BaseImporter):
|
|
|
213
212
|
def __init__(self, filepath: Path):
|
|
214
213
|
self.filepath = filepath
|
|
215
214
|
|
|
216
|
-
|
|
217
|
-
def to_rules(self, errors: Literal["raise"], role: RoleTypes | None = None) -> Rules: ...
|
|
218
|
-
|
|
219
|
-
@overload
|
|
220
|
-
def to_rules(
|
|
221
|
-
self,
|
|
222
|
-
errors: Literal["continue"] = "continue",
|
|
223
|
-
role: RoleTypes | None = None,
|
|
224
|
-
) -> tuple[Rules | None, IssueList]: ...
|
|
225
|
-
|
|
226
|
-
def to_rules(
|
|
227
|
-
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
|
|
228
|
-
) -> tuple[Rules | None, IssueList] | Rules:
|
|
215
|
+
def to_rules(self) -> ReadRules[T_InputRules]:
|
|
229
216
|
issue_list = IssueList(title=f"'{self.filepath.name}'")
|
|
230
217
|
if not self.filepath.exists():
|
|
231
|
-
issue_list.append(
|
|
232
|
-
return
|
|
218
|
+
issue_list.append(FileNotFoundNeatError(self.filepath))
|
|
219
|
+
return ReadRules(None, issue_list, {})
|
|
233
220
|
|
|
234
221
|
with pd.ExcelFile(self.filepath) as excel_file:
|
|
235
222
|
user_reader = SpreadsheetReader(issue_list)
|
|
236
223
|
|
|
237
224
|
user_read = user_reader.read(excel_file, self.filepath)
|
|
238
225
|
if user_read is None or issue_list.has_errors:
|
|
239
|
-
return
|
|
226
|
+
return ReadRules(None, issue_list, {})
|
|
240
227
|
|
|
241
228
|
last_read: ReadResult | None = None
|
|
242
229
|
if any(sheet_name.startswith("Last") for sheet_name in user_reader.seen_sheets):
|
|
@@ -248,11 +235,20 @@ class ExcelImporter(BaseImporter):
|
|
|
248
235
|
reference_read = SpreadsheetReader(issue_list, sheet_prefix="Ref").read(excel_file, self.filepath)
|
|
249
236
|
|
|
250
237
|
if issue_list.has_errors:
|
|
251
|
-
return
|
|
238
|
+
return ReadRules(None, issue_list, {})
|
|
252
239
|
|
|
253
240
|
if reference_read and user_read.role != reference_read.role:
|
|
254
|
-
issue_list.append(
|
|
255
|
-
|
|
241
|
+
issue_list.append(
|
|
242
|
+
PropertyDefinitionDuplicatedError(
|
|
243
|
+
self.filepath.as_posix(),
|
|
244
|
+
"spreadsheet.metadata", # type: ignore[arg-type]
|
|
245
|
+
"role",
|
|
246
|
+
frozenset({user_read.role, reference_read.role}),
|
|
247
|
+
("user", "reference"),
|
|
248
|
+
"sheet",
|
|
249
|
+
)
|
|
250
|
+
)
|
|
251
|
+
return ReadRules(None, issue_list, {})
|
|
256
252
|
|
|
257
253
|
sheets = user_read.sheets
|
|
258
254
|
original_role = user_read.role
|
|
@@ -267,34 +263,12 @@ class ExcelImporter(BaseImporter):
|
|
|
267
263
|
sheets["reference"] = reference_read.sheets
|
|
268
264
|
read_info_by_sheet.update(reference_read.read_info_by_sheet)
|
|
269
265
|
|
|
270
|
-
rules_cls =
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
error_cls=issues.spreadsheet.InvalidSheetError,
|
|
274
|
-
error_args={"read_info_by_sheet": read_info_by_sheet},
|
|
275
|
-
) as future:
|
|
276
|
-
rules: Rules
|
|
277
|
-
if rules_cls is DMSRules:
|
|
278
|
-
rules = DMSRulesInput.load(sheets).as_rules()
|
|
279
|
-
elif rules_cls is InformationRules:
|
|
280
|
-
rules = InformationRulesInput.load(sheets).as_rules()
|
|
281
|
-
elif rules_cls is AssetRules:
|
|
282
|
-
rules = AssetRulesInput.load(sheets).as_rules()
|
|
283
|
-
else:
|
|
284
|
-
rules = rules_cls.model_validate(sheets) # type: ignore[attr-defined]
|
|
285
|
-
|
|
286
|
-
if future.result == "failure" or issue_list.has_errors:
|
|
287
|
-
return self._return_or_raise(issue_list, errors)
|
|
288
|
-
|
|
289
|
-
return self._to_output(
|
|
290
|
-
rules,
|
|
291
|
-
issue_list,
|
|
292
|
-
errors=errors,
|
|
293
|
-
role=role,
|
|
294
|
-
)
|
|
266
|
+
rules_cls = INPUT_RULES_BY_ROLE[original_role]
|
|
267
|
+
rules = cast(T_InputRules, rules_cls.load(sheets))
|
|
268
|
+
return ReadRules(rules, issue_list, {"read_info_by_sheet": read_info_by_sheet})
|
|
295
269
|
|
|
296
270
|
|
|
297
|
-
class GoogleSheetImporter(BaseImporter):
|
|
271
|
+
class GoogleSheetImporter(BaseImporter[T_InputRules]):
|
|
298
272
|
"""Import rules from a Google Sheet.
|
|
299
273
|
|
|
300
274
|
.. warning::
|
|
@@ -310,38 +284,13 @@ class GoogleSheetImporter(BaseImporter):
|
|
|
310
284
|
self.sheet_id = sheet_id
|
|
311
285
|
self.skiprows = skiprows
|
|
312
286
|
|
|
313
|
-
|
|
314
|
-
|
|
287
|
+
def to_rules(self) -> ReadRules[T_InputRules]:
|
|
288
|
+
raise NotImplementedError("Google Sheet Importer is not yet implemented.")
|
|
315
289
|
|
|
316
|
-
|
|
317
|
-
def to_rules(
|
|
318
|
-
self, errors: Literal["continue"] = "continue", role: RoleTypes | None = None
|
|
319
|
-
) -> tuple[Rules | None, IssueList]: ...
|
|
320
|
-
|
|
321
|
-
def to_rules(
|
|
322
|
-
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
|
|
323
|
-
) -> tuple[Rules | None, IssueList] | Rules:
|
|
290
|
+
def _get_sheets(self) -> dict[str, pd.DataFrame]:
|
|
324
291
|
local_import("gspread", "google")
|
|
325
292
|
import gspread # type: ignore[import]
|
|
326
293
|
|
|
327
|
-
role = role or RoleTypes.domain_expert
|
|
328
|
-
rules_model = cast(DomainRules | InformationRules | AssetRules | DMSRules, RULES_PER_ROLE[role])
|
|
329
|
-
|
|
330
294
|
client_google = gspread.service_account()
|
|
331
295
|
google_sheet = client_google.open_by_key(self.sheet_id)
|
|
332
|
-
|
|
333
|
-
sheet_names = {str(name).lower() for name in sheets.keys()}
|
|
334
|
-
|
|
335
|
-
if missing_sheets := rules_model.mandatory_fields().difference(sheet_names):
|
|
336
|
-
raise ValueError(f"Missing mandatory sheets: {missing_sheets}")
|
|
337
|
-
|
|
338
|
-
if role == RoleTypes.domain_expert:
|
|
339
|
-
output = rules_model.model_validate(sheets)
|
|
340
|
-
elif role == RoleTypes.information:
|
|
341
|
-
output = rules_model.model_validate(sheets)
|
|
342
|
-
elif role == RoleTypes.dms:
|
|
343
|
-
output = rules_model.model_validate(sheets)
|
|
344
|
-
else:
|
|
345
|
-
raise ValueError(f"Role {role} is not valid.")
|
|
346
|
-
|
|
347
|
-
return self._to_output(output, IssueList(), errors=errors, role=role)
|
|
296
|
+
return {worksheet.title: pd.DataFrame(worksheet.get_all_records()) for worksheet in google_sheet.worksheets()}
|
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import Any,
|
|
2
|
+
from typing import Any, cast
|
|
3
3
|
|
|
4
4
|
import yaml
|
|
5
5
|
|
|
6
|
-
from cognite.neat.issues import IssueList
|
|
7
|
-
from cognite.neat.
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
6
|
+
from cognite.neat.issues import IssueList, NeatIssue
|
|
7
|
+
from cognite.neat.issues.errors import (
|
|
8
|
+
FileMissingRequiredFieldError,
|
|
9
|
+
FileNotAFileError,
|
|
10
|
+
FileNotFoundNeatError,
|
|
11
|
+
FileTypeUnexpectedError,
|
|
12
|
+
)
|
|
13
|
+
from cognite.neat.issues.warnings import NeatValueWarning
|
|
14
|
+
from cognite.neat.rules._shared import ReadRules, T_InputRules
|
|
15
|
+
from cognite.neat.rules.models import INPUT_RULES_BY_ROLE, RoleTypes
|
|
11
16
|
|
|
12
|
-
from ._base import BaseImporter
|
|
17
|
+
from ._base import BaseImporter
|
|
13
18
|
|
|
14
19
|
|
|
15
|
-
class YAMLImporter(BaseImporter):
|
|
20
|
+
class YAMLImporter(BaseImporter[T_InputRules]):
|
|
16
21
|
"""Imports the rules from a YAML file.
|
|
17
22
|
|
|
18
23
|
Args:
|
|
@@ -29,7 +34,7 @@ class YAMLImporter(BaseImporter):
|
|
|
29
34
|
def __init__(
|
|
30
35
|
self,
|
|
31
36
|
raw_data: dict[str, Any],
|
|
32
|
-
read_issues: list[
|
|
37
|
+
read_issues: list[NeatIssue] | None = None,
|
|
33
38
|
filepaths: list[Path] | None = None,
|
|
34
39
|
) -> None:
|
|
35
40
|
self.raw_data = raw_data
|
|
@@ -39,34 +44,22 @@ class YAMLImporter(BaseImporter):
|
|
|
39
44
|
@classmethod
|
|
40
45
|
def from_file(cls, filepath: Path):
|
|
41
46
|
if not filepath.exists():
|
|
42
|
-
return cls({}, [
|
|
43
|
-
|
|
44
|
-
return cls({}, [
|
|
47
|
+
return cls({}, [FileNotFoundNeatError(filepath)])
|
|
48
|
+
elif not filepath.is_file():
|
|
49
|
+
return cls({}, [FileNotAFileError(filepath)])
|
|
45
50
|
elif filepath.suffix not in [".yaml", ".yml"]:
|
|
46
|
-
return cls({}, [
|
|
51
|
+
return cls({}, [FileTypeUnexpectedError(filepath, frozenset([".yaml", ".yml"]))])
|
|
47
52
|
return cls(yaml.safe_load(filepath.read_text()), filepaths=[filepath])
|
|
48
53
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
@overload
|
|
53
|
-
def to_rules(
|
|
54
|
-
self, errors: Literal["continue"] = "continue", role: RoleTypes | None = None
|
|
55
|
-
) -> tuple[Rules | None, IssueList]: ...
|
|
56
|
-
|
|
57
|
-
def to_rules(
|
|
58
|
-
self, errors: Literal["raise", "continue"] = "continue", role: RoleTypes | None = None
|
|
59
|
-
) -> tuple[Rules | None, IssueList] | Rules:
|
|
60
|
-
if any(issue for issue in self._read_issues if isinstance(issue, NeatValidationError)) or not self.raw_data:
|
|
61
|
-
if errors == "raise":
|
|
62
|
-
raise self._read_issues.as_errors()
|
|
63
|
-
return None, self._read_issues
|
|
54
|
+
def to_rules(self) -> ReadRules[T_InputRules]:
|
|
55
|
+
if self._read_issues.has_errors or not self.raw_data:
|
|
56
|
+
return ReadRules(None, self._read_issues, {})
|
|
64
57
|
issue_list = IssueList(title="YAML Importer", issues=self._read_issues)
|
|
65
58
|
|
|
66
59
|
if not self._filepaths:
|
|
67
60
|
issue_list.append(
|
|
68
|
-
|
|
69
|
-
|
|
61
|
+
NeatValueWarning(
|
|
62
|
+
f"{type(self).__name__} was called without filepaths when there is content",
|
|
70
63
|
)
|
|
71
64
|
)
|
|
72
65
|
metadata_file = Path()
|
|
@@ -75,38 +68,19 @@ class YAMLImporter(BaseImporter):
|
|
|
75
68
|
metadata_file = metadata_file_nullable or self._filepaths[0]
|
|
76
69
|
|
|
77
70
|
if "metadata" not in self.raw_data:
|
|
78
|
-
self._read_issues.append(
|
|
79
|
-
|
|
80
|
-
)
|
|
81
|
-
if errors == "raise":
|
|
82
|
-
raise self._read_issues.as_errors()
|
|
83
|
-
return None, self._read_issues
|
|
71
|
+
self._read_issues.append(FileMissingRequiredFieldError(metadata_file, "section", "metadata"))
|
|
72
|
+
return ReadRules(None, self._read_issues, {})
|
|
84
73
|
|
|
85
74
|
metadata = self.raw_data["metadata"]
|
|
86
75
|
|
|
87
76
|
if "role" not in metadata:
|
|
88
|
-
self._read_issues.append(
|
|
89
|
-
|
|
90
|
-
)
|
|
91
|
-
if errors == "raise":
|
|
92
|
-
raise self._read_issues.as_errors()
|
|
93
|
-
return None, self._read_issues
|
|
77
|
+
self._read_issues.append(FileMissingRequiredFieldError(metadata, "metadata", "role"))
|
|
78
|
+
return ReadRules(None, self._read_issues, {})
|
|
94
79
|
|
|
95
80
|
role_input = RoleTypes(metadata["role"])
|
|
96
81
|
role_enum = RoleTypes(role_input)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
rules = DMSRulesInput.load(self.raw_data).as_rules()
|
|
103
|
-
else:
|
|
104
|
-
rules = rules_model.model_validate(self.raw_data)
|
|
105
|
-
|
|
106
|
-
if future.result == "failure":
|
|
107
|
-
if errors == "continue":
|
|
108
|
-
return None, issue_list
|
|
109
|
-
else:
|
|
110
|
-
raise issue_list.as_errors()
|
|
111
|
-
|
|
112
|
-
return self._to_output(rules, issue_list, errors, role)
|
|
82
|
+
rules_cls = INPUT_RULES_BY_ROLE[role_enum]
|
|
83
|
+
|
|
84
|
+
rules = cast(T_InputRules, rules_cls.load(self.raw_data))
|
|
85
|
+
|
|
86
|
+
return ReadRules(rules, issue_list, {})
|
|
@@ -1,12 +1,25 @@
|
|
|
1
|
-
from cognite.neat.rules.models.asset import AssetRules
|
|
2
|
-
from cognite.neat.rules.models.
|
|
1
|
+
from cognite.neat.rules.models.asset._rules import AssetRules
|
|
2
|
+
from cognite.neat.rules.models.asset._rules_input import AssetInputRules
|
|
3
|
+
from cognite.neat.rules.models.domain import DomainInputRules, DomainRules
|
|
3
4
|
from cognite.neat.rules.models.information._rules import InformationRules
|
|
5
|
+
from cognite.neat.rules.models.information._rules_input import InformationInputRules
|
|
4
6
|
|
|
5
|
-
from .
|
|
7
|
+
from ._base_rules import DataModelType, ExtensionCategory, RoleTypes, SchemaCompleteness, SheetEntity, SheetList
|
|
6
8
|
from .dms._rules import DMSRules
|
|
9
|
+
from .dms._rules_input import DMSInputRules
|
|
7
10
|
from .dms._schema import DMSSchema
|
|
8
11
|
|
|
9
|
-
|
|
12
|
+
INPUT_RULES_BY_ROLE: dict[
|
|
13
|
+
RoleTypes, type[InformationInputRules] | type[AssetInputRules] | type[DMSInputRules] | type[DomainInputRules]
|
|
14
|
+
] = {
|
|
15
|
+
RoleTypes.domain_expert: DomainInputRules,
|
|
16
|
+
RoleTypes.information: InformationInputRules,
|
|
17
|
+
RoleTypes.asset: AssetInputRules,
|
|
18
|
+
RoleTypes.dms: DMSInputRules,
|
|
19
|
+
}
|
|
20
|
+
VERIFIED_RULES_BY_ROLE: dict[
|
|
21
|
+
RoleTypes, type[InformationRules] | type[AssetRules] | type[DMSRules] | type[DomainRules]
|
|
22
|
+
] = {
|
|
10
23
|
RoleTypes.domain_expert: DomainRules,
|
|
11
24
|
RoleTypes.information: InformationRules,
|
|
12
25
|
RoleTypes.asset: AssetRules,
|
|
@@ -16,10 +29,13 @@ RULES_PER_ROLE: dict[RoleTypes, type[DomainRules] | type[InformationRules] | typ
|
|
|
16
29
|
|
|
17
30
|
__all__ = [
|
|
18
31
|
"DomainRules",
|
|
32
|
+
"DMSInputRules",
|
|
33
|
+
"InformationInputRules",
|
|
34
|
+
"AssetInputRules",
|
|
19
35
|
"InformationRules",
|
|
20
36
|
"AssetRules",
|
|
21
37
|
"DMSRules",
|
|
22
|
-
"
|
|
38
|
+
"INPUT_RULES_BY_ROLE",
|
|
23
39
|
"DMSSchema",
|
|
24
40
|
"RoleTypes",
|
|
25
41
|
"SchemaCompleteness",
|