cognite-neat 0.85.12__py3-none-any.whl → 0.87.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_version.py +1 -1
- cognite/neat/app/api/routers/core.py +4 -4
- cognite/neat/constants.py +11 -9
- cognite/neat/graph/extractors/_mock_graph_generator.py +8 -9
- cognite/neat/graph/loaders/__init__.py +5 -2
- cognite/neat/graph/loaders/_base.py +13 -5
- cognite/neat/graph/loaders/_rdf2asset.py +94 -20
- cognite/neat/graph/loaders/_rdf2dms.py +3 -16
- cognite/neat/graph/queries/_base.py +58 -5
- cognite/neat/graph/queries/_construct.py +17 -15
- cognite/neat/graph/queries/_shared.py +20 -6
- cognite/neat/graph/stores/_base.py +19 -10
- cognite/neat/graph/transformers/_rdfpath.py +7 -0
- cognite/neat/legacy/graph/extractors/_dexpi.py +0 -5
- cognite/neat/legacy/graph/stores/_base.py +24 -8
- cognite/neat/legacy/graph/stores/_graphdb_store.py +3 -2
- cognite/neat/legacy/graph/stores/_memory_store.py +3 -3
- cognite/neat/legacy/graph/stores/_oxigraph_store.py +8 -4
- cognite/neat/legacy/graph/stores/_rdf_to_graph.py +5 -3
- cognite/neat/legacy/graph/transformations/query_generator/sparql.py +48 -15
- cognite/neat/legacy/rules/importers/_graph2rules.py +34 -7
- cognite/neat/legacy/rules/models/raw_rules.py +18 -6
- cognite/neat/legacy/rules/models/rules.py +32 -12
- cognite/neat/rules/_shared.py +6 -1
- cognite/neat/rules/analysis/__init__.py +4 -4
- cognite/neat/rules/analysis/_asset.py +128 -0
- cognite/neat/rules/analysis/_base.py +385 -6
- cognite/neat/rules/analysis/_information.py +155 -0
- cognite/neat/rules/exporters/_base.py +4 -4
- cognite/neat/rules/exporters/_rules2dms.py +1 -1
- cognite/neat/rules/exporters/_rules2ontology.py +5 -5
- cognite/neat/rules/importers/_base.py +4 -4
- cognite/neat/rules/importers/_dtdl2rules/dtdl_converter.py +2 -8
- cognite/neat/rules/importers/_inference2rules.py +2 -2
- cognite/neat/rules/importers/_owl2rules/_owl2metadata.py +1 -1
- cognite/neat/rules/importers/_spreadsheet2rules.py +5 -5
- cognite/neat/rules/models/__init__.py +3 -3
- cognite/neat/rules/models/_base.py +10 -10
- cognite/neat/rules/models/asset/_rules.py +9 -10
- cognite/neat/rules/models/dms/_converter.py +4 -5
- cognite/neat/rules/models/dms/_rules.py +6 -3
- cognite/neat/rules/models/domain.py +5 -2
- cognite/neat/rules/models/entities.py +2 -9
- cognite/neat/rules/models/information/_converter.py +3 -3
- cognite/neat/rules/models/information/_rules.py +13 -11
- cognite/neat/rules/models/information/_rules_input.py +1 -2
- cognite/neat/rules/models/information/_validation.py +1 -1
- cognite/neat/utils/utils.py +54 -18
- cognite/neat/workflows/steps/lib/current/graph_store.py +28 -8
- cognite/neat/workflows/steps/lib/legacy/graph_extractor.py +129 -27
- cognite/neat/workflows/steps/lib/legacy/graph_store.py +4 -4
- {cognite_neat-0.85.12.dist-info → cognite_neat-0.87.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.85.12.dist-info → cognite_neat-0.87.0.dist-info}/RECORD +56 -54
- cognite/neat/rules/analysis/_information_rules.py +0 -469
- {cognite_neat-0.85.12.dist-info → cognite_neat-0.87.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.85.12.dist-info → cognite_neat-0.87.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.85.12.dist-info → cognite_neat-0.87.0.dist-info}/entry_points.txt +0 -0
|
@@ -29,7 +29,7 @@ from pydantic import (
|
|
|
29
29
|
from pydantic.fields import FieldInfo
|
|
30
30
|
from rdflib import XSD, Literal, Namespace, URIRef
|
|
31
31
|
|
|
32
|
-
from cognite.neat.constants import
|
|
32
|
+
from cognite.neat.constants import get_default_prefixes
|
|
33
33
|
from cognite.neat.legacy.rules import exceptions
|
|
34
34
|
from cognite.neat.legacy.rules.models._base import (
|
|
35
35
|
ENTITY_ID_REGEX_COMPILED,
|
|
@@ -49,7 +49,10 @@ from cognite.neat.legacy.rules.models.rdfpath import (
|
|
|
49
49
|
Traversal,
|
|
50
50
|
parse_rule,
|
|
51
51
|
)
|
|
52
|
-
from cognite.neat.legacy.rules.models.value_types import
|
|
52
|
+
from cognite.neat.legacy.rules.models.value_types import (
|
|
53
|
+
XSD_VALUE_TYPE_MAPPINGS,
|
|
54
|
+
ValueType,
|
|
55
|
+
)
|
|
53
56
|
|
|
54
57
|
if sys.version_info >= (3, 11):
|
|
55
58
|
from typing import Self
|
|
@@ -82,7 +85,10 @@ def replace_nan_floats_with_default(values: dict, model_fields: dict[str, FieldI
|
|
|
82
85
|
output[field_name] = model_fields[field_name].default
|
|
83
86
|
else:
|
|
84
87
|
# field_name may be an alias
|
|
85
|
-
source_name = next(
|
|
88
|
+
source_name = next(
|
|
89
|
+
(name for name, field in model_fields.items() if field.alias == field_name),
|
|
90
|
+
None,
|
|
91
|
+
)
|
|
86
92
|
if source_name:
|
|
87
93
|
output[field_name] = model_fields[source_name].default
|
|
88
94
|
else:
|
|
@@ -335,7 +341,9 @@ class Metadata(RuleModel):
|
|
|
335
341
|
if value.endswith("#") or value.endswith("/"):
|
|
336
342
|
return value
|
|
337
343
|
warnings.warn(
|
|
338
|
-
exceptions.NamespaceEndingFixed(value).message,
|
|
344
|
+
exceptions.NamespaceEndingFixed(value).message,
|
|
345
|
+
category=exceptions.NamespaceEndingFixed,
|
|
346
|
+
stacklevel=2,
|
|
339
347
|
)
|
|
340
348
|
return Namespace(f"{value}#")
|
|
341
349
|
|
|
@@ -444,10 +452,14 @@ class Resource(RuleModel):
|
|
|
444
452
|
default=None,
|
|
445
453
|
)
|
|
446
454
|
source_entity_name: str | None = Field(
|
|
447
|
-
alias="Source Entity Name",
|
|
455
|
+
alias="Source Entity Name",
|
|
456
|
+
description="Closest entity in source, e.g. Substation",
|
|
457
|
+
default=None,
|
|
448
458
|
)
|
|
449
459
|
match_type: str | None = Field(
|
|
450
|
-
alias="Match Type",
|
|
460
|
+
alias="Match Type",
|
|
461
|
+
description="Type of match between source entity and one being defined",
|
|
462
|
+
default=None,
|
|
451
463
|
)
|
|
452
464
|
comment: str | None = Field(alias="Comment", description="Comment about mapping", default=None)
|
|
453
465
|
|
|
@@ -656,7 +668,9 @@ class Property(Resource):
|
|
|
656
668
|
# Specialization of cdf_resource_type to allow definition of both
|
|
657
669
|
# Asset and Relationship at the same time
|
|
658
670
|
cdf_resource_type: list[str] = Field(
|
|
659
|
-
alias="Resource Type",
|
|
671
|
+
alias="Resource Type",
|
|
672
|
+
default_factory=list,
|
|
673
|
+
description="This is typically 'Asset' or 'Relationship'",
|
|
660
674
|
)
|
|
661
675
|
|
|
662
676
|
# Transformation rule (domain to solution)
|
|
@@ -701,7 +715,11 @@ class Property(Resource):
|
|
|
701
715
|
return ValueType.from_string(entity_string=value, type_=EntityTypes.object_value_type, mapping=None)
|
|
702
716
|
else:
|
|
703
717
|
return ValueType(
|
|
704
|
-
prefix="undefined",
|
|
718
|
+
prefix="undefined",
|
|
719
|
+
suffix=value,
|
|
720
|
+
name=value,
|
|
721
|
+
type_=EntityTypes.object_value_type,
|
|
722
|
+
mapping=None,
|
|
705
723
|
)
|
|
706
724
|
# return ValueType(
|
|
707
725
|
|
|
@@ -820,7 +838,9 @@ class Property(Resource):
|
|
|
820
838
|
def set_relationship_label(self):
|
|
821
839
|
if self.label is None:
|
|
822
840
|
warnings.warn(
|
|
823
|
-
exceptions.MissingLabel(self.property_id).message,
|
|
841
|
+
exceptions.MissingLabel(self.property_id).message,
|
|
842
|
+
category=exceptions.MissingLabel,
|
|
843
|
+
stacklevel=2,
|
|
824
844
|
)
|
|
825
845
|
self.label = self.property_id
|
|
826
846
|
return self
|
|
@@ -899,7 +919,7 @@ class Prefixes(RuleModel):
|
|
|
899
919
|
prefixes: Dict of prefixes
|
|
900
920
|
"""
|
|
901
921
|
|
|
902
|
-
prefixes: dict[str, Namespace] =
|
|
922
|
+
prefixes: dict[str, Namespace] = get_default_prefixes()
|
|
903
923
|
|
|
904
924
|
|
|
905
925
|
class Instance(RuleModel):
|
|
@@ -1019,7 +1039,7 @@ class Rules(RuleModel):
|
|
|
1019
1039
|
classes: Classes defined in the data model
|
|
1020
1040
|
properties: Class properties defined in the data model with accompanying transformation rules
|
|
1021
1041
|
to transform data from source to target representation
|
|
1022
|
-
prefixes: Prefixes used in the data model. Defaults to
|
|
1042
|
+
prefixes: Prefixes used in the data model. Defaults to internal prefixes
|
|
1023
1043
|
instances: Instances defined in the data model. Defaults to None
|
|
1024
1044
|
validators_to_skip: List of validators to skip. Defaults to []
|
|
1025
1045
|
|
|
@@ -1035,7 +1055,7 @@ class Rules(RuleModel):
|
|
|
1035
1055
|
metadata: Metadata
|
|
1036
1056
|
classes: Classes
|
|
1037
1057
|
properties: Properties
|
|
1038
|
-
prefixes: dict[str, Namespace] =
|
|
1058
|
+
prefixes: dict[str, Namespace] = get_default_prefixes()
|
|
1039
1059
|
instances: list[Instance] = Field(default_factory=list)
|
|
1040
1060
|
|
|
1041
1061
|
@property
|
cognite/neat/rules/_shared.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
from typing import TypeAlias
|
|
2
2
|
|
|
3
|
-
from cognite.neat.rules.models import
|
|
3
|
+
from cognite.neat.rules.models import (
|
|
4
|
+
AssetRules,
|
|
5
|
+
DMSRules,
|
|
6
|
+
DomainRules,
|
|
7
|
+
InformationRules,
|
|
8
|
+
)
|
|
4
9
|
|
|
5
10
|
Rules: TypeAlias = DomainRules | InformationRules | DMSRules | AssetRules
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from .
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
from ._asset import AssetAnalysis
|
|
2
|
+
from ._information import (
|
|
3
|
+
InformationAnalysis,
|
|
4
4
|
)
|
|
5
5
|
|
|
6
|
-
__all__ = ["
|
|
6
|
+
__all__ = ["InformationAnalysis", "AssetAnalysis"]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import cast
|
|
3
|
+
|
|
4
|
+
from cognite.neat.rules.models import AssetRules
|
|
5
|
+
from cognite.neat.rules.models._rdfpath import RDFPath
|
|
6
|
+
from cognite.neat.rules.models.asset import AssetClass, AssetProperty
|
|
7
|
+
from cognite.neat.rules.models.entities import (
|
|
8
|
+
AssetEntity,
|
|
9
|
+
ClassEntity,
|
|
10
|
+
EntityTypes,
|
|
11
|
+
ReferenceEntity,
|
|
12
|
+
RelationshipEntity,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from ._base import BaseAnalysis
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AssetAnalysis(BaseAnalysis[AssetRules, AssetClass, AssetProperty, ClassEntity, str]):
|
|
19
|
+
"""Assumes analysis over only the complete schema"""
|
|
20
|
+
|
|
21
|
+
def _get_reference(self, class_or_property: AssetClass | AssetProperty) -> ReferenceEntity | None:
|
|
22
|
+
return class_or_property.reference if isinstance(class_or_property.reference, ReferenceEntity) else None
|
|
23
|
+
|
|
24
|
+
def _get_cls_entity(self, class_: AssetClass | AssetProperty) -> ClassEntity:
|
|
25
|
+
return class_.class_
|
|
26
|
+
|
|
27
|
+
def _get_prop_entity(self, property_: AssetProperty) -> str:
|
|
28
|
+
return property_.property_
|
|
29
|
+
|
|
30
|
+
def _get_cls_parents(self, class_: AssetClass) -> list[ClassEntity] | None:
|
|
31
|
+
return list(class_.parent or []) or None
|
|
32
|
+
|
|
33
|
+
def _get_reference_rules(self) -> AssetRules | None:
|
|
34
|
+
return self.rules.reference
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def _set_cls_entity(cls, property_: AssetProperty, class_: ClassEntity) -> None:
|
|
38
|
+
property_.class_ = class_
|
|
39
|
+
|
|
40
|
+
def _get_object(self, property_: AssetProperty) -> ClassEntity | None:
|
|
41
|
+
return property_.value_type if isinstance(property_.value_type, ClassEntity) else None
|
|
42
|
+
|
|
43
|
+
def _get_max_occurrence(self, property_: AssetProperty) -> int | float | None:
|
|
44
|
+
return property_.max_count
|
|
45
|
+
|
|
46
|
+
def _get_classes(self) -> list[AssetClass]:
|
|
47
|
+
return list(self.rules.classes)
|
|
48
|
+
|
|
49
|
+
def _get_properties(self) -> list[AssetProperty]:
|
|
50
|
+
return list(self.rules.properties)
|
|
51
|
+
|
|
52
|
+
def subset_rules(self, desired_classes: set[ClassEntity]) -> AssetRules:
|
|
53
|
+
raise NotImplementedError("Method not implemented")
|
|
54
|
+
|
|
55
|
+
def class_property_pairs(
|
|
56
|
+
self,
|
|
57
|
+
only_rdfpath: bool = False,
|
|
58
|
+
consider_inheritance: bool = False,
|
|
59
|
+
implementation_type: EntityTypes = EntityTypes.asset,
|
|
60
|
+
) -> dict[ClassEntity, dict[str, AssetProperty]]:
|
|
61
|
+
class_property_pairs = {}
|
|
62
|
+
|
|
63
|
+
T_implementation = AssetEntity if implementation_type == EntityTypes.asset else RelationshipEntity
|
|
64
|
+
|
|
65
|
+
for class_, properties in self.classes_with_properties(consider_inheritance).items():
|
|
66
|
+
processed_properties = {}
|
|
67
|
+
for property_ in properties:
|
|
68
|
+
if property_.property_ in processed_properties:
|
|
69
|
+
# TODO: use appropriate Warning class from _exceptions.py
|
|
70
|
+
# if missing make one !
|
|
71
|
+
warnings.warn(
|
|
72
|
+
f"Property {property_.property_} for {class_} has been defined more than once!"
|
|
73
|
+
" Only the first definition will be considered, skipping the rest..",
|
|
74
|
+
stacklevel=2,
|
|
75
|
+
)
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
if (
|
|
79
|
+
property_.implementation
|
|
80
|
+
and any(isinstance(implementation, T_implementation) for implementation in property_.implementation)
|
|
81
|
+
and (not only_rdfpath or (only_rdfpath and isinstance(property_.transformation, RDFPath)))
|
|
82
|
+
):
|
|
83
|
+
implementation = [
|
|
84
|
+
implementation
|
|
85
|
+
for implementation in property_.implementation
|
|
86
|
+
if isinstance(implementation, T_implementation)
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
processed_properties[property_.property_] = property_.model_copy(
|
|
90
|
+
deep=True, update={"implementation": implementation}
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if processed_properties:
|
|
94
|
+
class_property_pairs[class_] = processed_properties
|
|
95
|
+
|
|
96
|
+
return class_property_pairs
|
|
97
|
+
|
|
98
|
+
def asset_definition(
|
|
99
|
+
self, only_rdfpath: bool = False, consider_inheritance: bool = False
|
|
100
|
+
) -> dict[ClassEntity, dict[str, AssetProperty]]:
|
|
101
|
+
return self.class_property_pairs(
|
|
102
|
+
consider_inheritance=consider_inheritance,
|
|
103
|
+
only_rdfpath=only_rdfpath,
|
|
104
|
+
implementation_type=EntityTypes.asset,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def relationship_definition(
|
|
108
|
+
self, only_rdfpath: bool = False, consider_inheritance: bool = False
|
|
109
|
+
) -> dict[ClassEntity, dict[str, AssetProperty]]:
|
|
110
|
+
return self.class_property_pairs(
|
|
111
|
+
consider_inheritance=consider_inheritance,
|
|
112
|
+
only_rdfpath=only_rdfpath,
|
|
113
|
+
implementation_type=EntityTypes.relationship,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def define_property_renaming_config(self, class_: ClassEntity) -> dict[str, str]:
|
|
117
|
+
property_renaming_configuration = {}
|
|
118
|
+
|
|
119
|
+
if asset_definition := self.asset_definition().get(class_, None):
|
|
120
|
+
for property_, transformation in asset_definition.items():
|
|
121
|
+
asset_property = cast(list[AssetEntity], transformation.implementation)[0].property_
|
|
122
|
+
|
|
123
|
+
if asset_property != "metadata":
|
|
124
|
+
property_renaming_configuration[property_] = str(asset_property)
|
|
125
|
+
else:
|
|
126
|
+
property_renaming_configuration[property_] = f"{asset_property}.{property_}"
|
|
127
|
+
|
|
128
|
+
return property_renaming_configuration
|
|
@@ -1,13 +1,392 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
import warnings
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from collections.abc import Set
|
|
6
|
+
from dataclasses import dataclass
|
|
2
7
|
from typing import Generic, TypeVar
|
|
3
8
|
|
|
4
|
-
|
|
5
|
-
from
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from pydantic import BaseModel
|
|
6
11
|
|
|
7
|
-
|
|
12
|
+
from cognite.neat.rules.models._base import BaseRules
|
|
13
|
+
from cognite.neat.rules.models._rdfpath import RDFPath
|
|
14
|
+
from cognite.neat.rules.models.entities import (
|
|
15
|
+
ClassEntity,
|
|
16
|
+
Entity,
|
|
17
|
+
ReferenceEntity,
|
|
18
|
+
)
|
|
19
|
+
from cognite.neat.rules.models.information import InformationProperty
|
|
20
|
+
from cognite.neat.utils.utils import get_inheritance_path
|
|
8
21
|
|
|
22
|
+
T_Rules = TypeVar("T_Rules", bound=BaseRules)
|
|
23
|
+
T_Property = TypeVar("T_Property", bound=BaseModel)
|
|
24
|
+
T_Class = TypeVar("T_Class", bound=BaseModel)
|
|
25
|
+
T_ClassEntity = TypeVar("T_ClassEntity", bound=Entity)
|
|
26
|
+
T_PropertyEntity = TypeVar("T_PropertyEntity", bound=Entity | str)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class Linkage(Generic[T_ClassEntity, T_PropertyEntity]):
|
|
31
|
+
source_class: T_ClassEntity
|
|
32
|
+
connecting_property: T_PropertyEntity
|
|
33
|
+
target_class: T_ClassEntity
|
|
34
|
+
max_occurrence: int | float | None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class LinkageSet(set, Generic[T_ClassEntity, T_PropertyEntity], Set[Linkage[T_ClassEntity, T_PropertyEntity]]):
|
|
38
|
+
@property
|
|
39
|
+
def source_class(self) -> set[T_ClassEntity]:
|
|
40
|
+
return {link.source_class for link in self}
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def target_class(self) -> set[T_ClassEntity]:
|
|
44
|
+
return {link.target_class for link in self}
|
|
45
|
+
|
|
46
|
+
def get_target_classes_by_source(self) -> dict[T_ClassEntity, set[T_ClassEntity]]:
|
|
47
|
+
target_classes_by_source: dict[T_ClassEntity, set[T_ClassEntity]] = defaultdict(set)
|
|
48
|
+
for link in self:
|
|
49
|
+
target_classes_by_source[link.source_class].add(link.target_class)
|
|
50
|
+
return target_classes_by_source
|
|
51
|
+
|
|
52
|
+
def to_pandas(self) -> pd.DataFrame:
|
|
53
|
+
# Todo: Remove this method
|
|
54
|
+
return pd.DataFrame(
|
|
55
|
+
[
|
|
56
|
+
{
|
|
57
|
+
"source_class": link.source_class,
|
|
58
|
+
"connecting_property": link.connecting_property,
|
|
59
|
+
"target_class": link.target_class,
|
|
60
|
+
"max_occurrence": link.max_occurrence,
|
|
61
|
+
}
|
|
62
|
+
for link in self
|
|
63
|
+
]
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class BaseAnalysis(ABC, Generic[T_Rules, T_Class, T_Property, T_ClassEntity, T_PropertyEntity]):
|
|
68
|
+
def __init__(self, rules: T_Rules) -> None:
|
|
69
|
+
self.rules = rules
|
|
70
|
+
|
|
71
|
+
@abstractmethod
|
|
72
|
+
def _get_classes(self) -> list[T_Class]:
|
|
73
|
+
raise NotImplementedError
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def _get_properties(self) -> list[T_Property]:
|
|
77
|
+
raise NotImplementedError
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def _get_reference(self, class_or_property: T_Class | T_Property) -> ReferenceEntity | None:
|
|
81
|
+
raise NotImplementedError
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def _get_cls_entity(self, class_: T_Class | T_Property) -> T_ClassEntity:
|
|
85
|
+
raise NotImplementedError
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def _get_prop_entity(self, property_: T_Property) -> T_PropertyEntity:
|
|
89
|
+
raise NotImplementedError
|
|
90
|
+
|
|
91
|
+
@abstractmethod
|
|
92
|
+
def _get_cls_parents(self, class_: T_Class) -> list[T_ClassEntity] | None:
|
|
93
|
+
raise NotImplementedError
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def _get_reference_rules(self) -> T_Rules | None:
|
|
97
|
+
raise NotImplementedError
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
@abstractmethod
|
|
101
|
+
def _set_cls_entity(cls, property_: T_Property, class_: T_ClassEntity) -> None:
|
|
102
|
+
raise NotImplementedError
|
|
103
|
+
|
|
104
|
+
@abstractmethod
|
|
105
|
+
def _get_object(self, property_: T_Property) -> T_ClassEntity | None:
|
|
106
|
+
raise NotImplementedError
|
|
107
|
+
|
|
108
|
+
@abstractmethod
|
|
109
|
+
def _get_max_occurrence(self, property_: T_Property) -> int | float | None:
|
|
110
|
+
raise NotImplementedError
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def directly_referred_classes(self) -> set[ClassEntity]:
|
|
114
|
+
ref_rules = self._get_reference_rules()
|
|
115
|
+
if ref_rules is None:
|
|
116
|
+
return set()
|
|
117
|
+
prefix = ref_rules.metadata.get_prefix()
|
|
118
|
+
return {
|
|
119
|
+
ref.as_class_entity()
|
|
120
|
+
for class_ in self._get_classes()
|
|
121
|
+
if isinstance((ref := self._get_reference(class_)), ReferenceEntity) and ref.prefix == prefix
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def inherited_referred_classes(self) -> set[ClassEntity]:
|
|
126
|
+
dir_referred_classes = self.directly_referred_classes
|
|
127
|
+
inherited_referred_classes = []
|
|
128
|
+
for class_ in dir_referred_classes:
|
|
129
|
+
inherited_referred_classes.extend(self.class_inheritance_path(class_))
|
|
130
|
+
return set(inherited_referred_classes)
|
|
131
|
+
|
|
132
|
+
# Todo Lru cache this method.
|
|
133
|
+
def class_parent_pairs(self) -> dict[T_ClassEntity, list[T_ClassEntity]]:
|
|
134
|
+
"""This only returns class - parent pairs only if parent is in the same data model"""
|
|
135
|
+
class_subclass_pairs: dict[T_ClassEntity, list[T_ClassEntity]] = {}
|
|
136
|
+
for cls_ in self._get_classes():
|
|
137
|
+
entity = self._get_cls_entity(cls_)
|
|
138
|
+
class_subclass_pairs[entity] = []
|
|
139
|
+
for parent in self._get_cls_parents(cls_) or []:
|
|
140
|
+
if parent.prefix == entity.prefix:
|
|
141
|
+
class_subclass_pairs[entity].append(parent)
|
|
142
|
+
else:
|
|
143
|
+
warnings.warn(
|
|
144
|
+
f"Parent class {parent} of class {cls_} is not in the same namespace, skipping !",
|
|
145
|
+
stacklevel=2,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return class_subclass_pairs
|
|
149
|
+
|
|
150
|
+
def classes_with_properties(self, consider_inheritance: bool = False) -> dict[T_ClassEntity, list[T_Property]]:
|
|
151
|
+
"""Returns classes that have been defined in the data model.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Dictionary of classes with a list of properties defined for them
|
|
158
|
+
|
|
159
|
+
!!! note "consider_inheritance"
|
|
160
|
+
If consider_inheritance is True, properties from parent classes will also be considered.
|
|
161
|
+
This means if a class has a parent class, and the parent class has properties defined for it,
|
|
162
|
+
while we do not have any properties defined for the child class, we will still consider the
|
|
163
|
+
properties from the parent class. If consider_inheritance is False, we will only consider
|
|
164
|
+
properties defined for the child class, thus if no properties are defined for the child class,
|
|
165
|
+
it will not be included in the returned dictionary.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
class_property_pairs: dict[T_ClassEntity, list[T_Property]] = defaultdict(list)
|
|
169
|
+
|
|
170
|
+
for property_ in self._get_properties():
|
|
171
|
+
class_property_pairs[self._get_cls_entity(property_)].append(property_) # type: ignore
|
|
172
|
+
|
|
173
|
+
if consider_inheritance:
|
|
174
|
+
class_parent_pairs = self.class_parent_pairs()
|
|
175
|
+
for class_ in class_parent_pairs:
|
|
176
|
+
self._add_inherited_properties(class_, class_property_pairs, class_parent_pairs)
|
|
177
|
+
|
|
178
|
+
return class_property_pairs
|
|
179
|
+
|
|
180
|
+
def class_inheritance_path(self, class_: ClassEntity) -> list[ClassEntity]:
|
|
181
|
+
class_parent_pairs = self.class_parent_pairs()
|
|
182
|
+
return get_inheritance_path(class_, class_parent_pairs)
|
|
183
|
+
|
|
184
|
+
@classmethod
|
|
185
|
+
def _add_inherited_properties(
|
|
186
|
+
cls,
|
|
187
|
+
class_: T_ClassEntity,
|
|
188
|
+
class_property_pairs: dict[T_ClassEntity, list[T_Property]],
|
|
189
|
+
class_parent_pairs: dict[T_ClassEntity, list[T_ClassEntity]],
|
|
190
|
+
):
|
|
191
|
+
inheritance_path = get_inheritance_path(class_, class_parent_pairs)
|
|
192
|
+
for parent in inheritance_path:
|
|
193
|
+
# ParentClassEntity -> ClassEntity to match the type of class_property_pairs
|
|
194
|
+
if parent in class_property_pairs:
|
|
195
|
+
for property_ in class_property_pairs[parent]:
|
|
196
|
+
property_ = property_.model_copy()
|
|
197
|
+
|
|
198
|
+
# This corresponds to importing properties from parent class
|
|
199
|
+
# making sure that the property is attached to desired child class
|
|
200
|
+
cls._set_cls_entity(property_, class_)
|
|
201
|
+
|
|
202
|
+
# need same if we have RDF path to make sure that the starting class is the
|
|
203
|
+
if class_ in class_property_pairs:
|
|
204
|
+
class_property_pairs[class_].append(property_)
|
|
205
|
+
else:
|
|
206
|
+
class_property_pairs[class_] = [property_]
|
|
207
|
+
|
|
208
|
+
def class_property_pairs(
|
|
209
|
+
self, only_rdfpath: bool = False, consider_inheritance: bool = False
|
|
210
|
+
) -> dict[T_ClassEntity, dict[T_PropertyEntity, T_Property]]:
|
|
211
|
+
"""Returns a dictionary of classes with a dictionary of properties associated with them.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
only_rdfpath : To consider only properties which have rule `rdfpath` set. Defaults False
|
|
215
|
+
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Dictionary of classes with a dictionary of properties associated with them.
|
|
219
|
+
|
|
220
|
+
!!! note "difference to get_classes_with_properties"
|
|
221
|
+
This method returns a dictionary of classes with a dictionary of properties associated with them.
|
|
222
|
+
While get_classes_with_properties returns a dictionary of classes with a list of
|
|
223
|
+
properties defined for them,
|
|
224
|
+
here we filter the properties based on the `only_rdfpath` parameter and only consider
|
|
225
|
+
the first definition of a property if it is defined more than once.
|
|
226
|
+
|
|
227
|
+
!!! note "only_rdfpath"
|
|
228
|
+
If only_rdfpath is True, only properties with RuleType.rdfpath will be returned as
|
|
229
|
+
a part of the dictionary of properties related to a class. Otherwise, all properties
|
|
230
|
+
will be returned.
|
|
231
|
+
|
|
232
|
+
!!! note "consider_inheritance"
|
|
233
|
+
If consider_inheritance is True, properties from parent classes will also be considered.
|
|
234
|
+
This means if a class has a parent class, and the parent class has properties defined for it,
|
|
235
|
+
while we do not have any properties defined for the child class, we will still consider the
|
|
236
|
+
properties from the parent class. If consider_inheritance is False, we will only consider
|
|
237
|
+
properties defined for the child class, thus if no properties are defined for the child class,
|
|
238
|
+
it will not be included in the returned dictionary.
|
|
239
|
+
"""
|
|
240
|
+
# TODO: https://cognitedata.atlassian.net/jira/software/projects/NEAT/boards/893?selectedIssue=NEAT-78
|
|
241
|
+
|
|
242
|
+
class_property_pairs: dict[T_ClassEntity, dict[T_PropertyEntity, T_Property]] = {}
|
|
243
|
+
|
|
244
|
+
for class_, properties in self.classes_with_properties(consider_inheritance).items():
|
|
245
|
+
processed_properties: dict[T_PropertyEntity, T_Property] = {}
|
|
246
|
+
for property_ in properties:
|
|
247
|
+
prop_entity = self._get_prop_entity(property_)
|
|
248
|
+
if prop_entity in processed_properties:
|
|
249
|
+
# TODO: use appropriate Warning class from _exceptions.py
|
|
250
|
+
# if missing make one !
|
|
251
|
+
warnings.warn(
|
|
252
|
+
f"Property {processed_properties} for {class_} has been defined more than once!"
|
|
253
|
+
" Only the first definition will be considered, skipping the rest..",
|
|
254
|
+
stacklevel=2,
|
|
255
|
+
)
|
|
256
|
+
continue
|
|
257
|
+
|
|
258
|
+
if (
|
|
259
|
+
only_rdfpath
|
|
260
|
+
and isinstance(property_, InformationProperty)
|
|
261
|
+
and isinstance(property_.transformation, RDFPath)
|
|
262
|
+
) or not only_rdfpath:
|
|
263
|
+
processed_properties[prop_entity] = property_
|
|
264
|
+
class_property_pairs[class_] = processed_properties
|
|
265
|
+
|
|
266
|
+
return class_property_pairs
|
|
267
|
+
|
|
268
|
+
def class_linkage(self, consider_inheritance: bool = False) -> LinkageSet[T_ClassEntity, T_PropertyEntity]:
|
|
269
|
+
"""Returns a set of class linkages in the data model.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
|
|
276
|
+
"""
|
|
277
|
+
class_linkage = LinkageSet[T_ClassEntity, T_PropertyEntity]()
|
|
278
|
+
|
|
279
|
+
class_property_pairs = self.classes_with_properties(consider_inheritance)
|
|
280
|
+
properties = list(itertools.chain.from_iterable(class_property_pairs.values()))
|
|
281
|
+
|
|
282
|
+
for property_ in properties:
|
|
283
|
+
object_ = self._get_object(property_)
|
|
284
|
+
if object_ is not None:
|
|
285
|
+
class_linkage.add(
|
|
286
|
+
Linkage(
|
|
287
|
+
source_class=self._get_cls_entity(property_),
|
|
288
|
+
connecting_property=self._get_prop_entity(property_),
|
|
289
|
+
target_class=object_,
|
|
290
|
+
max_occurrence=self._get_max_occurrence(property_),
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
return class_linkage
|
|
295
|
+
|
|
296
|
+
def connected_classes(self, consider_inheritance: bool = False) -> set[T_ClassEntity]:
|
|
297
|
+
"""Return a set of classes that are connected to other classes.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
Set of classes that are connected to other classes
|
|
304
|
+
"""
|
|
305
|
+
class_linkage = self.class_linkage(consider_inheritance)
|
|
306
|
+
return class_linkage.source_class.union(class_linkage.target_class)
|
|
307
|
+
|
|
308
|
+
def defined_classes(self, consider_inheritance: bool = False) -> set[T_ClassEntity]:
|
|
309
|
+
"""Returns classes that have properties defined for them in the data model.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Set of classes that have been defined in the data model
|
|
316
|
+
"""
|
|
317
|
+
class_property_pairs = self.classes_with_properties(consider_inheritance)
|
|
318
|
+
properties = list(itertools.chain.from_iterable(class_property_pairs.values()))
|
|
319
|
+
|
|
320
|
+
return {self._get_cls_entity(property) for property in properties}
|
|
321
|
+
|
|
322
|
+
def disconnected_classes(self, consider_inheritance: bool = False) -> set[T_ClassEntity]:
|
|
323
|
+
"""Return a set of classes that are disconnected (i.e. isolated) from other classes.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
Set of classes that are disconnected from other classes
|
|
330
|
+
"""
|
|
331
|
+
return self.defined_classes(consider_inheritance) - self.connected_classes(consider_inheritance)
|
|
332
|
+
|
|
333
|
+
def symmetrically_connected_classes(
|
|
334
|
+
self, consider_inheritance: bool = False
|
|
335
|
+
) -> set[tuple[ClassEntity, ClassEntity]]:
|
|
336
|
+
"""Returns a set of pairs of symmetrically linked classes.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
consider_inheritance: Whether to consider inheritance or not. Defaults False
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Set of pairs of symmetrically linked classes
|
|
343
|
+
|
|
344
|
+
!!! note "Symmetrically Connected Classes"
|
|
345
|
+
Symmetrically connected classes are classes that are connected to each other
|
|
346
|
+
in both directions. For example, if class A is connected to class B, and class B
|
|
347
|
+
is connected to class A, then classes A and B are symmetrically connected.
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
# TODO: Find better name for this method
|
|
351
|
+
sym_pairs: set[tuple[ClassEntity, ClassEntity]] = set()
|
|
352
|
+
|
|
353
|
+
class_linkage = self.class_linkage(consider_inheritance)
|
|
354
|
+
if not class_linkage:
|
|
355
|
+
return sym_pairs
|
|
356
|
+
|
|
357
|
+
targets_by_source = class_linkage.get_target_classes_by_source()
|
|
358
|
+
for link in class_linkage:
|
|
359
|
+
source = link.source_class
|
|
360
|
+
target = link.target_class
|
|
361
|
+
|
|
362
|
+
if source in targets_by_source[source] and (source, target) not in sym_pairs:
|
|
363
|
+
sym_pairs.add((source, target))
|
|
364
|
+
return sym_pairs
|
|
365
|
+
|
|
366
|
+
def as_property_dict(
|
|
367
|
+
self,
|
|
368
|
+
) -> dict[T_PropertyEntity, list[T_Property]]:
|
|
369
|
+
"""This is used to capture all definitions of a property in the data model."""
|
|
370
|
+
property_dict: dict[T_PropertyEntity, list[T_Property]] = defaultdict(list)
|
|
371
|
+
for definition in self._get_properties():
|
|
372
|
+
property_dict[self._get_prop_entity(definition)].append(definition)
|
|
373
|
+
return property_dict
|
|
374
|
+
|
|
375
|
+
def as_class_dict(self) -> dict[str, T_Class]:
|
|
376
|
+
"""This is to simplify access to classes through dict."""
|
|
377
|
+
class_dict: dict[str, T_Class] = {}
|
|
378
|
+
for definition in self._get_classes():
|
|
379
|
+
entity = self._get_cls_entity(definition)
|
|
380
|
+
if entity.suffix in class_dict:
|
|
381
|
+
warnings.warn(
|
|
382
|
+
f"Class {entity} has been defined more than once! Only the first definition "
|
|
383
|
+
"will be considered, skipping the rest..",
|
|
384
|
+
stacklevel=2,
|
|
385
|
+
)
|
|
386
|
+
continue
|
|
387
|
+
class_dict[entity.suffix] = definition
|
|
388
|
+
return class_dict
|
|
9
389
|
|
|
10
|
-
class BaseAnalysis(ABC, Generic[T_Rules]):
|
|
11
390
|
@abstractmethod
|
|
12
|
-
def subset_rules(self, desired_classes: set[
|
|
13
|
-
raise NotImplementedError
|
|
391
|
+
def subset_rules(self, desired_classes: set[T_ClassEntity]) -> T_Rules:
|
|
392
|
+
raise NotImplementedError
|