cognite-neat 0.105.2__py3-none-any.whl → 0.107.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_config.py +6 -260
- cognite/neat/_graph/extractors/__init__.py +5 -1
- cognite/neat/_graph/extractors/_base.py +32 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +42 -16
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +78 -8
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +2 -0
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +10 -3
- cognite/neat/_graph/extractors/_dms.py +48 -14
- cognite/neat/_graph/extractors/_dms_graph.py +149 -0
- cognite/neat/_graph/extractors/_rdf_file.py +32 -5
- cognite/neat/_graph/loaders/_rdf2dms.py +119 -20
- cognite/neat/_graph/queries/_construct.py +1 -1
- cognite/neat/_graph/transformers/__init__.py +5 -0
- cognite/neat/_graph/transformers/_base.py +13 -9
- cognite/neat/_graph/transformers/_classic_cdf.py +141 -44
- cognite/neat/_graph/transformers/_rdfpath.py +4 -4
- cognite/neat/_graph/transformers/_value_type.py +54 -44
- cognite/neat/_issues/warnings/_external.py +1 -1
- cognite/neat/_rules/analysis/_base.py +1 -1
- cognite/neat/_rules/analysis/_information.py +14 -13
- cognite/neat/_rules/catalog/__init__.py +1 -0
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/importers/_dms2rules.py +7 -5
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +5 -3
- cognite/neat/_rules/models/_base_rules.py +0 -12
- cognite/neat/_rules/models/_types.py +5 -0
- cognite/neat/_rules/models/dms/_rules.py +50 -2
- cognite/neat/_rules/models/information/_rules.py +48 -5
- cognite/neat/_rules/models/information/_rules_input.py +1 -1
- cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
- cognite/neat/_rules/models/mapping/_classic2core.yaml +70 -58
- cognite/neat/_rules/transformers/__init__.py +4 -0
- cognite/neat/_rules/transformers/_converters.py +209 -62
- cognite/neat/_rules/transformers/_mapping.py +3 -2
- cognite/neat/_session/_base.py +8 -13
- cognite/neat/_session/_inspect.py +6 -2
- cognite/neat/_session/_mapping.py +22 -13
- cognite/neat/_session/_prepare.py +9 -57
- cognite/neat/_session/_read.py +96 -29
- cognite/neat/_session/_set.py +9 -0
- cognite/neat/_session/_state.py +10 -1
- cognite/neat/_session/_to.py +51 -15
- cognite/neat/_session/exceptions.py +7 -3
- cognite/neat/_store/_graph_store.py +85 -39
- cognite/neat/_store/_rules_store.py +22 -0
- cognite/neat/_utils/auth.py +2 -0
- cognite/neat/_utils/collection_.py +32 -11
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/METADATA +2 -8
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/RECORD +54 -52
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/WHEEL +1 -1
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.105.2.dist-info → cognite_neat-0.107.0.dist-info}/entry_points.txt +0 -0
|
@@ -52,10 +52,11 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
52
52
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
53
53
|
camel_case: bool = True,
|
|
54
54
|
as_write: bool = False,
|
|
55
|
+
prefix: str | None = None,
|
|
55
56
|
unpack_columns: bool = False,
|
|
56
57
|
):
|
|
57
58
|
super().__init__(
|
|
58
|
-
items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write
|
|
59
|
+
items, namespace, to_type, total, limit, unpack_metadata, skip_metadata_values, camel_case, as_write, prefix
|
|
59
60
|
)
|
|
60
61
|
self.unpack_columns = unpack_columns
|
|
61
62
|
|
|
@@ -71,9 +72,10 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
71
72
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
72
73
|
camel_case: bool = True,
|
|
73
74
|
as_write: bool = False,
|
|
75
|
+
prefix: str | None = None,
|
|
74
76
|
unpack_columns: bool = False,
|
|
75
77
|
):
|
|
76
|
-
total, items = cls._from_dataset(client, data_set_external_id)
|
|
78
|
+
total, items = cls._handle_no_access(lambda: cls._from_dataset(client, data_set_external_id))
|
|
77
79
|
return cls(
|
|
78
80
|
items,
|
|
79
81
|
namespace,
|
|
@@ -84,6 +86,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
84
86
|
skip_metadata_values,
|
|
85
87
|
camel_case,
|
|
86
88
|
as_write,
|
|
89
|
+
prefix,
|
|
87
90
|
unpack_columns,
|
|
88
91
|
)
|
|
89
92
|
|
|
@@ -99,9 +102,10 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
99
102
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
100
103
|
camel_case: bool = True,
|
|
101
104
|
as_write: bool = False,
|
|
105
|
+
prefix: str | None = None,
|
|
102
106
|
unpack_columns: bool = False,
|
|
103
107
|
):
|
|
104
|
-
total, items = cls._from_hierarchy(client, root_asset_external_id)
|
|
108
|
+
total, items = cls._handle_no_access(lambda: cls._from_hierarchy(client, root_asset_external_id))
|
|
105
109
|
return cls(
|
|
106
110
|
items,
|
|
107
111
|
namespace,
|
|
@@ -112,6 +116,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
112
116
|
skip_metadata_values,
|
|
113
117
|
camel_case,
|
|
114
118
|
as_write,
|
|
119
|
+
prefix,
|
|
115
120
|
unpack_columns,
|
|
116
121
|
)
|
|
117
122
|
|
|
@@ -126,6 +131,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
126
131
|
skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
|
|
127
132
|
camel_case: bool = True,
|
|
128
133
|
as_write: bool = False,
|
|
134
|
+
prefix: str | None = None,
|
|
129
135
|
unpack_columns: bool = False,
|
|
130
136
|
):
|
|
131
137
|
total, items = cls._from_file(file_path)
|
|
@@ -139,6 +145,7 @@ class SequencesExtractor(ClassicCDFBaseExtractor[NeatSequence]):
|
|
|
139
145
|
skip_metadata_values,
|
|
140
146
|
camel_case,
|
|
141
147
|
as_write,
|
|
148
|
+
prefix,
|
|
142
149
|
unpack_columns,
|
|
143
150
|
)
|
|
144
151
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import urllib.parse
|
|
1
2
|
from collections.abc import Iterable, Iterator
|
|
2
3
|
from typing import cast
|
|
3
4
|
|
|
@@ -5,6 +6,7 @@ from cognite.client import CogniteClient
|
|
|
5
6
|
from cognite.client import data_modeling as dm
|
|
6
7
|
from cognite.client.data_classes.data_modeling import DataModelIdentifier
|
|
7
8
|
from cognite.client.data_classes.data_modeling.instances import Instance, PropertyValue
|
|
9
|
+
from cognite.client.utils.useful_types import SequenceNotStr
|
|
8
10
|
from rdflib import RDF, XSD, Literal, Namespace, URIRef
|
|
9
11
|
|
|
10
12
|
from cognite.neat._constants import DEFAULT_SPACE_URI
|
|
@@ -38,7 +40,12 @@ class DMSExtractor(BaseExtractor):
|
|
|
38
40
|
|
|
39
41
|
@classmethod
|
|
40
42
|
def from_data_model(
|
|
41
|
-
cls,
|
|
43
|
+
cls,
|
|
44
|
+
client: CogniteClient,
|
|
45
|
+
data_model: DataModelIdentifier,
|
|
46
|
+
limit: int | None = None,
|
|
47
|
+
overwrite_namespace: Namespace | None = None,
|
|
48
|
+
instance_space: str | SequenceNotStr[str] | None = None,
|
|
42
49
|
) -> "DMSExtractor":
|
|
43
50
|
"""Create an extractor from a data model.
|
|
44
51
|
|
|
@@ -46,22 +53,38 @@ class DMSExtractor(BaseExtractor):
|
|
|
46
53
|
client: The Cognite client to use.
|
|
47
54
|
data_model: The data model to extract.
|
|
48
55
|
limit: The maximum number of instances to extract.
|
|
56
|
+
overwrite_namespace: If provided, this will overwrite the space of the extracted items.
|
|
57
|
+
instance_space: The space to extract instances from.
|
|
49
58
|
"""
|
|
50
59
|
retrieved = client.data_modeling.data_models.retrieve(data_model, inline_views=True)
|
|
51
60
|
if not retrieved:
|
|
52
61
|
raise ResourceRetrievalError(dm.DataModelId.load(data_model), "data model", "Data Model is missing in CDF")
|
|
53
|
-
return cls.from_views(client, retrieved.latest_version().views, limit)
|
|
62
|
+
return cls.from_views(client, retrieved.latest_version().views, limit, overwrite_namespace, instance_space)
|
|
54
63
|
|
|
55
64
|
@classmethod
|
|
56
|
-
def from_views(
|
|
65
|
+
def from_views(
|
|
66
|
+
cls,
|
|
67
|
+
client: CogniteClient,
|
|
68
|
+
views: Iterable[dm.View],
|
|
69
|
+
limit: int | None = None,
|
|
70
|
+
overwrite_namespace: Namespace | None = None,
|
|
71
|
+
instance_space: str | SequenceNotStr[str] | None = None,
|
|
72
|
+
) -> "DMSExtractor":
|
|
57
73
|
"""Create an extractor from a set of views.
|
|
58
74
|
|
|
59
75
|
Args:
|
|
60
76
|
client: The Cognite client to use.
|
|
61
77
|
views: The views to extract.
|
|
62
78
|
limit: The maximum number of instances to extract.
|
|
79
|
+
overwrite_namespace: If provided, this will overwrite the space of the extracted items.
|
|
80
|
+
instance_space: The space to extract instances from.
|
|
63
81
|
"""
|
|
64
|
-
return cls(
|
|
82
|
+
return cls(
|
|
83
|
+
_InstanceIterator(client, views, instance_space),
|
|
84
|
+
total=None,
|
|
85
|
+
limit=limit,
|
|
86
|
+
overwrite_namespace=overwrite_namespace,
|
|
87
|
+
)
|
|
65
88
|
|
|
66
89
|
def extract(self) -> Iterable[Triple]:
|
|
67
90
|
for count, item in enumerate(self.items, 1):
|
|
@@ -105,6 +128,10 @@ class DMSExtractor(BaseExtractor):
|
|
|
105
128
|
else:
|
|
106
129
|
raise NotImplementedError(f"Unknown instance type {type(instance)}")
|
|
107
130
|
|
|
131
|
+
if self.overwrite_namespace:
|
|
132
|
+
# If the namespace is overwritten, keep the original space as a property to avoid losing information.
|
|
133
|
+
yield id_, self._get_namespace(instance.space)["space"], Literal(instance.space)
|
|
134
|
+
|
|
108
135
|
for view_id, properties in instance.properties.items():
|
|
109
136
|
namespace = self._get_namespace(view_id.space)
|
|
110
137
|
for key, value in properties.items():
|
|
@@ -124,35 +151,42 @@ class DMSExtractor(BaseExtractor):
|
|
|
124
151
|
yield from self._get_objects(item)
|
|
125
152
|
|
|
126
153
|
def _as_uri_ref(self, instance: Instance | dm.DirectRelationReference) -> URIRef:
|
|
127
|
-
return self._get_namespace(instance.space)[instance.external_id]
|
|
154
|
+
return self._get_namespace(instance.space)[urllib.parse.quote(instance.external_id)]
|
|
128
155
|
|
|
129
156
|
def _get_namespace(self, space: str) -> Namespace:
|
|
130
157
|
if self.overwrite_namespace:
|
|
131
158
|
return self.overwrite_namespace
|
|
132
|
-
return Namespace(DEFAULT_SPACE_URI.format(space=space))
|
|
159
|
+
return Namespace(DEFAULT_SPACE_URI.format(space=urllib.parse.quote(space)))
|
|
133
160
|
|
|
134
161
|
|
|
135
|
-
class _InstanceIterator(
|
|
136
|
-
def __init__(
|
|
162
|
+
class _InstanceIterator(Iterable[Instance]):
|
|
163
|
+
def __init__(
|
|
164
|
+
self, client: CogniteClient, views: Iterable[dm.View], instance_space: str | SequenceNotStr[str] | None = None
|
|
165
|
+
):
|
|
137
166
|
self.client = client
|
|
138
167
|
self.views = views
|
|
168
|
+
self.instance_space = instance_space
|
|
139
169
|
|
|
140
170
|
def __iter__(self) -> Iterator[Instance]:
|
|
141
|
-
return self
|
|
142
|
-
|
|
143
|
-
def __next__(self) -> Instance: # type: ignore[misc]
|
|
144
171
|
for view in self.views:
|
|
172
|
+
view_id = view.as_id()
|
|
145
173
|
# All nodes and edges with properties
|
|
146
|
-
|
|
147
|
-
|
|
174
|
+
if view.used_for in ("node", "all"):
|
|
175
|
+
yield from self.client.data_modeling.instances(
|
|
176
|
+
chunk_size=None, instance_type="node", sources=[view_id], space=self.instance_space
|
|
177
|
+
)
|
|
178
|
+
if view.used_for in ("edge", "all"):
|
|
179
|
+
yield from self.client.data_modeling.instances(
|
|
180
|
+
chunk_size=None, instance_type="edge", sources=[view_id], space=self.instance_space
|
|
181
|
+
)
|
|
148
182
|
|
|
149
183
|
for prop in view.properties.values():
|
|
150
184
|
if isinstance(prop, dm.EdgeConnection):
|
|
151
|
-
# Get all edges with properties
|
|
152
185
|
yield from self.client.data_modeling.instances(
|
|
153
186
|
chunk_size=None,
|
|
154
187
|
instance_type="edge",
|
|
155
188
|
filter=dm.filters.Equals(
|
|
156
189
|
["edge", "type"], {"space": prop.type.space, "externalId": prop.type.external_id}
|
|
157
190
|
),
|
|
191
|
+
space=self.instance_space,
|
|
158
192
|
)
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from collections.abc import Iterable, Sequence
|
|
2
|
+
|
|
3
|
+
from cognite.client import data_modeling as dm
|
|
4
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
5
|
+
from cognite.client.utils.useful_types import SequenceNotStr
|
|
6
|
+
from rdflib import Namespace, URIRef
|
|
7
|
+
|
|
8
|
+
from cognite.neat._client import NeatClient
|
|
9
|
+
from cognite.neat._constants import DEFAULT_NAMESPACE
|
|
10
|
+
from cognite.neat._issues import IssueList, NeatIssue, catch_warnings
|
|
11
|
+
from cognite.neat._issues.warnings import CDFAuthWarning, ResourceNotFoundWarning, ResourceRetrievalWarning
|
|
12
|
+
from cognite.neat._rules.importers import DMSImporter
|
|
13
|
+
from cognite.neat._rules.models import DMSRules, InformationRules
|
|
14
|
+
from cognite.neat._rules.transformers import DMSToInformation, VerifyDMSRules
|
|
15
|
+
from cognite.neat._shared import Triple
|
|
16
|
+
|
|
17
|
+
from ._base import KnowledgeGraphExtractor
|
|
18
|
+
from ._dms import DMSExtractor
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DMSGraphExtractor(KnowledgeGraphExtractor):
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
data_model: dm.DataModel[dm.View],
|
|
25
|
+
client: NeatClient,
|
|
26
|
+
namespace: Namespace = DEFAULT_NAMESPACE,
|
|
27
|
+
issues: Sequence[NeatIssue] | None = None,
|
|
28
|
+
instance_space: str | SequenceNotStr[str] | None = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
self._client = client
|
|
31
|
+
self._data_model = data_model
|
|
32
|
+
self._namespace = namespace or DEFAULT_NAMESPACE
|
|
33
|
+
self._issues = IssueList(issues)
|
|
34
|
+
self._instance_space = instance_space
|
|
35
|
+
|
|
36
|
+
self._views: list[dm.View] | None = None
|
|
37
|
+
self._information_rules: InformationRules | None = None
|
|
38
|
+
self._dms_rules: DMSRules | None = None
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_data_model_id(
|
|
42
|
+
cls,
|
|
43
|
+
data_model_id: dm.DataModelIdentifier,
|
|
44
|
+
client: NeatClient,
|
|
45
|
+
namespace: Namespace = DEFAULT_NAMESPACE,
|
|
46
|
+
instance_space: str | SequenceNotStr[str] | None = None,
|
|
47
|
+
) -> "DMSGraphExtractor":
|
|
48
|
+
issues: list[NeatIssue] = []
|
|
49
|
+
try:
|
|
50
|
+
data_model = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True)
|
|
51
|
+
except CogniteAPIError as e:
|
|
52
|
+
issues.append(CDFAuthWarning("retrieving data model", str(e)))
|
|
53
|
+
return cls(
|
|
54
|
+
cls._create_empty_model(dm.DataModelId.load(data_model_id)), client, namespace, issues, instance_space
|
|
55
|
+
)
|
|
56
|
+
if not data_model:
|
|
57
|
+
issues.append(ResourceRetrievalWarning(frozenset({data_model_id}), "data model"))
|
|
58
|
+
return cls(
|
|
59
|
+
cls._create_empty_model(dm.DataModelId.load(data_model_id)), client, namespace, issues, instance_space
|
|
60
|
+
)
|
|
61
|
+
return cls(data_model.latest_version(), client, namespace, issues, instance_space)
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def _create_empty_model(cls, data_model_id: dm.DataModelId) -> dm.DataModel:
|
|
65
|
+
return dm.DataModel(
|
|
66
|
+
data_model_id.space,
|
|
67
|
+
data_model_id.external_id,
|
|
68
|
+
data_model_id.version or "MISSING",
|
|
69
|
+
is_global=False,
|
|
70
|
+
last_updated_time=0,
|
|
71
|
+
created_time=0,
|
|
72
|
+
description=None,
|
|
73
|
+
name=None,
|
|
74
|
+
views=[],
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def _model_views(self) -> list[dm.View]:
|
|
79
|
+
if self._views is None:
|
|
80
|
+
self._views = self._get_views()
|
|
81
|
+
return self._views
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def description(self) -> str:
|
|
85
|
+
return "Extracts a data model with nodes and edges."
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def source_uri(self) -> URIRef:
|
|
89
|
+
space, external_id, version = self._data_model.as_id().as_tuple()
|
|
90
|
+
return DEFAULT_NAMESPACE[f"{self._client.config.project}/{space}/{external_id}/{version}"]
|
|
91
|
+
|
|
92
|
+
def extract(self) -> Iterable[Triple]:
|
|
93
|
+
"""Extracts the knowledge graph from the data model."""
|
|
94
|
+
views = self._model_views
|
|
95
|
+
yield from DMSExtractor.from_views(
|
|
96
|
+
self._client,
|
|
97
|
+
views,
|
|
98
|
+
overwrite_namespace=self._namespace,
|
|
99
|
+
instance_space=self._instance_space,
|
|
100
|
+
).extract()
|
|
101
|
+
|
|
102
|
+
def _get_views(self) -> list[dm.View]:
|
|
103
|
+
view_by_id: dict[dm.ViewId, dm.View] = {}
|
|
104
|
+
if view_ids := [view_id for view_id in self._data_model.views if isinstance(view_id, dm.ViewId)]:
|
|
105
|
+
try:
|
|
106
|
+
# MyPy does not understand the isinstance check above.
|
|
107
|
+
retrieved = self._client.data_modeling.views.retrieve(ids=view_ids) # type: ignore[arg-type]
|
|
108
|
+
except CogniteAPIError as e:
|
|
109
|
+
self._issues.append(CDFAuthWarning("retrieving views", str(e)))
|
|
110
|
+
else:
|
|
111
|
+
view_by_id.update({view.as_id(): view for view in retrieved})
|
|
112
|
+
views: list[dm.View] = []
|
|
113
|
+
data_model_id = self._data_model.as_id()
|
|
114
|
+
for dm_view in self._data_model.views:
|
|
115
|
+
if isinstance(dm_view, dm.View):
|
|
116
|
+
views.append(dm_view)
|
|
117
|
+
elif isinstance(dm_view, dm.ViewId):
|
|
118
|
+
if view := view_by_id.get(dm_view):
|
|
119
|
+
views.append(view)
|
|
120
|
+
else:
|
|
121
|
+
self._issues.append(ResourceNotFoundWarning(dm_view, "view", data_model_id, "data model"))
|
|
122
|
+
return views
|
|
123
|
+
|
|
124
|
+
def get_information_rules(self) -> InformationRules:
|
|
125
|
+
"""Returns the information rules that the extractor uses."""
|
|
126
|
+
if self._information_rules is None:
|
|
127
|
+
self._information_rules, self._dms_rules = self._create_rules()
|
|
128
|
+
return self._information_rules
|
|
129
|
+
|
|
130
|
+
def get_dms_rules(self) -> DMSRules:
|
|
131
|
+
"""Returns the DMS rules that the extractor uses."""
|
|
132
|
+
if self._dms_rules is None:
|
|
133
|
+
self._information_rules, self._dms_rules = self._create_rules()
|
|
134
|
+
return self._dms_rules
|
|
135
|
+
|
|
136
|
+
def get_issues(self) -> IssueList:
|
|
137
|
+
"""Returns the issues that occurred during the extraction."""
|
|
138
|
+
return self._issues
|
|
139
|
+
|
|
140
|
+
def _create_rules(self) -> tuple[InformationRules, DMSRules]:
|
|
141
|
+
# The DMS and Information rules must be created together to link them property.
|
|
142
|
+
importer = DMSImporter.from_data_model(self._client, self._data_model)
|
|
143
|
+
unverified_dms = importer.to_rules()
|
|
144
|
+
with catch_warnings() as issues:
|
|
145
|
+
# Any errors occur will be raised and caught outside the extractor.
|
|
146
|
+
verified_dms = VerifyDMSRules(client=self._client).transform(unverified_dms)
|
|
147
|
+
information_rules = DMSToInformation(self._namespace).transform(verified_dms)
|
|
148
|
+
self._issues.extend(issues)
|
|
149
|
+
return information_rules, verified_dms
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import zipfile
|
|
1
2
|
from collections.abc import Iterable
|
|
2
3
|
from pathlib import Path
|
|
3
|
-
from typing import get_args
|
|
4
|
+
from typing import cast, get_args
|
|
4
5
|
|
|
5
6
|
from rdflib import URIRef
|
|
6
7
|
from rdflib.util import guess_format
|
|
@@ -10,6 +11,7 @@ from cognite.neat._graph._shared import RDFTypes
|
|
|
10
11
|
from cognite.neat._graph.extractors._base import BaseExtractor
|
|
11
12
|
from cognite.neat._issues._base import IssueList
|
|
12
13
|
from cognite.neat._issues.errors import FileNotFoundNeatError, FileTypeUnexpectedError
|
|
14
|
+
from cognite.neat._issues.errors._general import NeatValueError
|
|
13
15
|
from cognite.neat._shared import Triple
|
|
14
16
|
|
|
15
17
|
|
|
@@ -24,25 +26,50 @@ class RdfFileExtractor(BaseExtractor):
|
|
|
24
26
|
|
|
25
27
|
def __init__(
|
|
26
28
|
self,
|
|
27
|
-
filepath: Path,
|
|
29
|
+
filepath: Path | zipfile.ZipExtFile,
|
|
28
30
|
base_uri: URIRef = DEFAULT_BASE_URI,
|
|
29
31
|
issue_list: IssueList | None = None,
|
|
30
32
|
):
|
|
31
33
|
self.issue_list = issue_list or IssueList(title=f"{filepath.name}")
|
|
32
34
|
self.base_uri = base_uri
|
|
33
35
|
self.filepath = filepath
|
|
34
|
-
self.format = guess_format(str(self.filepath))
|
|
35
36
|
|
|
36
|
-
if
|
|
37
|
+
self.format = guess_format(str(self.filepath) if isinstance(self.filepath, Path) else self.filepath.name)
|
|
38
|
+
|
|
39
|
+
print(self.format)
|
|
40
|
+
if isinstance(self.filepath, Path) and not self.filepath.exists():
|
|
37
41
|
self.issue_list.append(FileNotFoundNeatError(self.filepath))
|
|
38
42
|
|
|
39
43
|
if not self.format:
|
|
40
44
|
self.issue_list.append(
|
|
41
45
|
FileTypeUnexpectedError(
|
|
42
|
-
self.filepath,
|
|
46
|
+
(self.filepath if isinstance(self.filepath, Path) else Path(self.filepath.name)),
|
|
43
47
|
frozenset(get_args(RDFTypes)),
|
|
44
48
|
)
|
|
45
49
|
)
|
|
46
50
|
|
|
47
51
|
def extract(self) -> Iterable[Triple]:
|
|
48
52
|
raise NotImplementedError()
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def from_zip(
|
|
56
|
+
cls,
|
|
57
|
+
filepath: Path,
|
|
58
|
+
filename: str = "neat-session/instances/instances.ttl",
|
|
59
|
+
base_uri: URIRef = DEFAULT_BASE_URI,
|
|
60
|
+
issue_list: IssueList | None = None,
|
|
61
|
+
):
|
|
62
|
+
if not filepath.exists():
|
|
63
|
+
raise FileNotFoundNeatError(filepath)
|
|
64
|
+
if filepath.suffix not in {".zip"}:
|
|
65
|
+
raise NeatValueError("Expected a zip file, got {filepath.suffix}")
|
|
66
|
+
|
|
67
|
+
with zipfile.ZipFile(filepath, "r") as zip_ref:
|
|
68
|
+
for file_info in zip_ref.infolist():
|
|
69
|
+
if file_info.filename == filename:
|
|
70
|
+
# We need to open the file in the zip file, and close it upon
|
|
71
|
+
# triple extraction ...
|
|
72
|
+
file = zip_ref.open(file_info)
|
|
73
|
+
return cls(cast(zipfile.ZipExtFile, file), base_uri, issue_list)
|
|
74
|
+
|
|
75
|
+
raise NeatValueError(f"Cannot extract {filename} from zip file {filepath}")
|
|
@@ -32,11 +32,12 @@ from cognite.neat._issues.errors import (
|
|
|
32
32
|
from cognite.neat._issues.warnings import PropertyDirectRelationLimitWarning, PropertyTypeNotSupportedWarning
|
|
33
33
|
from cognite.neat._rules.analysis._dms import DMSAnalysis
|
|
34
34
|
from cognite.neat._rules.models import DMSRules
|
|
35
|
-
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
|
|
35
|
+
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json, String
|
|
36
36
|
from cognite.neat._rules.models.entities._single_value import ViewEntity
|
|
37
37
|
from cognite.neat._shared import InstanceType
|
|
38
38
|
from cognite.neat._store import NeatGraphStore
|
|
39
39
|
from cognite.neat._utils.auxiliary import create_sha256_hash
|
|
40
|
+
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
40
41
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri
|
|
41
42
|
from cognite.neat._utils.upload import UploadResult
|
|
42
43
|
|
|
@@ -157,8 +158,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
157
158
|
view_ids.append(f"{view_id!r} (self)")
|
|
158
159
|
|
|
159
160
|
tracker = self._tracker(type(self).__name__, view_ids, "views")
|
|
160
|
-
for view_id, (view,
|
|
161
|
-
pydantic_cls, edge_by_type, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
161
|
+
for view_id, (view, instance_count) in view_and_count_by_id.items():
|
|
162
|
+
pydantic_cls, edge_by_type, edge_by_prop_id, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
162
163
|
yield from issues
|
|
163
164
|
tracker.issue(issues)
|
|
164
165
|
|
|
@@ -194,21 +195,66 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
194
195
|
# this assumes no changes in the suffix of view and class
|
|
195
196
|
reader = self.graph_store.read(view.external_id)
|
|
196
197
|
|
|
197
|
-
|
|
198
|
+
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
199
|
+
reader, instance_count, f"Loading {track_id}"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
for identifier, properties in instance_iterable:
|
|
203
|
+
start_node, end_node = self._pop_start_end_node(properties)
|
|
204
|
+
is_edge = start_node and end_node
|
|
205
|
+
if (is_edge and view.used_for == "node") or (not is_edge and view.used_for == "edge"):
|
|
206
|
+
instance_type = "edge" if is_edge else "node"
|
|
207
|
+
creation_error = ResourceCreationError(
|
|
208
|
+
identifier,
|
|
209
|
+
instance_type,
|
|
210
|
+
error=f"{instance_type.capitalize()} found in {view.used_for} view",
|
|
211
|
+
)
|
|
212
|
+
tracker.issue(creation_error)
|
|
213
|
+
if stop_on_exception:
|
|
214
|
+
raise creation_error
|
|
215
|
+
yield creation_error
|
|
216
|
+
continue
|
|
217
|
+
|
|
198
218
|
if skip_properties:
|
|
199
219
|
properties = {k: v for k, v in properties.items() if k not in skip_properties}
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
220
|
+
|
|
221
|
+
if start_node and end_node:
|
|
222
|
+
# Is an edge
|
|
223
|
+
try:
|
|
224
|
+
yield self._create_edge_with_properties(
|
|
225
|
+
identifier, properties, start_node, end_node, pydantic_cls, view_id
|
|
226
|
+
)
|
|
227
|
+
except ValueError as e:
|
|
228
|
+
error_edge = ResourceCreationError(identifier, "edge", error=str(e))
|
|
229
|
+
tracker.issue(error_edge)
|
|
230
|
+
if stop_on_exception:
|
|
231
|
+
raise error_edge from e
|
|
232
|
+
yield error_edge
|
|
233
|
+
else:
|
|
234
|
+
try:
|
|
235
|
+
yield self._create_node(identifier, properties, pydantic_cls, view_id)
|
|
236
|
+
except ValueError as e:
|
|
237
|
+
error_node = ResourceCreationError(identifier, "node", error=str(e))
|
|
238
|
+
tracker.issue(error_node)
|
|
239
|
+
if stop_on_exception:
|
|
240
|
+
raise error_node from e
|
|
241
|
+
yield error_node
|
|
242
|
+
yield from self._create_edges_without_properties(
|
|
243
|
+
identifier, properties, edge_by_type, edge_by_prop_id, tracker
|
|
244
|
+
)
|
|
209
245
|
tracker.finish(track_id)
|
|
210
246
|
yield _END_OF_CLASS
|
|
211
247
|
|
|
248
|
+
@staticmethod
|
|
249
|
+
def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str | None, str | None]:
|
|
250
|
+
start_node = properties.pop("startNode", [None])[0]
|
|
251
|
+
if not start_node:
|
|
252
|
+
start_node = properties.pop("start_node", [None])[0]
|
|
253
|
+
end_node = properties.pop("endNode", [None])[0]
|
|
254
|
+
if not end_node:
|
|
255
|
+
end_node = properties.pop("end_node", [None])[0]
|
|
256
|
+
return start_node, end_node
|
|
257
|
+
|
|
212
258
|
def write_to_file(self, filepath: Path) -> None:
|
|
213
259
|
if filepath.suffix not in [".json", ".yaml", ".yml"]:
|
|
214
260
|
raise ValueError(f"File format {filepath.suffix} is not supported")
|
|
@@ -293,17 +339,30 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
293
339
|
|
|
294
340
|
def _create_validation_classes(
|
|
295
341
|
self, view: dm.View
|
|
296
|
-
) -> tuple[
|
|
342
|
+
) -> tuple[
|
|
343
|
+
type[BaseModel],
|
|
344
|
+
dict[str, tuple[str, dm.EdgeConnection]],
|
|
345
|
+
dict[str, tuple[str, dm.EdgeConnection]],
|
|
346
|
+
NeatIssueList,
|
|
347
|
+
]:
|
|
297
348
|
issues = IssueList()
|
|
298
349
|
field_definitions: dict[str, tuple[type, Any]] = {}
|
|
299
|
-
|
|
350
|
+
edge_by_type: dict[str, tuple[str, dm.EdgeConnection]] = {}
|
|
351
|
+
edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]] = {}
|
|
300
352
|
validators: dict[str, classmethod] = {}
|
|
301
353
|
direct_relation_by_property: dict[str, dm.DirectRelation] = {}
|
|
302
354
|
unit_properties: list[str] = []
|
|
303
355
|
json_fields: list[str] = []
|
|
356
|
+
text_fields: list[str] = []
|
|
304
357
|
for prop_id, prop in view.properties.items():
|
|
305
358
|
if isinstance(prop, dm.EdgeConnection):
|
|
306
|
-
|
|
359
|
+
if prop.edge_source:
|
|
360
|
+
# Edges with properties are created separately
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
edge_by_type[prop.type.external_id] = prop_id, prop
|
|
364
|
+
edge_by_prop_id[prop_id] = prop_id, prop
|
|
365
|
+
|
|
307
366
|
if isinstance(prop, dm.MappedProperty):
|
|
308
367
|
if is_readonly_property(prop.container, prop.container_property_identifier):
|
|
309
368
|
continue
|
|
@@ -329,6 +388,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
329
388
|
|
|
330
389
|
if data_type == Json:
|
|
331
390
|
json_fields.append(prop_id)
|
|
391
|
+
elif data_type == String:
|
|
392
|
+
text_fields.append(prop_id)
|
|
332
393
|
python_type = data_type.python
|
|
333
394
|
if isinstance(prop.type, ListablePropertyType) and prop.type.is_list:
|
|
334
395
|
python_type = list[python_type]
|
|
@@ -364,11 +425,20 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
364
425
|
else:
|
|
365
426
|
raise ValueError(f"Expect valid JSON string or dict for {info.field_name}: {value}")
|
|
366
427
|
|
|
428
|
+
def parse_text(cls, value: Any, info: ValidationInfo) -> Any:
|
|
429
|
+
if isinstance(value, list):
|
|
430
|
+
return [remove_namespace_from_uri(v) for v in value]
|
|
431
|
+
else:
|
|
432
|
+
return remove_namespace_from_uri(value)
|
|
433
|
+
|
|
367
434
|
if json_fields:
|
|
368
435
|
validators["parse_json_string"] = field_validator(*json_fields, mode="before")(parse_json_string) # type: ignore[assignment, arg-type]
|
|
369
436
|
|
|
370
437
|
validators["parse_list"] = field_validator("*", mode="before")(parse_list) # type: ignore[assignment, arg-type]
|
|
371
438
|
|
|
439
|
+
if text_fields:
|
|
440
|
+
validators["parse_text"] = field_validator(*text_fields, mode="before")(parse_text) # type: ignore[assignment, arg-type]
|
|
441
|
+
|
|
372
442
|
if direct_relation_by_property:
|
|
373
443
|
|
|
374
444
|
def parse_direct_relation(cls, value: list, info: ValidationInfo) -> dict | list[dict]:
|
|
@@ -409,7 +479,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
409
479
|
)
|
|
410
480
|
|
|
411
481
|
pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
|
|
412
|
-
return pydantic_cls,
|
|
482
|
+
return pydantic_cls, edge_by_type, edge_by_prop_id, issues
|
|
413
483
|
|
|
414
484
|
def _create_node(
|
|
415
485
|
self,
|
|
@@ -430,17 +500,46 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
430
500
|
],
|
|
431
501
|
)
|
|
432
502
|
|
|
433
|
-
def
|
|
503
|
+
def _create_edge_with_properties(
|
|
504
|
+
self,
|
|
505
|
+
identifier: str,
|
|
506
|
+
properties: dict[str | InstanceType, list[str]],
|
|
507
|
+
start_node: str,
|
|
508
|
+
end_node: str,
|
|
509
|
+
pydantic_cls: type[BaseModel],
|
|
510
|
+
view_id: dm.ViewId,
|
|
511
|
+
) -> dm.EdgeApply:
|
|
512
|
+
type_ = properties.pop(RDF.type, [None])[0]
|
|
513
|
+
created = pydantic_cls.model_validate(properties)
|
|
514
|
+
if type_ is None:
|
|
515
|
+
raise ValueError(f"Missing type for edge {identifier}")
|
|
516
|
+
|
|
517
|
+
return dm.EdgeApply(
|
|
518
|
+
space=self.instance_space,
|
|
519
|
+
external_id=identifier,
|
|
520
|
+
type=dm.DirectRelationReference(view_id.space, view_id.external_id),
|
|
521
|
+
start_node=dm.DirectRelationReference(self.instance_space, start_node),
|
|
522
|
+
end_node=dm.DirectRelationReference(self.instance_space, end_node),
|
|
523
|
+
sources=[
|
|
524
|
+
dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
|
|
525
|
+
],
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
def _create_edges_without_properties(
|
|
434
529
|
self,
|
|
435
530
|
identifier: str,
|
|
436
531
|
properties: dict[str, list[str]],
|
|
437
532
|
edge_by_type: dict[str, tuple[str, dm.EdgeConnection]],
|
|
533
|
+
edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]],
|
|
438
534
|
tracker: Tracker,
|
|
439
535
|
) -> Iterable[dm.EdgeApply | NeatIssue]:
|
|
440
536
|
for predicate, values in properties.items():
|
|
441
|
-
if predicate
|
|
537
|
+
if predicate in edge_by_type:
|
|
538
|
+
prop_id, edge = edge_by_type[predicate]
|
|
539
|
+
elif predicate in edge_by_prop_id:
|
|
540
|
+
prop_id, edge = edge_by_prop_id[predicate]
|
|
541
|
+
else:
|
|
442
542
|
continue
|
|
443
|
-
prop_id, edge = edge_by_type[predicate]
|
|
444
543
|
if isinstance(edge, SingleEdgeConnection) and len(values) > 1:
|
|
445
544
|
error = ResourceDuplicatedError(
|
|
446
545
|
resource_type="edge",
|
|
@@ -106,7 +106,7 @@ def to_construct_triples(
|
|
|
106
106
|
non_inherited_starting_rdf_types = []
|
|
107
107
|
|
|
108
108
|
for transformation in transformations:
|
|
109
|
-
traversal = cast(RDFPath, transformation.
|
|
109
|
+
traversal = cast(RDFPath, transformation.instance_source).traversal
|
|
110
110
|
|
|
111
111
|
# keeping track of starting rdf types of non-inherited transformations/properties
|
|
112
112
|
if isinstance(traversal, Traversal) and not transformation.inherited:
|