cognite-neat 0.106.0__py3-none-any.whl → 0.108.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_constants.py +35 -1
- cognite/neat/_graph/_shared.py +4 -0
- cognite/neat/_graph/extractors/__init__.py +5 -1
- cognite/neat/_graph/extractors/_base.py +32 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +128 -14
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +156 -12
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +50 -12
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +26 -1
- cognite/neat/_graph/extractors/_dms.py +196 -47
- cognite/neat/_graph/extractors/_dms_graph.py +199 -0
- cognite/neat/_graph/extractors/_mock_graph_generator.py +1 -1
- cognite/neat/_graph/extractors/_rdf_file.py +33 -5
- cognite/neat/_graph/loaders/__init__.py +1 -3
- cognite/neat/_graph/loaders/_rdf2dms.py +123 -19
- cognite/neat/_graph/queries/_base.py +140 -84
- cognite/neat/_graph/queries/_construct.py +2 -2
- cognite/neat/_graph/transformers/__init__.py +8 -1
- cognite/neat/_graph/transformers/_base.py +9 -1
- cognite/neat/_graph/transformers/_classic_cdf.py +90 -3
- cognite/neat/_graph/transformers/_rdfpath.py +3 -3
- cognite/neat/_graph/transformers/_value_type.py +106 -45
- cognite/neat/_issues/errors/_resources.py +1 -1
- cognite/neat/_issues/warnings/__init__.py +0 -2
- cognite/neat/_issues/warnings/_models.py +1 -1
- cognite/neat/_issues/warnings/_properties.py +0 -8
- cognite/neat/_rules/analysis/_base.py +1 -1
- cognite/neat/_rules/analysis/_information.py +14 -13
- cognite/neat/_rules/catalog/__init__.py +1 -0
- cognite/neat/_rules/catalog/classic_model.xlsx +0 -0
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2instance_template.py +3 -3
- cognite/neat/_rules/importers/__init__.py +3 -1
- cognite/neat/_rules/importers/_dms2rules.py +7 -5
- cognite/neat/_rules/importers/_dtdl2rules/spec.py +1 -2
- cognite/neat/_rules/importers/_rdf/__init__.py +2 -2
- cognite/neat/_rules/importers/_rdf/_base.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +242 -19
- cognite/neat/_rules/models/_base_rules.py +13 -15
- cognite/neat/_rules/models/_types.py +5 -0
- cognite/neat/_rules/models/dms/_rules.py +51 -10
- cognite/neat/_rules/models/dms/_rules_input.py +4 -0
- cognite/neat/_rules/models/information/_rules.py +48 -5
- cognite/neat/_rules/models/information/_rules_input.py +6 -1
- cognite/neat/_rules/models/mapping/_classic2core.py +4 -5
- cognite/neat/_rules/transformers/__init__.py +10 -0
- cognite/neat/_rules/transformers/_converters.py +300 -62
- cognite/neat/_session/_base.py +57 -10
- cognite/neat/_session/_drop.py +5 -1
- cognite/neat/_session/_inspect.py +3 -2
- cognite/neat/_session/_mapping.py +17 -6
- cognite/neat/_session/_prepare.py +0 -47
- cognite/neat/_session/_read.py +115 -10
- cognite/neat/_session/_set.py +27 -0
- cognite/neat/_session/_show.py +4 -4
- cognite/neat/_session/_state.py +12 -1
- cognite/neat/_session/_to.py +43 -2
- cognite/neat/_session/_wizard.py +1 -1
- cognite/neat/_session/exceptions.py +8 -3
- cognite/neat/_store/_graph_store.py +331 -136
- cognite/neat/_store/_rules_store.py +130 -1
- cognite/neat/_utils/auth.py +3 -1
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/METADATA +2 -2
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/RECORD +67 -65
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/WHEEL +1 -1
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.106.0.dist-info → cognite_neat-0.108.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
from collections.abc import Iterable, Sequence
|
|
2
|
+
|
|
3
|
+
from cognite.client import data_modeling as dm
|
|
4
|
+
from cognite.client.exceptions import CogniteAPIError
|
|
5
|
+
from cognite.client.utils.useful_types import SequenceNotStr
|
|
6
|
+
from rdflib import Namespace, URIRef
|
|
7
|
+
|
|
8
|
+
from cognite.neat._client import NeatClient
|
|
9
|
+
from cognite.neat._constants import COGNITE_SPACES, DEFAULT_NAMESPACE
|
|
10
|
+
from cognite.neat._issues import IssueList, NeatIssue, catch_warnings
|
|
11
|
+
from cognite.neat._issues.warnings import CDFAuthWarning, ResourceNotFoundWarning, ResourceRetrievalWarning
|
|
12
|
+
from cognite.neat._rules.importers import DMSImporter
|
|
13
|
+
from cognite.neat._rules.models import DMSRules, InformationRules
|
|
14
|
+
from cognite.neat._rules.models.data_types import Json
|
|
15
|
+
from cognite.neat._rules.transformers import DMSToInformation, VerifyDMSRules
|
|
16
|
+
from cognite.neat._shared import Triple
|
|
17
|
+
|
|
18
|
+
from ._base import KnowledgeGraphExtractor
|
|
19
|
+
from ._dms import DMSExtractor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DMSGraphExtractor(KnowledgeGraphExtractor):
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
data_model: dm.DataModel[dm.View],
|
|
26
|
+
client: NeatClient,
|
|
27
|
+
namespace: Namespace = DEFAULT_NAMESPACE,
|
|
28
|
+
issues: Sequence[NeatIssue] | None = None,
|
|
29
|
+
instance_space: str | SequenceNotStr[str] | None = None,
|
|
30
|
+
skip_cognite_views: bool = True,
|
|
31
|
+
unpack_json: bool = False,
|
|
32
|
+
str_to_ideal_type: bool = False,
|
|
33
|
+
) -> None:
|
|
34
|
+
self._client = client
|
|
35
|
+
self._data_model = data_model
|
|
36
|
+
self._namespace = namespace or DEFAULT_NAMESPACE
|
|
37
|
+
self._issues = IssueList(issues)
|
|
38
|
+
self._instance_space = instance_space
|
|
39
|
+
self._skip_cognite_views = skip_cognite_views
|
|
40
|
+
self._unpack_json = unpack_json
|
|
41
|
+
self._str_to_ideal_type = str_to_ideal_type
|
|
42
|
+
|
|
43
|
+
self._views: list[dm.View] | None = None
|
|
44
|
+
self._information_rules: InformationRules | None = None
|
|
45
|
+
self._dms_rules: DMSRules | None = None
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def from_data_model_id(
|
|
49
|
+
cls,
|
|
50
|
+
data_model_id: dm.DataModelIdentifier,
|
|
51
|
+
client: NeatClient,
|
|
52
|
+
namespace: Namespace = DEFAULT_NAMESPACE,
|
|
53
|
+
instance_space: str | SequenceNotStr[str] | None = None,
|
|
54
|
+
skip_cognite_views: bool = True,
|
|
55
|
+
unpack_json: bool = False,
|
|
56
|
+
str_to_ideal_type: bool = False,
|
|
57
|
+
) -> "DMSGraphExtractor":
|
|
58
|
+
issues: list[NeatIssue] = []
|
|
59
|
+
try:
|
|
60
|
+
data_model = client.data_modeling.data_models.retrieve(data_model_id, inline_views=True)
|
|
61
|
+
except CogniteAPIError as e:
|
|
62
|
+
issues.append(CDFAuthWarning("retrieving data model", str(e)))
|
|
63
|
+
return cls(
|
|
64
|
+
cls._create_empty_model(dm.DataModelId.load(data_model_id)),
|
|
65
|
+
client,
|
|
66
|
+
namespace,
|
|
67
|
+
issues,
|
|
68
|
+
instance_space,
|
|
69
|
+
skip_cognite_views,
|
|
70
|
+
unpack_json,
|
|
71
|
+
str_to_ideal_type,
|
|
72
|
+
)
|
|
73
|
+
if not data_model:
|
|
74
|
+
issues.append(ResourceRetrievalWarning(frozenset({data_model_id}), "data model"))
|
|
75
|
+
return cls(
|
|
76
|
+
cls._create_empty_model(dm.DataModelId.load(data_model_id)),
|
|
77
|
+
client,
|
|
78
|
+
namespace,
|
|
79
|
+
issues,
|
|
80
|
+
instance_space,
|
|
81
|
+
skip_cognite_views,
|
|
82
|
+
unpack_json,
|
|
83
|
+
str_to_ideal_type,
|
|
84
|
+
)
|
|
85
|
+
return cls(
|
|
86
|
+
data_model.latest_version(),
|
|
87
|
+
client,
|
|
88
|
+
namespace,
|
|
89
|
+
issues,
|
|
90
|
+
instance_space,
|
|
91
|
+
skip_cognite_views,
|
|
92
|
+
unpack_json,
|
|
93
|
+
str_to_ideal_type,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def _create_empty_model(cls, data_model_id: dm.DataModelId) -> dm.DataModel:
|
|
98
|
+
return dm.DataModel(
|
|
99
|
+
data_model_id.space,
|
|
100
|
+
data_model_id.external_id,
|
|
101
|
+
data_model_id.version or "MISSING",
|
|
102
|
+
is_global=False,
|
|
103
|
+
last_updated_time=0,
|
|
104
|
+
created_time=0,
|
|
105
|
+
description=None,
|
|
106
|
+
name=None,
|
|
107
|
+
views=[],
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def _model_views(self) -> list[dm.View]:
|
|
112
|
+
if self._views is None:
|
|
113
|
+
self._views = self._get_views()
|
|
114
|
+
return self._views
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def description(self) -> str:
|
|
118
|
+
return "Extracts a data model with nodes and edges."
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def source_uri(self) -> URIRef:
|
|
122
|
+
space, external_id, version = self._data_model.as_id().as_tuple()
|
|
123
|
+
return DEFAULT_NAMESPACE[f"{self._client.config.project}/{space}/{external_id}/{version}"]
|
|
124
|
+
|
|
125
|
+
def extract(self) -> Iterable[Triple]:
|
|
126
|
+
"""Extracts the knowledge graph from the data model."""
|
|
127
|
+
views = self._model_views
|
|
128
|
+
if self._skip_cognite_views:
|
|
129
|
+
views = [view for view in views if view.space not in COGNITE_SPACES]
|
|
130
|
+
|
|
131
|
+
yield from DMSExtractor.from_views(
|
|
132
|
+
self._client,
|
|
133
|
+
views,
|
|
134
|
+
overwrite_namespace=self._namespace,
|
|
135
|
+
instance_space=self._instance_space,
|
|
136
|
+
unpack_json=self._unpack_json,
|
|
137
|
+
str_to_ideal_type=self._str_to_ideal_type,
|
|
138
|
+
).extract()
|
|
139
|
+
|
|
140
|
+
def _get_views(self) -> list[dm.View]:
|
|
141
|
+
view_by_id: dict[dm.ViewId, dm.View] = {}
|
|
142
|
+
if view_ids := [view_id for view_id in self._data_model.views if isinstance(view_id, dm.ViewId)]:
|
|
143
|
+
try:
|
|
144
|
+
# MyPy does not understand the isinstance check above.
|
|
145
|
+
retrieved = self._client.data_modeling.views.retrieve(ids=view_ids) # type: ignore[arg-type]
|
|
146
|
+
except CogniteAPIError as e:
|
|
147
|
+
self._issues.append(CDFAuthWarning("retrieving views", str(e)))
|
|
148
|
+
else:
|
|
149
|
+
view_by_id.update({view.as_id(): view for view in retrieved})
|
|
150
|
+
views: list[dm.View] = []
|
|
151
|
+
data_model_id = self._data_model.as_id()
|
|
152
|
+
for dm_view in self._data_model.views:
|
|
153
|
+
if isinstance(dm_view, dm.View):
|
|
154
|
+
views.append(dm_view)
|
|
155
|
+
elif isinstance(dm_view, dm.ViewId):
|
|
156
|
+
if view := view_by_id.get(dm_view):
|
|
157
|
+
views.append(view)
|
|
158
|
+
else:
|
|
159
|
+
self._issues.append(ResourceNotFoundWarning(dm_view, "view", data_model_id, "data model"))
|
|
160
|
+
return views
|
|
161
|
+
|
|
162
|
+
def get_information_rules(self) -> InformationRules:
|
|
163
|
+
"""Returns the information rules that the extractor uses."""
|
|
164
|
+
if self._information_rules is None:
|
|
165
|
+
self._information_rules, self._dms_rules = self._create_rules()
|
|
166
|
+
return self._information_rules
|
|
167
|
+
|
|
168
|
+
def get_dms_rules(self) -> DMSRules:
|
|
169
|
+
"""Returns the DMS rules that the extractor uses."""
|
|
170
|
+
if self._dms_rules is None:
|
|
171
|
+
self._information_rules, self._dms_rules = self._create_rules()
|
|
172
|
+
return self._dms_rules
|
|
173
|
+
|
|
174
|
+
def get_issues(self) -> IssueList:
|
|
175
|
+
"""Returns the issues that occurred during the extraction."""
|
|
176
|
+
return self._issues
|
|
177
|
+
|
|
178
|
+
def _create_rules(self) -> tuple[InformationRules, DMSRules]:
|
|
179
|
+
# The DMS and Information rules must be created together to link them property.
|
|
180
|
+
importer = DMSImporter.from_data_model(self._client, self._data_model)
|
|
181
|
+
unverified_dms = importer.to_rules()
|
|
182
|
+
if self._unpack_json and (dms_rules := unverified_dms.rules):
|
|
183
|
+
# Drop the JSON properties from the DMS rules as these are no longer valid.
|
|
184
|
+
json_name = Json().name # To avoid instantiating Json multiple times.
|
|
185
|
+
dms_rules.properties = [
|
|
186
|
+
prop
|
|
187
|
+
for prop in dms_rules.properties
|
|
188
|
+
if not (
|
|
189
|
+
isinstance(prop.value_type, Json)
|
|
190
|
+
or (isinstance(prop.value_type, str) and prop.value_type == json_name)
|
|
191
|
+
)
|
|
192
|
+
]
|
|
193
|
+
|
|
194
|
+
with catch_warnings() as issues:
|
|
195
|
+
# Any errors occur will be raised and caught outside the extractor.
|
|
196
|
+
verified_dms = VerifyDMSRules(client=self._client).transform(unverified_dms)
|
|
197
|
+
information_rules = DMSToInformation(self._namespace).transform(verified_dms)
|
|
198
|
+
self._issues.extend(issues)
|
|
199
|
+
return information_rules, verified_dms
|
|
@@ -141,7 +141,7 @@ def generate_triples(
|
|
|
141
141
|
|
|
142
142
|
# pregenerate instance ids for each remaining class
|
|
143
143
|
instance_ids = {
|
|
144
|
-
key: [URIRef(namespace[f"{key.suffix}-{i+1}"]) for i in range(value)] for key, value in class_count.items()
|
|
144
|
+
key: [URIRef(namespace[f"{key.suffix}-{i + 1}"]) for i in range(value)] for key, value in class_count.items()
|
|
145
145
|
}
|
|
146
146
|
|
|
147
147
|
# create triple for each class instance defining its type
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
import zipfile
|
|
1
2
|
from collections.abc import Iterable
|
|
2
3
|
from pathlib import Path
|
|
3
|
-
from typing import get_args
|
|
4
|
+
from typing import cast, get_args
|
|
4
5
|
|
|
5
6
|
from rdflib import URIRef
|
|
6
7
|
from rdflib.util import guess_format
|
|
@@ -10,6 +11,7 @@ from cognite.neat._graph._shared import RDFTypes
|
|
|
10
11
|
from cognite.neat._graph.extractors._base import BaseExtractor
|
|
11
12
|
from cognite.neat._issues._base import IssueList
|
|
12
13
|
from cognite.neat._issues.errors import FileNotFoundNeatError, FileTypeUnexpectedError
|
|
14
|
+
from cognite.neat._issues.errors._general import NeatValueError
|
|
13
15
|
from cognite.neat._shared import Triple
|
|
14
16
|
|
|
15
17
|
|
|
@@ -24,25 +26,51 @@ class RdfFileExtractor(BaseExtractor):
|
|
|
24
26
|
|
|
25
27
|
def __init__(
|
|
26
28
|
self,
|
|
27
|
-
filepath: Path,
|
|
29
|
+
filepath: Path | zipfile.ZipExtFile,
|
|
28
30
|
base_uri: URIRef = DEFAULT_BASE_URI,
|
|
29
31
|
issue_list: IssueList | None = None,
|
|
30
32
|
):
|
|
31
33
|
self.issue_list = issue_list or IssueList(title=f"{filepath.name}")
|
|
32
34
|
self.base_uri = base_uri
|
|
33
35
|
self.filepath = filepath
|
|
34
|
-
self.format = guess_format(str(self.filepath))
|
|
35
36
|
|
|
36
|
-
if
|
|
37
|
+
self.format = guess_format(str(self.filepath) if isinstance(self.filepath, Path) else self.filepath.name)
|
|
38
|
+
|
|
39
|
+
if isinstance(self.filepath, Path) and not self.filepath.exists():
|
|
37
40
|
self.issue_list.append(FileNotFoundNeatError(self.filepath))
|
|
38
41
|
|
|
39
42
|
if not self.format:
|
|
40
43
|
self.issue_list.append(
|
|
41
44
|
FileTypeUnexpectedError(
|
|
42
|
-
self.filepath,
|
|
45
|
+
(self.filepath if isinstance(self.filepath, Path) else Path(self.filepath.name)),
|
|
43
46
|
frozenset(get_args(RDFTypes)),
|
|
44
47
|
)
|
|
45
48
|
)
|
|
46
49
|
|
|
47
50
|
def extract(self) -> Iterable[Triple]:
|
|
48
51
|
raise NotImplementedError()
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def from_zip(
|
|
55
|
+
cls,
|
|
56
|
+
filepath: Path,
|
|
57
|
+
filename: str = "neat-session/instances/instances.trig",
|
|
58
|
+
base_uri: URIRef = DEFAULT_BASE_URI,
|
|
59
|
+
issue_list: IssueList | None = None,
|
|
60
|
+
):
|
|
61
|
+
if not filepath.exists():
|
|
62
|
+
raise FileNotFoundNeatError(filepath)
|
|
63
|
+
if filepath.suffix not in {".zip"}:
|
|
64
|
+
raise NeatValueError("Expected a zip file, got {filepath.suffix}")
|
|
65
|
+
|
|
66
|
+
with zipfile.ZipFile(filepath, "r") as zip_ref:
|
|
67
|
+
for file_info in zip_ref.infolist():
|
|
68
|
+
if file_info.filename == filename:
|
|
69
|
+
# We need to open the file in the zip file, and close it upon
|
|
70
|
+
# triple extraction ...
|
|
71
|
+
|
|
72
|
+
print(file_info)
|
|
73
|
+
file = zip_ref.open(file_info)
|
|
74
|
+
return cls(cast(zipfile.ZipExtFile, file), base_uri, issue_list)
|
|
75
|
+
|
|
76
|
+
raise NeatValueError(f"Cannot extract {filename} from zip file {filepath}")
|
|
@@ -20,6 +20,4 @@ def _repr_html_() -> str:
|
|
|
20
20
|
]
|
|
21
21
|
)._repr_html_()
|
|
22
22
|
|
|
23
|
-
return
|
|
24
|
-
"<strong>Loader</strong> A loader writes data from Neat's triple storage into a target system" f"<br />{table}"
|
|
25
|
-
)
|
|
23
|
+
return f"<strong>Loader</strong> A loader writes data from Neat's triple storage into a target system<br />{table}"
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
import json
|
|
3
|
+
import urllib.parse
|
|
3
4
|
import warnings
|
|
4
5
|
from collections import defaultdict
|
|
5
6
|
from collections.abc import Iterable, Sequence
|
|
@@ -32,7 +33,7 @@ from cognite.neat._issues.errors import (
|
|
|
32
33
|
from cognite.neat._issues.warnings import PropertyDirectRelationLimitWarning, PropertyTypeNotSupportedWarning
|
|
33
34
|
from cognite.neat._rules.analysis._dms import DMSAnalysis
|
|
34
35
|
from cognite.neat._rules.models import DMSRules
|
|
35
|
-
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json
|
|
36
|
+
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE, Json, String
|
|
36
37
|
from cognite.neat._rules.models.entities._single_value import ViewEntity
|
|
37
38
|
from cognite.neat._shared import InstanceType
|
|
38
39
|
from cognite.neat._store import NeatGraphStore
|
|
@@ -70,6 +71,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
70
71
|
tracker: type[Tracker] | None = None,
|
|
71
72
|
rules: DMSRules | None = None,
|
|
72
73
|
client: NeatClient | None = None,
|
|
74
|
+
unquote_external_ids: bool = False,
|
|
73
75
|
):
|
|
74
76
|
super().__init__(graph_store)
|
|
75
77
|
self.data_model = data_model
|
|
@@ -79,6 +81,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
79
81
|
self._tracker: type[Tracker] = tracker or LogTracker
|
|
80
82
|
self.rules = rules
|
|
81
83
|
self._client = client
|
|
84
|
+
self._unquote_external_ids = unquote_external_ids
|
|
82
85
|
|
|
83
86
|
@classmethod
|
|
84
87
|
def from_data_model_id(
|
|
@@ -99,7 +102,12 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
99
102
|
|
|
100
103
|
@classmethod
|
|
101
104
|
def from_rules(
|
|
102
|
-
cls,
|
|
105
|
+
cls,
|
|
106
|
+
rules: DMSRules,
|
|
107
|
+
graph_store: NeatGraphStore,
|
|
108
|
+
instance_space: str,
|
|
109
|
+
client: NeatClient | None = None,
|
|
110
|
+
unquote_external_ids: bool = False,
|
|
103
111
|
) -> "DMSLoader":
|
|
104
112
|
issues: list[NeatIssue] = []
|
|
105
113
|
data_model: dm.DataModel[dm.View] | None = None
|
|
@@ -125,6 +133,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
125
133
|
issues,
|
|
126
134
|
rules=rules,
|
|
127
135
|
client=client,
|
|
136
|
+
unquote_external_ids=unquote_external_ids,
|
|
128
137
|
)
|
|
129
138
|
|
|
130
139
|
def _load(self, stop_on_exception: bool = False) -> Iterable[dm.InstanceApply | NeatIssue | type[_END_OF_CLASS]]:
|
|
@@ -142,7 +151,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
142
151
|
if self.rules and self.rules.metadata.logical
|
|
143
152
|
else None
|
|
144
153
|
)
|
|
154
|
+
|
|
145
155
|
view_and_count_by_id = self._select_views_with_instances(self.data_model.views)
|
|
156
|
+
|
|
146
157
|
if self._client:
|
|
147
158
|
view_and_count_by_id, properties_point_to_self = self._sort_by_direct_relation_dependencies(
|
|
148
159
|
view_and_count_by_id
|
|
@@ -159,7 +170,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
159
170
|
|
|
160
171
|
tracker = self._tracker(type(self).__name__, view_ids, "views")
|
|
161
172
|
for view_id, (view, instance_count) in view_and_count_by_id.items():
|
|
162
|
-
pydantic_cls, edge_by_type, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
173
|
+
pydantic_cls, edge_by_type, edge_by_prop_id, issues = self._create_validation_classes(view) # type: ignore[var-annotated]
|
|
163
174
|
yield from issues
|
|
164
175
|
tracker.issue(issues)
|
|
165
176
|
|
|
@@ -200,20 +211,61 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
200
211
|
)
|
|
201
212
|
|
|
202
213
|
for identifier, properties in instance_iterable:
|
|
214
|
+
start_node, end_node = self._pop_start_end_node(properties)
|
|
215
|
+
is_edge = start_node and end_node
|
|
216
|
+
if (is_edge and view.used_for == "node") or (not is_edge and view.used_for == "edge"):
|
|
217
|
+
instance_type = "edge" if is_edge else "node"
|
|
218
|
+
creation_error = ResourceCreationError(
|
|
219
|
+
identifier,
|
|
220
|
+
instance_type,
|
|
221
|
+
error=f"{instance_type.capitalize()} found in {view.used_for} view",
|
|
222
|
+
)
|
|
223
|
+
tracker.issue(creation_error)
|
|
224
|
+
if stop_on_exception:
|
|
225
|
+
raise creation_error
|
|
226
|
+
yield creation_error
|
|
227
|
+
continue
|
|
228
|
+
|
|
203
229
|
if skip_properties:
|
|
204
230
|
properties = {k: v for k, v in properties.items() if k not in skip_properties}
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
231
|
+
|
|
232
|
+
if start_node and end_node:
|
|
233
|
+
# Is an edge
|
|
234
|
+
try:
|
|
235
|
+
yield self._create_edge_with_properties(
|
|
236
|
+
identifier, properties, start_node, end_node, pydantic_cls, view_id
|
|
237
|
+
)
|
|
238
|
+
except ValueError as e:
|
|
239
|
+
error_edge = ResourceCreationError(identifier, "edge", error=str(e))
|
|
240
|
+
tracker.issue(error_edge)
|
|
241
|
+
if stop_on_exception:
|
|
242
|
+
raise error_edge from e
|
|
243
|
+
yield error_edge
|
|
244
|
+
else:
|
|
245
|
+
try:
|
|
246
|
+
yield self._create_node(identifier, properties, pydantic_cls, view_id)
|
|
247
|
+
except ValueError as e:
|
|
248
|
+
error_node = ResourceCreationError(identifier, "node", error=str(e))
|
|
249
|
+
tracker.issue(error_node)
|
|
250
|
+
if stop_on_exception:
|
|
251
|
+
raise error_node from e
|
|
252
|
+
yield error_node
|
|
253
|
+
yield from self._create_edges_without_properties(
|
|
254
|
+
identifier, properties, edge_by_type, edge_by_prop_id, tracker
|
|
255
|
+
)
|
|
214
256
|
tracker.finish(track_id)
|
|
215
257
|
yield _END_OF_CLASS
|
|
216
258
|
|
|
259
|
+
@staticmethod
|
|
260
|
+
def _pop_start_end_node(properties: dict[str | InstanceType, list[str]]) -> tuple[str | None, str | None]:
|
|
261
|
+
start_node = properties.pop("startNode", [None])[0]
|
|
262
|
+
if not start_node:
|
|
263
|
+
start_node = properties.pop("start_node", [None])[0]
|
|
264
|
+
end_node = properties.pop("endNode", [None])[0]
|
|
265
|
+
if not end_node:
|
|
266
|
+
end_node = properties.pop("end_node", [None])[0]
|
|
267
|
+
return start_node, end_node
|
|
268
|
+
|
|
217
269
|
def write_to_file(self, filepath: Path) -> None:
|
|
218
270
|
if filepath.suffix not in [".json", ".yaml", ".yml"]:
|
|
219
271
|
raise ValueError(f"File format {filepath.suffix} is not supported")
|
|
@@ -298,17 +350,30 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
298
350
|
|
|
299
351
|
def _create_validation_classes(
|
|
300
352
|
self, view: dm.View
|
|
301
|
-
) -> tuple[
|
|
353
|
+
) -> tuple[
|
|
354
|
+
type[BaseModel],
|
|
355
|
+
dict[str, tuple[str, dm.EdgeConnection]],
|
|
356
|
+
dict[str, tuple[str, dm.EdgeConnection]],
|
|
357
|
+
NeatIssueList,
|
|
358
|
+
]:
|
|
302
359
|
issues = IssueList()
|
|
303
360
|
field_definitions: dict[str, tuple[type, Any]] = {}
|
|
304
|
-
|
|
361
|
+
edge_by_type: dict[str, tuple[str, dm.EdgeConnection]] = {}
|
|
362
|
+
edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]] = {}
|
|
305
363
|
validators: dict[str, classmethod] = {}
|
|
306
364
|
direct_relation_by_property: dict[str, dm.DirectRelation] = {}
|
|
307
365
|
unit_properties: list[str] = []
|
|
308
366
|
json_fields: list[str] = []
|
|
367
|
+
text_fields: list[str] = []
|
|
309
368
|
for prop_id, prop in view.properties.items():
|
|
310
369
|
if isinstance(prop, dm.EdgeConnection):
|
|
311
|
-
|
|
370
|
+
if prop.edge_source:
|
|
371
|
+
# Edges with properties are created separately
|
|
372
|
+
continue
|
|
373
|
+
|
|
374
|
+
edge_by_type[prop.type.external_id] = prop_id, prop
|
|
375
|
+
edge_by_prop_id[prop_id] = prop_id, prop
|
|
376
|
+
|
|
312
377
|
if isinstance(prop, dm.MappedProperty):
|
|
313
378
|
if is_readonly_property(prop.container, prop.container_property_identifier):
|
|
314
379
|
continue
|
|
@@ -334,6 +399,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
334
399
|
|
|
335
400
|
if data_type == Json:
|
|
336
401
|
json_fields.append(prop_id)
|
|
402
|
+
elif data_type == String:
|
|
403
|
+
text_fields.append(prop_id)
|
|
337
404
|
python_type = data_type.python
|
|
338
405
|
if isinstance(prop.type, ListablePropertyType) and prop.type.is_list:
|
|
339
406
|
python_type = list[python_type]
|
|
@@ -414,7 +481,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
414
481
|
)
|
|
415
482
|
|
|
416
483
|
pydantic_cls = create_model(view.external_id, __validators__=validators, **field_definitions) # type: ignore[arg-type, call-overload]
|
|
417
|
-
return pydantic_cls,
|
|
484
|
+
return pydantic_cls, edge_by_type, edge_by_prop_id, issues
|
|
418
485
|
|
|
419
486
|
def _create_node(
|
|
420
487
|
self,
|
|
@@ -425,6 +492,8 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
425
492
|
) -> dm.InstanceApply:
|
|
426
493
|
type_ = properties.pop(RDF.type, [None])[0]
|
|
427
494
|
created = pydantic_cls.model_validate(properties)
|
|
495
|
+
if self._unquote_external_ids:
|
|
496
|
+
identifier = urllib.parse.unquote(identifier)
|
|
428
497
|
|
|
429
498
|
return dm.NodeApply(
|
|
430
499
|
space=self.instance_space,
|
|
@@ -435,17 +504,49 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
435
504
|
],
|
|
436
505
|
)
|
|
437
506
|
|
|
438
|
-
def
|
|
507
|
+
def _create_edge_with_properties(
|
|
508
|
+
self,
|
|
509
|
+
identifier: str,
|
|
510
|
+
properties: dict[str | InstanceType, list[str]],
|
|
511
|
+
start_node: str,
|
|
512
|
+
end_node: str,
|
|
513
|
+
pydantic_cls: type[BaseModel],
|
|
514
|
+
view_id: dm.ViewId,
|
|
515
|
+
) -> dm.EdgeApply:
|
|
516
|
+
type_ = properties.pop(RDF.type, [None])[0]
|
|
517
|
+
created = pydantic_cls.model_validate(properties)
|
|
518
|
+
if type_ is None:
|
|
519
|
+
raise ValueError(f"Missing type for edge {identifier}")
|
|
520
|
+
|
|
521
|
+
if self._unquote_external_ids:
|
|
522
|
+
identifier = urllib.parse.unquote(identifier)
|
|
523
|
+
|
|
524
|
+
return dm.EdgeApply(
|
|
525
|
+
space=self.instance_space,
|
|
526
|
+
external_id=identifier,
|
|
527
|
+
type=dm.DirectRelationReference(view_id.space, view_id.external_id),
|
|
528
|
+
start_node=dm.DirectRelationReference(self.instance_space, start_node),
|
|
529
|
+
end_node=dm.DirectRelationReference(self.instance_space, end_node),
|
|
530
|
+
sources=[
|
|
531
|
+
dm.NodeOrEdgeData(source=view_id, properties=dict(created.model_dump(exclude_unset=True).items()))
|
|
532
|
+
],
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
def _create_edges_without_properties(
|
|
439
536
|
self,
|
|
440
537
|
identifier: str,
|
|
441
538
|
properties: dict[str, list[str]],
|
|
442
539
|
edge_by_type: dict[str, tuple[str, dm.EdgeConnection]],
|
|
540
|
+
edge_by_prop_id: dict[str, tuple[str, dm.EdgeConnection]],
|
|
443
541
|
tracker: Tracker,
|
|
444
542
|
) -> Iterable[dm.EdgeApply | NeatIssue]:
|
|
445
543
|
for predicate, values in properties.items():
|
|
446
|
-
if predicate
|
|
544
|
+
if predicate in edge_by_type:
|
|
545
|
+
prop_id, edge = edge_by_type[predicate]
|
|
546
|
+
elif predicate in edge_by_prop_id:
|
|
547
|
+
prop_id, edge = edge_by_prop_id[predicate]
|
|
548
|
+
else:
|
|
447
549
|
continue
|
|
448
|
-
prop_id, edge = edge_by_type[predicate]
|
|
449
550
|
if isinstance(edge, SingleEdgeConnection) and len(values) > 1:
|
|
450
551
|
error = ResourceDuplicatedError(
|
|
451
552
|
resource_type="edge",
|
|
@@ -456,6 +557,9 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
456
557
|
yield error
|
|
457
558
|
for target in values:
|
|
458
559
|
external_id = f"{identifier}.{prop_id}.{target}"
|
|
560
|
+
if self._unquote_external_ids:
|
|
561
|
+
external_id = urllib.parse.unquote(external_id)
|
|
562
|
+
|
|
459
563
|
yield dm.EdgeApply(
|
|
460
564
|
space=self.instance_space,
|
|
461
565
|
external_id=(external_id if len(external_id) < 256 else create_sha256_hash(external_id)),
|