cognite-neat 0.109.4__py3-none-any.whl → 0.111.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-neat might be problematic. Click here for more details.
- cognite/neat/_alpha.py +8 -0
- cognite/neat/_client/_api/schema.py +43 -1
- cognite/neat/_client/data_classes/schema.py +4 -4
- cognite/neat/_constants.py +15 -1
- cognite/neat/_graph/extractors/__init__.py +4 -0
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +8 -16
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +48 -19
- cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +23 -17
- cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +15 -17
- cognite/neat/_graph/extractors/_dict.py +102 -0
- cognite/neat/_graph/extractors/_dms.py +27 -40
- cognite/neat/_graph/extractors/_dms_graph.py +30 -3
- cognite/neat/_graph/extractors/_iodd.py +3 -3
- cognite/neat/_graph/extractors/_mock_graph_generator.py +9 -7
- cognite/neat/_graph/extractors/_raw.py +67 -0
- cognite/neat/_graph/loaders/_base.py +20 -4
- cognite/neat/_graph/loaders/_rdf2dms.py +476 -383
- cognite/neat/_graph/queries/_base.py +163 -133
- cognite/neat/_graph/transformers/__init__.py +1 -3
- cognite/neat/_graph/transformers/_classic_cdf.py +6 -22
- cognite/neat/_graph/transformers/_rdfpath.py +2 -49
- cognite/neat/_issues/__init__.py +1 -6
- cognite/neat/_issues/_base.py +21 -252
- cognite/neat/_issues/_contextmanagers.py +46 -0
- cognite/neat/_issues/_factory.py +69 -0
- cognite/neat/_issues/errors/__init__.py +20 -4
- cognite/neat/_issues/errors/_external.py +7 -0
- cognite/neat/_issues/errors/_wrapper.py +81 -3
- cognite/neat/_issues/formatters.py +4 -4
- cognite/neat/_issues/warnings/__init__.py +3 -2
- cognite/neat/_issues/warnings/_properties.py +8 -0
- cognite/neat/_issues/warnings/user_modeling.py +12 -0
- cognite/neat/_rules/_constants.py +12 -0
- cognite/neat/_rules/_shared.py +3 -2
- cognite/neat/_rules/analysis/__init__.py +2 -3
- cognite/neat/_rules/analysis/_base.py +430 -259
- cognite/neat/_rules/catalog/info-rules-imf.xlsx +0 -0
- cognite/neat/_rules/exporters/_rules2excel.py +3 -9
- cognite/neat/_rules/exporters/_rules2instance_template.py +2 -2
- cognite/neat/_rules/exporters/_rules2ontology.py +5 -4
- cognite/neat/_rules/importers/_base.py +2 -47
- cognite/neat/_rules/importers/_dms2rules.py +7 -10
- cognite/neat/_rules/importers/_dtdl2rules/dtdl_importer.py +2 -2
- cognite/neat/_rules/importers/_rdf/_inference2rules.py +66 -26
- cognite/neat/_rules/importers/_rdf/_shared.py +1 -1
- cognite/neat/_rules/importers/_spreadsheet2rules.py +12 -9
- cognite/neat/_rules/models/_base_rules.py +0 -2
- cognite/neat/_rules/models/data_types.py +7 -0
- cognite/neat/_rules/models/dms/_exporter.py +9 -8
- cognite/neat/_rules/models/dms/_rules.py +29 -2
- cognite/neat/_rules/models/dms/_rules_input.py +9 -1
- cognite/neat/_rules/models/dms/_validation.py +115 -5
- cognite/neat/_rules/models/entities/_loaders.py +1 -1
- cognite/neat/_rules/models/entities/_multi_value.py +2 -2
- cognite/neat/_rules/models/entities/_single_value.py +8 -3
- cognite/neat/_rules/models/entities/_wrapped.py +2 -2
- cognite/neat/_rules/models/information/_rules.py +18 -17
- cognite/neat/_rules/models/information/_rules_input.py +3 -1
- cognite/neat/_rules/models/information/_validation.py +66 -17
- cognite/neat/_rules/transformers/__init__.py +8 -2
- cognite/neat/_rules/transformers/_converters.py +234 -44
- cognite/neat/_rules/transformers/_verification.py +5 -10
- cognite/neat/_session/_base.py +6 -4
- cognite/neat/_session/_explore.py +39 -0
- cognite/neat/_session/_inspect.py +25 -6
- cognite/neat/_session/_prepare.py +12 -0
- cognite/neat/_session/_read.py +88 -20
- cognite/neat/_session/_set.py +7 -1
- cognite/neat/_session/_show.py +11 -123
- cognite/neat/_session/_state.py +6 -2
- cognite/neat/_session/_subset.py +64 -0
- cognite/neat/_session/_to.py +177 -19
- cognite/neat/_store/_graph_store.py +9 -246
- cognite/neat/_utils/rdf_.py +36 -5
- cognite/neat/_utils/spreadsheet.py +44 -1
- cognite/neat/_utils/text.py +124 -37
- cognite/neat/_utils/upload.py +2 -0
- cognite/neat/_version.py +2 -2
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/RECORD +83 -82
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/WHEEL +1 -1
- cognite/neat/_graph/queries/_construct.py +0 -187
- cognite/neat/_graph/queries/_shared.py +0 -173
- cognite/neat/_rules/analysis/_dms.py +0 -57
- cognite/neat/_rules/analysis/_information.py +0 -249
- cognite/neat/_rules/models/_rdfpath.py +0 -372
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.109.4.dist-info → cognite_neat-0.111.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import urllib.parse
|
|
3
|
+
from collections.abc import Callable, Iterable, Mapping, Set
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from cognite.client import data_modeling as dm
|
|
7
|
+
from cognite.client.data_classes.data_modeling.instances import Instance
|
|
8
|
+
from rdflib import XSD, Literal, Namespace, URIRef
|
|
9
|
+
|
|
10
|
+
from cognite.neat._shared import Triple
|
|
11
|
+
from cognite.neat._utils.auxiliary import string_to_ideal_type
|
|
12
|
+
|
|
13
|
+
from ._base import BaseExtractor
|
|
14
|
+
|
|
15
|
+
DEFAULT_EMPTY_VALUES = frozenset({"nan", "null", "none", "", " ", "nil", "n/a", "na", "unknown", "undefined"})
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DictExtractor(BaseExtractor):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
id_: URIRef,
|
|
22
|
+
data: Mapping[str, Any],
|
|
23
|
+
namespace: Namespace,
|
|
24
|
+
uri_ref_keys: set[str] | None = None,
|
|
25
|
+
empty_values: Set[str] = DEFAULT_EMPTY_VALUES,
|
|
26
|
+
str_to_ideal_type: bool = False,
|
|
27
|
+
unpack_json: bool = False,
|
|
28
|
+
) -> None:
|
|
29
|
+
self.id_ = id_
|
|
30
|
+
self.namespace = namespace
|
|
31
|
+
self.data = data
|
|
32
|
+
self.uri_ref_keys = uri_ref_keys or set()
|
|
33
|
+
self.empty_values = empty_values
|
|
34
|
+
self.str_to_ideal_type = str_to_ideal_type
|
|
35
|
+
self.unpack_json = unpack_json
|
|
36
|
+
|
|
37
|
+
def extract(self) -> Iterable[Triple]:
|
|
38
|
+
for key, value in self.data.items():
|
|
39
|
+
for predicate_str, object_ in self._get_predicate_objects_pair(key, value, self.unpack_json):
|
|
40
|
+
yield self.id_, self.namespace[urllib.parse.quote(predicate_str)], object_
|
|
41
|
+
|
|
42
|
+
def _get_predicate_objects_pair(
|
|
43
|
+
self, key: str, value: Any, unpack_json: bool
|
|
44
|
+
) -> Iterable[tuple[str, Literal | URIRef]]:
|
|
45
|
+
if key in self.uri_ref_keys and not isinstance(value, dict | list):
|
|
46
|
+
yield key, URIRef(self.namespace[urllib.parse.quote(value)])
|
|
47
|
+
if isinstance(value, str | float | bool | int):
|
|
48
|
+
yield key, Literal(value)
|
|
49
|
+
elif isinstance(value, dict) and unpack_json:
|
|
50
|
+
yield from self._unpack_json(value)
|
|
51
|
+
elif isinstance(value, dict):
|
|
52
|
+
# This object is a json object.
|
|
53
|
+
yield key, Literal(json.dumps(value), datatype=XSD._NS["json"])
|
|
54
|
+
elif isinstance(value, list):
|
|
55
|
+
for item in value:
|
|
56
|
+
yield from self._get_predicate_objects_pair(key, item, False)
|
|
57
|
+
|
|
58
|
+
def _unpack_json(self, value: dict, parent: str | None = None) -> Iterable[tuple[str, Literal | URIRef]]:
|
|
59
|
+
for sub_key, sub_value in value.items():
|
|
60
|
+
key = f"{parent}_{sub_key}" if parent else sub_key
|
|
61
|
+
if isinstance(sub_value, str):
|
|
62
|
+
if sub_value.casefold() in self.empty_values:
|
|
63
|
+
continue
|
|
64
|
+
if self.str_to_ideal_type:
|
|
65
|
+
yield key, Literal(string_to_ideal_type(sub_value))
|
|
66
|
+
else:
|
|
67
|
+
yield key, Literal(sub_value)
|
|
68
|
+
elif isinstance(sub_value, int | float | bool):
|
|
69
|
+
yield key, Literal(sub_value)
|
|
70
|
+
elif isinstance(sub_value, dict):
|
|
71
|
+
yield from self._unpack_json(sub_value, key)
|
|
72
|
+
elif isinstance(sub_value, list):
|
|
73
|
+
for no, item in enumerate(sub_value, 1):
|
|
74
|
+
if isinstance(item, dict):
|
|
75
|
+
yield from self._unpack_json(item, f"{key}_{no}")
|
|
76
|
+
else:
|
|
77
|
+
yield from self._get_predicate_objects_pair(key, item, self.unpack_json)
|
|
78
|
+
else:
|
|
79
|
+
yield key, Literal(str(sub_value))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class DMSPropertyExtractor(DictExtractor):
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
id_: URIRef,
|
|
86
|
+
data: Mapping[str, Any],
|
|
87
|
+
namespace: Namespace,
|
|
88
|
+
as_uri_ref: Callable[[Instance | dm.DirectRelationReference], URIRef],
|
|
89
|
+
empty_values: Set[str] = DEFAULT_EMPTY_VALUES,
|
|
90
|
+
str_to_ideal_type: bool = False,
|
|
91
|
+
unpack_json: bool = False,
|
|
92
|
+
) -> None:
|
|
93
|
+
super().__init__(id_, data, namespace, None, empty_values, str_to_ideal_type, unpack_json)
|
|
94
|
+
self.as_uri_ref = as_uri_ref
|
|
95
|
+
|
|
96
|
+
def _get_predicate_objects_pair(
|
|
97
|
+
self, key: str, value: Any, unpack_json: bool
|
|
98
|
+
) -> Iterable[tuple[str, Literal | URIRef]]:
|
|
99
|
+
if isinstance(value, dict) and "space" in value and "externalId" in value:
|
|
100
|
+
yield key, self.as_uri_ref(dm.DirectRelationReference.load(value))
|
|
101
|
+
else:
|
|
102
|
+
yield from super()._get_predicate_objects_pair(key, value, unpack_json)
|
|
@@ -6,20 +6,18 @@ from typing import cast
|
|
|
6
6
|
from cognite.client import CogniteClient
|
|
7
7
|
from cognite.client import data_modeling as dm
|
|
8
8
|
from cognite.client.data_classes.data_modeling import DataModelIdentifier
|
|
9
|
-
from cognite.client.data_classes.data_modeling.instances import Instance,
|
|
9
|
+
from cognite.client.data_classes.data_modeling.instances import Instance, InstanceSort
|
|
10
10
|
from cognite.client.utils.useful_types import SequenceNotStr
|
|
11
|
-
from rdflib import RDF,
|
|
11
|
+
from rdflib import RDF, Literal, Namespace, URIRef
|
|
12
12
|
|
|
13
13
|
from cognite.neat._config import GLOBAL_CONFIG
|
|
14
14
|
from cognite.neat._constants import DEFAULT_SPACE_URI, is_readonly_property
|
|
15
15
|
from cognite.neat._issues.errors import ResourceRetrievalError
|
|
16
16
|
from cognite.neat._shared import Triple
|
|
17
|
-
from cognite.neat._utils.auxiliary import string_to_ideal_type
|
|
18
17
|
from cognite.neat._utils.collection_ import iterate_progress_bar
|
|
19
18
|
|
|
20
19
|
from ._base import BaseExtractor
|
|
21
|
-
|
|
22
|
-
DEFAULT_EMPTY_VALUES = frozenset({"nan", "null", "none", "", " ", "nil", "n/a", "na", "unknown", "undefined"})
|
|
20
|
+
from ._dict import DEFAULT_EMPTY_VALUES, DMSPropertyExtractor
|
|
23
21
|
|
|
24
22
|
|
|
25
23
|
class DMSExtractor(BaseExtractor):
|
|
@@ -188,39 +186,15 @@ class DMSExtractor(BaseExtractor):
|
|
|
188
186
|
|
|
189
187
|
for view_id, properties in instance.properties.items():
|
|
190
188
|
namespace = self._get_namespace(view_id.space)
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
elif isinstance(value, dict) and self.unpack_json:
|
|
201
|
-
for sub_key, sub_value in value.items():
|
|
202
|
-
if isinstance(sub_value, str):
|
|
203
|
-
if sub_value.casefold() in self.empty_values:
|
|
204
|
-
continue
|
|
205
|
-
if self.str_to_ideal_type:
|
|
206
|
-
yield sub_key, Literal(string_to_ideal_type(sub_value))
|
|
207
|
-
else:
|
|
208
|
-
yield sub_key, Literal(sub_value)
|
|
209
|
-
elif isinstance(sub_value, int | float | bool):
|
|
210
|
-
yield sub_key, Literal(sub_value)
|
|
211
|
-
elif isinstance(sub_value, dict):
|
|
212
|
-
yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", sub_value)
|
|
213
|
-
elif isinstance(sub_value, list):
|
|
214
|
-
for item in sub_value:
|
|
215
|
-
yield from self._get_predicate_objects_pair(f"{key}_{sub_key}", item)
|
|
216
|
-
else:
|
|
217
|
-
yield sub_key, Literal(str(sub_value))
|
|
218
|
-
elif isinstance(value, dict):
|
|
219
|
-
# This object is a json object.
|
|
220
|
-
yield key, Literal(str(value), datatype=XSD._NS["json"])
|
|
221
|
-
elif isinstance(value, list):
|
|
222
|
-
for item in value:
|
|
223
|
-
yield from self._get_predicate_objects_pair(key, item)
|
|
189
|
+
yield from DMSPropertyExtractor(
|
|
190
|
+
id_,
|
|
191
|
+
properties,
|
|
192
|
+
namespace,
|
|
193
|
+
self._as_uri_ref,
|
|
194
|
+
self.empty_values,
|
|
195
|
+
self.str_to_ideal_type,
|
|
196
|
+
self.unpack_json,
|
|
197
|
+
).extract()
|
|
224
198
|
|
|
225
199
|
def _as_uri_ref(self, instance: Instance | dm.DirectRelationReference) -> URIRef:
|
|
226
200
|
return self._get_namespace(instance.space)[urllib.parse.quote(instance.external_id)]
|
|
@@ -270,8 +244,16 @@ class _ViewInstanceIterator(Iterable[Instance]):
|
|
|
270
244
|
}
|
|
271
245
|
# All nodes and edges with properties
|
|
272
246
|
if self.view.used_for in ("node", "all"):
|
|
247
|
+
# Without a sort, the sort is implicitly by the internal id, as cursoring needs a stable sort.
|
|
248
|
+
# By making the sort be on external_id, Postgres should pick the index
|
|
249
|
+
# that's on (project_id, space, external_id)
|
|
250
|
+
# WHERE deleted_at IS NULL. In other words, avoiding soft deleted instances.
|
|
273
251
|
node_iterable: Iterable[Instance] = self.client.data_modeling.instances(
|
|
274
|
-
chunk_size=None,
|
|
252
|
+
chunk_size=None,
|
|
253
|
+
instance_type="node",
|
|
254
|
+
sources=[view_id],
|
|
255
|
+
space=self.instance_space,
|
|
256
|
+
sort=InstanceSort(["node", "externalId"]),
|
|
275
257
|
)
|
|
276
258
|
if read_only_properties:
|
|
277
259
|
node_iterable = self._remove_read_only_properties(node_iterable, read_only_properties, view_id)
|
|
@@ -279,7 +261,11 @@ class _ViewInstanceIterator(Iterable[Instance]):
|
|
|
279
261
|
|
|
280
262
|
if self.view.used_for in ("edge", "all"):
|
|
281
263
|
yield from self.client.data_modeling.instances(
|
|
282
|
-
chunk_size=None,
|
|
264
|
+
chunk_size=None,
|
|
265
|
+
instance_type="edge",
|
|
266
|
+
sources=[view_id],
|
|
267
|
+
space=self.instance_space,
|
|
268
|
+
sort=InstanceSort(["edge", "externalId"]),
|
|
283
269
|
)
|
|
284
270
|
|
|
285
271
|
for prop in self.view.properties.values():
|
|
@@ -294,6 +280,7 @@ class _ViewInstanceIterator(Iterable[Instance]):
|
|
|
294
280
|
["edge", "type"], {"space": prop.type.space, "externalId": prop.type.external_id}
|
|
295
281
|
),
|
|
296
282
|
space=self.instance_space,
|
|
283
|
+
sort=InstanceSort(["edge", "externalId"]),
|
|
297
284
|
)
|
|
298
285
|
|
|
299
286
|
@staticmethod
|
|
@@ -12,6 +12,8 @@ from cognite.neat._issues.warnings import CDFAuthWarning, ResourceNotFoundWarnin
|
|
|
12
12
|
from cognite.neat._rules.importers import DMSImporter
|
|
13
13
|
from cognite.neat._rules.models import DMSRules, InformationRules
|
|
14
14
|
from cognite.neat._rules.models.data_types import Json
|
|
15
|
+
from cognite.neat._rules.models.entities import UnknownEntity
|
|
16
|
+
from cognite.neat._rules.models.information import InformationProperty
|
|
15
17
|
from cognite.neat._rules.transformers import DMSToInformation, VerifyDMSRules
|
|
16
18
|
from cognite.neat._shared import Triple
|
|
17
19
|
|
|
@@ -131,7 +133,6 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
|
|
|
131
133
|
yield from DMSExtractor.from_views(
|
|
132
134
|
self._client,
|
|
133
135
|
views,
|
|
134
|
-
overwrite_namespace=self._namespace,
|
|
135
136
|
instance_space=self._instance_space,
|
|
136
137
|
unpack_json=self._unpack_json,
|
|
137
138
|
str_to_ideal_type=self._str_to_ideal_type,
|
|
@@ -186,8 +187,12 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
|
|
|
186
187
|
prop
|
|
187
188
|
for prop in dms_rules.properties
|
|
188
189
|
if not (
|
|
189
|
-
|
|
190
|
-
|
|
190
|
+
(
|
|
191
|
+
isinstance(prop.value_type, Json)
|
|
192
|
+
or (isinstance(prop.value_type, str) and prop.value_type == json_name)
|
|
193
|
+
)
|
|
194
|
+
# We are not unpacking list of JSONs.
|
|
195
|
+
and prop.is_list is not True
|
|
191
196
|
)
|
|
192
197
|
]
|
|
193
198
|
|
|
@@ -195,5 +200,27 @@ class DMSGraphExtractor(KnowledgeGraphExtractor):
|
|
|
195
200
|
# Any errors occur will be raised and caught outside the extractor.
|
|
196
201
|
verified_dms = VerifyDMSRules(client=self._client).transform(unverified_dms)
|
|
197
202
|
information_rules = DMSToInformation(self._namespace).transform(verified_dms)
|
|
203
|
+
|
|
204
|
+
# We need to sync the metadata between the two rules, such that the `.sync_with_info_rules` method works.
|
|
205
|
+
information_rules.metadata.physical = verified_dms.metadata.identifier
|
|
206
|
+
verified_dms.metadata.logical = information_rules.metadata.identifier
|
|
207
|
+
verified_dms.sync_with_info_rules(information_rules)
|
|
208
|
+
|
|
209
|
+
# Adding startNode and endNode to the information rules for views that are used for edges.
|
|
210
|
+
classes_by_prefix = {cls_.class_.prefix: cls_ for cls_ in information_rules.classes}
|
|
211
|
+
for view in self._model_views:
|
|
212
|
+
if view.used_for == "edge" and view.external_id in classes_by_prefix:
|
|
213
|
+
cls_ = classes_by_prefix[view.external_id]
|
|
214
|
+
for property_ in ("startNode", "endNode"):
|
|
215
|
+
information_rules.properties.append(
|
|
216
|
+
InformationProperty(
|
|
217
|
+
class_=cls_.class_,
|
|
218
|
+
property_=property_,
|
|
219
|
+
value_type=UnknownEntity(),
|
|
220
|
+
min_count=0,
|
|
221
|
+
max_count=1,
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
|
|
198
225
|
self._issues.extend(issues)
|
|
199
226
|
return information_rules, verified_dms
|
|
@@ -14,7 +14,7 @@ from cognite.neat._graph.extractors._base import BaseExtractor
|
|
|
14
14
|
from cognite.neat._issues.errors import FileReadError, NeatValueError
|
|
15
15
|
from cognite.neat._shared import Triple
|
|
16
16
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri
|
|
17
|
-
from cognite.neat._utils.text import
|
|
17
|
+
from cognite.neat._utils.text import to_camel_case
|
|
18
18
|
from cognite.neat._utils.xml_ import get_children
|
|
19
19
|
|
|
20
20
|
IODD = Namespace("http://www.io-link.com/IODD/2010/10/")
|
|
@@ -170,7 +170,7 @@ class IODDExtractor(BaseExtractor):
|
|
|
170
170
|
):
|
|
171
171
|
if text_id := child[0].attrib.get("textId"):
|
|
172
172
|
# Create connection from device to textId node
|
|
173
|
-
element_tag =
|
|
173
|
+
element_tag = to_camel_case(element_tag)
|
|
174
174
|
triples.append((id, IODD[element_tag], namespace[text_id]))
|
|
175
175
|
|
|
176
176
|
return triples
|
|
@@ -216,7 +216,7 @@ class IODDExtractor(BaseExtractor):
|
|
|
216
216
|
if id := element.attrib.get("id"):
|
|
217
217
|
if id in cls.std_variable_elements_to_extract:
|
|
218
218
|
if object := element.attrib.get("defaultValue"):
|
|
219
|
-
predicate =
|
|
219
|
+
predicate = to_camel_case(id.replace("V_", ""))
|
|
220
220
|
triples.append((device_id, IODD[predicate], Literal(object)))
|
|
221
221
|
return triples
|
|
222
222
|
|
|
@@ -12,11 +12,12 @@ import pandas as pd
|
|
|
12
12
|
from rdflib import RDF, Literal, Namespace, URIRef
|
|
13
13
|
|
|
14
14
|
from cognite.neat._rules._constants import EntityTypes
|
|
15
|
-
from cognite.neat._rules.analysis import
|
|
15
|
+
from cognite.neat._rules.analysis import RulesAnalysis
|
|
16
16
|
from cognite.neat._rules.models import DMSRules, InformationRules
|
|
17
17
|
from cognite.neat._rules.models.data_types import DataType
|
|
18
18
|
from cognite.neat._rules.models.entities import ClassEntity
|
|
19
19
|
from cognite.neat._rules.models.information import InformationProperty
|
|
20
|
+
from cognite.neat._rules.transformers import SubsetInformationRules
|
|
20
21
|
from cognite.neat._shared import Triple
|
|
21
22
|
from cognite.neat._utils.rdf_ import remove_namespace_from_uri
|
|
22
23
|
|
|
@@ -54,7 +55,7 @@ class MockGraphGenerator(BaseExtractor):
|
|
|
54
55
|
|
|
55
56
|
if not class_count:
|
|
56
57
|
self.class_count = {
|
|
57
|
-
class_: 1 for class_ in
|
|
58
|
+
class_: 1 for class_ in RulesAnalysis(self.rules).defined_classes(include_ancestors=True)
|
|
58
59
|
}
|
|
59
60
|
elif all(isinstance(key, str) for key in class_count.keys()):
|
|
60
61
|
self.class_count = {
|
|
@@ -104,7 +105,8 @@ def generate_triples(
|
|
|
104
105
|
"""
|
|
105
106
|
|
|
106
107
|
namespace = rules.metadata.namespace
|
|
107
|
-
|
|
108
|
+
analysis = RulesAnalysis(rules)
|
|
109
|
+
defined_classes = analysis.defined_classes(include_ancestors=True)
|
|
108
110
|
|
|
109
111
|
if non_existing_classes := set(class_count.keys()) - defined_classes:
|
|
110
112
|
msg = f"Class count contains classes {non_existing_classes} for which properties are not defined in Data Model!"
|
|
@@ -118,16 +120,16 @@ def generate_triples(
|
|
|
118
120
|
|
|
119
121
|
# Subset data model to only classes that are defined in class count
|
|
120
122
|
rules = (
|
|
121
|
-
|
|
123
|
+
SubsetInformationRules(classes=set(class_count.keys())).transform(rules)
|
|
122
124
|
if defined_classes != set(class_count.keys())
|
|
123
125
|
else rules
|
|
124
126
|
)
|
|
125
127
|
|
|
126
|
-
class_linkage =
|
|
128
|
+
class_linkage = analysis.class_linkage().to_pandas()
|
|
127
129
|
|
|
128
130
|
# Remove one of symmetric pairs from class linkage to maintain proper linking
|
|
129
131
|
# among instances of symmetrically linked classes
|
|
130
|
-
if sym_pairs :=
|
|
132
|
+
if sym_pairs := analysis.symmetrically_connected_classes():
|
|
131
133
|
class_linkage = _remove_higher_occurring_sym_pair(class_linkage, sym_pairs)
|
|
132
134
|
|
|
133
135
|
# Remove any of symmetric pairs containing classes that are not present class count
|
|
@@ -137,7 +139,7 @@ def generate_triples(
|
|
|
137
139
|
generation_order = _prettify_generation_order(_get_generation_order(class_linkage))
|
|
138
140
|
|
|
139
141
|
# Generated simple view of data model
|
|
140
|
-
class_property_pairs =
|
|
142
|
+
class_property_pairs = analysis.properties_by_class(include_ancestors=True)
|
|
141
143
|
|
|
142
144
|
# pregenerate instance ids for each remaining class
|
|
143
145
|
instance_ids = {
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import urllib.parse
|
|
2
|
+
from collections.abc import Iterable, Set
|
|
3
|
+
from typing import Any, cast
|
|
4
|
+
|
|
5
|
+
from cognite.client.data_classes import Row, RowList
|
|
6
|
+
from cognite.client.utils.useful_types import SequenceNotStr
|
|
7
|
+
from rdflib import RDF, Namespace, URIRef
|
|
8
|
+
|
|
9
|
+
from cognite.neat._client import NeatClient
|
|
10
|
+
from cognite.neat._constants import DEFAULT_RAW_URI
|
|
11
|
+
from cognite.neat._shared import Triple
|
|
12
|
+
|
|
13
|
+
from ._base import BaseExtractor
|
|
14
|
+
from ._dict import DEFAULT_EMPTY_VALUES, DictExtractor
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RAWExtractor(BaseExtractor):
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
client: NeatClient,
|
|
21
|
+
db_name: str,
|
|
22
|
+
table_name: str,
|
|
23
|
+
table_type: str | None = None,
|
|
24
|
+
foreign_keys: str | SequenceNotStr[str] | None = None,
|
|
25
|
+
namespace: Namespace | None = None,
|
|
26
|
+
empty_values: Set[str] = DEFAULT_EMPTY_VALUES,
|
|
27
|
+
str_to_ideal_type: bool = False,
|
|
28
|
+
unpack_json: bool = False,
|
|
29
|
+
) -> None:
|
|
30
|
+
self.client = client
|
|
31
|
+
self.db_name = db_name
|
|
32
|
+
self.table_name = table_name
|
|
33
|
+
self.table_type = table_type
|
|
34
|
+
self.foreign_keys = {foreign_keys} if isinstance(foreign_keys, str) else set(foreign_keys or [])
|
|
35
|
+
self.namespace = namespace or Namespace(DEFAULT_RAW_URI)
|
|
36
|
+
self.empty_values = empty_values
|
|
37
|
+
self.str_to_ideal_type = str_to_ideal_type
|
|
38
|
+
self.unpack_json = unpack_json
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def _rdf_type(self) -> URIRef:
|
|
42
|
+
return self.namespace[urllib.parse.quote(self.table_type or self.table_name)]
|
|
43
|
+
|
|
44
|
+
def extract(self) -> Iterable[Triple]:
|
|
45
|
+
for row in self.client.raw.rows(self.db_name, self.table_name, partitions=10, chunk_size=None):
|
|
46
|
+
if isinstance(row, Row):
|
|
47
|
+
yield from self._row2triples(row)
|
|
48
|
+
elif isinstance(row, RowList):
|
|
49
|
+
# Bug in SDK returning row list with chunk_size= None
|
|
50
|
+
for item in row:
|
|
51
|
+
yield from self._row2triples(item)
|
|
52
|
+
|
|
53
|
+
def _row2triples(self, row: Row) -> Iterable[Triple]:
|
|
54
|
+
# The row is always set. It is just the PySDK that have it as str | None
|
|
55
|
+
key, data = cast(tuple[str, dict[str, Any]], (row.key, row.columns))
|
|
56
|
+
identifier = self.namespace[urllib.parse.quote(key)]
|
|
57
|
+
yield identifier, RDF.type, self._rdf_type
|
|
58
|
+
|
|
59
|
+
yield from DictExtractor(
|
|
60
|
+
identifier,
|
|
61
|
+
data,
|
|
62
|
+
self.namespace,
|
|
63
|
+
self.foreign_keys,
|
|
64
|
+
self.empty_values,
|
|
65
|
+
self.str_to_ideal_type,
|
|
66
|
+
self.unpack_json,
|
|
67
|
+
).extract()
|
|
@@ -20,6 +20,11 @@ T_Output = TypeVar("T_Output")
|
|
|
20
20
|
class _END_OF_CLASS: ...
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
class _START_OF_CLASS:
|
|
24
|
+
def __init__(self, class_name: str | None = None):
|
|
25
|
+
self.class_name = class_name
|
|
26
|
+
|
|
27
|
+
|
|
23
28
|
class BaseLoader(ABC, Generic[T_Output]):
|
|
24
29
|
_new_line = "\n"
|
|
25
30
|
_encoding = "utf-8"
|
|
@@ -33,10 +38,16 @@ class BaseLoader(ABC, Generic[T_Output]):
|
|
|
33
38
|
|
|
34
39
|
def load(self, stop_on_exception: bool = False) -> Iterable[T_Output | NeatIssue]:
|
|
35
40
|
"""Load the graph with data."""
|
|
36
|
-
return (
|
|
41
|
+
return (
|
|
42
|
+
item # type: ignore[misc]
|
|
43
|
+
for item in self._load(stop_on_exception)
|
|
44
|
+
if not (item is _END_OF_CLASS or isinstance(item, _START_OF_CLASS))
|
|
45
|
+
)
|
|
37
46
|
|
|
38
47
|
@abstractmethod
|
|
39
|
-
def _load(
|
|
48
|
+
def _load(
|
|
49
|
+
self, stop_on_exception: bool = False
|
|
50
|
+
) -> Iterable[T_Output | NeatIssue | type[_END_OF_CLASS] | _START_OF_CLASS]:
|
|
40
51
|
"""Load the graph with data."""
|
|
41
52
|
pass
|
|
42
53
|
|
|
@@ -75,21 +86,25 @@ class CDFLoader(BaseLoader[T_Output]):
|
|
|
75
86
|
|
|
76
87
|
issues = IssueList()
|
|
77
88
|
items: list[T_Output] = []
|
|
89
|
+
last_class_name: str | None = None
|
|
78
90
|
for result in self._load(stop_on_exception=False):
|
|
79
91
|
if isinstance(result, NeatIssue):
|
|
80
92
|
issues.append(result)
|
|
81
93
|
elif result is _END_OF_CLASS:
|
|
82
94
|
...
|
|
95
|
+
elif isinstance(result, _START_OF_CLASS):
|
|
96
|
+
last_class_name = result.class_name
|
|
97
|
+
continue
|
|
83
98
|
else:
|
|
84
99
|
# MyPy does not understand that 'else' means the item will be of type T_Output
|
|
85
100
|
items.append(result) # type: ignore[arg-type]
|
|
86
101
|
|
|
87
102
|
if len(items) >= self._UPLOAD_BATCH_SIZE or result is _END_OF_CLASS:
|
|
88
|
-
yield from self._upload_to_cdf(client, items, dry_run, issues)
|
|
103
|
+
yield from self._upload_to_cdf(client, items, dry_run, issues, last_class_name)
|
|
89
104
|
issues = IssueList()
|
|
90
105
|
items = []
|
|
91
106
|
if items:
|
|
92
|
-
yield from self._upload_to_cdf(client, items, dry_run, issues)
|
|
107
|
+
yield from self._upload_to_cdf(client, items, dry_run, issues, last_class_name)
|
|
93
108
|
|
|
94
109
|
@abstractmethod
|
|
95
110
|
def _get_required_capabilities(self) -> list[Capability]:
|
|
@@ -102,5 +117,6 @@ class CDFLoader(BaseLoader[T_Output]):
|
|
|
102
117
|
items: list[T_Output],
|
|
103
118
|
dry_run: bool,
|
|
104
119
|
read_issues: IssueList,
|
|
120
|
+
class_name: str | None = None,
|
|
105
121
|
) -> Iterable[UploadResult]:
|
|
106
122
|
raise NotImplementedError
|