cognite-neat 0.119.1__py3-none-any.whl → 0.119.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (32) hide show
  1. cognite/neat/_constants.py +34 -70
  2. cognite/neat/_graph/extractors/__init__.py +0 -6
  3. cognite/neat/_graph/loaders/_rdf2dms.py +5 -5
  4. cognite/neat/_graph/queries/__init__.py +1 -1
  5. cognite/neat/_graph/queries/_base.py +2 -456
  6. cognite/neat/_graph/queries/_queries.py +16 -0
  7. cognite/neat/_graph/queries/_select.py +440 -0
  8. cognite/neat/_graph/queries/_update.py +37 -0
  9. cognite/neat/_issues/errors/_external.py +4 -2
  10. cognite/neat/_rules/exporters/_rules2excel.py +240 -107
  11. cognite/neat/_rules/importers/_yaml2rules.py +7 -1
  12. cognite/neat/_rules/models/_base_rules.py +16 -1
  13. cognite/neat/_rules/models/dms/_validation.py +11 -2
  14. cognite/neat/_rules/transformers/_converters.py +16 -6
  15. cognite/neat/_session/_drop.py +2 -2
  16. cognite/neat/_session/_explore.py +4 -4
  17. cognite/neat/_session/_prepare.py +5 -5
  18. cognite/neat/_session/_read.py +6 -0
  19. cognite/neat/_session/_set.py +3 -3
  20. cognite/neat/_session/_show.py +1 -1
  21. cognite/neat/_session/_template.py +21 -2
  22. cognite/neat/_state/README.md +23 -0
  23. cognite/neat/_store/_graph_store.py +5 -5
  24. cognite/neat/_version.py +1 -1
  25. {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/METADATA +37 -2
  26. {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/RECORD +29 -28
  27. cognite/neat/_graph/extractors/_dexpi.py +0 -234
  28. cognite/neat/_graph/extractors/_iodd.py +0 -403
  29. cognite/neat/_graph/transformers/_iodd.py +0 -30
  30. {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/LICENSE +0 -0
  31. {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/WHEEL +0 -0
  32. {cognite_neat-0.119.1.dist-info → cognite_neat-0.119.3.dist-info}/entry_points.txt +0 -0
@@ -1,403 +0,0 @@
1
- import re
2
- import uuid
3
- import xml.etree.ElementTree as ET
4
- from functools import cached_property
5
- from pathlib import Path
6
- from typing import ClassVar
7
- from typing import Literal as LiteralType
8
- from xml.etree.ElementTree import Element
9
-
10
- from rdflib import RDF, XSD, Literal, Namespace, URIRef
11
- from typing_extensions import Self
12
-
13
- from cognite.neat._constants import DEFAULT_NAMESPACE
14
- from cognite.neat._graph.extractors._base import BaseExtractor
15
- from cognite.neat._issues.errors import FileReadError, NeatValueError
16
- from cognite.neat._shared import Triple
17
- from cognite.neat._utils.rdf_ import remove_namespace_from_uri
18
- from cognite.neat._utils.text import to_camel_case
19
- from cognite.neat._utils.xml_ import get_children
20
-
21
- IODD = Namespace("http://www.io-link.com/IODD/2010/10/")
22
- XSI = Namespace("http://www.w3.org/2001/XMLSchema-instance/")
23
-
24
- XSI_XML_PREFIX = "{http://www.w3.org/2001/XMLSchema-instance}"
25
-
26
-
27
- class IODDExtractor(BaseExtractor):
28
- """
29
- IODD-XML extractor of RDF triples
30
-
31
- Each IODD sheet describes an IODD device. This extractor extracts rdf triples that describes the device, and the
32
- sensors connected to the device.
33
- This data is described under the elements "DeviceIdentity" and "ProcessDataCollection".
34
- In addition, triples extacted from "DeviceIdentity" and
35
- "ProcessDataCollection" may reference "Text" elements which are found under "ExternalTextCollection". Edges to
36
- these Text element nodes are also extracted.
37
-
38
- Args:
39
- root: XML root element of IODD XML file.
40
- namespace: Optional custom namespace to use for extracted triples that define data
41
- model instances. Defaults to DEFAULT_NAMESPACE.
42
- device_id: Optional user specified unique id/tag for actual equipment instance. If not provided, a randomly
43
- generated UUID will be used. The device_id must be WEB compliant,
44
- meaning that the characters /&?=: % are not allowed
45
- """
46
-
47
- device_elements_with_text_nodes: ClassVar[list[str]] = ["VendorText", "VendorUrl", "DeviceName", "DeviceFamily"]
48
- std_variable_elements_to_extract: ClassVar[list[str]] = ["V_SerialNumber", "V_ApplicationSpecificTag"]
49
- text_elements_language: LiteralType["en", "de"] = "en"
50
-
51
- def __init__(
52
- self,
53
- root: Element,
54
- namespace: Namespace | None = None,
55
- device_id: str | None = None,
56
- ):
57
- self.root = root
58
- self.namespace = namespace or DEFAULT_NAMESPACE
59
-
60
- if device_id and device_id != re.sub(r"[^a-zA-Z0-9-_.]", "", device_id):
61
- raise NeatValueError("Specified device_id is not web compliant. Please exclude characters: /&?=: %")
62
-
63
- self.device_id = (
64
- self.namespace[device_id] if device_id else self.namespace[f"Device_{str(uuid.uuid4()).replace('-', '_')}"]
65
- )
66
-
67
- @cached_property
68
- def _text_id_2value_mapping(self) -> dict[str, str]:
69
- """
70
- !!! note used for "Prototype Solution" !!!
71
- A mapping for text_id references to Text elements under ExternalTextCollection.
72
- The mapping can be used to find the Text element with matching id, and returns
73
- the value associated with the Text element.
74
- """
75
- mapping = {}
76
- if et_root := get_children(
77
- self.root, "ExternalTextCollection", ignore_namespace=True, include_nested_children=True, no_children=1
78
- ):
79
- if language_element := get_children(et_root[0], "PrimaryLanguage", ignore_namespace=True, no_children=1):
80
- if (
81
- language_element[0].attrib.get("{http://www.w3.org/XML/1998/namespace}lang")
82
- == self.text_elements_language
83
- ):
84
- if text_elements := get_children(
85
- language_element[0], child_tag="Text", ignore_namespace=True, include_nested_children=True
86
- ):
87
- for element in text_elements:
88
- if id := element.attrib.get("id"):
89
- if value := element.attrib.get("value"):
90
- mapping[id] = value
91
- return mapping
92
-
93
- @classmethod
94
- def from_file(cls, filepath: Path, namespace: Namespace | None = None, device_id: str | None = None) -> Self:
95
- if filepath.suffix != ".xml":
96
- raise FileReadError(filepath, "File is not XML.")
97
- return cls(ET.parse(filepath).getroot(), namespace, device_id)
98
-
99
- @classmethod
100
- def _from_root2triples(cls, root: Element, namespace: Namespace, device_id: URIRef) -> list[Triple]:
101
- """Loops through the relevant elements of the IODD XML sheet to create rdf triples that describes the IODD
102
- device by starting at the root element.
103
- """
104
- triples: list[Triple] = []
105
-
106
- # Extract DeviceIdentity triples
107
- if di_root := get_children(
108
- root, "DeviceIdentity", ignore_namespace=True, include_nested_children=True, no_children=1
109
- ):
110
- triples.extend(cls._iodd_device_identity2triples(di_root[0], namespace, device_id))
111
-
112
- # Extract VariableCollection triples -
113
- # this element holds the information about the sensors connected to the device that collects data such as
114
- # temperature, voltage, leakage etc.
115
- if vc_root := get_children(
116
- root, "VariableCollection", ignore_namespace=True, include_nested_children=True, no_children=1
117
- ):
118
- triples.extend(cls._variables_data_collection2triples(vc_root[0], namespace, device_id))
119
-
120
- if pc_root := get_children(
121
- root, "ProcessDataCollection", ignore_namespace=True, include_nested_children=True, no_children=1
122
- ):
123
- triples.extend(cls._process_data_collection2triples(pc_root[0], namespace, device_id))
124
-
125
- if et_root := get_children(
126
- root, "ExternalTextCollection", ignore_namespace=True, include_nested_children=True, no_children=1
127
- ):
128
- triples.extend(cls._text_elements2triples(et_root[0], namespace))
129
-
130
- return triples
131
-
132
- @classmethod
133
- def _process_data_collection2triples(
134
- cls, pc_root: Element, namespace: Namespace, device_id: URIRef
135
- ) -> list[Triple]:
136
- """
137
- Will only collect ProcessDataIn elements at this point. The data from the IO-master is transmitted as an
138
- array related to a ProcessDataIn item.
139
- """
140
- triples: list[Triple] = []
141
-
142
- if process_data_in := get_children(
143
- pc_root, "ProcessDataIn", ignore_namespace=True, include_nested_children=True
144
- ):
145
- for process_data_element in process_data_in:
146
- if p_id := process_data_element.attrib.get("id"):
147
- device_id_str = remove_namespace_from_uri(device_id)
148
- process_data_in_id = namespace[f"{device_id_str}.{p_id}"]
149
-
150
- # Create ProcessDataIn node
151
- triples.append((process_data_in_id, RDF.type, IODD.ProcessDataIn))
152
-
153
- # Create connection from device to node
154
- triples.append((device_id, IODD.processDataIn, process_data_in_id))
155
-
156
- # Connect record items (essentially an array of indexed variables) to the ProcessDataIn node
157
- triples.extend(cls._process_data_in_records2triples(process_data_element, process_data_in_id))
158
-
159
- return triples
160
-
161
- @classmethod
162
- def _device_2text_elements_edges(cls, di_root: Element, id: URIRef, namespace: Namespace) -> list[Triple]:
163
- """
164
- Create edges from the device node to text nodes.
165
- """
166
- triples: list[Triple] = []
167
-
168
- for element_tag in cls.device_elements_with_text_nodes:
169
- if child := get_children(
170
- di_root, child_tag=element_tag, ignore_namespace=True, include_nested_children=True, no_children=1
171
- ):
172
- if text_id := child[0].attrib.get("textId"):
173
- # Create connection from device to textId node
174
- element_tag = to_camel_case(element_tag)
175
- triples.append((id, IODD[element_tag], namespace[text_id]))
176
-
177
- return triples
178
-
179
- @classmethod
180
- def _text_elements2triples(cls, et_root: Element, namespace: Namespace) -> list[Triple]:
181
- """
182
- This method extracts all text item triples under the ExternalTextCollection element. This will create a node
183
- for each text item, and add the text value as a property to the node.
184
- """
185
- triples: list[Triple] = []
186
-
187
- if language_element := get_children(et_root, "PrimaryLanguage", ignore_namespace=True, no_children=1):
188
- if (
189
- language_element[0].attrib.get("{http://www.w3.org/XML/1998/namespace}lang")
190
- == cls.text_elements_language
191
- ):
192
- if text_elements := get_children(
193
- language_element[0], child_tag="Text", ignore_namespace=True, include_nested_children=True
194
- ):
195
- for element in text_elements:
196
- if id := element.attrib.get("id"):
197
- text_id = namespace[id]
198
-
199
- # Create Text node
200
- triples.append((text_id, RDF.type, IODD.TextObject))
201
-
202
- # Resolve text value related to the text item
203
- if value := element.attrib.get("value"):
204
- triples.append((text_id, IODD.value, Literal(value)))
205
- return triples
206
-
207
- @classmethod
208
- def _std_variables2triples(cls, vc_root: Element, namespace: Namespace, device_id: URIRef) -> list[Triple]:
209
- """
210
- For simplicity, only extract the two items we want for this use case - V_ApplicationSpecificTag and
211
- V_SerialNumber
212
- """
213
- triples: list[Triple] = []
214
-
215
- if std_variable_elements := get_children(vc_root, child_tag="StdVariableRef", ignore_namespace=True):
216
- for element in std_variable_elements:
217
- if id := element.attrib.get("id"):
218
- if id in cls.std_variable_elements_to_extract:
219
- if object := element.attrib.get("defaultValue"):
220
- predicate = to_camel_case(id.replace("V_", ""))
221
- triples.append((device_id, IODD[predicate], Literal(object)))
222
- return triples
223
-
224
- @classmethod
225
- def _variables_data_collection2triples(
226
- cls, vc_root: Element, namespace: Namespace, device_id: URIRef
227
- ) -> list[Triple]:
228
- """
229
- VariableCollection contains elements that references Variables and StdVariables. The StdVariables
230
- can be resolved by looking up the ID in the IODD-StandardDefinitions1.1.xml sheet.
231
-
232
- The Variable elements are descriptions of the sensors collecting data for the device.
233
- """
234
- triples: list[Triple] = []
235
-
236
- # StdVariableRef elements of interest
237
- triples.extend(cls._std_variables2triples(vc_root, namespace, device_id))
238
-
239
- # Variable elements (these are the descriptions of the sensors)
240
- if variable_elements := get_children(vc_root, child_tag="Variable", ignore_namespace=True):
241
- for element in variable_elements:
242
- if id := element.attrib.get("id"):
243
- device_id_str = remove_namespace_from_uri(device_id)
244
- variable_id = f"{device_id_str}.{id}"
245
-
246
- # Create connection from device node to time series
247
- triples.append((device_id, IODD.variable, Literal(variable_id, datatype=XSD["timeseries"])))
248
-
249
- return triples
250
-
251
- @classmethod
252
- def _iodd_device_identity2triples(cls, di_root: Element, namespace: Namespace, device_id: URIRef) -> list[Triple]:
253
- """
254
- Properties and metadata related to the IO Device are described under the 'DeviceIdentity' element in the XML.
255
- This method extracts the triples that describe the device's identity which is found under the
256
- DeviceIdentity element and its child elements.
257
-
258
- """
259
- triples: list[Triple] = []
260
-
261
- # Create rdf type triple for IODD
262
- triples.append(
263
- (
264
- device_id,
265
- RDF.type,
266
- IODD.IoddDevice,
267
- )
268
- )
269
-
270
- for attribute_name, attribute_value in di_root.attrib.items():
271
- triples.append((device_id, IODD[attribute_name], Literal(attribute_value)))
272
-
273
- triples.extend(cls._device_2text_elements_edges(di_root, device_id, namespace))
274
- return triples
275
-
276
- @classmethod
277
- def _process_data_in_records2triples(cls, pc_in_root: Element, process_data_in_id: URIRef) -> list[Triple]:
278
- """
279
- Extract RecordItems related to a ProcessDataIn element. Each record item is indexed. Will use this index
280
- as the identifier for the time series in CDF.
281
- """
282
- triples: list[Triple] = []
283
-
284
- if record_items := get_children(pc_in_root, "RecordItem", ignore_namespace=True, include_nested_children=True):
285
- for record in record_items:
286
- if index := record.attrib.get("subindex"):
287
- process_id_str = remove_namespace_from_uri(process_data_in_id)
288
- record_id = f"{process_id_str}.{index}"
289
- # Create connection from device node to time series
290
- triples.append((process_data_in_id, IODD.variable, Literal(record_id, datatype=XSD["timeseries"])))
291
-
292
- return triples
293
-
294
- def extract(self) -> list[Triple]:
295
- """
296
- Extract RDF triples from IODD XML
297
- """
298
- return self._from_root2triples(self.root, self.namespace, self.device_id)
299
-
300
- def _variable2info(self, variable_element: Element) -> dict:
301
- """
302
- !!! note used for "Prototype Solution" !!!
303
- Extracts information relevant to a CDF time series type from a Variable element
304
- """
305
-
306
- variable_dict = {}
307
-
308
- if name := get_children(
309
- variable_element, child_tag="Name", ignore_namespace=True, include_nested_children=False, no_children=1
310
- ):
311
- if text_id := name[0].get("textId"):
312
- variable_dict["name"] = self._text_id_2value_mapping[text_id]
313
- if description := get_children(
314
- variable_element,
315
- child_tag="Description",
316
- ignore_namespace=True,
317
- include_nested_children=False,
318
- no_children=1,
319
- ):
320
- if text_id := description[0].get("textId"):
321
- variable_dict["description"] = self._text_id_2value_mapping[text_id]
322
- if data_type := get_children(
323
- variable_element, child_tag="Datatype", ignore_namespace=True, include_nested_children=False, no_children=1
324
- ):
325
- variable_dict["data_type"] = data_type[0].attrib[f"{XSI_XML_PREFIX}type"]
326
-
327
- return variable_dict
328
-
329
- def _process_record2info(self, record_element: Element) -> dict:
330
- """
331
- !!! note used for "Prototype Solution" !!!
332
- Extracts information relevant to a CDF time series type from a Record element
333
- """
334
- record_dict = {}
335
-
336
- if name := get_children(
337
- record_element, child_tag="Name", ignore_namespace=True, include_nested_children=False, no_children=1
338
- ):
339
- if text_id := name[0].get("textId"):
340
- record_dict["name"] = self._text_id_2value_mapping[text_id]
341
- if description := get_children(
342
- record_element, child_tag="Description", ignore_namespace=True, include_nested_children=False, no_children=1
343
- ):
344
- if text_id := description[0].get("textId"):
345
- record_dict["description"] = self._text_id_2value_mapping[text_id]
346
- if data_type := get_children(
347
- record_element,
348
- child_tag="SimpleDatatype",
349
- ignore_namespace=True,
350
- include_nested_children=False,
351
- no_children=1,
352
- ):
353
- record_dict["data_type"] = data_type[0].attrib[f"{XSI_XML_PREFIX}type"]
354
- if index := record_element.attrib.get("subindex"):
355
- record_dict["index"] = index
356
-
357
- return record_dict
358
-
359
- def _extract_enhanced_ts_information(self, json_file_path: Path) -> None:
360
- """
361
- Extract additional information like name, description and data type for Variables and ProcessDataIn
362
- record elements in the IODD. The purpose is for the result gile to be used for enhancing time series with more
363
- information when they are created in CDF.
364
-
365
- Args:
366
- json_file_path: file path for where to write the extracted information about all time series
367
- in the IODD
368
-
369
- !!! note "Prototype Solution" !!!
370
- This is an intermediate solution while better support for adding this information directly
371
- into the knowledge graph for the timeseries node type is under development.
372
- """
373
- import json
374
-
375
- ts_ext_id2_info_map = {}
376
-
377
- # Variable elements (these are the descriptions of the sensors)
378
- if variable_elements := get_children(
379
- self.root, child_tag="Variable", ignore_namespace=True, include_nested_children=True
380
- ):
381
- for element in variable_elements:
382
- if id := element.attrib.get("id"):
383
- device_id_str = remove_namespace_from_uri(self.device_id)
384
- variable_id = f"{device_id_str}.{id}"
385
- ts_ext_id2_info_map[variable_id] = self._variable2info(element)
386
-
387
- if process_data_in := get_children(
388
- self.root, "ProcessDataIn", ignore_namespace=True, include_nested_children=True
389
- ):
390
- for process_data_element in process_data_in:
391
- if p_id := process_data_element.attrib.get("id"):
392
- device_id_str = remove_namespace_from_uri(self.device_id)
393
- process_data_in_id = f"{device_id_str}.{p_id}"
394
- if record_items := get_children(
395
- process_data_element, "RecordItem", ignore_namespace=True, include_nested_children=True
396
- ):
397
- for record in record_items:
398
- if index := record.attrib.get("subindex"):
399
- process_record_id = f"{process_data_in_id}.{index}"
400
- ts_ext_id2_info_map[process_record_id] = self._process_record2info(record)
401
-
402
- with Path.open(json_file_path, "w") as fp:
403
- json.dump(ts_ext_id2_info_map, fp, indent=2)
@@ -1,30 +0,0 @@
1
- from rdflib import Namespace
2
-
3
- from cognite.neat._graph.extractors import IODDExtractor
4
-
5
- from ._prune_graph import AttachPropertyFromTargetToSource, PruneDanglingNodes
6
-
7
- IODD = Namespace("http://www.io-link.com/IODD/2010/10/")
8
-
9
-
10
- class IODDAttachPropertyFromTargetToSource(AttachPropertyFromTargetToSource):
11
- _need_changes = frozenset(
12
- {
13
- str(IODDExtractor.__name__),
14
- }
15
- )
16
-
17
- def __init__(self) -> None:
18
- super().__init__(
19
- target_node_type=IODD.TextObject,
20
- target_property=IODD.value,
21
- delete_target_node=True,
22
- namespace=IODD,
23
- )
24
-
25
-
26
- class IODDPruneDanglingNodes(PruneDanglingNodes):
27
- _need_changes = frozenset({str(IODDExtractor.__name__), str(IODDAttachPropertyFromTargetToSource.__name__)})
28
-
29
- def __init__(self) -> None:
30
- super().__init__(node_prune_types=[IODD.TextObject])