cognite-neat 0.80.3__py3-none-any.whl → 0.81.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

cognite/neat/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.80.3"
1
+ __version__ = "0.81.1"
@@ -5,6 +5,7 @@ from ._classic_cdf._labels import LabelsExtractor
5
5
  from ._classic_cdf._relationships import RelationshipsExtractor
6
6
  from ._classic_cdf._sequences import SequencesExtractor
7
7
  from ._classic_cdf._timeseries import TimeSeriesExtractor
8
+ from ._dexpi import DexpiExtractor
8
9
  from ._mock_graph_generator import MockGraphGenerator
9
10
  from ._rdf_file import RdfFileExtractor
10
11
 
@@ -18,6 +19,7 @@ __all__ = [
18
19
  "FilesExtractor",
19
20
  "LabelsExtractor",
20
21
  "RdfFileExtractor",
22
+ "DexpiExtractor",
21
23
  ]
22
24
 
23
25
 
@@ -31,4 +33,5 @@ TripleExtractors = (
31
33
  | FilesExtractor
32
34
  | LabelsExtractor
33
35
  | RdfFileExtractor
36
+ | DexpiExtractor
34
37
  )
@@ -1,10 +1,9 @@
1
1
  from collections.abc import Iterable
2
- from datetime import datetime
2
+ from datetime import datetime, timezone
3
3
  from pathlib import Path
4
4
  from typing import cast
5
5
  from urllib.parse import quote
6
6
 
7
- import pytz
8
7
  from cognite.client import CogniteClient
9
8
  from cognite.client.data_classes import Asset, AssetList
10
9
  from rdflib import RDF, Literal, Namespace
@@ -49,7 +48,7 @@ class AssetsExtractor(BaseExtractor):
49
48
  @classmethod
50
49
  def _asset2triples(cls, asset: Asset, namespace: Namespace) -> list[Triple]:
51
50
  """Converts an asset to triples."""
52
- id_ = namespace[str(asset.id)]
51
+ id_ = namespace[f"Asset_{asset.id}"]
53
52
 
54
53
  # Set rdf type
55
54
  triples: list[Triple] = [(id_, RDF.type, namespace["Asset"])]
@@ -72,14 +71,14 @@ class AssetsExtractor(BaseExtractor):
72
71
  (
73
72
  id_,
74
73
  namespace.created_time,
75
- Literal(datetime.fromtimestamp(asset.created_time / 1000, pytz.utc)),
74
+ Literal(datetime.fromtimestamp(asset.created_time / 1000, timezone.utc)),
76
75
  )
77
76
  )
78
77
  triples.append(
79
78
  (
80
79
  id_,
81
80
  namespace.last_updated_time,
82
- Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, pytz.utc)),
81
+ Literal(datetime.fromtimestamp(asset.last_updated_time / 1000, timezone.utc)),
83
82
  )
84
83
  )
85
84
 
@@ -87,7 +86,7 @@ class AssetsExtractor(BaseExtractor):
87
86
  for label in asset.labels:
88
87
  # external_id can create ill-formed URIs, so we create websafe URIs
89
88
  # since labels do not have internal ids, we use the external_id as the id
90
- triples.append((id_, namespace.label, namespace[quote(label.dump()["externalId"])]))
89
+ triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
91
90
 
92
91
  if asset.metadata:
93
92
  for key, value in asset.metadata.items():
@@ -96,12 +95,12 @@ class AssetsExtractor(BaseExtractor):
96
95
 
97
96
  # Create connections:
98
97
  if asset.parent_id:
99
- triples.append((id_, namespace.parent, namespace[str(asset.parent_id)]))
98
+ triples.append((id_, namespace.parent, namespace[f"Asset_{asset.parent_id}"]))
100
99
 
101
100
  if asset.root_id:
102
- triples.append((id_, namespace.root, namespace[str(asset.root_id)]))
101
+ triples.append((id_, namespace.root, namespace[f"Asset_{asset.root_id}"]))
103
102
 
104
103
  if asset.data_set_id:
105
- triples.append((id_, namespace.dataset, namespace[str(asset.data_set_id)]))
104
+ triples.append((id_, namespace.dataset, namespace[f"Dataset_{asset.data_set_id}"]))
106
105
 
107
106
  return triples
@@ -1,9 +1,8 @@
1
1
  from collections.abc import Iterable
2
- from datetime import datetime
2
+ from datetime import datetime, timezone
3
3
  from pathlib import Path
4
4
  from typing import cast
5
5
 
6
- import pytz
7
6
  from cognite.client import CogniteClient
8
7
  from cognite.client.data_classes import Event, EventList
9
8
  from pydantic import AnyHttpUrl, ValidationError
@@ -44,7 +43,7 @@ class EventsExtractor(BaseExtractor):
44
43
 
45
44
  @classmethod
46
45
  def _event2triples(cls, event: Event, namespace: Namespace) -> list[Triple]:
47
- id_ = namespace[str(event.id)]
46
+ id_ = namespace[f"Event_{event.id}"]
48
47
 
49
48
  # Set rdf type
50
49
  triples: list[Triple] = [(id_, RDF.type, namespace.Event)]
@@ -77,7 +76,7 @@ class EventsExtractor(BaseExtractor):
77
76
 
78
77
  if event.created_time:
79
78
  triples.append(
80
- (id_, namespace.created_time, Literal(datetime.fromtimestamp(event.created_time / 1000, pytz.utc)))
79
+ (id_, namespace.created_time, Literal(datetime.fromtimestamp(event.created_time / 1000, timezone.utc)))
81
80
  )
82
81
 
83
82
  if event.last_updated_time:
@@ -85,7 +84,7 @@ class EventsExtractor(BaseExtractor):
85
84
  (
86
85
  id_,
87
86
  namespace.last_updated_time,
88
- Literal(datetime.fromtimestamp(event.last_updated_time / 1000, pytz.utc)),
87
+ Literal(datetime.fromtimestamp(event.last_updated_time / 1000, timezone.utc)),
89
88
  )
90
89
  )
91
90
 
@@ -94,7 +93,7 @@ class EventsExtractor(BaseExtractor):
94
93
  (
95
94
  id_,
96
95
  namespace.start_time,
97
- Literal(datetime.fromtimestamp(event.start_time / 1000, pytz.utc)),
96
+ Literal(datetime.fromtimestamp(event.start_time / 1000, timezone.utc)),
98
97
  )
99
98
  )
100
99
 
@@ -103,15 +102,15 @@ class EventsExtractor(BaseExtractor):
103
102
  (
104
103
  id_,
105
104
  namespace.end_time,
106
- Literal(datetime.fromtimestamp(event.end_time / 1000, pytz.utc)),
105
+ Literal(datetime.fromtimestamp(event.end_time / 1000, timezone.utc)),
107
106
  )
108
107
  )
109
108
 
110
109
  if event.data_set_id:
111
- triples.append((id_, namespace.data_set_id, namespace[str(event.data_set_id)]))
110
+ triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{event.data_set_id}"]))
112
111
 
113
112
  if event.asset_ids:
114
113
  for asset_id in event.asset_ids:
115
- triples.append((id_, namespace.asset, namespace[str(asset_id)]))
114
+ triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
116
115
 
117
116
  return triples
@@ -1,10 +1,9 @@
1
1
  from collections.abc import Iterable
2
- from datetime import datetime
2
+ from datetime import datetime, timezone
3
3
  from pathlib import Path
4
4
  from typing import cast
5
5
  from urllib.parse import quote
6
6
 
7
- import pytz
8
7
  from cognite.client import CogniteClient
9
8
  from cognite.client.data_classes import FileMetadata, FileMetadataList
10
9
  from pydantic import AnyHttpUrl, ValidationError
@@ -45,7 +44,7 @@ class FilesExtractor(BaseExtractor):
45
44
 
46
45
  @classmethod
47
46
  def _file2triples(cls, file: FileMetadata, namespace: Namespace) -> list[Triple]:
48
- id_ = namespace[str(file.id)]
47
+ id_ = namespace[f"File_{file.id}"]
49
48
 
50
49
  # Set rdf type
51
50
  triples: list[Triple] = [(id_, RDF.type, namespace.File)]
@@ -81,7 +80,7 @@ class FilesExtractor(BaseExtractor):
81
80
  (
82
81
  id_,
83
82
  namespace.source_created_time,
84
- Literal(datetime.fromtimestamp(file.source_created_time / 1000, pytz.utc)),
83
+ Literal(datetime.fromtimestamp(file.source_created_time / 1000, timezone.utc)),
85
84
  )
86
85
  )
87
86
  if file.source_modified_time:
@@ -89,17 +88,17 @@ class FilesExtractor(BaseExtractor):
89
88
  (
90
89
  id_,
91
90
  namespace.source_created_time,
92
- Literal(datetime.fromtimestamp(file.source_modified_time / 1000, pytz.utc)),
91
+ Literal(datetime.fromtimestamp(file.source_modified_time / 1000, timezone.utc)),
93
92
  )
94
93
  )
95
94
  if file.uploaded_time:
96
95
  triples.append(
97
- (id_, namespace.uploaded_time, Literal(datetime.fromtimestamp(file.uploaded_time / 1000, pytz.utc)))
96
+ (id_, namespace.uploaded_time, Literal(datetime.fromtimestamp(file.uploaded_time / 1000, timezone.utc)))
98
97
  )
99
98
 
100
99
  if file.created_time:
101
100
  triples.append(
102
- (id_, namespace.created_time, Literal(datetime.fromtimestamp(file.created_time / 1000, pytz.utc)))
101
+ (id_, namespace.created_time, Literal(datetime.fromtimestamp(file.created_time / 1000, timezone.utc)))
103
102
  )
104
103
 
105
104
  if file.last_updated_time:
@@ -107,7 +106,7 @@ class FilesExtractor(BaseExtractor):
107
106
  (
108
107
  id_,
109
108
  namespace.last_updated_time,
110
- Literal(datetime.fromtimestamp(file.last_updated_time / 1000, pytz.utc)),
109
+ Literal(datetime.fromtimestamp(file.last_updated_time / 1000, timezone.utc)),
111
110
  )
112
111
  )
113
112
 
@@ -115,17 +114,17 @@ class FilesExtractor(BaseExtractor):
115
114
  for label in file.labels:
116
115
  # external_id can create ill-formed URIs, so we create websafe URIs
117
116
  # since labels do not have internal ids, we use the external_id as the id
118
- triples.append((id_, namespace.label, namespace[quote(label.dump()["externalId"])]))
117
+ triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
119
118
 
120
119
  if file.security_categories:
121
120
  for category in file.security_categories:
122
121
  triples.append((id_, namespace.security_categories, Literal(category)))
123
122
 
124
123
  if file.data_set_id:
125
- triples.append((id_, namespace.data_set_id, namespace[str(file.data_set_id)]))
124
+ triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{file.data_set_id}"]))
126
125
 
127
126
  if file.asset_ids:
128
127
  for asset_id in file.asset_ids:
129
- triples.append((id_, namespace.asset, namespace[str(asset_id)]))
128
+ triples.append((id_, namespace.asset, namespace[f"Asset_{asset_id}"]))
130
129
 
131
130
  return triples
@@ -1,10 +1,9 @@
1
1
  from collections.abc import Iterable
2
- from datetime import datetime
2
+ from datetime import datetime, timezone
3
3
  from pathlib import Path
4
4
  from typing import cast
5
5
  from urllib.parse import quote
6
6
 
7
- import pytz
8
7
  from cognite.client import CogniteClient
9
8
  from cognite.client.data_classes import LabelDefinition, LabelDefinitionList
10
9
  from rdflib import RDF, Literal, Namespace
@@ -45,7 +44,7 @@ class LabelsExtractor(BaseExtractor):
45
44
 
46
45
  @classmethod
47
46
  def _labels2triples(cls, label: LabelDefinition, namespace: Namespace) -> list[Triple]:
48
- id_ = namespace[quote(cast(str, label.external_id))]
47
+ id_ = namespace[f"Label_{quote(label.dump()['externalId'])}"]
49
48
 
50
49
  # Set rdf type
51
50
  triples: list[Triple] = [(id_, RDF.type, namespace.Label)]
@@ -63,10 +62,10 @@ class LabelsExtractor(BaseExtractor):
63
62
 
64
63
  if label.created_time:
65
64
  triples.append(
66
- (id_, namespace.created_time, Literal(datetime.fromtimestamp(label.created_time / 1000, pytz.utc)))
65
+ (id_, namespace.created_time, Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)))
67
66
  )
68
67
 
69
68
  if label.data_set_id:
70
- triples.append((id_, namespace.data_set_id, namespace[str(label.data_set_id)]))
69
+ triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{label.data_set_id}"]))
71
70
 
72
71
  return triples
@@ -1,11 +1,10 @@
1
1
  import uuid
2
2
  from collections.abc import Iterable
3
- from datetime import datetime
3
+ from datetime import datetime, timezone
4
4
  from pathlib import Path
5
5
  from typing import cast
6
6
  from urllib.parse import quote
7
7
 
8
- import pytz
9
8
  from cognite.client import CogniteClient
10
9
  from cognite.client.data_classes import Relationship, RelationshipList
11
10
  from rdflib import RDF, Literal, Namespace
@@ -49,27 +48,27 @@ class RelationshipsExtractor(BaseExtractor):
49
48
  """Converts an asset to triples."""
50
49
 
51
50
  # relationships do not have an internal id, so we generate one
52
- id_ = namespace[str(uuid.uuid4())]
51
+ id_ = namespace[f"Relationship_{uuid.uuid4()}"]
53
52
 
54
53
  # Set rdf type
55
54
  triples: list[Triple] = [(id_, RDF.type, namespace["Relationship"])]
56
55
 
57
56
  # Set source and target types
58
- if relationship.source_type:
57
+ if source_type := relationship.source_type:
59
58
  triples.append(
60
59
  (
61
60
  id_,
62
61
  namespace.source_type,
63
- namespace[relationship.source_type.title()],
62
+ namespace[source_type.title()],
64
63
  )
65
64
  )
66
65
 
67
- if relationship.target_type:
66
+ if target_type := relationship.target_type:
68
67
  triples.append(
69
68
  (
70
69
  id_,
71
70
  namespace.target_type,
72
- namespace[relationship.target_type.title()],
71
+ namespace[target_type.title()],
73
72
  )
74
73
  )
75
74
 
@@ -100,7 +99,7 @@ class RelationshipsExtractor(BaseExtractor):
100
99
  (
101
100
  id_,
102
101
  namespace.start_time,
103
- Literal(datetime.fromtimestamp(relationship.start_time / 1000, pytz.utc)),
102
+ Literal(datetime.fromtimestamp(relationship.start_time / 1000, timezone.utc)),
104
103
  )
105
104
  )
106
105
 
@@ -109,7 +108,7 @@ class RelationshipsExtractor(BaseExtractor):
109
108
  (
110
109
  id_,
111
110
  namespace.end_time,
112
- Literal(datetime.fromtimestamp(relationship.end_time / 1000, pytz.utc)),
111
+ Literal(datetime.fromtimestamp(relationship.end_time / 1000, timezone.utc)),
113
112
  )
114
113
  )
115
114
 
@@ -118,7 +117,7 @@ class RelationshipsExtractor(BaseExtractor):
118
117
  (
119
118
  id_,
120
119
  namespace.created_time,
121
- Literal(datetime.fromtimestamp(relationship.created_time / 1000, pytz.utc)),
120
+ Literal(datetime.fromtimestamp(relationship.created_time / 1000, timezone.utc)),
122
121
  )
123
122
  )
124
123
 
@@ -127,7 +126,7 @@ class RelationshipsExtractor(BaseExtractor):
127
126
  (
128
127
  id_,
129
128
  namespace.last_updated_time,
130
- Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, pytz.utc)),
129
+ Literal(datetime.fromtimestamp(relationship.last_updated_time / 1000, timezone.utc)),
131
130
  )
132
131
  )
133
132
 
@@ -144,10 +143,10 @@ class RelationshipsExtractor(BaseExtractor):
144
143
  for label in relationship.labels:
145
144
  # external_id can create ill-formed URIs, so we create websafe URIs
146
145
  # since labels do not have internal ids, we use the external_id as the id
147
- triples.append((id_, namespace.label, namespace[quote(label.dump()["externalId"])]))
146
+ triples.append((id_, namespace.label, namespace[f"Label_{quote(label.dump()['externalId'])}"]))
148
147
 
149
148
  # Create connection
150
149
  if relationship.data_set_id:
151
- triples.append((id_, namespace.dataset, namespace[str(relationship.data_set_id)]))
150
+ triples.append((id_, namespace.dataset, namespace[f"Dataset_{relationship.data_set_id}"]))
152
151
 
153
152
  return triples
@@ -1,9 +1,8 @@
1
1
  from collections.abc import Iterable
2
- from datetime import datetime
2
+ from datetime import datetime, timezone
3
3
  from pathlib import Path
4
4
  from typing import cast
5
5
 
6
- import pytz
7
6
  from cognite.client import CogniteClient
8
7
  from cognite.client.data_classes import Sequence, SequenceList
9
8
  from pydantic import AnyHttpUrl, ValidationError
@@ -44,7 +43,7 @@ class SequencesExtractor(BaseExtractor):
44
43
 
45
44
  @classmethod
46
45
  def _sequence2triples(cls, sequence: Sequence, namespace: Namespace) -> list[Triple]:
47
- id_ = namespace[str(sequence.id)]
46
+ id_ = namespace[f"Sequence_{sequence.id}"]
48
47
 
49
48
  # Set rdf type
50
49
  triples: list[Triple] = [(id_, RDF.type, namespace.Sequence)]
@@ -71,7 +70,11 @@ class SequencesExtractor(BaseExtractor):
71
70
 
72
71
  if sequence.created_time:
73
72
  triples.append(
74
- (id_, namespace.created_time, Literal(datetime.fromtimestamp(sequence.created_time / 1000, pytz.utc)))
73
+ (
74
+ id_,
75
+ namespace.created_time,
76
+ Literal(datetime.fromtimestamp(sequence.created_time / 1000, timezone.utc)),
77
+ )
75
78
  )
76
79
 
77
80
  if sequence.last_updated_time:
@@ -79,14 +82,14 @@ class SequencesExtractor(BaseExtractor):
79
82
  (
80
83
  id_,
81
84
  namespace.last_updated_time,
82
- Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, pytz.utc)),
85
+ Literal(datetime.fromtimestamp(sequence.last_updated_time / 1000, timezone.utc)),
83
86
  )
84
87
  )
85
88
 
86
89
  if sequence.data_set_id:
87
- triples.append((id_, namespace.data_set_id, namespace[str(sequence.data_set_id)]))
90
+ triples.append((id_, namespace.data_set_id, namespace[f"Dataset_{sequence.data_set_id}"]))
88
91
 
89
92
  if sequence.asset_id:
90
- triples.append((id_, namespace.asset, namespace[str(sequence.asset_id)]))
93
+ triples.append((id_, namespace.asset, namespace[f"Asset_{sequence.asset_id}"]))
91
94
 
92
95
  return triples
@@ -1,9 +1,8 @@
1
1
  from collections.abc import Iterable
2
- from datetime import datetime
2
+ from datetime import datetime, timezone
3
3
  from pathlib import Path
4
4
  from typing import cast
5
5
 
6
- import pytz
7
6
  from cognite.client import CogniteClient
8
7
  from cognite.client.data_classes import TimeSeries, TimeSeriesList
9
8
  from pydantic import AnyHttpUrl, ValidationError
@@ -46,7 +45,7 @@ class TimeSeriesExtractor(BaseExtractor):
46
45
 
47
46
  @classmethod
48
47
  def _timeseries2triples(cls, timeseries: TimeSeries, namespace: Namespace) -> list[Triple]:
49
- id_ = namespace[str(timeseries.id)]
48
+ id_ = namespace[f"TimeSeries_{timeseries.id}"]
50
49
 
51
50
  # Set rdf type
52
51
  triples: list[Triple] = [(id_, RDF.type, namespace.TimeSeries)]
@@ -86,7 +85,11 @@ class TimeSeriesExtractor(BaseExtractor):
86
85
 
87
86
  if timeseries.created_time:
88
87
  triples.append(
89
- (id_, namespace.created_time, Literal(datetime.fromtimestamp(timeseries.created_time / 1000, pytz.utc)))
88
+ (
89
+ id_,
90
+ namespace.created_time,
91
+ Literal(datetime.fromtimestamp(timeseries.created_time / 1000, timezone.utc)),
92
+ )
90
93
  )
91
94
 
92
95
  if timeseries.last_updated_time:
@@ -94,7 +97,7 @@ class TimeSeriesExtractor(BaseExtractor):
94
97
  (
95
98
  id_,
96
99
  namespace.last_updated_time,
97
- Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, pytz.utc)),
100
+ Literal(datetime.fromtimestamp(timeseries.last_updated_time / 1000, timezone.utc)),
98
101
  )
99
102
  )
100
103
 
@@ -110,9 +113,9 @@ class TimeSeriesExtractor(BaseExtractor):
110
113
  triples.append((id_, namespace.unit_external_id, Literal(timeseries.unit_external_id)))
111
114
 
112
115
  if timeseries.data_set_id:
113
- triples.append((id_, namespace.dataset, namespace[str(timeseries.data_set_id)]))
116
+ triples.append((id_, namespace.dataset, namespace[f"Dataset_{timeseries.data_set_id}"]))
114
117
 
115
118
  if timeseries.asset_id:
116
- triples.append((id_, namespace.asset, namespace[str(timeseries.asset_id)]))
119
+ triples.append((id_, namespace.asset, namespace[f"Asset_{timeseries.asset_id}"]))
117
120
 
118
121
  return triples
@@ -0,0 +1,212 @@
1
+ import xml.etree.ElementTree as ET
2
+ from collections import defaultdict
3
+ from collections.abc import Iterable
4
+ from pathlib import Path
5
+ from xml.etree.ElementTree import Element
6
+
7
+ from rdflib import RDF, RDFS, XSD, Literal, Namespace, URIRef
8
+
9
+ from cognite.neat.constants import DEFAULT_NAMESPACE
10
+ from cognite.neat.graph.extractors._base import BaseExtractor
11
+ from cognite.neat.graph.models import Triple
12
+ from cognite.neat.utils.utils import as_neat_compliant_uri
13
+ from cognite.neat.utils.xml import get_children, iterate_tree
14
+
15
+ DEXPI = Namespace("http://sandbox.dexpi.org/rdl/")
16
+
17
+
18
+ class DexpiExtractor(BaseExtractor):
19
+ """
20
+ DEXPI-XML extractor of RDF triples
21
+
22
+ Args:
23
+ root: XML root element of DEXPI file.
24
+ namespace: Optional custom namespace to use for extracted triples that define data
25
+ model instances. Defaults to DEFAULT_NAMESPACE.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ root: Element,
31
+ namespace: Namespace | None = None,
32
+ ):
33
+ self.root = root
34
+ self.namespace = namespace or DEFAULT_NAMESPACE
35
+
36
+ @classmethod
37
+ def from_file(cls, filepath: str | Path, namespace: Namespace | None = None):
38
+ return cls(ET.parse(filepath).getroot(), namespace)
39
+
40
+ @classmethod
41
+ def from_url(cls, url: str, namespace: Namespace | None = None):
42
+ from io import BytesIO
43
+
44
+ import requests
45
+
46
+ response = requests.get(url)
47
+ response.raise_for_status()
48
+ return cls(ET.parse(BytesIO(response.content)).getroot(), namespace)
49
+
50
+ def extract(self) -> Iterable[Triple]:
51
+ """Extracts RDF triples from DEXPI XML file."""
52
+
53
+ for element in iterate_tree(self.root):
54
+ yield from self._element2triples(element, self.namespace)
55
+
56
+ @classmethod
57
+ def _element2triples(cls, element: Element, namespace: Namespace) -> list[Triple]:
58
+ """Converts an element to triples."""
59
+ triples: list[Triple] = []
60
+
61
+ if (
62
+ "ComponentClass" in element.attrib
63
+ and element.attrib["ComponentClass"] != "Label"
64
+ and "ID" in element.attrib
65
+ ):
66
+ id_ = namespace[element.attrib["ID"]]
67
+
68
+ if node_triples := cls._element2node_triples(id_, element):
69
+ triples.extend(node_triples)
70
+
71
+ if edge_triples := cls._element2edge_triples(id_, element, namespace):
72
+ triples.extend(edge_triples)
73
+
74
+ return triples
75
+
76
+ @classmethod
77
+ def _element2edge_triples(cls, id_: URIRef, element: Element, namespace: Namespace) -> list[Triple]:
78
+ triples: list[Triple] = []
79
+
80
+ # connection triples
81
+ if connections := get_children(element, "Connection"):
82
+ for connection in connections:
83
+ if "FromID" in connection.attrib and "ToID" in connection.attrib:
84
+ triples.append(
85
+ (
86
+ namespace[connection.attrib["FromID"]],
87
+ DEXPI.connection,
88
+ namespace[connection.attrib["ToID"]],
89
+ )
90
+ )
91
+
92
+ # association triples
93
+ if associations := get_children(element, "Association"):
94
+ for association in associations:
95
+ if "Type" in association.attrib and "ItemID" in association.attrib:
96
+ association_type = cls._to_uri_friendly_association_type(association)
97
+
98
+ triples.append(
99
+ (
100
+ id_,
101
+ DEXPI[f"association/{association_type}"],
102
+ namespace[association.attrib["ItemID"]],
103
+ )
104
+ )
105
+
106
+ # children-parent triples
107
+ for child in element:
108
+ if "ID" in child.attrib and child.tag != "Label":
109
+ camel_case_property = child.tag[0].lower() + child.tag[1:]
110
+ triples.append(
111
+ (
112
+ id_,
113
+ DEXPI[f"children/{camel_case_property}"],
114
+ namespace[child.attrib["ID"]],
115
+ )
116
+ )
117
+
118
+ return triples
119
+
120
+ @classmethod
121
+ def _to_uri_friendly_association_type(cls, association: Element):
122
+ association_type = "".join(
123
+ [word.capitalize() if i != 0 else word for i, word in enumerate(association.attrib["Type"].split(" "))]
124
+ )
125
+
126
+ return association_type
127
+
128
+ @classmethod
129
+ def _element2node_triples(cls, id_: URIRef, element: Element) -> list[Triple]:
130
+ """Converts an XML element to triples."""
131
+ triples: list[Triple] = []
132
+
133
+ # adding tag triple if exists
134
+ if tag := element.tag:
135
+ triples.append((id_, DEXPI.tag, Literal(str(tag))))
136
+
137
+ # adding attributes triples
138
+ if attributes := element.attrib:
139
+ if component_class := attributes.get("ComponentClass", None):
140
+ triples.append((id_, DEXPI.ComponentClass, Literal(component_class)))
141
+ if component_name := attributes.get("ComponentName", None):
142
+ triples.append((id_, DEXPI.ComponentName, Literal(component_name)))
143
+ if type_ := attributes.get("ComponentClassURI", None):
144
+ triples.append((id_, RDF.type, URIRef(type_)))
145
+
146
+ # add label triple
147
+ if label := cls._get_element_label(element):
148
+ triples.append((id_, RDFS.label, Literal(label)))
149
+
150
+ # add generic attributes triples
151
+ if generic_attributes := cls._get_element_generic_attributes(element):
152
+ for attribute, value_definitions in generic_attributes.items():
153
+ predicate = as_neat_compliant_uri(attribute)
154
+ for value_definition in value_definitions:
155
+ if literal := cls._value_definition2literal(value_definition):
156
+ triples.append((id_, predicate, literal))
157
+
158
+ return triples
159
+
160
+ @classmethod
161
+ def _value_definition2literal(cls, definition: dict) -> Literal | None:
162
+ if "Value" not in definition or "Format" not in definition:
163
+ return None
164
+
165
+ # case: when language is present we create add language tag to the literal
166
+ elif "Language" in definition and "Value" in definition:
167
+ return Literal(definition["Value"], lang=definition["Language"])
168
+
169
+ # case: when ValueURI is present we use it instead of Value
170
+ # this would be candidate for ENUMs in CDF
171
+ elif "ValueURI" in definition:
172
+ return Literal(definition["ValueURI"], datatype=XSD[definition["Format"]])
173
+
174
+ # case: when Format is not string we make sure to add the datatype
175
+ elif definition["Format"].lower() != "string":
176
+ return Literal(definition["Value"], datatype=XSD[definition["Format"]])
177
+
178
+ # case: when Format is string we add the literal without datatype (easier to read triples, less noise)
179
+ else:
180
+ return Literal(definition["Value"])
181
+
182
+ @classmethod
183
+ def _get_element_label(cls, element: Element) -> str | None:
184
+ if children := get_children(element, "Label", 1):
185
+ if grandchildren := get_children(children[0], "Text", 1):
186
+ if "String" in grandchildren[0].attrib:
187
+ return grandchildren[0].attrib["String"]
188
+
189
+ # extension for schema version 3.3, where text is used to "label" without a <label> parent
190
+ elif children := get_children(element, "Text", 1):
191
+ if "String" in children[0].attrib:
192
+ return children[0].attrib["String"]
193
+
194
+ return None
195
+
196
+ @classmethod
197
+ def _get_element_generic_attributes(cls, element: Element) -> dict:
198
+ # TODO: This requires more work as there are multiple groupings of GenericAttributes
199
+
200
+ attributes = defaultdict(list)
201
+ if children := get_children(element, "GenericAttributes", 1):
202
+ if grandchildren := get_children(children[0], "GenericAttribute"):
203
+ for generic_attribute in grandchildren:
204
+ # extension for schema version 3.3, where "AttributeURI" is not included
205
+ if "AttributeURI" in generic_attribute.attrib:
206
+ if generic_attribute.attrib["AttributeURI"] not in attributes:
207
+ attributes[generic_attribute.attrib["AttributeURI"]] = [generic_attribute.attrib]
208
+
209
+ else:
210
+ attributes[generic_attribute.attrib["AttributeURI"]].append(generic_attribute.attrib)
211
+
212
+ return attributes
@@ -1,11 +1,10 @@
1
1
  import sys
2
2
  import warnings
3
3
  from collections.abc import Iterable
4
- from datetime import datetime
4
+ from datetime import datetime, timezone
5
5
  from pathlib import Path
6
6
  from typing import cast
7
7
 
8
- import pytz
9
8
  from rdflib import RDF, Graph, Namespace, URIRef
10
9
  from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
11
10
  from rdflib.query import ResultRow
@@ -41,14 +40,14 @@ class NeatGraphStore:
41
40
  graph: Graph,
42
41
  rules: InformationRules | None = None,
43
42
  ):
44
- _start = datetime.now(pytz.utc)
43
+ _start = datetime.now(timezone.utc)
45
44
  self.graph = graph
46
45
  self.provenance = Provenance(
47
46
  [
48
47
  Change.record(
49
48
  activity=f"{type(self).__name__}.__init__",
50
49
  start=_start,
51
- end=datetime.now(pytz.utc),
50
+ end=datetime.now(timezone.utc),
52
51
  description=f"Initialize graph store as {type(self.graph.store).__name__}",
53
52
  )
54
53
  ]
@@ -62,7 +61,7 @@ class NeatGraphStore:
62
61
 
63
62
  def _upsert_prefixes(self, prefixes: dict[str, Namespace]) -> None:
64
63
  """Adds prefixes to the graph store."""
65
- _start = datetime.now(pytz.utc)
64
+ _start = datetime.now(timezone.utc)
66
65
  for prefix, namespace in prefixes.items():
67
66
  self.graph.bind(prefix, namespace)
68
67
 
@@ -70,7 +69,7 @@ class NeatGraphStore:
70
69
  Change.record(
71
70
  activity=f"{type(self).__name__}._upsert_prefixes",
72
71
  start=_start,
73
- end=datetime.now(pytz.utc),
72
+ end=datetime.now(timezone.utc),
74
73
  description="Upsert prefixes to graph store",
75
74
  )
76
75
  )
@@ -124,11 +123,22 @@ class NeatGraphStore:
124
123
  return cls(graph, rules)
125
124
 
126
125
  def write(self, extractor: TripleExtractors) -> None:
126
+ _start = datetime.now(timezone.utc)
127
+
127
128
  if isinstance(extractor, RdfFileExtractor):
128
129
  self._parse_file(extractor.filepath, extractor.mime_type, extractor.base_uri)
129
130
  else:
130
131
  self._add_triples(extractor.extract())
131
132
 
133
+ self.provenance.append(
134
+ Change.record(
135
+ activity=f"{type(extractor).__name__}",
136
+ start=_start,
137
+ end=datetime.now(timezone.utc),
138
+ description=f"Extracted triples to graph store using {type(extractor).__name__}",
139
+ )
140
+ )
141
+
132
142
  def _parse_file(
133
143
  self,
134
144
  filepath: Path,
@@ -147,6 +147,13 @@ def get_namespace(URI: URIRef, special_separator: str = "#_") -> str:
147
147
  return "/".join(URI.split("/")[:-1]) + "/"
148
148
 
149
149
 
150
+ def as_neat_compliant_uri(uri: URIRef) -> URIRef:
151
+ namespace = get_namespace(uri)
152
+ id_ = remove_namespace(uri)
153
+ compliant_uri = re.sub(r"[^a-zA-Z0-9-_.]", "", id_)
154
+ return URIRef(f"{namespace}{compliant_uri}")
155
+
156
+
150
157
  def convert_rdflib_content(content: Literal | URIRef | dict | list) -> Any:
151
158
  if isinstance(content, Literal) or isinstance(content, URIRef):
152
159
  return content.toPython()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cognite-neat
3
- Version: 0.80.3
3
+ Version: 0.81.1
4
4
  Summary: Knowledge graph transformation
5
5
  Home-page: https://cognite-neat.readthedocs-hosted.com/
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  cognite/neat/__init__.py,sha256=v-rRiDOgZ3sQSMQKq0vgUQZvpeOkoHFXissAx6Ktg84,61
2
- cognite/neat/_version.py,sha256=2sBSmuW0uNJNnwbbj_2wdeCNnLYr-TNazMPCCt9nA8w,23
2
+ cognite/neat/_version.py,sha256=zEgwrETiUxKyTUiAscaK4aVaaACC4OjGI69_cwGk124,23
3
3
  cognite/neat/app/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  cognite/neat/app/api/asgi/metrics.py,sha256=nxFy7L5cChTI0a-zkCiJ59Aq8yLuIJp5c9Dg0wRXtV0,152
5
5
  cognite/neat/app/api/configuration.py,sha256=2U5M6M252swvQPQyooA1EBzFUZNtcTmuSaywfJDgckM,4232
@@ -53,16 +53,17 @@ cognite/neat/graph/examples/Knowledge-Graph-Nordic44.xml,sha256=U2Ns-M4LRjT1fBkh
53
53
  cognite/neat/graph/examples/__init__.py,sha256=yAjHVY3b5jOjmbW-iLbhvu7BG014TpGi3K4igkDqW5I,368
54
54
  cognite/neat/graph/examples/skos-capturing-sheet-wind-topics.xlsx,sha256=CV_yK5ZSbYS_ktfIZUPD8Sevs47zpswLXQUDFkGE4Gw,45798
55
55
  cognite/neat/graph/exceptions.py,sha256=R6pyOH774n9w2x_X_nrUr8OMAdjJMf_XPIqAvxIQaWo,3401
56
- cognite/neat/graph/extractors/__init__.py,sha256=PRKYPCnxofQ3i_iiJ3xGjEligLgqAPDw2TSlcZt0MlU,947
56
+ cognite/neat/graph/extractors/__init__.py,sha256=ozXL6ZLK36wp3uX4UACRVs6rbvynQg2JQlDgL1UM1Wk,1025
57
57
  cognite/neat/graph/extractors/_base.py,sha256=TOXDnlqske8DgnJwA0THDVRgmR79Acjm56yF0E-2w7I,356
58
58
  cognite/neat/graph/extractors/_classic_cdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=S5QB_38ysVodGRMqr_SWYYaUtkUCS6a6L2b5D1T-888,3812
60
- cognite/neat/graph/extractors/_classic_cdf/_events.py,sha256=cYd-A7bvRw2S-FDvvE58PPDNE7uhoq2Lhu9En2i6E58,3961
61
- cognite/neat/graph/extractors/_classic_cdf/_files.py,sha256=8CpqZl8pLBRNJ6oxxp2YLfCupxlXJQ6h0ymUlI1GzH8,4783
62
- cognite/neat/graph/extractors/_classic_cdf/_labels.py,sha256=GcMPoecniy3g59enKD71F3fghvnN4K3uj1Z9bo2ZKIE,2367
63
- cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=5kClA5zBlhyPT6hfanLP-upLvMcE6mLU4AhkRp49NYQ,4985
64
- cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=ov-n8cBEC73AMO1xam2GUDHv-7SyOEWXWRxLXh9flyY,3298
65
- cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=xlnJ4fKvCJawZO6l6EHpx36RRAafd3BdYWS0ajNnGVM,4449
59
+ cognite/neat/graph/extractors/_classic_cdf/_assets.py,sha256=8FQvJqi8nclkQJ7YmSo4yNqs9uExaoCn_whMW8cIAx0,3855
60
+ cognite/neat/graph/extractors/_classic_cdf/_events.py,sha256=Z0vPcyOz4mCwY0Dqa5wAQZjczO1dbTUGM0X4Y10NLGQ,3995
61
+ cognite/neat/graph/extractors/_classic_cdf/_files.py,sha256=-6nCkXUCAnDsv4eDFDEiQ-U4SGhmW1VLxZJFUcszqjU,4831
62
+ cognite/neat/graph/extractors/_classic_cdf/_labels.py,sha256=wm7JFmsk7sHsOVpTsGBE0wargIuHD09Xu-OHK_Bm20g,2386
63
+ cognite/neat/graph/extractors/_classic_cdf/_relationships.py,sha256=n7gISeyhLjiaWYLWWRj20jmaYgdvJBdYSiZ0G8ZW6mk,5035
64
+ cognite/neat/graph/extractors/_classic_cdf/_sequences.py,sha256=o4yxkf81FGFrKkflvlyDYie05fTYsT_LcRFM63OTVCI,3406
65
+ cognite/neat/graph/extractors/_classic_cdf/_timeseries.py,sha256=KTYmL8vhXijlmkN1UFQrGpaCllpRekr1y55SoLhlLbg,4559
66
+ cognite/neat/graph/extractors/_dexpi.py,sha256=N_xaI3wxBdMBePikEEMW-HhMijSmnwQNIqQJA_WUcbY,8526
66
67
  cognite/neat/graph/extractors/_mock_graph_generator.py,sha256=gziG2FFsLk-HmA9uxAeT9RCjVpFxjkCTLiC4tq2zgvw,14961
67
68
  cognite/neat/graph/extractors/_rdf_file.py,sha256=w4-XgPgNsmZOkNxjO1ZQCcopTntmmtxfDBkQxn1se6E,463
68
69
  cognite/neat/graph/issues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -72,7 +73,7 @@ cognite/neat/graph/loaders/_base.py,sha256=bdYC6CwsHVqnQa1QzOhL68qQhF1OtrsearqH6
72
73
  cognite/neat/graph/loaders/_rdf2dms.py,sha256=Tn7vy6XwXFXpVDn7uzfzgJMJapbPITerKaF5b5Y4ol4,12857
73
74
  cognite/neat/graph/models.py,sha256=AtLgZh2qyRP6NRetjQCy9qLMuTQB0CH52Zsev-qa2sk,149
74
75
  cognite/neat/graph/stores/__init__.py,sha256=G-VG_YwfRt1kuPao07PDJyZ3w_0-eguzLUM13n-Z_RA,64
75
- cognite/neat/graph/stores/_base.py,sha256=DGmguO0qE5sLHgHG757ymP-cFtEimKvD57Irr3FH9yY,9106
76
+ cognite/neat/graph/stores/_base.py,sha256=6MZAXygT6sHTQ1LWm_TDb2Ws6fgNJ-r4evwcLywpBVk,9481
76
77
  cognite/neat/graph/stores/_oxrdflib.py,sha256=A5zeRm5_e8ui_ihGpgstRDg_N7qcLZ3QZBRGrOXSGI0,9569
77
78
  cognite/neat/graph/stores/_provenance.py,sha256=Y20-I8dP3DwTQ1sdI_eC4va2Az2FpK0oZwdfJ5T-2wc,3279
78
79
  cognite/neat/issues.py,sha256=pxQfqfBseMDE8JM0iqZnkLXngeyeFfT0TFtu1UuAd4c,4629
@@ -246,7 +247,7 @@ cognite/neat/utils/exceptions.py,sha256=-w4cAcvcoWLf-_ZwAl7QV_NysfqtQzIOd1Ti-mpx
246
247
  cognite/neat/utils/spreadsheet.py,sha256=LI0c7dlW0zXHkHw0NvB-gg6Df6cDcE3FbiaHBYLXdzQ,2714
247
248
  cognite/neat/utils/text.py,sha256=4bg1_Q0lg7KsoxaDOvXrVyeY78BJN8i-27BlyDzUCls,3082
248
249
  cognite/neat/utils/upload.py,sha256=XaAKqyMhz6qXbUrttGNIXZxFRPJvrnbMpDRF8GEiK2g,2707
249
- cognite/neat/utils/utils.py,sha256=OOuL0l-pv_8gDJCpXGBx-U9CEYDKQffP9dt8Dbg5kdU,13807
250
+ cognite/neat/utils/utils.py,sha256=p5qiqL4p4yC2z1pJWRaw-GSITOvBMCvj8zSo58NNsCo,14031
250
251
  cognite/neat/utils/xml.py,sha256=ppLT3lQKVp8wOP-m8-tFY8uB2P4R76l7R_-kUtsABng,992
251
252
  cognite/neat/workflows/__init__.py,sha256=oiKub_U9f5cA0I1nKl5dFkR4BD8_6Be9eMzQ_50PwP0,396
252
253
  cognite/neat/workflows/_exceptions.py,sha256=ugI_X1XNpikAiL8zIggBjcx6q7WvOpRIgvxHrj2Rhr4,1348
@@ -292,8 +293,8 @@ cognite/neat/workflows/steps_registry.py,sha256=fkTX14ZA7_gkUYfWIlx7A1XbCidvqR23
292
293
  cognite/neat/workflows/tasks.py,sha256=dqlJwKAb0jlkl7abbY8RRz3m7MT4SK8-7cntMWkOYjw,788
293
294
  cognite/neat/workflows/triggers.py,sha256=_BLNplzoz0iic367u1mhHMHiUrCwP-SLK6_CZzfODX0,7071
294
295
  cognite/neat/workflows/utils.py,sha256=gKdy3RLG7ctRhbCRwaDIWpL9Mi98zm56-d4jfHDqP1E,453
295
- cognite_neat-0.80.3.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
296
- cognite_neat-0.80.3.dist-info/METADATA,sha256=2a_btNnYxdfU9_bgJ7wvzywNoLEdUcTL5BkiZ9RhvTY,9290
297
- cognite_neat-0.80.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
298
- cognite_neat-0.80.3.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
299
- cognite_neat-0.80.3.dist-info/RECORD,,
296
+ cognite_neat-0.81.1.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
297
+ cognite_neat-0.81.1.dist-info/METADATA,sha256=HvhapkfdDjeI4wNtoR6rsyloUItiSR1R9D-6VbnoIiQ,9290
298
+ cognite_neat-0.81.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
299
+ cognite_neat-0.81.1.dist-info/entry_points.txt,sha256=61FPqiWb25vbqB0KI7znG8nsg_ibLHBvTjYnkPvNFso,50
300
+ cognite_neat-0.81.1.dist-info/RECORD,,