cognite-neat 0.98.0__py3-none-any.whl → 0.99.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-neat might be problematic. Click here for more details.

Files changed (103) hide show
  1. cognite/neat/_client/__init__.py +4 -0
  2. cognite/neat/_client/_api/data_modeling_loaders.py +585 -0
  3. cognite/neat/_client/_api/schema.py +111 -0
  4. cognite/neat/_client/_api_client.py +17 -0
  5. cognite/neat/_client/data_classes/__init__.py +0 -0
  6. cognite/neat/{_utils/cdf/data_classes.py → _client/data_classes/data_modeling.py} +8 -135
  7. cognite/neat/_client/data_classes/schema.py +495 -0
  8. cognite/neat/_constants.py +27 -4
  9. cognite/neat/_graph/_shared.py +14 -15
  10. cognite/neat/_graph/extractors/_classic_cdf/_assets.py +14 -154
  11. cognite/neat/_graph/extractors/_classic_cdf/_base.py +154 -7
  12. cognite/neat/_graph/extractors/_classic_cdf/_classic.py +25 -14
  13. cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py +17 -92
  14. cognite/neat/_graph/extractors/_classic_cdf/_events.py +13 -162
  15. cognite/neat/_graph/extractors/_classic_cdf/_files.py +15 -179
  16. cognite/neat/_graph/extractors/_classic_cdf/_labels.py +32 -100
  17. cognite/neat/_graph/extractors/_classic_cdf/_relationships.py +27 -178
  18. cognite/neat/_graph/extractors/_classic_cdf/_sequences.py +14 -139
  19. cognite/neat/_graph/extractors/_classic_cdf/_timeseries.py +15 -173
  20. cognite/neat/_graph/extractors/_rdf_file.py +6 -7
  21. cognite/neat/_graph/loaders/_rdf2dms.py +2 -2
  22. cognite/neat/_graph/queries/_base.py +17 -1
  23. cognite/neat/_graph/transformers/_classic_cdf.py +74 -147
  24. cognite/neat/_graph/transformers/_prune_graph.py +1 -1
  25. cognite/neat/_graph/transformers/_rdfpath.py +1 -1
  26. cognite/neat/_issues/_base.py +26 -17
  27. cognite/neat/_issues/errors/__init__.py +4 -2
  28. cognite/neat/_issues/errors/_external.py +7 -0
  29. cognite/neat/_issues/errors/_properties.py +2 -7
  30. cognite/neat/_issues/errors/_resources.py +1 -1
  31. cognite/neat/_issues/warnings/__init__.py +8 -0
  32. cognite/neat/_issues/warnings/_external.py +16 -0
  33. cognite/neat/_issues/warnings/_properties.py +16 -0
  34. cognite/neat/_issues/warnings/_resources.py +26 -2
  35. cognite/neat/_issues/warnings/user_modeling.py +4 -4
  36. cognite/neat/_rules/_constants.py +8 -11
  37. cognite/neat/_rules/analysis/_base.py +8 -4
  38. cognite/neat/_rules/exporters/_base.py +3 -4
  39. cognite/neat/_rules/exporters/_rules2dms.py +33 -46
  40. cognite/neat/_rules/importers/__init__.py +1 -3
  41. cognite/neat/_rules/importers/_base.py +1 -1
  42. cognite/neat/_rules/importers/_dms2rules.py +6 -29
  43. cognite/neat/_rules/importers/_rdf/__init__.py +5 -0
  44. cognite/neat/_rules/importers/_rdf/_base.py +34 -11
  45. cognite/neat/_rules/importers/_rdf/_imf2rules.py +91 -0
  46. cognite/neat/_rules/importers/_rdf/_inference2rules.py +43 -35
  47. cognite/neat/_rules/importers/_rdf/_owl2rules.py +80 -0
  48. cognite/neat/_rules/importers/_rdf/_shared.py +138 -441
  49. cognite/neat/_rules/models/__init__.py +1 -1
  50. cognite/neat/_rules/models/_base_rules.py +22 -12
  51. cognite/neat/_rules/models/dms/__init__.py +4 -2
  52. cognite/neat/_rules/models/dms/_exporter.py +45 -48
  53. cognite/neat/_rules/models/dms/_rules.py +20 -17
  54. cognite/neat/_rules/models/dms/_rules_input.py +52 -8
  55. cognite/neat/_rules/models/dms/_validation.py +391 -119
  56. cognite/neat/_rules/models/entities/_single_value.py +32 -4
  57. cognite/neat/_rules/models/information/__init__.py +2 -0
  58. cognite/neat/_rules/models/information/_rules.py +0 -67
  59. cognite/neat/_rules/models/information/_validation.py +9 -9
  60. cognite/neat/_rules/models/mapping/__init__.py +2 -3
  61. cognite/neat/_rules/models/mapping/_classic2core.py +36 -146
  62. cognite/neat/_rules/models/mapping/_classic2core.yaml +343 -0
  63. cognite/neat/_rules/transformers/__init__.py +2 -2
  64. cognite/neat/_rules/transformers/_converters.py +110 -11
  65. cognite/neat/_rules/transformers/_mapping.py +105 -30
  66. cognite/neat/_rules/transformers/_pipelines.py +1 -1
  67. cognite/neat/_rules/transformers/_verification.py +31 -3
  68. cognite/neat/_session/_base.py +24 -8
  69. cognite/neat/_session/_drop.py +35 -0
  70. cognite/neat/_session/_inspect.py +17 -5
  71. cognite/neat/_session/_mapping.py +39 -0
  72. cognite/neat/_session/_prepare.py +219 -23
  73. cognite/neat/_session/_read.py +49 -12
  74. cognite/neat/_session/_to.py +8 -5
  75. cognite/neat/_session/exceptions.py +4 -0
  76. cognite/neat/_store/_base.py +27 -24
  77. cognite/neat/_utils/rdf_.py +34 -5
  78. cognite/neat/_version.py +1 -1
  79. cognite/neat/_workflows/steps/lib/current/rules_exporter.py +5 -88
  80. cognite/neat/_workflows/steps/lib/current/rules_importer.py +3 -14
  81. cognite/neat/_workflows/steps/lib/current/rules_validator.py +6 -7
  82. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/METADATA +3 -3
  83. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/RECORD +87 -92
  84. cognite/neat/_rules/importers/_rdf/_imf2rules/__init__.py +0 -3
  85. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2classes.py +0 -86
  86. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2metadata.py +0 -29
  87. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2properties.py +0 -130
  88. cognite/neat/_rules/importers/_rdf/_imf2rules/_imf2rules.py +0 -154
  89. cognite/neat/_rules/importers/_rdf/_owl2rules/__init__.py +0 -3
  90. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2classes.py +0 -58
  91. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2metadata.py +0 -65
  92. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2properties.py +0 -59
  93. cognite/neat/_rules/importers/_rdf/_owl2rules/_owl2rules.py +0 -39
  94. cognite/neat/_rules/models/dms/_schema.py +0 -1101
  95. cognite/neat/_rules/models/mapping/_base.py +0 -131
  96. cognite/neat/_utils/cdf/loaders/__init__.py +0 -25
  97. cognite/neat/_utils/cdf/loaders/_base.py +0 -54
  98. cognite/neat/_utils/cdf/loaders/_data_modeling.py +0 -339
  99. cognite/neat/_utils/cdf/loaders/_ingestion.py +0 -167
  100. /cognite/neat/{_utils/cdf → _client/_api}/__init__.py +0 -0
  101. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/LICENSE +0 -0
  102. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/WHEEL +0 -0
  103. {cognite_neat-0.98.0.dist-info → cognite_neat-0.99.1.dist-info}/entry_points.txt +0 -0
@@ -1,182 +1,33 @@
1
- from collections.abc import Callable, Set
2
- from datetime import datetime, timezone
1
+ from collections.abc import Iterable
3
2
  from pathlib import Path
4
3
 
5
4
  from cognite.client import CogniteClient
6
5
  from cognite.client.data_classes import Event, EventFilter, EventList
7
- from rdflib import RDF, Literal, Namespace
8
6
 
9
- from cognite.neat._shared import Triple
10
-
11
- from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
7
+ from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
12
8
 
13
9
 
14
10
  class EventsExtractor(ClassicCDFBaseExtractor[Event]):
15
- """Extract data from Cognite Data Fusions Events into Neat.
16
-
17
- Args:
18
- items (Iterable[Event]): An iterable of items.
19
- namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
20
- to_type (Callable[[Event], str | None], optional): A function to convert an item to a type.
21
- Defaults to None. If None or if the function returns None, the asset will be set to the default type.
22
- total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
23
- is installed. Defaults to None.
24
- limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
25
- testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
26
- limit the extraction to 1000 assets to test the setup.
27
- unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
28
- a JSON string.
29
- skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
30
- values in this set will be skipped.
31
- """
11
+ """Extract data from Cognite Data Fusions Events into Neat."""
32
12
 
33
13
  _default_rdf_type = "Event"
14
+ _instance_id_prefix = InstanceIdPrefix.event
34
15
 
35
16
  @classmethod
36
- def from_dataset(
37
- cls,
38
- client: CogniteClient,
39
- data_set_external_id: str,
40
- namespace: Namespace | None = None,
41
- to_type: Callable[[Event], str | None] | None = None,
42
- limit: int | None = None,
43
- unpack_metadata: bool = True,
44
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
45
- ):
17
+ def _from_dataset(cls, client: CogniteClient, data_set_external_id: str) -> tuple[int | None, Iterable[Event]]:
46
18
  total = client.events.aggregate_count(filter=EventFilter(data_set_ids=[{"externalId": data_set_external_id}]))
47
-
48
- return cls(
49
- client.events(data_set_external_ids=data_set_external_id),
50
- namespace,
51
- to_type,
52
- total=total,
53
- limit=limit,
54
- unpack_metadata=unpack_metadata,
55
- skip_metadata_values=skip_metadata_values,
56
- )
19
+ items = client.events(data_set_external_ids=data_set_external_id)
20
+ return total, items
57
21
 
58
22
  @classmethod
59
- def from_hierarchy(
60
- cls,
61
- client: CogniteClient,
62
- root_asset_external_id: str,
63
- namespace: Namespace | None = None,
64
- to_type: Callable[[Event], str | None] | None = None,
65
- limit: int | None = None,
66
- unpack_metadata: bool = True,
67
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
68
- ):
23
+ def _from_hierarchy(cls, client: CogniteClient, root_asset_external_id: str) -> tuple[int | None, Iterable[Event]]:
69
24
  total = client.events.aggregate_count(
70
25
  filter=EventFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
71
26
  )
72
-
73
- return cls(
74
- client.events(asset_subtree_external_ids=[root_asset_external_id]),
75
- namespace,
76
- to_type,
77
- total,
78
- limit,
79
- unpack_metadata=unpack_metadata,
80
- skip_metadata_values=skip_metadata_values,
81
- )
27
+ items = client.events(asset_subtree_external_ids=[root_asset_external_id])
28
+ return total, items
82
29
 
83
30
  @classmethod
84
- def from_file(
85
- cls,
86
- file_path: str,
87
- namespace: Namespace | None = None,
88
- to_type: Callable[[Event], str | None] | None = None,
89
- limit: int | None = None,
90
- unpack_metadata: bool = True,
91
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
92
- ):
93
- events = EventList.load(Path(file_path).read_text())
94
-
95
- return cls(
96
- events,
97
- namespace,
98
- to_type,
99
- total=len(events),
100
- limit=limit,
101
- unpack_metadata=unpack_metadata,
102
- skip_metadata_values=skip_metadata_values,
103
- )
104
-
105
- def _item2triples(self, event: Event) -> list[Triple]:
106
- id_ = self.namespace[f"{InstanceIdPrefix.event}{event.id}"]
107
-
108
- type_ = self._get_rdf_type(event)
109
-
110
- # Set rdf type
111
- triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
112
-
113
- # Create attributes
114
-
115
- if event.external_id:
116
- triples.append((id_, self.namespace.external_id, Literal(event.external_id)))
117
-
118
- if event.source:
119
- triples.append((id_, self.namespace.type, Literal(event.source)))
120
-
121
- if event.type:
122
- triples.append((id_, self.namespace.type, Literal(event.type)))
123
-
124
- if event.subtype:
125
- triples.append((id_, self.namespace.subtype, Literal(event.subtype)))
126
-
127
- if event.metadata:
128
- triples.extend(self._metadata_to_triples(id_, event.metadata))
129
-
130
- if event.description:
131
- triples.append((id_, self.namespace.description, Literal(event.description)))
132
-
133
- if event.created_time:
134
- triples.append(
135
- (
136
- id_,
137
- self.namespace.created_time,
138
- Literal(datetime.fromtimestamp(event.created_time / 1000, timezone.utc)),
139
- )
140
- )
141
-
142
- if event.last_updated_time:
143
- triples.append(
144
- (
145
- id_,
146
- self.namespace.last_updated_time,
147
- Literal(datetime.fromtimestamp(event.last_updated_time / 1000, timezone.utc)),
148
- )
149
- )
150
-
151
- if event.start_time:
152
- triples.append(
153
- (
154
- id_,
155
- self.namespace.start_time,
156
- Literal(datetime.fromtimestamp(event.start_time / 1000, timezone.utc)),
157
- )
158
- )
159
-
160
- if event.end_time:
161
- triples.append(
162
- (
163
- id_,
164
- self.namespace.end_time,
165
- Literal(datetime.fromtimestamp(event.end_time / 1000, timezone.utc)),
166
- )
167
- )
168
-
169
- if event.data_set_id:
170
- triples.append(
171
- (
172
- id_,
173
- self.namespace.data_set_id,
174
- self.namespace[f"{InstanceIdPrefix.data_set}{event.data_set_id}"],
175
- )
176
- )
177
-
178
- if event.asset_ids:
179
- for asset_id in event.asset_ids:
180
- triples.append((id_, self.namespace.asset, self.namespace[f"{InstanceIdPrefix.asset}{asset_id}"]))
181
-
182
- return triples
31
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[Event]]:
32
+ assets = EventList.load(Path(file_path).read_text())
33
+ return len(assets), assets
@@ -1,202 +1,38 @@
1
- from collections.abc import Callable, Set
2
- from datetime import datetime, timezone
1
+ from collections.abc import Iterable
3
2
  from pathlib import Path
4
3
 
5
4
  from cognite.client import CogniteClient
6
5
  from cognite.client.data_classes import FileMetadata, FileMetadataFilter, FileMetadataList
7
- from rdflib import RDF, Literal, Namespace
8
6
 
9
- from cognite.neat._shared import Triple
10
-
11
- from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
12
- from ._labels import LabelsExtractor
7
+ from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix
13
8
 
14
9
 
15
10
  class FilesExtractor(ClassicCDFBaseExtractor[FileMetadata]):
16
- """Extract data from Cognite Data Fusions files metadata into Neat.
17
-
18
- Args:
19
- items (Iterable[FileMetadata]): An iterable of items.
20
- namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
21
- to_type (Callable[[FileMetadata], str | None], optional): A function to convert an item to a type.
22
- Defaults to None. If None or if the function returns None, the asset will be set to the default type.
23
- total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
24
- is installed. Defaults to None.
25
- limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
26
- testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
27
- limit the extraction to 1000 assets to test the setup.
28
- unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
29
- a JSON string.
30
- skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
31
- values in this set will be skipped.
32
- """
11
+ """Extract data from Cognite Data Fusions files metadata into Neat."""
33
12
 
34
13
  _default_rdf_type = "File"
14
+ _instance_id_prefix = InstanceIdPrefix.file
35
15
 
36
16
  @classmethod
37
- def from_dataset(
17
+ def _from_dataset(
38
18
  cls,
39
19
  client: CogniteClient,
40
20
  data_set_external_id: str,
41
- namespace: Namespace | None = None,
42
- to_type: Callable[[FileMetadata], str | None] | None = None,
43
- limit: int | None = None,
44
- unpack_metadata: bool = True,
45
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
46
- ):
47
- return cls(
48
- client.files(data_set_external_ids=data_set_external_id),
49
- namespace=namespace,
50
- to_type=to_type,
51
- limit=limit,
52
- unpack_metadata=unpack_metadata,
53
- skip_metadata_values=skip_metadata_values,
54
- )
21
+ ) -> tuple[int | None, Iterable[FileMetadata]]:
22
+ items = client.files(data_set_external_ids=data_set_external_id)
23
+ return None, items
55
24
 
56
25
  @classmethod
57
- def from_hierarchy(
58
- cls,
59
- client: CogniteClient,
60
- root_asset_external_id: str,
61
- namespace: Namespace | None = None,
62
- to_type: Callable[[FileMetadata], str | None] | None = None,
63
- limit: int | None = None,
64
- unpack_metadata: bool = True,
65
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
66
- ):
26
+ def _from_hierarchy(
27
+ cls, client: CogniteClient, root_asset_external_id: str
28
+ ) -> tuple[int | None, Iterable[FileMetadata]]:
67
29
  total = client.files.aggregate(
68
30
  filter=FileMetadataFilter(asset_subtree_ids=[{"externalId": root_asset_external_id}])
69
31
  )[0].count
70
-
71
- return cls(
72
- client.files(asset_subtree_external_ids=[root_asset_external_id]),
73
- namespace,
74
- to_type,
75
- total,
76
- limit,
77
- unpack_metadata=unpack_metadata,
78
- skip_metadata_values=skip_metadata_values,
79
- )
32
+ items = client.files(asset_subtree_external_ids=root_asset_external_id)
33
+ return total, items
80
34
 
81
35
  @classmethod
82
- def from_file(
83
- cls,
84
- file_path: str,
85
- namespace: Namespace | None = None,
86
- to_type: Callable[[FileMetadata], str | None] | None = None,
87
- limit: int | None = None,
88
- unpack_metadata: bool = True,
89
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
90
- ):
36
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[FileMetadata]]:
91
37
  file_metadata = FileMetadataList.load(Path(file_path).read_text())
92
- return cls(
93
- file_metadata,
94
- namespace=namespace,
95
- to_type=to_type,
96
- limit=limit,
97
- total=len(file_metadata),
98
- unpack_metadata=unpack_metadata,
99
- skip_metadata_values=skip_metadata_values,
100
- )
101
-
102
- def _item2triples(self, file: FileMetadata) -> list[Triple]:
103
- id_ = self.namespace[f"{InstanceIdPrefix.file}{file.id}"]
104
-
105
- type_ = self._get_rdf_type(file)
106
-
107
- # Set rdf type
108
- triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
109
-
110
- # Create attributes
111
-
112
- if file.external_id:
113
- triples.append((id_, self.namespace.external_id, Literal(file.external_id)))
114
-
115
- if file.source:
116
- triples.append((id_, self.namespace.type, Literal(file.source)))
117
-
118
- if file.mime_type:
119
- triples.append((id_, self.namespace.mime_type, Literal(file.mime_type)))
120
-
121
- if file.uploaded:
122
- triples.append((id_, self.namespace.uploaded, Literal(file.uploaded)))
123
-
124
- if file.source:
125
- triples.append((id_, self.namespace.source, Literal(file.source)))
126
-
127
- if file.metadata:
128
- triples.extend(self._metadata_to_triples(id_, file.metadata))
129
-
130
- if file.source_created_time:
131
- triples.append(
132
- (
133
- id_,
134
- self.namespace.source_created_time,
135
- Literal(datetime.fromtimestamp(file.source_created_time / 1000, timezone.utc)),
136
- )
137
- )
138
- if file.source_modified_time:
139
- triples.append(
140
- (
141
- id_,
142
- self.namespace.source_created_time,
143
- Literal(datetime.fromtimestamp(file.source_modified_time / 1000, timezone.utc)),
144
- )
145
- )
146
- if file.uploaded_time:
147
- triples.append(
148
- (
149
- id_,
150
- self.namespace.uploaded_time,
151
- Literal(datetime.fromtimestamp(file.uploaded_time / 1000, timezone.utc)),
152
- )
153
- )
154
-
155
- if file.created_time:
156
- triples.append(
157
- (
158
- id_,
159
- self.namespace.created_time,
160
- Literal(datetime.fromtimestamp(file.created_time / 1000, timezone.utc)),
161
- )
162
- )
163
-
164
- if file.last_updated_time:
165
- triples.append(
166
- (
167
- id_,
168
- self.namespace.last_updated_time,
169
- Literal(datetime.fromtimestamp(file.last_updated_time / 1000, timezone.utc)),
170
- )
171
- )
172
-
173
- if file.labels:
174
- for label in file.labels:
175
- # external_id can create ill-formed URIs, so we create websafe URIs
176
- # since labels do not have internal ids, we use the external_id as the id
177
- triples.append(
178
- (
179
- id_,
180
- self.namespace.label,
181
- self.namespace[f"{InstanceIdPrefix.label}{LabelsExtractor._label_id(label)}"],
182
- )
183
- )
184
-
185
- if file.security_categories:
186
- for category in file.security_categories:
187
- triples.append((id_, self.namespace.security_categories, Literal(category)))
188
-
189
- if file.data_set_id:
190
- triples.append(
191
- (
192
- id_,
193
- self.namespace.data_set_id,
194
- self.namespace[f"{InstanceIdPrefix.data_set}{file.data_set_id}"],
195
- )
196
- )
197
-
198
- if file.asset_ids:
199
- for asset_id in file.asset_ids:
200
- triples.append((id_, self.namespace.asset, self.namespace[f"{InstanceIdPrefix.asset}{asset_id}"]))
201
-
202
- return triples
38
+ return len(file_metadata), file_metadata
@@ -1,122 +1,54 @@
1
- from collections.abc import Callable, Set
2
- from datetime import datetime, timezone
1
+ from collections.abc import Iterable
3
2
  from pathlib import Path
4
3
  from urllib.parse import quote
5
4
 
6
5
  from cognite.client import CogniteClient
7
6
  from cognite.client.data_classes import Label, LabelDefinition, LabelDefinitionList
8
- from rdflib import RDF, Literal, Namespace
9
7
 
10
- from cognite.neat._shared import Triple
11
-
12
- from ._base import DEFAULT_SKIP_METADATA_VALUES, ClassicCDFBaseExtractor, InstanceIdPrefix
8
+ from ._base import ClassicCDFBaseExtractor, InstanceIdPrefix, T_CogniteResource
13
9
 
14
10
 
15
11
  class LabelsExtractor(ClassicCDFBaseExtractor[LabelDefinition]):
16
- """Extract data from Cognite Data Fusions Labels into Neat.
17
-
18
- Args:
19
- items (Iterable[LabelDefinition]): An iterable of items.
20
- namespace (Namespace, optional): The namespace to use. Defaults to DEFAULT_NAMESPACE.
21
- to_type (Callable[[LabelDefinition], str | None], optional): A function to convert an item to a type.
22
- Defaults to None. If None or if the function returns None, the asset will be set to the default type.
23
- total (int, optional): The total number of items to load. If passed, you will get a progress bar if rich
24
- is installed. Defaults to None.
25
- limit (int, optional): The maximal number of items to load. Defaults to None. This is typically used for
26
- testing setup of the extractor. For example, if you are extracting 100 000 assets, you might want to
27
- limit the extraction to 1000 assets to test the setup.
28
- unpack_metadata (bool, optional): Whether to unpack metadata. Defaults to False, which yields the metadata as
29
- a JSON string.
30
- skip_metadata_values (set[str] | frozenset[str] | None, optional): If you are unpacking metadata, then
31
- values in this set will be skipped.
32
- """
12
+ """Extract data from Cognite Data Fusions Labels into Neat."""
33
13
 
34
14
  _default_rdf_type = "Label"
15
+ _instance_id_prefix = InstanceIdPrefix.label
35
16
 
36
17
  @classmethod
37
- def from_dataset(
38
- cls,
39
- client: CogniteClient,
40
- data_set_external_id: str,
41
- namespace: Namespace | None = None,
42
- to_type: Callable[[LabelDefinition], str | None] | None = None,
43
- limit: int | None = None,
44
- unpack_metadata: bool = True,
45
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
46
- ):
47
- return cls(
48
- client.labels(data_set_external_ids=data_set_external_id),
49
- namespace=namespace,
50
- to_type=to_type,
51
- limit=limit,
52
- unpack_metadata=unpack_metadata,
53
- skip_metadata_values=skip_metadata_values,
54
- )
18
+ def _from_dataset(
19
+ cls, client: CogniteClient, data_set_external_id: str
20
+ ) -> tuple[int | None, Iterable[LabelDefinition]]:
21
+ items = client.labels(data_set_external_ids=data_set_external_id)
22
+ return None, items
55
23
 
56
24
  @classmethod
57
- def from_file(
58
- cls,
59
- file_path: str,
60
- namespace: Namespace | None = None,
61
- to_type: Callable[[LabelDefinition], str | None] | None = None,
62
- limit: int | None = None,
63
- unpack_metadata: bool = True,
64
- skip_metadata_values: Set[str] | None = DEFAULT_SKIP_METADATA_VALUES,
65
- ):
66
- labels = LabelDefinitionList.load(Path(file_path).read_text())
67
- return cls(
68
- labels,
69
- total=len(labels),
70
- namespace=namespace,
71
- to_type=to_type,
72
- limit=limit,
73
- unpack_metadata=unpack_metadata,
74
- skip_metadata_values=skip_metadata_values,
75
- )
76
-
77
- def _item2triples(self, label: LabelDefinition) -> list[Triple]:
78
- if not label.external_id:
79
- return []
80
-
81
- id_ = self.namespace[f"{InstanceIdPrefix.label}{self._label_id(label)}"]
82
-
83
- type_ = self._get_rdf_type(label)
84
- # Set rdf type
85
- triples: list[Triple] = [(id_, RDF.type, self.namespace[type_])]
25
+ def _from_hierarchy(
26
+ cls, client: CogniteClient, root_asset_external_id: str
27
+ ) -> tuple[int | None, Iterable[T_CogniteResource]]:
28
+ raise NotImplementedError("Hierarchy is not supported for labels")
86
29
 
87
- # Create attributes
88
- triples.append((id_, self.namespace.external_id, Literal(label.external_id)))
89
-
90
- if label.name:
91
- triples.append((id_, self.namespace.name, Literal(label.name)))
92
-
93
- if label.description:
94
- triples.append((id_, self.namespace.description, Literal(label.description)))
95
-
96
- if label.created_time:
97
- triples.append(
98
- (
99
- id_,
100
- self.namespace.created_time,
101
- Literal(datetime.fromtimestamp(label.created_time / 1000, timezone.utc)),
102
- )
103
- )
104
-
105
- if label.data_set_id:
106
- triples.append(
107
- (
108
- id_,
109
- self.namespace.data_set_id,
110
- self.namespace[f"{InstanceIdPrefix.data_set}{label.data_set_id}"],
111
- )
112
- )
30
+ @classmethod
31
+ def _from_file(cls, file_path: str | Path) -> tuple[int | None, Iterable[LabelDefinition]]:
32
+ labels = LabelDefinitionList.load(Path(file_path).read_text())
33
+ return len(labels), labels
113
34
 
114
- return triples
35
+ def _fallback_id(self, item: LabelDefinition) -> str | None:
36
+ if not item.external_id:
37
+ return None
38
+ return self._label_id(item)
115
39
 
116
40
  @staticmethod
117
- def _label_id(label: Label | LabelDefinition) -> str:
41
+ def _label_id(label: Label | LabelDefinition | dict) -> str:
118
42
  # external_id can create ill-formed URIs, so we create websafe URIs
119
43
  # since labels do not have internal ids, we use the external_id as the id
120
- if label.external_id is None:
44
+ external_id: str | None = None
45
+ if isinstance(label, dict):
46
+ if "externalId" in label:
47
+ external_id = label["externalId"]
48
+ elif "external_id" in label:
49
+ external_id = label["external_id"]
50
+ else:
51
+ external_id = label.external_id
52
+ if external_id is None:
121
53
  raise ValueError("External id must be set of the label")
122
- return quote(label.external_id)
54
+ return quote(external_id)