cognite-neat 0.111.1__py3-none-any.whl → 0.112.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognite/neat/_graph/extractors/_classic_cdf/_base.py +39 -7
- cognite/neat/_graph/extractors/_classic_cdf/_classic.py +43 -1
- cognite/neat/_graph/loaders/_rdf2dms.py +8 -38
- cognite/neat/_rules/exporters/_rules2excel.py +115 -1
- cognite/neat/_session/_fix.py +1 -1
- cognite/neat/_session/_subset.py +18 -0
- cognite/neat/_utils/spreadsheet.py +10 -0
- cognite/neat/_version.py +1 -1
- {cognite_neat-0.111.1.dist-info → cognite_neat-0.112.0.dist-info}/METADATA +1 -1
- {cognite_neat-0.111.1.dist-info → cognite_neat-0.112.0.dist-info}/RECORD +13 -13
- {cognite_neat-0.111.1.dist-info → cognite_neat-0.112.0.dist-info}/LICENSE +0 -0
- {cognite_neat-0.111.1.dist-info → cognite_neat-0.112.0.dist-info}/WHEEL +0 -0
- {cognite_neat-0.111.1.dist-info → cognite_neat-0.112.0.dist-info}/entry_points.txt +0 -0
|
@@ -8,7 +8,7 @@ from abc import ABC, abstractmethod
|
|
|
8
8
|
from collections.abc import Callable, Iterable, Sequence, Set
|
|
9
9
|
from datetime import datetime, timezone
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import Any, Generic, TypeVar
|
|
11
|
+
from typing import Any, Generic, TypeVar, cast
|
|
12
12
|
|
|
13
13
|
from cognite.client import CogniteClient
|
|
14
14
|
from cognite.client.data_classes._base import WriteableCogniteResource
|
|
@@ -111,6 +111,8 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
111
111
|
# Used by the ClassicGraphExtractor to log URIRefs
|
|
112
112
|
self._log_urirefs = False
|
|
113
113
|
self._uriref_by_external_id: dict[str, URIRef] = {}
|
|
114
|
+
self.asset_parent_uri_by_id: dict[int, URIRef] = {}
|
|
115
|
+
self.asset_parent_uri_by_external_id: dict[str, URIRef] = {}
|
|
114
116
|
|
|
115
117
|
def extract(self) -> Iterable[Triple]:
|
|
116
118
|
"""Extracts an asset with the given asset_id."""
|
|
@@ -165,19 +167,36 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
165
167
|
item = item.as_write()
|
|
166
168
|
dumped = item.dump(self.camel_case)
|
|
167
169
|
dumped.pop("id", None)
|
|
168
|
-
|
|
169
|
-
dumped.pop("parentExternalId", None)
|
|
170
|
+
|
|
170
171
|
if "metadata" in dumped:
|
|
171
172
|
triples.extend(self._metadata_to_triples(id_, dumped.pop("metadata")))
|
|
172
173
|
|
|
173
174
|
triples.extend(self._item2triples_special_cases(id_, dumped))
|
|
174
175
|
|
|
176
|
+
parent_renaming = {"parent_external_id": "parent_id", "parentExternalId": "parentId"}
|
|
177
|
+
parent_key = set(parent_renaming.keys()) | set(parent_renaming.values())
|
|
178
|
+
|
|
175
179
|
for key, value in dumped.items():
|
|
176
180
|
if value is None or value == []:
|
|
177
181
|
continue
|
|
178
182
|
values = value if isinstance(value, Sequence) and not isinstance(value, str) else [value]
|
|
179
183
|
for raw in values:
|
|
180
|
-
|
|
184
|
+
object_ = self._as_object(raw, key)
|
|
185
|
+
if object_ is None:
|
|
186
|
+
continue
|
|
187
|
+
if key in parent_key:
|
|
188
|
+
parent_id = cast(URIRef, object_)
|
|
189
|
+
if isinstance(raw, str):
|
|
190
|
+
self.asset_parent_uri_by_external_id[raw] = parent_id
|
|
191
|
+
elif isinstance(raw, int):
|
|
192
|
+
self.asset_parent_uri_by_id[raw] = parent_id
|
|
193
|
+
# We add a triple to include the parent. This is such that for example the parent
|
|
194
|
+
# externalID will remove the prefix when loading.
|
|
195
|
+
triples.append((parent_id, RDF.type, self.namespace[self._get_rdf_type()]))
|
|
196
|
+
# Parent external ID must be renamed to parent id to match the data model.
|
|
197
|
+
key = parent_renaming.get(key, key)
|
|
198
|
+
|
|
199
|
+
triples.append((id_, self.namespace[key], object_))
|
|
181
200
|
return triples
|
|
182
201
|
|
|
183
202
|
def _item2triples_special_cases(self, id_: URIRef, dumped: dict[str, Any]) -> list[Triple]:
|
|
@@ -186,7 +205,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
186
205
|
|
|
187
206
|
@classmethod
|
|
188
207
|
def _external_id_as_uri_suffix(cls, external_id: str | None) -> str:
|
|
189
|
-
if external_id == "":
|
|
208
|
+
if external_id == "" or (isinstance(external_id, str) and external_id.strip() == ""):
|
|
190
209
|
warnings.warn(NeatValueWarning(f"Empty external id in {cls._default_rdf_type}"), stacklevel=2)
|
|
191
210
|
return "empty"
|
|
192
211
|
elif external_id == "\x00":
|
|
@@ -223,7 +242,7 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
223
242
|
type_ = f"{self.prefix}{type_}"
|
|
224
243
|
return self._SPACE_PATTERN.sub("_", type_)
|
|
225
244
|
|
|
226
|
-
def _as_object(self, raw: Any, key: str) -> Literal | URIRef:
|
|
245
|
+
def _as_object(self, raw: Any, key: str) -> Literal | URIRef | None:
|
|
227
246
|
"""Return properly formatted object part of s-p-o triple"""
|
|
228
247
|
if key in {"data_set_id", "dataSetId"}:
|
|
229
248
|
if self.identifier == "externalId" and self.lookup_dataset_external_id:
|
|
@@ -237,13 +256,26 @@ class ClassicCDFBaseExtractor(BaseExtractor, ABC, Generic[T_CogniteResource]):
|
|
|
237
256
|
]
|
|
238
257
|
else:
|
|
239
258
|
return self.namespace[f"{InstanceIdPrefix.data_set}{raw}"]
|
|
240
|
-
elif key in {"
|
|
259
|
+
elif key in {"parentId", "parent_id", "parentExternalId", "parent_external_id"}:
|
|
260
|
+
if self.identifier == "id" and key in {"parent_id", "parentId"}:
|
|
261
|
+
return self.namespace[f"{InstanceIdPrefix.asset}{raw}"]
|
|
262
|
+
elif (
|
|
263
|
+
self.identifier == "externalId"
|
|
264
|
+
and key in {"parent_external_id", "parentExternalId"}
|
|
265
|
+
and isinstance(raw, str)
|
|
266
|
+
):
|
|
267
|
+
return self.namespace[f"{InstanceIdPrefix.asset}{self._external_id_as_uri_suffix(raw)}"]
|
|
268
|
+
else:
|
|
269
|
+
# Skip it
|
|
270
|
+
return None
|
|
271
|
+
elif key in {"assetId", "asset_id", "assetIds", "asset_ids", "rootId", "root_id"}:
|
|
241
272
|
if self.identifier == "id":
|
|
242
273
|
return self.namespace[f"{InstanceIdPrefix.asset}{raw}"]
|
|
243
274
|
else:
|
|
244
275
|
try:
|
|
245
276
|
asset_external_id = self._external_id_as_uri_suffix(self.asset_external_ids_by_id[raw])
|
|
246
277
|
except KeyError:
|
|
278
|
+
warnings.warn(NeatValueWarning(f"Unknown asset id {raw}"), stacklevel=2)
|
|
247
279
|
return Literal("Unknown asset", datatype=XSD.string)
|
|
248
280
|
else:
|
|
249
281
|
return self.namespace[f"{InstanceIdPrefix.asset}{asset_external_id}"]
|
|
@@ -7,7 +7,7 @@ from typing import ClassVar, NamedTuple, cast
|
|
|
7
7
|
|
|
8
8
|
from cognite.client import CogniteClient
|
|
9
9
|
from cognite.client.exceptions import CogniteAPIError
|
|
10
|
-
from rdflib import Namespace, URIRef
|
|
10
|
+
from rdflib import Literal, Namespace, URIRef
|
|
11
11
|
|
|
12
12
|
from cognite.neat._constants import CLASSIC_CDF_NAMESPACE, DEFAULT_NAMESPACE, get_default_prefixes_and_namespaces
|
|
13
13
|
from cognite.neat._graph.extractors._base import KnowledgeGraphExtractor
|
|
@@ -136,6 +136,8 @@ class ClassicGraphExtractor(KnowledgeGraphExtractor):
|
|
|
136
136
|
self._extracted_data_sets = False
|
|
137
137
|
self._asset_external_ids_by_id: dict[int, str] = {}
|
|
138
138
|
self._dataset_external_ids_by_id: dict[int, str] = {}
|
|
139
|
+
self._asset_parent_uri_by_id: dict[int, URIRef] = {}
|
|
140
|
+
self._asset_parent_uri_by_external_id: dict[str, URIRef] = {}
|
|
139
141
|
self.neat_prefix_by_predicate_uri: dict[URIRef, str] = {
|
|
140
142
|
self._namespace["dataSetId"]: InstanceIdPrefix.data_set,
|
|
141
143
|
self._namespace["assetId"]: InstanceIdPrefix.asset,
|
|
@@ -197,6 +199,8 @@ class ClassicGraphExtractor(KnowledgeGraphExtractor):
|
|
|
197
199
|
else:
|
|
198
200
|
self._extracted_data_sets = True
|
|
199
201
|
|
|
202
|
+
yield from self._extract_asset_parent_data_sets()
|
|
203
|
+
|
|
200
204
|
def get_information_rules(self) -> InformationRules:
|
|
201
205
|
# To avoid circular imports
|
|
202
206
|
from cognite.neat._rules.importers import ExcelImporter
|
|
@@ -288,6 +292,10 @@ class ClassicGraphExtractor(KnowledgeGraphExtractor):
|
|
|
288
292
|
if self._identifier == "id":
|
|
289
293
|
self._uris_by_external_id_by_type[core_node.resource_type].update(extractor._uriref_by_external_id)
|
|
290
294
|
|
|
295
|
+
if isinstance(extractor, AssetsExtractor):
|
|
296
|
+
self._asset_parent_uri_by_id.update(extractor.asset_parent_uri_by_id)
|
|
297
|
+
self._asset_parent_uri_by_external_id.update(extractor.asset_parent_uri_by_external_id)
|
|
298
|
+
|
|
291
299
|
def _extract_start_node_relationships(self):
|
|
292
300
|
for start_resource_type, source_external_ids in self._source_external_ids_by_type.items():
|
|
293
301
|
start_type = start_resource_type.removesuffix("_")
|
|
@@ -325,6 +333,10 @@ class ClassicGraphExtractor(KnowledgeGraphExtractor):
|
|
|
325
333
|
# the target nodes.
|
|
326
334
|
self._relationship_subject_predicate_type_external_id.extend(extractor._target_triples)
|
|
327
335
|
|
|
336
|
+
if isinstance(extractor, AssetsExtractor):
|
|
337
|
+
self._asset_parent_uri_by_id.update(extractor.asset_parent_uri_by_id)
|
|
338
|
+
self._asset_parent_uri_by_external_id.update(extractor.asset_parent_uri_by_external_id)
|
|
339
|
+
|
|
328
340
|
def _extract_core_end_nodes(self):
|
|
329
341
|
for core_node in self._classic_node_types:
|
|
330
342
|
target_external_ids = self._target_external_ids_by_type[core_node.resource_type]
|
|
@@ -372,6 +384,36 @@ class ClassicGraphExtractor(KnowledgeGraphExtractor):
|
|
|
372
384
|
)
|
|
373
385
|
yield from DataSetExtractor(data_set_iterator, **self._extractor_args).extract()
|
|
374
386
|
|
|
387
|
+
def _extract_asset_parent_data_sets(self):
|
|
388
|
+
if self._asset_parent_uri_by_id:
|
|
389
|
+
for chunk in self._chunk(
|
|
390
|
+
list(self._asset_parent_uri_by_id.keys()), description="Extracting asset parent data sets"
|
|
391
|
+
):
|
|
392
|
+
assets = self._client.assets.retrieve_multiple(id=list(chunk), ignore_unknown_ids=True)
|
|
393
|
+
for asset in assets:
|
|
394
|
+
if asset.data_set_id is None:
|
|
395
|
+
continue
|
|
396
|
+
object_ = (
|
|
397
|
+
Literal(self._lookup_dataset(asset.data_set_id))
|
|
398
|
+
if self._identifier == "externalId"
|
|
399
|
+
else Literal(asset.data_set_id)
|
|
400
|
+
)
|
|
401
|
+
yield self._asset_parent_uri_by_id[asset.id], self._namespace.dataSetId, object_
|
|
402
|
+
if self._asset_parent_uri_by_external_id:
|
|
403
|
+
for chunk in self._chunk(
|
|
404
|
+
list(self._asset_parent_uri_by_external_id.keys()), description="Extracting asset parent data sets"
|
|
405
|
+
):
|
|
406
|
+
assets = self._client.assets.retrieve_multiple(external_ids=list(chunk), ignore_unknown_ids=True)
|
|
407
|
+
for asset in assets:
|
|
408
|
+
if asset.data_set_id is None:
|
|
409
|
+
continue
|
|
410
|
+
object_ = (
|
|
411
|
+
Literal(self._lookup_dataset(asset.data_set_id))
|
|
412
|
+
if self._identifier == "externalId"
|
|
413
|
+
else Literal(asset.data_set_id)
|
|
414
|
+
)
|
|
415
|
+
yield self._asset_parent_uri_by_external_id[asset.external_id], self._namespace.dataSetId, object_
|
|
416
|
+
|
|
375
417
|
def _extract_with_logging_label_dataset(
|
|
376
418
|
self, extractor: ClassicCDFBaseExtractor, resource_type: InstanceIdPrefix | None = None
|
|
377
419
|
) -> Iterable[Triple]:
|
|
@@ -46,7 +46,7 @@ from cognite.neat._store import NeatGraphStore
|
|
|
46
46
|
from cognite.neat._utils.auxiliary import create_sha256_hash
|
|
47
47
|
from cognite.neat._utils.collection_ import iterate_progress_bar_if_above_config_threshold
|
|
48
48
|
from cognite.neat._utils.rdf_ import namespace_as_space, remove_namespace_from_uri, split_uri
|
|
49
|
-
from cognite.neat._utils.text import NamingStandardization
|
|
49
|
+
from cognite.neat._utils.text import NamingStandardization
|
|
50
50
|
from cognite.neat._utils.upload import UploadResult
|
|
51
51
|
|
|
52
52
|
from ._base import _END_OF_CLASS, _START_OF_CLASS, CDFLoader
|
|
@@ -59,14 +59,12 @@ class _ViewIterator:
|
|
|
59
59
|
Args:
|
|
60
60
|
view_id: The view to iterate over
|
|
61
61
|
instance_count: The number of instances in the view
|
|
62
|
-
hierarchical_properties: The properties that are hierarchical, meaning they point to the same instances.
|
|
63
62
|
query: The query to get the instances from the store.
|
|
64
63
|
view: The view object from the client.
|
|
65
64
|
"""
|
|
66
65
|
|
|
67
66
|
view_id: dm.ViewId
|
|
68
67
|
instance_count: int
|
|
69
|
-
hierarchical_properties: set[str]
|
|
70
68
|
query: ViewQuery
|
|
71
69
|
view: dm.View | None = None
|
|
72
70
|
|
|
@@ -189,33 +187,12 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
189
187
|
)
|
|
190
188
|
yield _START_OF_CLASS(view.external_id)
|
|
191
189
|
for identifier, properties in instance_iterable:
|
|
192
|
-
yield from self._create_instances(
|
|
193
|
-
|
|
194
|
-
)
|
|
195
|
-
if it.hierarchical_properties:
|
|
196
|
-
# Force the creation of instances, before we create the hierarchical properties.
|
|
197
|
-
yield _END_OF_CLASS
|
|
198
|
-
yield _START_OF_CLASS(f"{view.external_id} ({humanize_collection(it.hierarchical_properties)})")
|
|
199
|
-
yield from self._create_hierarchical_properties(it, projection, stop_on_exception)
|
|
200
|
-
if reader is not instance_iterable:
|
|
190
|
+
yield from self._create_instances(identifier, properties, projection, stop_on_exception)
|
|
191
|
+
if reader is instance_iterable:
|
|
201
192
|
print(f"Loaded {it.instance_count} instances for {it.view_id!r}")
|
|
202
193
|
|
|
203
194
|
yield _END_OF_CLASS
|
|
204
195
|
|
|
205
|
-
def _create_hierarchical_properties(
|
|
206
|
-
self, it: _ViewIterator, projection: _Projection, stop_on_exception: bool
|
|
207
|
-
) -> Iterable[dm.InstanceApply | NeatIssue]:
|
|
208
|
-
reader = self.graph_store.read(it.query.rdf_type, property_renaming_config=it.query.property_renaming_config)
|
|
209
|
-
instance_iterable = iterate_progress_bar_if_above_config_threshold(
|
|
210
|
-
reader,
|
|
211
|
-
it.instance_count,
|
|
212
|
-
f"Loading {it.view_id!r} hierarchical properties: {humanize_collection(it.hierarchical_properties)}",
|
|
213
|
-
)
|
|
214
|
-
for identifier, properties in instance_iterable:
|
|
215
|
-
yield from self._create_instances(
|
|
216
|
-
identifier, properties, projection, stop_on_exception, include=it.hierarchical_properties
|
|
217
|
-
)
|
|
218
|
-
|
|
219
196
|
def _create_view_iterations(self) -> tuple[list[_ViewIterator], IssueList]:
|
|
220
197
|
view_query_by_id = RulesAnalysis(self.info_rules, self.dms_rules).view_query_by_id
|
|
221
198
|
iterations_by_view_id = self._select_views_with_instances(view_query_by_id)
|
|
@@ -228,8 +205,6 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
228
205
|
for missing_view in missing:
|
|
229
206
|
issues.append(ResourceNotFoundError(missing_view, "view", more="The view is not found in CDF."))
|
|
230
207
|
return [], issues
|
|
231
|
-
# Todo: Remove if this turns out to be unnecessary.
|
|
232
|
-
hierarchical_properties_by_view_id: dict[dm.ViewId, set[str]] = {}
|
|
233
208
|
else:
|
|
234
209
|
views = dm.ViewList([])
|
|
235
210
|
with catch_issues() as issues:
|
|
@@ -237,7 +212,6 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
237
212
|
views.extend(read_model.views)
|
|
238
213
|
if issues.has_errors:
|
|
239
214
|
return [], issues
|
|
240
|
-
hierarchical_properties_by_view_id = {}
|
|
241
215
|
views_by_id = {view.as_id(): view for view in views}
|
|
242
216
|
|
|
243
217
|
def sort_by_instance_type(id_: dm.ViewId) -> int:
|
|
@@ -254,7 +228,6 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
254
228
|
continue
|
|
255
229
|
view_iteration = iterations_by_view_id[view_id]
|
|
256
230
|
view_iteration.view = views_by_id.get(view_id)
|
|
257
|
-
view_iteration.hierarchical_properties = hierarchical_properties_by_view_id.get(view_id, set())
|
|
258
231
|
view_iterations.append(view_iteration)
|
|
259
232
|
return view_iterations, issues
|
|
260
233
|
|
|
@@ -264,7 +237,7 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
264
237
|
for view_id, query in view_query_by_id.items():
|
|
265
238
|
count = self.graph_store.queries.count_of_type(query.rdf_type)
|
|
266
239
|
if count > 0:
|
|
267
|
-
view_iterations[view_id] = _ViewIterator(view_id, count,
|
|
240
|
+
view_iterations[view_id] = _ViewIterator(view_id, count, query)
|
|
268
241
|
return view_iterations
|
|
269
242
|
|
|
270
243
|
def _lookup_space_by_uri(self, view_iterations: list[_ViewIterator], stop_on_exception: bool = False) -> IssueList:
|
|
@@ -500,8 +473,6 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
500
473
|
properties: dict[str | InstanceType, list[Any]],
|
|
501
474
|
projection: _Projection,
|
|
502
475
|
stop_on_exception: Literal[True, False] = False,
|
|
503
|
-
exclude: set[str] | None = None,
|
|
504
|
-
include: set[str] | None = None,
|
|
505
476
|
) -> Iterable[dm.InstanceApply | NeatIssue]:
|
|
506
477
|
instance_id = self._create_instance_id(instance_uri, "node", stop_on_exception)
|
|
507
478
|
if not isinstance(instance_id, InstanceId):
|
|
@@ -529,17 +500,16 @@ class DMSLoader(CDFLoader[dm.InstanceApply]):
|
|
|
529
500
|
yield error
|
|
530
501
|
return
|
|
531
502
|
_ = properties.pop(RDF.type)[0]
|
|
532
|
-
if exclude:
|
|
533
|
-
properties = {k: v for k, v in properties.items() if k not in exclude}
|
|
534
|
-
if include:
|
|
535
|
-
properties = {k: v for k, v in properties.items() if k in include}
|
|
536
503
|
|
|
537
504
|
sources = []
|
|
538
505
|
with catch_issues() as property_issues:
|
|
539
506
|
sources = [
|
|
540
507
|
dm.NodeOrEdgeData(
|
|
541
508
|
projection.view_id,
|
|
542
|
-
projection.pydantic_cls.model_validate(properties).model_dump(
|
|
509
|
+
projection.pydantic_cls.model_validate(properties).model_dump(
|
|
510
|
+
exclude_unset=True,
|
|
511
|
+
exclude_none=True,
|
|
512
|
+
),
|
|
543
513
|
)
|
|
544
514
|
]
|
|
545
515
|
for issue in property_issues:
|
|
@@ -20,10 +20,15 @@ from cognite.neat._rules.models import (
|
|
|
20
20
|
SchemaCompleteness,
|
|
21
21
|
SheetRow,
|
|
22
22
|
)
|
|
23
|
+
from cognite.neat._rules.models.data_types import _DATA_TYPE_BY_DMS_TYPE
|
|
23
24
|
from cognite.neat._rules.models.dms import DMSMetadata
|
|
25
|
+
from cognite.neat._rules.models.dms._rules import DMSRules
|
|
24
26
|
from cognite.neat._rules.models.information import InformationMetadata
|
|
25
27
|
from cognite.neat._rules.models.information._rules import InformationRules
|
|
26
|
-
from cognite.neat._utils.spreadsheet import
|
|
28
|
+
from cognite.neat._utils.spreadsheet import (
|
|
29
|
+
find_column_with_value,
|
|
30
|
+
generate_data_validation,
|
|
31
|
+
)
|
|
27
32
|
|
|
28
33
|
from ._base import BaseExporter
|
|
29
34
|
|
|
@@ -54,6 +59,7 @@ class ExcelExporter(BaseExporter[VerifiedRules, Workbook]):
|
|
|
54
59
|
|
|
55
60
|
Style = Literal["none", "minimal", "default", "maximal"]
|
|
56
61
|
DumpOptions = Literal["user", "last", "reference"]
|
|
62
|
+
_helper_sheet_name: str = "_helper"
|
|
57
63
|
_main_header_by_sheet_name: ClassVar[dict[str, str]] = {
|
|
58
64
|
"Properties": "Definition of Properties",
|
|
59
65
|
"Classes": "Definition of Classes",
|
|
@@ -74,6 +80,7 @@ class ExcelExporter(BaseExporter[VerifiedRules, Workbook]):
|
|
|
74
80
|
add_empty_rows: bool = False,
|
|
75
81
|
hide_internal_columns: bool = True,
|
|
76
82
|
include_properties: Literal["same-space", "all"] = "all",
|
|
83
|
+
add_drop_downs: bool = True,
|
|
77
84
|
):
|
|
78
85
|
self.sheet_prefix = sheet_prefix or ""
|
|
79
86
|
if styling not in self.style_options:
|
|
@@ -85,6 +92,7 @@ class ExcelExporter(BaseExporter[VerifiedRules, Workbook]):
|
|
|
85
92
|
self.add_empty_rows = add_empty_rows
|
|
86
93
|
self.hide_internal_columns = hide_internal_columns
|
|
87
94
|
self.include_properties = include_properties
|
|
95
|
+
self.add_drop_downs = add_drop_downs
|
|
88
96
|
|
|
89
97
|
@property
|
|
90
98
|
def description(self) -> str:
|
|
@@ -130,8 +138,114 @@ class ExcelExporter(BaseExporter[VerifiedRules, Workbook]):
|
|
|
130
138
|
if column_letter:
|
|
131
139
|
ws.column_dimensions[column_letter].hidden = True
|
|
132
140
|
|
|
141
|
+
# Only add drop downs if the rules are DMSRules
|
|
142
|
+
if self.add_drop_downs and isinstance(rules, DMSRules):
|
|
143
|
+
self._add_drop_downs(workbook)
|
|
144
|
+
|
|
133
145
|
return workbook
|
|
134
146
|
|
|
147
|
+
def _add_drop_downs(self, workbook: Workbook, no_rows: int = 100) -> None:
|
|
148
|
+
"""Adds drop down menus to specific columns for fast and accurate data entry.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
workbook: Workbook representation of the Excel file.
|
|
152
|
+
no_rows: number of rows to add drop down menus. Defaults to 100*100.
|
|
153
|
+
|
|
154
|
+
!!! note "Why no_rows=100?"
|
|
155
|
+
Maximum number of views per data model is 100, thus this value is set accordingly
|
|
156
|
+
|
|
157
|
+
!!! note "Why defining individual data validation per desired column?
|
|
158
|
+
This is due to the internal working of openpyxl. Adding same validation to
|
|
159
|
+
different column leads to unexpected behavior when the openpyxl workbook is exported
|
|
160
|
+
as and Excel file. Probably, the validation is not copied to the new column,
|
|
161
|
+
but instead reference to the data validation object is added.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
self._make_helper_sheet(workbook)
|
|
165
|
+
|
|
166
|
+
# We need create individual data validation and cannot re-use the same one due
|
|
167
|
+
# the internals of openpyxl
|
|
168
|
+
dv_views = generate_data_validation(self._helper_sheet_name, "A", no_header_rows=0, no_rows=no_rows)
|
|
169
|
+
dv_containers = generate_data_validation(self._helper_sheet_name, "b", no_header_rows=0, no_rows=no_rows)
|
|
170
|
+
dv_value_types = generate_data_validation(self._helper_sheet_name, "C", no_header_rows=0, no_rows=no_rows)
|
|
171
|
+
|
|
172
|
+
dv_immutable = generate_data_validation(self._helper_sheet_name, "D", no_header_rows=0, no_rows=3)
|
|
173
|
+
dv_nullable = generate_data_validation(self._helper_sheet_name, "D", no_header_rows=0, no_rows=3)
|
|
174
|
+
dv_is_list = generate_data_validation(self._helper_sheet_name, "D", no_header_rows=0, no_rows=3)
|
|
175
|
+
dv_in_model = generate_data_validation(self._helper_sheet_name, "D", no_header_rows=0, no_rows=3)
|
|
176
|
+
dv_used_for = generate_data_validation(self._helper_sheet_name, "E", no_header_rows=0, no_rows=3)
|
|
177
|
+
|
|
178
|
+
workbook["Properties"].add_data_validation(dv_views)
|
|
179
|
+
workbook["Properties"].add_data_validation(dv_containers)
|
|
180
|
+
workbook["Properties"].add_data_validation(dv_value_types)
|
|
181
|
+
workbook["Properties"].add_data_validation(dv_nullable)
|
|
182
|
+
workbook["Properties"].add_data_validation(dv_is_list)
|
|
183
|
+
workbook["Properties"].add_data_validation(dv_immutable)
|
|
184
|
+
workbook["Views"].add_data_validation(dv_in_model)
|
|
185
|
+
workbook["Containers"].add_data_validation(dv_used_for)
|
|
186
|
+
|
|
187
|
+
# we multiply no_rows with 100 since a view can have max 100 properties per view
|
|
188
|
+
if column := find_column_with_value(workbook["Properties"], "View"):
|
|
189
|
+
dv_views.add(f"{column}{3}:{column}{no_rows * 100}")
|
|
190
|
+
|
|
191
|
+
if column := find_column_with_value(workbook["Properties"], "Container"):
|
|
192
|
+
dv_containers.add(f"{column}{3}:{column}{no_rows * 100}")
|
|
193
|
+
|
|
194
|
+
if column := find_column_with_value(workbook["Properties"], "Value Type"):
|
|
195
|
+
dv_value_types.add(f"{column}{3}:{column}{no_rows * 100}")
|
|
196
|
+
|
|
197
|
+
if column := find_column_with_value(workbook["Properties"], "Nullable"):
|
|
198
|
+
dv_nullable.add(f"{column}{3}:{column}{no_rows * 100}")
|
|
199
|
+
|
|
200
|
+
if column := find_column_with_value(workbook["Properties"], "Is List"):
|
|
201
|
+
dv_is_list.add(f"{column}{3}:{column}{no_rows * 100}")
|
|
202
|
+
|
|
203
|
+
if column := find_column_with_value(workbook["Properties"], "Immutable"):
|
|
204
|
+
dv_immutable.add(f"{column}{3}:{column}{no_rows * 100}")
|
|
205
|
+
|
|
206
|
+
if column := find_column_with_value(workbook["Views"], "In Model"):
|
|
207
|
+
dv_in_model.add(f"{column}{3}:{column}{no_rows}")
|
|
208
|
+
|
|
209
|
+
if column := find_column_with_value(workbook["Containers"], "Used For"):
|
|
210
|
+
dv_used_for.add(f"{column}{3}:{column}{no_rows}")
|
|
211
|
+
|
|
212
|
+
def _make_helper_sheet(self, workbook: Workbook) -> None:
|
|
213
|
+
"""This helper sheet is used as source of data for drop down menus creation
|
|
214
|
+
|
|
215
|
+
!!! note "Why 100 rows?"
|
|
216
|
+
The number of rows is set to 100 since this is the maximum number of views
|
|
217
|
+
per data model.
|
|
218
|
+
"""
|
|
219
|
+
workbook.create_sheet(title=self._helper_sheet_name)
|
|
220
|
+
|
|
221
|
+
for counter, dtype in enumerate(_DATA_TYPE_BY_DMS_TYPE):
|
|
222
|
+
workbook[self._helper_sheet_name].cell(row=counter + 1, column=3, value=dtype)
|
|
223
|
+
|
|
224
|
+
for i in range(100):
|
|
225
|
+
workbook[self._helper_sheet_name].cell(
|
|
226
|
+
row=i + 1,
|
|
227
|
+
column=1,
|
|
228
|
+
value=f'=IF(ISBLANK(Views!A{i + 3}), "", Views!A{i + 3})',
|
|
229
|
+
)
|
|
230
|
+
workbook[self._helper_sheet_name].cell(
|
|
231
|
+
row=i + 1,
|
|
232
|
+
column=2,
|
|
233
|
+
value=f'=IF(ISBLANK(Containers!A{i + 3}), "", Containers!A{i + 3})',
|
|
234
|
+
)
|
|
235
|
+
workbook[self._helper_sheet_name].cell(
|
|
236
|
+
row=counter + i + 2,
|
|
237
|
+
column=3,
|
|
238
|
+
value=f'=IF(ISBLANK(Views!A{i + 3}), "", Views!A{i + 3})',
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
for i, value in enumerate([True, False, ""]):
|
|
242
|
+
workbook[self._helper_sheet_name].cell(row=i + 1, column=4, value=cast(bool | str, value))
|
|
243
|
+
|
|
244
|
+
for i, value in enumerate(["node", "edge", "all"]):
|
|
245
|
+
workbook[self._helper_sheet_name].cell(row=i + 1, column=5, value=value)
|
|
246
|
+
|
|
247
|
+
workbook[self._helper_sheet_name].sheet_state = "hidden"
|
|
248
|
+
|
|
135
249
|
def _write_sheets(
|
|
136
250
|
self,
|
|
137
251
|
workbook: Workbook,
|
cognite/neat/_session/_fix.py
CHANGED
|
@@ -9,7 +9,7 @@ from .exceptions import session_class_wrapper
|
|
|
9
9
|
|
|
10
10
|
@session_class_wrapper
|
|
11
11
|
class FixAPI:
|
|
12
|
-
"""Apply variety of fix methods to data model and
|
|
12
|
+
"""Apply variety of fix methods to data model and instances"""
|
|
13
13
|
|
|
14
14
|
def __init__(self, state: SessionState, verbose: bool) -> None:
|
|
15
15
|
self._state = state
|
cognite/neat/_session/_subset.py
CHANGED
|
@@ -25,6 +25,24 @@ class SubsetAPI:
|
|
|
25
25
|
self._state = state
|
|
26
26
|
|
|
27
27
|
def data_model(self, concepts: str | list[str]) -> IssueList:
|
|
28
|
+
"""Subset the data model to the desired concepts.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
concepts: The concepts to subset the data model to.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
IssueList: A list of issues that occurred during the transformation.
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
Read the CogniteCore data model and reduce the data model to only the 'CogniteAsset' concept.
|
|
38
|
+
```python
|
|
39
|
+
neat = NeatSession(CogniteClient())
|
|
40
|
+
|
|
41
|
+
neat.read.examples.core_data_model()
|
|
42
|
+
|
|
43
|
+
neat.subset.data_model("CogniteAsset")
|
|
44
|
+
```
|
|
45
|
+
"""
|
|
28
46
|
if self._state.rule_store.empty:
|
|
29
47
|
raise NeatSessionError("No rules to set the data model ID.")
|
|
30
48
|
|
|
@@ -3,6 +3,7 @@ from typing import Any, Literal, cast, overload
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from openpyxl import load_workbook
|
|
6
|
+
from openpyxl.worksheet.datavalidation import DataValidation
|
|
6
7
|
from openpyxl.worksheet.worksheet import Worksheet
|
|
7
8
|
|
|
8
9
|
from cognite.neat._rules._constants import get_internal_properties
|
|
@@ -133,3 +134,12 @@ def find_column_with_value(sheet: Worksheet, value: Any) -> str | None:
|
|
|
133
134
|
return cell.column_letter # type: ignore
|
|
134
135
|
|
|
135
136
|
return None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def generate_data_validation(sheet: str, column: str, no_header_rows: int, no_rows: int) -> DataValidation:
|
|
140
|
+
"Creates openpyxl data validation object for a cell in a sheet"
|
|
141
|
+
|
|
142
|
+
return DataValidation(
|
|
143
|
+
type="list",
|
|
144
|
+
formula1=f"={sheet}!{column}${no_header_rows + 1}:{column}${no_rows}",
|
|
145
|
+
)
|
cognite/neat/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.112.0"
|
|
2
2
|
__engine__ = "^2.0.4"
|
|
@@ -25,8 +25,8 @@ cognite/neat/_graph/extractors/__init__.py,sha256=v7hPDaRzI4koBTesbCgcxTb2W0Eoqy
|
|
|
25
25
|
cognite/neat/_graph/extractors/_base.py,sha256=qQE-fl3f1hfqZg5KLF3zLHybP0u8ofRKf4jk7pEHnl4,1907
|
|
26
26
|
cognite/neat/_graph/extractors/_classic_cdf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
27
|
cognite/neat/_graph/extractors/_classic_cdf/_assets.py,sha256=9WVFrAtUFAp_AAlb26Rtt2Axz9xsPQYetg7SKVrNCr4,1474
|
|
28
|
-
cognite/neat/_graph/extractors/_classic_cdf/_base.py,sha256=
|
|
29
|
-
cognite/neat/_graph/extractors/_classic_cdf/_classic.py,sha256=
|
|
28
|
+
cognite/neat/_graph/extractors/_classic_cdf/_base.py,sha256=bYAoxsmiF6j5VB5Ptz2Kbkk8viEt8j_c-895cAI6E3Q,17639
|
|
29
|
+
cognite/neat/_graph/extractors/_classic_cdf/_classic.py,sha256=lkiuGLPwRBQgmoO5ev-UfKIkm0QzV5vRXa4n26r264M,22666
|
|
30
30
|
cognite/neat/_graph/extractors/_classic_cdf/_data_sets.py,sha256=xRFv9pVFgIMTZ45E8teMC0Ynku_CuZdcZkVCbhPuPBk,1294
|
|
31
31
|
cognite/neat/_graph/extractors/_classic_cdf/_events.py,sha256=B8hRoMAg8GQvApjxals5PfPyjmdPO93U3nj_G7g0kDQ,1394
|
|
32
32
|
cognite/neat/_graph/extractors/_classic_cdf/_files.py,sha256=Q816cVQ9qS7Art66HJfErL2OV7MxH_eSIG7bJ8_HJ7Q,1406
|
|
@@ -44,7 +44,7 @@ cognite/neat/_graph/extractors/_raw.py,sha256=xU3SmeLBCeqbs1WBdGCge8ZMnlOU6wgkKX
|
|
|
44
44
|
cognite/neat/_graph/extractors/_rdf_file.py,sha256=vz145N1_ZDAlAzCuMiv2z5-7Z4nG2fciLMnl9OpEc3M,2857
|
|
45
45
|
cognite/neat/_graph/loaders/__init__.py,sha256=XS6vwmxgBzntg7UuG_ct_1hfhShVnFH5u0gGrdA8WfA,699
|
|
46
46
|
cognite/neat/_graph/loaders/_base.py,sha256=Xq91-4GeQF2XN90-QgEFCU4aJabBXkeFeFXS2k4mWU4,4472
|
|
47
|
-
cognite/neat/_graph/loaders/_rdf2dms.py,sha256=
|
|
47
|
+
cognite/neat/_graph/loaders/_rdf2dms.py,sha256=2EpuElmYFjfDFp7DCccV4ruQvhFefdb5cSTz5OnMuag,32400
|
|
48
48
|
cognite/neat/_graph/queries/__init__.py,sha256=BgDd-037kvtWwAoGAy8eORVNMiZ5-E9sIV0txIpeaN4,50
|
|
49
49
|
cognite/neat/_graph/queries/_base.py,sha256=xs_kCiqQFJfaPyYrKhpyPIAvyDOf19RgYcdg3WxjB6s,19344
|
|
50
50
|
cognite/neat/_graph/transformers/__init__.py,sha256=YzC1Z8BuT77NwagWX4Z-F9R9BARLSS7zM4bCdxBbqKg,1761
|
|
@@ -84,7 +84,7 @@ cognite/neat/_rules/catalog/info-rules-imf.xlsx,sha256=vrE5g8vBtsGpwJqygxG3t9I3x
|
|
|
84
84
|
cognite/neat/_rules/exporters/__init__.py,sha256=IYBa0DIYlx8cFItgYRw9W4FY_LmVEjuaqMz3JORZZX0,1204
|
|
85
85
|
cognite/neat/_rules/exporters/_base.py,sha256=VkNMy8wsH-x4tAjS44cXgzzNH0CM2k_4RhkMwK50J7g,2284
|
|
86
86
|
cognite/neat/_rules/exporters/_rules2dms.py,sha256=7I3a8ZPwkIBQAClQbMjJ2D2aIITY-OBVUD-8hirCmzM,19183
|
|
87
|
-
cognite/neat/_rules/exporters/_rules2excel.py,sha256=
|
|
87
|
+
cognite/neat/_rules/exporters/_rules2excel.py,sha256=TWtiF18Vp1sekGRwTCvfxw6IBt7u41xXpDE9e-aj3no,19382
|
|
88
88
|
cognite/neat/_rules/exporters/_rules2instance_template.py,sha256=gI0tWFKzAhuFtnxVA7A9_AGYyL9lH_Yst-OYPNtTguA,5937
|
|
89
89
|
cognite/neat/_rules/exporters/_rules2ontology.py,sha256=8GIawhTeNIS59zWYX2j3F63Ix9-xNGxgTzU1HiGkN98,22105
|
|
90
90
|
cognite/neat/_rules/exporters/_rules2yaml.py,sha256=ggaPR8FO8PwZk1_nhwb5wVHk_C4s6qh1RrlbPkNcbBo,3160
|
|
@@ -140,7 +140,7 @@ cognite/neat/_session/_collector.py,sha256=RcOGY0DjTCCKJt9j_p0gnQXn4omhsIX2G8Aq3
|
|
|
140
140
|
cognite/neat/_session/_create.py,sha256=doDCbDIWMbHCYe3cyk1obQaFdYJjvARg3X4lRUVicCk,7214
|
|
141
141
|
cognite/neat/_session/_drop.py,sha256=gOkDAnddASpFxYxkPjlTyhkpNfnmDEj94GRI8tnHFR0,4167
|
|
142
142
|
cognite/neat/_session/_explore.py,sha256=hrL0ASLtEXLlZn0dgDsKNySO10qEMBT8cE8mti2lOws,1561
|
|
143
|
-
cognite/neat/_session/_fix.py,sha256=
|
|
143
|
+
cognite/neat/_session/_fix.py,sha256=wYXIIHKmWTNmOLr9RvDSkBJllKoomP2mCnMdB9x2ojw,898
|
|
144
144
|
cognite/neat/_session/_inspect.py,sha256=qoBAfCQnzC40ef91gxJmhonWo1Kr_VEjBb2KhbCOO_s,10084
|
|
145
145
|
cognite/neat/_session/_mapping.py,sha256=AkQwmqYH-0EgqoXHqCFwJY92hNSGzfojOelhVFlqH4c,2655
|
|
146
146
|
cognite/neat/_session/_prepare.py,sha256=BZ1NurenrsZDU4tg629wnt1Iuw_zVLRA58FNTQMYS9I,12636
|
|
@@ -148,7 +148,7 @@ cognite/neat/_session/_read.py,sha256=kULWbdP9T5jR9bW8S09Ix0aGRxN6PtcMrDy_IFJTyc
|
|
|
148
148
|
cognite/neat/_session/_set.py,sha256=dCZ5zEmNAw8tiqOGT7-EigSXOIGlfVP2ldA7nmC8LJ8,4451
|
|
149
149
|
cognite/neat/_session/_show.py,sha256=2lnkud996ouwf6-aKGvU0cU0ttfMeQ3vcb__g_7Yko4,10539
|
|
150
150
|
cognite/neat/_session/_state.py,sha256=CPyjYbgUe6uUnCG6h-UqDtdIaWKVLjY4lAz4ar2_75A,4222
|
|
151
|
-
cognite/neat/_session/_subset.py,sha256
|
|
151
|
+
cognite/neat/_session/_subset.py,sha256=vKtBiEnOruqe_749Nd8vzRS5HIZMR-sXSxyEH9Fa6Gk,2673
|
|
152
152
|
cognite/neat/_session/_to.py,sha256=3bZGaXAXFvgzmNPmOtjzLiJLSlRXXaD6vYp4Cotx9Ks,18692
|
|
153
153
|
cognite/neat/_session/_wizard.py,sha256=9idlzhZy54h2Iwupe9iXKX3RDb5jJQuBZFEouni50L0,1476
|
|
154
154
|
cognite/neat/_session/engine/__init__.py,sha256=D3MxUorEs6-NtgoICqtZ8PISQrjrr4dvca6n48bu_bI,120
|
|
@@ -171,15 +171,15 @@ cognite/neat/_utils/io_.py,sha256=D2Mg8sOxfBoDg3fC0jBzaxO3vkXmr0QvZSgYIv6xRkM,38
|
|
|
171
171
|
cognite/neat/_utils/rdf_.py,sha256=v4m8DD9dcHkALSx6wStC2h3kj-e3BmhxJT20fydtw7g,9897
|
|
172
172
|
cognite/neat/_utils/reader/__init__.py,sha256=fPkrNB_9hLB7CyHTCFV_xEbIfOMqUQzNly5JN33-QfM,146
|
|
173
173
|
cognite/neat/_utils/reader/_base.py,sha256=Q35hz8tqAiQiELjE4DsDDKQHLtRmSTrty4Gep9rg_CU,5444
|
|
174
|
-
cognite/neat/_utils/spreadsheet.py,sha256=
|
|
174
|
+
cognite/neat/_utils/spreadsheet.py,sha256=_QaziVzo83X6vuXdAmqp4HgMmF3eb9PrTRwq8F1POl4,4652
|
|
175
175
|
cognite/neat/_utils/text.py,sha256=BFJoEOQBFgpelysL92FdF0OVRVFl0q9tRNoz-oRanNc,7779
|
|
176
176
|
cognite/neat/_utils/time_.py,sha256=O30LUiDH9TdOYz8_a9pFqTtJdg8vEjC3qHCk8xZblG8,345
|
|
177
177
|
cognite/neat/_utils/upload.py,sha256=xWtM6mFuD2QYQHaZ7zCAuGptbEpPIxcH-raWQu93-Ug,5845
|
|
178
178
|
cognite/neat/_utils/xml_.py,sha256=FQkq84u35MUsnKcL6nTMJ9ajtG9D5i1u4VBnhGqP2DQ,1710
|
|
179
|
-
cognite/neat/_version.py,sha256
|
|
179
|
+
cognite/neat/_version.py,sha256=W7xZS6jccWmomymHy0HgbVW_-n6rOs454M4iJwFADDg,46
|
|
180
180
|
cognite/neat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
181
|
-
cognite_neat-0.
|
|
182
|
-
cognite_neat-0.
|
|
183
|
-
cognite_neat-0.
|
|
184
|
-
cognite_neat-0.
|
|
185
|
-
cognite_neat-0.
|
|
181
|
+
cognite_neat-0.112.0.dist-info/LICENSE,sha256=W8VmvFia4WHa3Gqxq1Ygrq85McUNqIGDVgtdvzT-XqA,11351
|
|
182
|
+
cognite_neat-0.112.0.dist-info/METADATA,sha256=5I-isgVln98HgiP59TJnX8IbvlHTenc46CgNBJT_6mc,5361
|
|
183
|
+
cognite_neat-0.112.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
184
|
+
cognite_neat-0.112.0.dist-info/entry_points.txt,sha256=SsQlnl8SNMSSjE3acBI835JYFtsIinLSbVmHmMEXv6E,51
|
|
185
|
+
cognite_neat-0.112.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|