cognite-toolkit 0.6.78__py3-none-any.whl → 0.6.80__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cognite-toolkit might be problematic. Click here for more details.
- cognite_toolkit/_builtin_modules/cdf.toml +1 -1
- cognite_toolkit/_cdf.py +0 -4
- cognite_toolkit/_cdf_tk/apps/__init__.py +0 -2
- cognite_toolkit/_cdf_tk/apps/_migrate_app.py +134 -21
- cognite_toolkit/_cdf_tk/apps/_modules_app.py +27 -0
- cognite_toolkit/_cdf_tk/commands/__init__.py +0 -6
- cognite_toolkit/_cdf_tk/commands/_migrate/__init__.py +0 -4
- cognite_toolkit/_cdf_tk/commands/_migrate/command.py +6 -8
- cognite_toolkit/_cdf_tk/commands/_migrate/creators.py +26 -2
- cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py +42 -8
- cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py +4 -6
- cognite_toolkit/_cdf_tk/commands/_migrate/default_mappings.py +1 -0
- cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py +203 -0
- cognite_toolkit/_cdf_tk/commands/_migrate/selectors.py +66 -0
- cognite_toolkit/_cdf_tk/commands/modules.py +59 -14
- cognite_toolkit/_cdf_tk/constants.py +3 -0
- cognite_toolkit/_cdf_tk/feature_flags.py +0 -4
- cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +6 -6
- cognite_toolkit/_cdf_tk/storageio/_base.py +2 -5
- cognite_toolkit/_cdf_tk/utils/useful_types.py +3 -1
- cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
- cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
- cognite_toolkit/_resources/cdf.toml +13 -0
- cognite_toolkit/_version.py +1 -1
- {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/METADATA +1 -1
- {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/RECORD +29 -32
- cognite_toolkit/_cdf_tk/apps/_populate_app.py +0 -80
- cognite_toolkit/_cdf_tk/commands/_migrate/adapter.py +0 -368
- cognite_toolkit/_cdf_tk/commands/_migrate/assets.py +0 -0
- cognite_toolkit/_cdf_tk/commands/_migrate/files.py +0 -165
- cognite_toolkit/_cdf_tk/commands/_migrate/timeseries.py +0 -165
- cognite_toolkit/_cdf_tk/commands/_populate.py +0 -306
- {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/WHEEL +0 -0
- {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/entry_points.txt +0 -0
- {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import questionary
|
|
4
|
-
from cognite.client.data_classes.capabilities import (
|
|
5
|
-
Capability,
|
|
6
|
-
DataSetScope,
|
|
7
|
-
FilesAcl,
|
|
8
|
-
)
|
|
9
|
-
from cognite.client.data_classes.data_modeling import ViewId
|
|
10
|
-
from cognite.client.data_classes.data_modeling.cdm.v1 import CogniteFileApply
|
|
11
|
-
from cognite.client.exceptions import CogniteAPIError
|
|
12
|
-
from rich import print
|
|
13
|
-
from rich.panel import Panel
|
|
14
|
-
from rich.progress import track
|
|
15
|
-
|
|
16
|
-
from cognite_toolkit._cdf_tk.client import ToolkitClient
|
|
17
|
-
from cognite_toolkit._cdf_tk.client.data_classes.extended_filemetdata import ExtendedFileMetadata
|
|
18
|
-
from cognite_toolkit._cdf_tk.exceptions import (
|
|
19
|
-
AuthenticationError,
|
|
20
|
-
ToolkitValueError,
|
|
21
|
-
)
|
|
22
|
-
from cognite_toolkit._cdf_tk.utils import humanize_collection
|
|
23
|
-
from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
|
|
24
|
-
|
|
25
|
-
from .base import BaseMigrateCommand
|
|
26
|
-
from .data_classes import MigrationMappingList
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class MigrateFilesCommand(BaseMigrateCommand):
|
|
30
|
-
cdf_cdm = "cdf_cdm"
|
|
31
|
-
view_id = ViewId(cdf_cdm, "CogniteFile", "v1")
|
|
32
|
-
chunk_size = 1000
|
|
33
|
-
|
|
34
|
-
def source_acl(self, data_set_ids: list[int]) -> Capability:
|
|
35
|
-
return FilesAcl(
|
|
36
|
-
actions=[FilesAcl.Action.Read, FilesAcl.Action.Write],
|
|
37
|
-
scope=DataSetScope(data_set_ids),
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
def migrate_files(
|
|
41
|
-
self,
|
|
42
|
-
client: ToolkitClient,
|
|
43
|
-
mapping_file: Path,
|
|
44
|
-
dry_run: bool = False,
|
|
45
|
-
verbose: bool = False,
|
|
46
|
-
auto_yes: bool = False,
|
|
47
|
-
) -> None:
|
|
48
|
-
"""Migrate resources from Asset-Centric to data modeling in CDF."""
|
|
49
|
-
mappings = MigrationMappingList.read_csv_file(mapping_file, "file")
|
|
50
|
-
self.validate_access(
|
|
51
|
-
client,
|
|
52
|
-
instance_spaces=list(mappings.spaces()),
|
|
53
|
-
schema_spaces=[self.cdf_cdm],
|
|
54
|
-
data_set_ids=list(mappings.get_data_set_ids()),
|
|
55
|
-
)
|
|
56
|
-
self._validate_files(client, mappings)
|
|
57
|
-
self.validate_available_capacity(client, len(mappings))
|
|
58
|
-
|
|
59
|
-
if dry_run:
|
|
60
|
-
self.console(f"Dry run mode. Would have migrated {len(mappings):,} Files to CogniteFiles.")
|
|
61
|
-
return
|
|
62
|
-
if not auto_yes and self._confirm(mappings) is False:
|
|
63
|
-
return
|
|
64
|
-
self._migrate(client, mappings, verbose)
|
|
65
|
-
|
|
66
|
-
def _validate_files(self, client: ToolkitClient, mappings: MigrationMappingList) -> None:
|
|
67
|
-
total_validated = 0
|
|
68
|
-
chunk: MigrationMappingList
|
|
69
|
-
for chunk in track(
|
|
70
|
-
chunker_sequence(mappings, size=self.chunk_size),
|
|
71
|
-
description="Validating...",
|
|
72
|
-
total=len(mappings) // self.chunk_size + 1,
|
|
73
|
-
):
|
|
74
|
-
try:
|
|
75
|
-
files = client.files.retrieve_multiple(
|
|
76
|
-
ids=chunk.get_ids(),
|
|
77
|
-
ignore_unknown_ids=True,
|
|
78
|
-
)
|
|
79
|
-
except CogniteAPIError as e:
|
|
80
|
-
raise AuthenticationError(
|
|
81
|
-
f"Failed to retrieve Files. This is likely due to lack of permissions: {e!s}"
|
|
82
|
-
) from e
|
|
83
|
-
|
|
84
|
-
missing_count = len(files) - len(mappings)
|
|
85
|
-
if missing_count > 0:
|
|
86
|
-
raise ToolkitValueError(f"{missing_count} Files are missing in CDF.")
|
|
87
|
-
|
|
88
|
-
missing_file_content = [file for file in files if file.uploaded is not True]
|
|
89
|
-
if missing_file_content:
|
|
90
|
-
raise ToolkitValueError(
|
|
91
|
-
f"The following files does not have file content yet: {humanize_collection(missing_file_content)}. "
|
|
92
|
-
"You can only migrate files that have file content uploaded."
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
existing_result = client.data_modeling.instances.retrieve(chunk.as_node_ids())
|
|
96
|
-
if len(existing_result.nodes) != 0:
|
|
97
|
-
raise ToolkitValueError(
|
|
98
|
-
"Some of the Files you are trying to migrate already exist in Data Modeling. "
|
|
99
|
-
f"Please remove the following files from the mapping file {humanize_collection(existing_result.nodes.as_ids())}"
|
|
100
|
-
)
|
|
101
|
-
total_validated += len(files)
|
|
102
|
-
print(
|
|
103
|
-
f"Validated {total_validated:,} Files for migration. "
|
|
104
|
-
f"{len(mappings):,} mappings provided in the mapping file."
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
@staticmethod
|
|
108
|
-
def _confirm(mappings: MigrationMappingList) -> bool:
|
|
109
|
-
print(
|
|
110
|
-
Panel(
|
|
111
|
-
f"[red]WARNING:[/red] This operation [bold]cannot be undone[/bold]! "
|
|
112
|
-
f"{len(mappings):,} Files will linked to the new CogniteFiles. "
|
|
113
|
-
"This linking cannot be undone",
|
|
114
|
-
style="bold",
|
|
115
|
-
title="Migrate asset-centric Files to CogniteFiles",
|
|
116
|
-
title_align="left",
|
|
117
|
-
border_style="red",
|
|
118
|
-
expand=False,
|
|
119
|
-
)
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
if not questionary.confirm("Are you really sure you want to continue?", default=False).ask():
|
|
123
|
-
print("Migration cancelled by user.")
|
|
124
|
-
return False
|
|
125
|
-
return True
|
|
126
|
-
|
|
127
|
-
def _migrate(self, client: ToolkitClient, mappings: MigrationMappingList, verbose: bool) -> None:
|
|
128
|
-
print("Migrating Files to CogniteFiles...")
|
|
129
|
-
total_migrated = 0
|
|
130
|
-
for chunk in track(
|
|
131
|
-
chunker_sequence(mappings, size=self.chunk_size),
|
|
132
|
-
description="Migrating Files to CogniteFiles...",
|
|
133
|
-
total=len(mappings) // self.chunk_size + 1,
|
|
134
|
-
):
|
|
135
|
-
if verbose:
|
|
136
|
-
print(f"Migrating {len(chunk):,} Files...")
|
|
137
|
-
|
|
138
|
-
# Set pending IDs for the chunk of mappings
|
|
139
|
-
try:
|
|
140
|
-
pending_files = client.files.set_pending_ids(chunk.as_pending_ids())
|
|
141
|
-
except CogniteAPIError as e:
|
|
142
|
-
raise ToolkitValueError(f"Failed to set pending IDs for Files: {e!s}") from e
|
|
143
|
-
|
|
144
|
-
# The ExtendedTimeSeriesList is iterating ExtendedTimeSeries objects.
|
|
145
|
-
converted_files = [self.as_cognite_file(file) for file in pending_files] # type: ignore[arg-type]
|
|
146
|
-
try:
|
|
147
|
-
created = client.data_modeling.instances.apply_fast(converted_files)
|
|
148
|
-
except CogniteAPIError as e:
|
|
149
|
-
raise ToolkitValueError(f"Failed to apply Files: {e!s}") from e
|
|
150
|
-
if verbose:
|
|
151
|
-
print(f"Created {len(created):,} CogniteFiles.")
|
|
152
|
-
total_migrated += len(created)
|
|
153
|
-
print(f"Successfully migrated {total_migrated:,} Files to CogniteFiles.")
|
|
154
|
-
|
|
155
|
-
@classmethod
|
|
156
|
-
def as_cognite_file(cls, file: ExtendedFileMetadata) -> CogniteFileApply:
|
|
157
|
-
if file.pending_instance_id is None:
|
|
158
|
-
raise ToolkitValueError("ExtendedFiles must have a pending_instance_id set before migration.")
|
|
159
|
-
|
|
160
|
-
return CogniteFileApply(
|
|
161
|
-
space=file.pending_instance_id.space,
|
|
162
|
-
external_id=file.pending_instance_id.external_id,
|
|
163
|
-
name=file.name,
|
|
164
|
-
mime_type=file.mime_type,
|
|
165
|
-
)
|
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import questionary
|
|
4
|
-
from cognite.client.data_classes.capabilities import (
|
|
5
|
-
Capability,
|
|
6
|
-
DataSetScope,
|
|
7
|
-
TimeSeriesAcl,
|
|
8
|
-
)
|
|
9
|
-
from cognite.client.data_classes.data_modeling import DirectRelationReference, ViewId
|
|
10
|
-
from cognite.client.data_classes.data_modeling.cdm.v1 import CogniteTimeSeriesApply
|
|
11
|
-
from cognite.client.exceptions import CogniteAPIError
|
|
12
|
-
from rich import print
|
|
13
|
-
from rich.panel import Panel
|
|
14
|
-
from rich.progress import track
|
|
15
|
-
|
|
16
|
-
from cognite_toolkit._cdf_tk.client import ToolkitClient
|
|
17
|
-
from cognite_toolkit._cdf_tk.client.data_classes.extended_timeseries import ExtendedTimeSeries
|
|
18
|
-
from cognite_toolkit._cdf_tk.exceptions import (
|
|
19
|
-
AuthenticationError,
|
|
20
|
-
ToolkitValueError,
|
|
21
|
-
)
|
|
22
|
-
from cognite_toolkit._cdf_tk.utils import humanize_collection
|
|
23
|
-
from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
|
|
24
|
-
|
|
25
|
-
from .base import BaseMigrateCommand
|
|
26
|
-
from .data_classes import MigrationMappingList
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class MigrateTimeseriesCommand(BaseMigrateCommand):
|
|
30
|
-
cdf_cdm = "cdf_cdm"
|
|
31
|
-
view_id = ViewId(cdf_cdm, "CogniteTimeSeries", "v1")
|
|
32
|
-
cdf_cdm_units = "cdf_cdm_units"
|
|
33
|
-
chunk_size = 1000
|
|
34
|
-
|
|
35
|
-
def source_acl(self, data_set_ids: list[int]) -> Capability:
|
|
36
|
-
return TimeSeriesAcl(
|
|
37
|
-
actions=[TimeSeriesAcl.Action.Read, TimeSeriesAcl.Action.Write],
|
|
38
|
-
scope=DataSetScope(data_set_ids),
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
def migrate_timeseries(
|
|
42
|
-
self,
|
|
43
|
-
client: ToolkitClient,
|
|
44
|
-
mapping_file: Path,
|
|
45
|
-
dry_run: bool = False,
|
|
46
|
-
verbose: bool = False,
|
|
47
|
-
auto_yes: bool = False,
|
|
48
|
-
) -> None:
|
|
49
|
-
"""Migrate resources from Asset-Centric to data modeling in CDF."""
|
|
50
|
-
mappings = MigrationMappingList.read_csv_file(mapping_file, "timeseries")
|
|
51
|
-
self.validate_access(
|
|
52
|
-
client,
|
|
53
|
-
instance_spaces=list(mappings.spaces()),
|
|
54
|
-
schema_spaces=[self.cdf_cdm, self.cdf_cdm_units],
|
|
55
|
-
data_set_ids=list(mappings.get_data_set_ids()),
|
|
56
|
-
)
|
|
57
|
-
self._validate_timeseries_existence(client, mappings)
|
|
58
|
-
self.validate_available_capacity(client, len(mappings))
|
|
59
|
-
|
|
60
|
-
if dry_run:
|
|
61
|
-
self.console(f"Dry run mode. Would have migrated {len(mappings):,} TimeSeries to CogniteTimeSeries.")
|
|
62
|
-
return
|
|
63
|
-
if not auto_yes and self._confirm(mappings) is False:
|
|
64
|
-
return
|
|
65
|
-
self._migrate(client, mappings, verbose)
|
|
66
|
-
|
|
67
|
-
def _validate_timeseries_existence(self, client: ToolkitClient, mappings: MigrationMappingList) -> None:
|
|
68
|
-
total_validated = 0
|
|
69
|
-
chunk: MigrationMappingList
|
|
70
|
-
for chunk in track(
|
|
71
|
-
chunker_sequence(mappings, size=self.chunk_size),
|
|
72
|
-
description="Validating...",
|
|
73
|
-
total=len(mappings) // self.chunk_size + 1,
|
|
74
|
-
):
|
|
75
|
-
try:
|
|
76
|
-
timeseries = client.time_series.retrieve_multiple(
|
|
77
|
-
ids=chunk.get_ids(),
|
|
78
|
-
ignore_unknown_ids=True,
|
|
79
|
-
)
|
|
80
|
-
except CogniteAPIError as e:
|
|
81
|
-
raise AuthenticationError(
|
|
82
|
-
f"Failed to retrieve TimeSeries. This is likely due to lack of permissions: {e!s}"
|
|
83
|
-
) from e
|
|
84
|
-
|
|
85
|
-
missing_count = len(timeseries) - len(mappings)
|
|
86
|
-
if missing_count > 0:
|
|
87
|
-
raise ToolkitValueError(f"Missing {missing_count} TimeSeries does not exist in CDF.")
|
|
88
|
-
|
|
89
|
-
existing_result = client.data_modeling.instances.retrieve(chunk.as_node_ids())
|
|
90
|
-
if len(existing_result.nodes) != 0:
|
|
91
|
-
raise ToolkitValueError(
|
|
92
|
-
"Some of the TimeSeries you are trying to migrate already exist in Data Modeling. "
|
|
93
|
-
f"Please remove the following TimeSeries from the mapping file {humanize_collection(existing_result.nodes.as_ids())}"
|
|
94
|
-
)
|
|
95
|
-
total_validated += len(timeseries)
|
|
96
|
-
print(
|
|
97
|
-
f"Validated {total_validated:,} TimeSeries for migration. "
|
|
98
|
-
f"{len(mappings):,} mappings provided in the mapping file."
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
@staticmethod
|
|
102
|
-
def _confirm(mappings: MigrationMappingList) -> bool:
|
|
103
|
-
print(
|
|
104
|
-
Panel(
|
|
105
|
-
f"[red]WARNING:[/red] This operation [bold]cannot be undone[/bold]! "
|
|
106
|
-
f"{len(mappings):,} TimeSeries will linked to the new CogniteTimeSeries. "
|
|
107
|
-
"This linking cannot be undone",
|
|
108
|
-
style="bold",
|
|
109
|
-
title="Migrate asset-centric TimeSeries to CogniteTimeSeries",
|
|
110
|
-
title_align="left",
|
|
111
|
-
border_style="red",
|
|
112
|
-
expand=False,
|
|
113
|
-
)
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
if not questionary.confirm("Are you really sure you want to continue?", default=False).ask():
|
|
117
|
-
print("Migration cancelled by user.")
|
|
118
|
-
return False
|
|
119
|
-
return True
|
|
120
|
-
|
|
121
|
-
def _migrate(self, client: ToolkitClient, mappings: MigrationMappingList, verbose: bool) -> None:
|
|
122
|
-
print("Migrating TimeSeries to CogniteTimeSeries...")
|
|
123
|
-
total_migrated = 0
|
|
124
|
-
for chunk in track(
|
|
125
|
-
chunker_sequence(mappings, size=self.chunk_size),
|
|
126
|
-
description="Migrating TimeSeries to CogniteTimeSeries...",
|
|
127
|
-
total=len(mappings) // self.chunk_size + 1,
|
|
128
|
-
):
|
|
129
|
-
if verbose:
|
|
130
|
-
print(f"Migrating {len(chunk):,} TimeSeries...")
|
|
131
|
-
|
|
132
|
-
# Set pending IDs for the chunk of mappings
|
|
133
|
-
try:
|
|
134
|
-
pending_timeseries = client.time_series.set_pending_ids(chunk.as_pending_ids())
|
|
135
|
-
except CogniteAPIError as e:
|
|
136
|
-
raise ToolkitValueError(f"Failed to set pending IDs for TimeSeries: {e!s}") from e
|
|
137
|
-
|
|
138
|
-
# The ExtendedTimeSeriesList is iterating ExtendedTimeSeries objects.
|
|
139
|
-
converted_timeseries = [self.as_cognite_timeseries(ts) for ts in pending_timeseries] # type: ignore[arg-type]
|
|
140
|
-
try:
|
|
141
|
-
created = client.data_modeling.instances.apply_fast(converted_timeseries)
|
|
142
|
-
except CogniteAPIError as e:
|
|
143
|
-
raise ToolkitValueError(f"Failed to apply TimeSeries: {e!s}") from e
|
|
144
|
-
if verbose:
|
|
145
|
-
print(f"Created {len(created):,} CogniteTimeSeries.")
|
|
146
|
-
total_migrated += len(created)
|
|
147
|
-
print(f"Successfully migrated {total_migrated:,} TimeSeries to CogniteTimeSeries.")
|
|
148
|
-
|
|
149
|
-
@classmethod
|
|
150
|
-
def as_cognite_timeseries(cls, ts: ExtendedTimeSeries) -> CogniteTimeSeriesApply:
|
|
151
|
-
if ts.pending_instance_id is None:
|
|
152
|
-
raise ToolkitValueError("ExtendedTimeSeries must have a pending_instance_id set before migration.")
|
|
153
|
-
if ts.is_step is None:
|
|
154
|
-
raise ToolkitValueError("ExtendedTimeSeries must have is_step set before migration.")
|
|
155
|
-
|
|
156
|
-
return CogniteTimeSeriesApply(
|
|
157
|
-
space=ts.pending_instance_id.space,
|
|
158
|
-
external_id=ts.pending_instance_id.external_id,
|
|
159
|
-
name=ts.name,
|
|
160
|
-
description=ts.description,
|
|
161
|
-
is_step=ts.is_step,
|
|
162
|
-
time_series_type="string" if ts.is_string else "numeric",
|
|
163
|
-
source_unit=ts.unit,
|
|
164
|
-
unit=DirectRelationReference(cls.cdf_cdm_units, ts.unit_external_id) if ts.unit_external_id else None,
|
|
165
|
-
)
|
|
@@ -1,306 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import math
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Any, cast
|
|
6
|
-
|
|
7
|
-
import pandas as pd
|
|
8
|
-
import questionary
|
|
9
|
-
import typer
|
|
10
|
-
from cognite.client.data_classes.data_modeling import (
|
|
11
|
-
DataModel,
|
|
12
|
-
MappedProperty,
|
|
13
|
-
NodeApply,
|
|
14
|
-
NodeOrEdgeData,
|
|
15
|
-
PropertyType,
|
|
16
|
-
Space,
|
|
17
|
-
View,
|
|
18
|
-
ViewId,
|
|
19
|
-
)
|
|
20
|
-
from cognite.client.data_classes.data_modeling.data_types import (
|
|
21
|
-
Boolean,
|
|
22
|
-
CDFExternalIdReference,
|
|
23
|
-
Date,
|
|
24
|
-
DirectRelation,
|
|
25
|
-
Enum,
|
|
26
|
-
Float32,
|
|
27
|
-
Float64,
|
|
28
|
-
Int32,
|
|
29
|
-
Int64,
|
|
30
|
-
Json,
|
|
31
|
-
ListablePropertyType,
|
|
32
|
-
Text,
|
|
33
|
-
Timestamp,
|
|
34
|
-
)
|
|
35
|
-
from cognite.client.exceptions import CogniteAPIError
|
|
36
|
-
from questionary import Choice
|
|
37
|
-
from rich import print
|
|
38
|
-
from rich.markup import escape
|
|
39
|
-
from rich.progress import Progress
|
|
40
|
-
|
|
41
|
-
from cognite_toolkit._cdf_tk.client import ToolkitClient
|
|
42
|
-
from cognite_toolkit._cdf_tk.exceptions import (
|
|
43
|
-
CDFAPIError,
|
|
44
|
-
ToolkitFileNotFoundError,
|
|
45
|
-
ToolkitRequiredValueError,
|
|
46
|
-
ToolkitResourceMissingError,
|
|
47
|
-
)
|
|
48
|
-
from cognite_toolkit._cdf_tk.utils.collection import chunker, humanize_collection
|
|
49
|
-
from cognite_toolkit._cdf_tk.utils.file import get_table_columns, read_csv
|
|
50
|
-
|
|
51
|
-
from ._base import ToolkitCommand
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
@dataclass
|
|
55
|
-
class PopulateConfig:
|
|
56
|
-
view: View
|
|
57
|
-
table: Path
|
|
58
|
-
instance_space: str
|
|
59
|
-
external_id_column: str
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
class PopulateCommand(ToolkitCommand):
|
|
63
|
-
def view(
|
|
64
|
-
self,
|
|
65
|
-
client: ToolkitClient,
|
|
66
|
-
view_id: list[str] | None = None,
|
|
67
|
-
table: Path | None = None,
|
|
68
|
-
instance_space: str | None = None,
|
|
69
|
-
external_id_column: str | None = None,
|
|
70
|
-
verbose: bool = False,
|
|
71
|
-
) -> None:
|
|
72
|
-
if view_id is None:
|
|
73
|
-
config = self._get_config_from_user(client)
|
|
74
|
-
else:
|
|
75
|
-
config = self._validate_config(view_id, table, instance_space, external_id_column, client)
|
|
76
|
-
|
|
77
|
-
if config.table.suffix == ".csv":
|
|
78
|
-
data = read_csv(config.table)
|
|
79
|
-
else:
|
|
80
|
-
# Parquet - already validated
|
|
81
|
-
data = pd.read_parquet(config.table)
|
|
82
|
-
|
|
83
|
-
properties_by_column, property_types_by_column = self._properties_by_column(list(data.columns), config.view)
|
|
84
|
-
|
|
85
|
-
with Progress() as progress:
|
|
86
|
-
task = progress.add_task("Populating view", total=len(data))
|
|
87
|
-
for chunk in chunker(data.to_dict(orient="records"), 1_000):
|
|
88
|
-
nodes = [
|
|
89
|
-
NodeApply(
|
|
90
|
-
space=config.instance_space,
|
|
91
|
-
external_id=row[config.external_id_column],
|
|
92
|
-
sources=[
|
|
93
|
-
NodeOrEdgeData(
|
|
94
|
-
source=config.view.as_id(),
|
|
95
|
-
properties={
|
|
96
|
-
properties_by_column[col]: self._serialize_value(
|
|
97
|
-
value, property_types_by_column[col], config.instance_space
|
|
98
|
-
)
|
|
99
|
-
for col, value in row.items()
|
|
100
|
-
if col in properties_by_column and col in property_types_by_column
|
|
101
|
-
},
|
|
102
|
-
)
|
|
103
|
-
],
|
|
104
|
-
)
|
|
105
|
-
for row in chunk
|
|
106
|
-
]
|
|
107
|
-
try:
|
|
108
|
-
created = client.data_modeling.instances.apply(nodes=nodes, auto_create_direct_relations=True)
|
|
109
|
-
except CogniteAPIError as e:
|
|
110
|
-
raise CDFAPIError(f"Failed to populate view; {escape(str(e))}")
|
|
111
|
-
else:
|
|
112
|
-
progress.update(task, advance=len(created.nodes))
|
|
113
|
-
|
|
114
|
-
def _get_config_from_user(self, client: ToolkitClient) -> PopulateConfig:
|
|
115
|
-
view = self._get_view_from_user(client)
|
|
116
|
-
table = self._get_table_from_user()
|
|
117
|
-
instance_space = self._get_instance_space_from_user(client)
|
|
118
|
-
external_id_column = self._get_external_id_column_from_user(table)
|
|
119
|
-
return PopulateConfig(
|
|
120
|
-
view=view,
|
|
121
|
-
table=table,
|
|
122
|
-
instance_space=instance_space,
|
|
123
|
-
external_id_column=external_id_column,
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
@staticmethod
|
|
127
|
-
def _get_view_from_user(client: ToolkitClient) -> View:
|
|
128
|
-
data_models = client.data_modeling.data_models.list(inline_views=False, limit=-1, all_versions=False)
|
|
129
|
-
data_model_choices = [
|
|
130
|
-
Choice(f"{dm.as_id().as_tuple()}", value=dm)
|
|
131
|
-
for dm in sorted(data_models, key=lambda dm: (dm.space, dm.external_id))
|
|
132
|
-
]
|
|
133
|
-
selected_data_model: DataModel[ViewId] | None = questionary.select(
|
|
134
|
-
"Select the data model containing the view to populate",
|
|
135
|
-
choices=data_model_choices,
|
|
136
|
-
).ask()
|
|
137
|
-
|
|
138
|
-
if selected_data_model is None:
|
|
139
|
-
print("No data model selected. Exiting.")
|
|
140
|
-
raise typer.Exit(0)
|
|
141
|
-
|
|
142
|
-
view_options = [
|
|
143
|
-
Choice(view.external_id, value=view)
|
|
144
|
-
for view in sorted(selected_data_model.views, key=lambda v: v.external_id, reverse=True)
|
|
145
|
-
]
|
|
146
|
-
selected_view: ViewId | None = questionary.select(
|
|
147
|
-
"Select the view to populate",
|
|
148
|
-
choices=view_options,
|
|
149
|
-
).ask()
|
|
150
|
-
if selected_view is None:
|
|
151
|
-
print("No view selected. Exiting.")
|
|
152
|
-
raise typer.Exit(0)
|
|
153
|
-
view = client.data_modeling.views.retrieve(selected_view)
|
|
154
|
-
return view[0]
|
|
155
|
-
|
|
156
|
-
def _get_table_from_user(self) -> Path:
|
|
157
|
-
selected_table: str | None = questionary.path("Enter the path to the table to populate the view with").ask()
|
|
158
|
-
if selected_table is None:
|
|
159
|
-
print("No table path provided. Exiting.")
|
|
160
|
-
raise typer.Exit(0)
|
|
161
|
-
table_path = Path(selected_table)
|
|
162
|
-
if not table_path.exists():
|
|
163
|
-
print("Table path does not exist.")
|
|
164
|
-
return self._get_table_from_user()
|
|
165
|
-
if table_path.suffix not in (".csv", ".parquet"):
|
|
166
|
-
print("Only CSV and Parquet files are supported. Please provide a valid file.")
|
|
167
|
-
return self._get_table_from_user()
|
|
168
|
-
return table_path
|
|
169
|
-
|
|
170
|
-
@staticmethod
|
|
171
|
-
def _get_instance_space_from_user(client: ToolkitClient) -> str:
|
|
172
|
-
spaces = client.data_modeling.spaces.list(limit=-1)
|
|
173
|
-
space_choices = [Choice(space.space, value=space) for space in sorted(spaces, key=lambda s: s.space)]
|
|
174
|
-
selected_space: Space | None = questionary.select(
|
|
175
|
-
"Select the instance space to write the nodes to", choices=space_choices
|
|
176
|
-
).ask()
|
|
177
|
-
if selected_space is None:
|
|
178
|
-
print("No instance space selected. Exiting.")
|
|
179
|
-
raise typer.Exit(0)
|
|
180
|
-
return selected_space.space
|
|
181
|
-
|
|
182
|
-
@staticmethod
|
|
183
|
-
def _get_external_id_column_from_user(table: Path) -> str:
|
|
184
|
-
columns = get_table_columns(table)
|
|
185
|
-
selected_column: str | None = questionary.select(
|
|
186
|
-
"Select the column in the table that contains the external IDs of the nodes",
|
|
187
|
-
choices=[Choice(col, value=col) for col in columns],
|
|
188
|
-
).ask()
|
|
189
|
-
if selected_column is None:
|
|
190
|
-
print("No external ID column selected. Exiting.")
|
|
191
|
-
raise typer.Exit(0)
|
|
192
|
-
return selected_column
|
|
193
|
-
|
|
194
|
-
@staticmethod
|
|
195
|
-
def _validate_config(
|
|
196
|
-
user_view_id: list[str],
|
|
197
|
-
table: Path | None,
|
|
198
|
-
instance_space: str | None,
|
|
199
|
-
external_id_column: str | None,
|
|
200
|
-
client: ToolkitClient,
|
|
201
|
-
) -> PopulateConfig:
|
|
202
|
-
if missing := [name for name, value in locals().items() if value is None]:
|
|
203
|
-
raise ToolkitRequiredValueError(f"Missing required values: {humanize_collection(missing)}")
|
|
204
|
-
# Happy Mypy
|
|
205
|
-
instance_space = cast(str, instance_space)
|
|
206
|
-
table = cast(Path, table)
|
|
207
|
-
external_id_column = cast(str, external_id_column)
|
|
208
|
-
|
|
209
|
-
if not table.exists():
|
|
210
|
-
raise ToolkitFileNotFoundError(f"Table {table.as_posix()} not found", table)
|
|
211
|
-
columns = {col.casefold() for col in get_table_columns(table)}
|
|
212
|
-
if external_id_column.casefold() not in columns:
|
|
213
|
-
raise ToolkitRequiredValueError(
|
|
214
|
-
f"External ID column {external_id_column!r} not found in table {table.name}", external_id_column
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
view_id = ViewId.load(tuple(user_view_id)) # type: ignore[arg-type]
|
|
218
|
-
try:
|
|
219
|
-
views = client.data_modeling.views.retrieve(view_id)
|
|
220
|
-
except CogniteAPIError as e:
|
|
221
|
-
raise CDFAPIError(f"Failed to retrieve view {view_id:!r}; {escape(str(e))}")
|
|
222
|
-
if not views:
|
|
223
|
-
raise ToolkitResourceMissingError(f"View {view_id} not found", repr(view_id))
|
|
224
|
-
view = max(views, key=lambda v: v.created_time)
|
|
225
|
-
try:
|
|
226
|
-
space = client.data_modeling.spaces.retrieve(instance_space)
|
|
227
|
-
except CogniteAPIError as e:
|
|
228
|
-
raise CDFAPIError(f"Failed to retrieve instance space {instance_space!r}; {escape(str(e))}")
|
|
229
|
-
if space is None:
|
|
230
|
-
raise ToolkitResourceMissingError(f"Instance space {instance_space} not found", repr(instance_space))
|
|
231
|
-
|
|
232
|
-
return PopulateConfig(
|
|
233
|
-
view=view, table=table, instance_space=instance_space, external_id_column=external_id_column
|
|
234
|
-
)
|
|
235
|
-
|
|
236
|
-
@staticmethod
|
|
237
|
-
def _properties_by_column(columns: list[str], view: View) -> tuple[dict[str, str], dict[str, PropertyType]]:
|
|
238
|
-
properties_by_column: dict[str, str] = {}
|
|
239
|
-
property_types_by_column: dict[str, PropertyType] = {}
|
|
240
|
-
container_property_by_id = {
|
|
241
|
-
prop_id.casefold(): (prop_id, prop)
|
|
242
|
-
for prop_id, prop in view.properties.items()
|
|
243
|
-
if isinstance(prop, MappedProperty)
|
|
244
|
-
}
|
|
245
|
-
for col in columns:
|
|
246
|
-
if col.casefold() not in container_property_by_id:
|
|
247
|
-
continue
|
|
248
|
-
prop_id, prop = container_property_by_id[col.casefold()]
|
|
249
|
-
properties_by_column[col] = prop_id
|
|
250
|
-
property_types_by_column[col] = prop.type
|
|
251
|
-
return properties_by_column, property_types_by_column
|
|
252
|
-
|
|
253
|
-
@classmethod
|
|
254
|
-
def _serialize_value(cls, value: Any, property_type: PropertyType, instance_space: str) -> Any:
|
|
255
|
-
if isinstance(value, str):
|
|
256
|
-
try:
|
|
257
|
-
return cls._serialize_value(json.loads(value), property_type, instance_space)
|
|
258
|
-
except json.JSONDecodeError:
|
|
259
|
-
...
|
|
260
|
-
elif isinstance(property_type, ListablePropertyType) and property_type.is_list and isinstance(value, list):
|
|
261
|
-
return [cls._serialize_value(v, property_type, instance_space) for v in value]
|
|
262
|
-
elif isinstance(property_type, ListablePropertyType) and property_type.is_list and not isinstance(value, list):
|
|
263
|
-
return [cls._serialize_value(value, property_type, instance_space)]
|
|
264
|
-
|
|
265
|
-
if value is None:
|
|
266
|
-
return None
|
|
267
|
-
|
|
268
|
-
match (property_type, value):
|
|
269
|
-
case (Text() | CDFExternalIdReference(), _):
|
|
270
|
-
return str(value)
|
|
271
|
-
case (Boolean(), str()):
|
|
272
|
-
return value.lower() in ("true", "1")
|
|
273
|
-
case (Boolean(), _):
|
|
274
|
-
return bool(value)
|
|
275
|
-
case (Timestamp(), _):
|
|
276
|
-
return pd.Timestamp(value).to_pydatetime().isoformat(timespec="milliseconds")
|
|
277
|
-
case (Date(), _):
|
|
278
|
-
return pd.Timestamp(value).strftime("%Y-%m-%d")
|
|
279
|
-
case (Json(), dict() | list() | str()):
|
|
280
|
-
return value
|
|
281
|
-
case (Float32() | Float64(), _):
|
|
282
|
-
float_value = float(value)
|
|
283
|
-
if math.isinf(float_value) or math.isnan(float_value):
|
|
284
|
-
return None
|
|
285
|
-
return float_value
|
|
286
|
-
case (Int32() | Int64(), _):
|
|
287
|
-
try:
|
|
288
|
-
return int(value)
|
|
289
|
-
except ValueError:
|
|
290
|
-
return None
|
|
291
|
-
case (DirectRelation(), _str):
|
|
292
|
-
return {"space": instance_space, "externalId": value}
|
|
293
|
-
case (DirectRelation(), _):
|
|
294
|
-
return value
|
|
295
|
-
case (Enum(), _):
|
|
296
|
-
return next(
|
|
297
|
-
(
|
|
298
|
-
opt
|
|
299
|
-
for opt in property_type.values.keys()
|
|
300
|
-
if opt.casefold() == str(value).casefold()
|
|
301
|
-
and str(value).casefold() != (property_type.unknown_value or "").casefold()
|
|
302
|
-
),
|
|
303
|
-
None,
|
|
304
|
-
)
|
|
305
|
-
case _:
|
|
306
|
-
return value
|
|
File without changes
|
|
File without changes
|
|
File without changes
|