cognite-toolkit 0.6.78__py3-none-any.whl → 0.6.80__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cognite-toolkit might be problematic. Click here for more details.

Files changed (35) hide show
  1. cognite_toolkit/_builtin_modules/cdf.toml +1 -1
  2. cognite_toolkit/_cdf.py +0 -4
  3. cognite_toolkit/_cdf_tk/apps/__init__.py +0 -2
  4. cognite_toolkit/_cdf_tk/apps/_migrate_app.py +134 -21
  5. cognite_toolkit/_cdf_tk/apps/_modules_app.py +27 -0
  6. cognite_toolkit/_cdf_tk/commands/__init__.py +0 -6
  7. cognite_toolkit/_cdf_tk/commands/_migrate/__init__.py +0 -4
  8. cognite_toolkit/_cdf_tk/commands/_migrate/command.py +6 -8
  9. cognite_toolkit/_cdf_tk/commands/_migrate/creators.py +26 -2
  10. cognite_toolkit/_cdf_tk/commands/_migrate/data_classes.py +42 -8
  11. cognite_toolkit/_cdf_tk/commands/_migrate/data_mapper.py +4 -6
  12. cognite_toolkit/_cdf_tk/commands/_migrate/default_mappings.py +1 -0
  13. cognite_toolkit/_cdf_tk/commands/_migrate/migration_io.py +203 -0
  14. cognite_toolkit/_cdf_tk/commands/_migrate/selectors.py +66 -0
  15. cognite_toolkit/_cdf_tk/commands/modules.py +59 -14
  16. cognite_toolkit/_cdf_tk/constants.py +3 -0
  17. cognite_toolkit/_cdf_tk/feature_flags.py +0 -4
  18. cognite_toolkit/_cdf_tk/storageio/_asset_centric.py +6 -6
  19. cognite_toolkit/_cdf_tk/storageio/_base.py +2 -5
  20. cognite_toolkit/_cdf_tk/utils/useful_types.py +3 -1
  21. cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml +1 -1
  22. cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml +1 -1
  23. cognite_toolkit/_resources/cdf.toml +13 -0
  24. cognite_toolkit/_version.py +1 -1
  25. {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/METADATA +1 -1
  26. {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/RECORD +29 -32
  27. cognite_toolkit/_cdf_tk/apps/_populate_app.py +0 -80
  28. cognite_toolkit/_cdf_tk/commands/_migrate/adapter.py +0 -368
  29. cognite_toolkit/_cdf_tk/commands/_migrate/assets.py +0 -0
  30. cognite_toolkit/_cdf_tk/commands/_migrate/files.py +0 -165
  31. cognite_toolkit/_cdf_tk/commands/_migrate/timeseries.py +0 -165
  32. cognite_toolkit/_cdf_tk/commands/_populate.py +0 -306
  33. {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/WHEEL +0 -0
  34. {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/entry_points.txt +0 -0
  35. {cognite_toolkit-0.6.78.dist-info → cognite_toolkit-0.6.80.dist-info}/licenses/LICENSE +0 -0
@@ -1,165 +0,0 @@
1
- from pathlib import Path
2
-
3
- import questionary
4
- from cognite.client.data_classes.capabilities import (
5
- Capability,
6
- DataSetScope,
7
- FilesAcl,
8
- )
9
- from cognite.client.data_classes.data_modeling import ViewId
10
- from cognite.client.data_classes.data_modeling.cdm.v1 import CogniteFileApply
11
- from cognite.client.exceptions import CogniteAPIError
12
- from rich import print
13
- from rich.panel import Panel
14
- from rich.progress import track
15
-
16
- from cognite_toolkit._cdf_tk.client import ToolkitClient
17
- from cognite_toolkit._cdf_tk.client.data_classes.extended_filemetdata import ExtendedFileMetadata
18
- from cognite_toolkit._cdf_tk.exceptions import (
19
- AuthenticationError,
20
- ToolkitValueError,
21
- )
22
- from cognite_toolkit._cdf_tk.utils import humanize_collection
23
- from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
24
-
25
- from .base import BaseMigrateCommand
26
- from .data_classes import MigrationMappingList
27
-
28
-
29
- class MigrateFilesCommand(BaseMigrateCommand):
30
- cdf_cdm = "cdf_cdm"
31
- view_id = ViewId(cdf_cdm, "CogniteFile", "v1")
32
- chunk_size = 1000
33
-
34
- def source_acl(self, data_set_ids: list[int]) -> Capability:
35
- return FilesAcl(
36
- actions=[FilesAcl.Action.Read, FilesAcl.Action.Write],
37
- scope=DataSetScope(data_set_ids),
38
- )
39
-
40
- def migrate_files(
41
- self,
42
- client: ToolkitClient,
43
- mapping_file: Path,
44
- dry_run: bool = False,
45
- verbose: bool = False,
46
- auto_yes: bool = False,
47
- ) -> None:
48
- """Migrate resources from Asset-Centric to data modeling in CDF."""
49
- mappings = MigrationMappingList.read_csv_file(mapping_file, "file")
50
- self.validate_access(
51
- client,
52
- instance_spaces=list(mappings.spaces()),
53
- schema_spaces=[self.cdf_cdm],
54
- data_set_ids=list(mappings.get_data_set_ids()),
55
- )
56
- self._validate_files(client, mappings)
57
- self.validate_available_capacity(client, len(mappings))
58
-
59
- if dry_run:
60
- self.console(f"Dry run mode. Would have migrated {len(mappings):,} Files to CogniteFiles.")
61
- return
62
- if not auto_yes and self._confirm(mappings) is False:
63
- return
64
- self._migrate(client, mappings, verbose)
65
-
66
- def _validate_files(self, client: ToolkitClient, mappings: MigrationMappingList) -> None:
67
- total_validated = 0
68
- chunk: MigrationMappingList
69
- for chunk in track(
70
- chunker_sequence(mappings, size=self.chunk_size),
71
- description="Validating...",
72
- total=len(mappings) // self.chunk_size + 1,
73
- ):
74
- try:
75
- files = client.files.retrieve_multiple(
76
- ids=chunk.get_ids(),
77
- ignore_unknown_ids=True,
78
- )
79
- except CogniteAPIError as e:
80
- raise AuthenticationError(
81
- f"Failed to retrieve Files. This is likely due to lack of permissions: {e!s}"
82
- ) from e
83
-
84
- missing_count = len(files) - len(mappings)
85
- if missing_count > 0:
86
- raise ToolkitValueError(f"{missing_count} Files are missing in CDF.")
87
-
88
- missing_file_content = [file for file in files if file.uploaded is not True]
89
- if missing_file_content:
90
- raise ToolkitValueError(
91
- f"The following files does not have file content yet: {humanize_collection(missing_file_content)}. "
92
- "You can only migrate files that have file content uploaded."
93
- )
94
-
95
- existing_result = client.data_modeling.instances.retrieve(chunk.as_node_ids())
96
- if len(existing_result.nodes) != 0:
97
- raise ToolkitValueError(
98
- "Some of the Files you are trying to migrate already exist in Data Modeling. "
99
- f"Please remove the following files from the mapping file {humanize_collection(existing_result.nodes.as_ids())}"
100
- )
101
- total_validated += len(files)
102
- print(
103
- f"Validated {total_validated:,} Files for migration. "
104
- f"{len(mappings):,} mappings provided in the mapping file."
105
- )
106
-
107
- @staticmethod
108
- def _confirm(mappings: MigrationMappingList) -> bool:
109
- print(
110
- Panel(
111
- f"[red]WARNING:[/red] This operation [bold]cannot be undone[/bold]! "
112
- f"{len(mappings):,} Files will linked to the new CogniteFiles. "
113
- "This linking cannot be undone",
114
- style="bold",
115
- title="Migrate asset-centric Files to CogniteFiles",
116
- title_align="left",
117
- border_style="red",
118
- expand=False,
119
- )
120
- )
121
-
122
- if not questionary.confirm("Are you really sure you want to continue?", default=False).ask():
123
- print("Migration cancelled by user.")
124
- return False
125
- return True
126
-
127
- def _migrate(self, client: ToolkitClient, mappings: MigrationMappingList, verbose: bool) -> None:
128
- print("Migrating Files to CogniteFiles...")
129
- total_migrated = 0
130
- for chunk in track(
131
- chunker_sequence(mappings, size=self.chunk_size),
132
- description="Migrating Files to CogniteFiles...",
133
- total=len(mappings) // self.chunk_size + 1,
134
- ):
135
- if verbose:
136
- print(f"Migrating {len(chunk):,} Files...")
137
-
138
- # Set pending IDs for the chunk of mappings
139
- try:
140
- pending_files = client.files.set_pending_ids(chunk.as_pending_ids())
141
- except CogniteAPIError as e:
142
- raise ToolkitValueError(f"Failed to set pending IDs for Files: {e!s}") from e
143
-
144
- # The ExtendedTimeSeriesList is iterating ExtendedTimeSeries objects.
145
- converted_files = [self.as_cognite_file(file) for file in pending_files] # type: ignore[arg-type]
146
- try:
147
- created = client.data_modeling.instances.apply_fast(converted_files)
148
- except CogniteAPIError as e:
149
- raise ToolkitValueError(f"Failed to apply Files: {e!s}") from e
150
- if verbose:
151
- print(f"Created {len(created):,} CogniteFiles.")
152
- total_migrated += len(created)
153
- print(f"Successfully migrated {total_migrated:,} Files to CogniteFiles.")
154
-
155
- @classmethod
156
- def as_cognite_file(cls, file: ExtendedFileMetadata) -> CogniteFileApply:
157
- if file.pending_instance_id is None:
158
- raise ToolkitValueError("ExtendedFiles must have a pending_instance_id set before migration.")
159
-
160
- return CogniteFileApply(
161
- space=file.pending_instance_id.space,
162
- external_id=file.pending_instance_id.external_id,
163
- name=file.name,
164
- mime_type=file.mime_type,
165
- )
@@ -1,165 +0,0 @@
1
- from pathlib import Path
2
-
3
- import questionary
4
- from cognite.client.data_classes.capabilities import (
5
- Capability,
6
- DataSetScope,
7
- TimeSeriesAcl,
8
- )
9
- from cognite.client.data_classes.data_modeling import DirectRelationReference, ViewId
10
- from cognite.client.data_classes.data_modeling.cdm.v1 import CogniteTimeSeriesApply
11
- from cognite.client.exceptions import CogniteAPIError
12
- from rich import print
13
- from rich.panel import Panel
14
- from rich.progress import track
15
-
16
- from cognite_toolkit._cdf_tk.client import ToolkitClient
17
- from cognite_toolkit._cdf_tk.client.data_classes.extended_timeseries import ExtendedTimeSeries
18
- from cognite_toolkit._cdf_tk.exceptions import (
19
- AuthenticationError,
20
- ToolkitValueError,
21
- )
22
- from cognite_toolkit._cdf_tk.utils import humanize_collection
23
- from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
24
-
25
- from .base import BaseMigrateCommand
26
- from .data_classes import MigrationMappingList
27
-
28
-
29
- class MigrateTimeseriesCommand(BaseMigrateCommand):
30
- cdf_cdm = "cdf_cdm"
31
- view_id = ViewId(cdf_cdm, "CogniteTimeSeries", "v1")
32
- cdf_cdm_units = "cdf_cdm_units"
33
- chunk_size = 1000
34
-
35
- def source_acl(self, data_set_ids: list[int]) -> Capability:
36
- return TimeSeriesAcl(
37
- actions=[TimeSeriesAcl.Action.Read, TimeSeriesAcl.Action.Write],
38
- scope=DataSetScope(data_set_ids),
39
- )
40
-
41
- def migrate_timeseries(
42
- self,
43
- client: ToolkitClient,
44
- mapping_file: Path,
45
- dry_run: bool = False,
46
- verbose: bool = False,
47
- auto_yes: bool = False,
48
- ) -> None:
49
- """Migrate resources from Asset-Centric to data modeling in CDF."""
50
- mappings = MigrationMappingList.read_csv_file(mapping_file, "timeseries")
51
- self.validate_access(
52
- client,
53
- instance_spaces=list(mappings.spaces()),
54
- schema_spaces=[self.cdf_cdm, self.cdf_cdm_units],
55
- data_set_ids=list(mappings.get_data_set_ids()),
56
- )
57
- self._validate_timeseries_existence(client, mappings)
58
- self.validate_available_capacity(client, len(mappings))
59
-
60
- if dry_run:
61
- self.console(f"Dry run mode. Would have migrated {len(mappings):,} TimeSeries to CogniteTimeSeries.")
62
- return
63
- if not auto_yes and self._confirm(mappings) is False:
64
- return
65
- self._migrate(client, mappings, verbose)
66
-
67
- def _validate_timeseries_existence(self, client: ToolkitClient, mappings: MigrationMappingList) -> None:
68
- total_validated = 0
69
- chunk: MigrationMappingList
70
- for chunk in track(
71
- chunker_sequence(mappings, size=self.chunk_size),
72
- description="Validating...",
73
- total=len(mappings) // self.chunk_size + 1,
74
- ):
75
- try:
76
- timeseries = client.time_series.retrieve_multiple(
77
- ids=chunk.get_ids(),
78
- ignore_unknown_ids=True,
79
- )
80
- except CogniteAPIError as e:
81
- raise AuthenticationError(
82
- f"Failed to retrieve TimeSeries. This is likely due to lack of permissions: {e!s}"
83
- ) from e
84
-
85
- missing_count = len(timeseries) - len(mappings)
86
- if missing_count > 0:
87
- raise ToolkitValueError(f"Missing {missing_count} TimeSeries does not exist in CDF.")
88
-
89
- existing_result = client.data_modeling.instances.retrieve(chunk.as_node_ids())
90
- if len(existing_result.nodes) != 0:
91
- raise ToolkitValueError(
92
- "Some of the TimeSeries you are trying to migrate already exist in Data Modeling. "
93
- f"Please remove the following TimeSeries from the mapping file {humanize_collection(existing_result.nodes.as_ids())}"
94
- )
95
- total_validated += len(timeseries)
96
- print(
97
- f"Validated {total_validated:,} TimeSeries for migration. "
98
- f"{len(mappings):,} mappings provided in the mapping file."
99
- )
100
-
101
- @staticmethod
102
- def _confirm(mappings: MigrationMappingList) -> bool:
103
- print(
104
- Panel(
105
- f"[red]WARNING:[/red] This operation [bold]cannot be undone[/bold]! "
106
- f"{len(mappings):,} TimeSeries will linked to the new CogniteTimeSeries. "
107
- "This linking cannot be undone",
108
- style="bold",
109
- title="Migrate asset-centric TimeSeries to CogniteTimeSeries",
110
- title_align="left",
111
- border_style="red",
112
- expand=False,
113
- )
114
- )
115
-
116
- if not questionary.confirm("Are you really sure you want to continue?", default=False).ask():
117
- print("Migration cancelled by user.")
118
- return False
119
- return True
120
-
121
- def _migrate(self, client: ToolkitClient, mappings: MigrationMappingList, verbose: bool) -> None:
122
- print("Migrating TimeSeries to CogniteTimeSeries...")
123
- total_migrated = 0
124
- for chunk in track(
125
- chunker_sequence(mappings, size=self.chunk_size),
126
- description="Migrating TimeSeries to CogniteTimeSeries...",
127
- total=len(mappings) // self.chunk_size + 1,
128
- ):
129
- if verbose:
130
- print(f"Migrating {len(chunk):,} TimeSeries...")
131
-
132
- # Set pending IDs for the chunk of mappings
133
- try:
134
- pending_timeseries = client.time_series.set_pending_ids(chunk.as_pending_ids())
135
- except CogniteAPIError as e:
136
- raise ToolkitValueError(f"Failed to set pending IDs for TimeSeries: {e!s}") from e
137
-
138
- # The ExtendedTimeSeriesList is iterating ExtendedTimeSeries objects.
139
- converted_timeseries = [self.as_cognite_timeseries(ts) for ts in pending_timeseries] # type: ignore[arg-type]
140
- try:
141
- created = client.data_modeling.instances.apply_fast(converted_timeseries)
142
- except CogniteAPIError as e:
143
- raise ToolkitValueError(f"Failed to apply TimeSeries: {e!s}") from e
144
- if verbose:
145
- print(f"Created {len(created):,} CogniteTimeSeries.")
146
- total_migrated += len(created)
147
- print(f"Successfully migrated {total_migrated:,} TimeSeries to CogniteTimeSeries.")
148
-
149
- @classmethod
150
- def as_cognite_timeseries(cls, ts: ExtendedTimeSeries) -> CogniteTimeSeriesApply:
151
- if ts.pending_instance_id is None:
152
- raise ToolkitValueError("ExtendedTimeSeries must have a pending_instance_id set before migration.")
153
- if ts.is_step is None:
154
- raise ToolkitValueError("ExtendedTimeSeries must have is_step set before migration.")
155
-
156
- return CogniteTimeSeriesApply(
157
- space=ts.pending_instance_id.space,
158
- external_id=ts.pending_instance_id.external_id,
159
- name=ts.name,
160
- description=ts.description,
161
- is_step=ts.is_step,
162
- time_series_type="string" if ts.is_string else "numeric",
163
- source_unit=ts.unit,
164
- unit=DirectRelationReference(cls.cdf_cdm_units, ts.unit_external_id) if ts.unit_external_id else None,
165
- )
@@ -1,306 +0,0 @@
1
- import json
2
- import math
3
- from dataclasses import dataclass
4
- from pathlib import Path
5
- from typing import Any, cast
6
-
7
- import pandas as pd
8
- import questionary
9
- import typer
10
- from cognite.client.data_classes.data_modeling import (
11
- DataModel,
12
- MappedProperty,
13
- NodeApply,
14
- NodeOrEdgeData,
15
- PropertyType,
16
- Space,
17
- View,
18
- ViewId,
19
- )
20
- from cognite.client.data_classes.data_modeling.data_types import (
21
- Boolean,
22
- CDFExternalIdReference,
23
- Date,
24
- DirectRelation,
25
- Enum,
26
- Float32,
27
- Float64,
28
- Int32,
29
- Int64,
30
- Json,
31
- ListablePropertyType,
32
- Text,
33
- Timestamp,
34
- )
35
- from cognite.client.exceptions import CogniteAPIError
36
- from questionary import Choice
37
- from rich import print
38
- from rich.markup import escape
39
- from rich.progress import Progress
40
-
41
- from cognite_toolkit._cdf_tk.client import ToolkitClient
42
- from cognite_toolkit._cdf_tk.exceptions import (
43
- CDFAPIError,
44
- ToolkitFileNotFoundError,
45
- ToolkitRequiredValueError,
46
- ToolkitResourceMissingError,
47
- )
48
- from cognite_toolkit._cdf_tk.utils.collection import chunker, humanize_collection
49
- from cognite_toolkit._cdf_tk.utils.file import get_table_columns, read_csv
50
-
51
- from ._base import ToolkitCommand
52
-
53
-
54
- @dataclass
55
- class PopulateConfig:
56
- view: View
57
- table: Path
58
- instance_space: str
59
- external_id_column: str
60
-
61
-
62
- class PopulateCommand(ToolkitCommand):
63
- def view(
64
- self,
65
- client: ToolkitClient,
66
- view_id: list[str] | None = None,
67
- table: Path | None = None,
68
- instance_space: str | None = None,
69
- external_id_column: str | None = None,
70
- verbose: bool = False,
71
- ) -> None:
72
- if view_id is None:
73
- config = self._get_config_from_user(client)
74
- else:
75
- config = self._validate_config(view_id, table, instance_space, external_id_column, client)
76
-
77
- if config.table.suffix == ".csv":
78
- data = read_csv(config.table)
79
- else:
80
- # Parquet - already validated
81
- data = pd.read_parquet(config.table)
82
-
83
- properties_by_column, property_types_by_column = self._properties_by_column(list(data.columns), config.view)
84
-
85
- with Progress() as progress:
86
- task = progress.add_task("Populating view", total=len(data))
87
- for chunk in chunker(data.to_dict(orient="records"), 1_000):
88
- nodes = [
89
- NodeApply(
90
- space=config.instance_space,
91
- external_id=row[config.external_id_column],
92
- sources=[
93
- NodeOrEdgeData(
94
- source=config.view.as_id(),
95
- properties={
96
- properties_by_column[col]: self._serialize_value(
97
- value, property_types_by_column[col], config.instance_space
98
- )
99
- for col, value in row.items()
100
- if col in properties_by_column and col in property_types_by_column
101
- },
102
- )
103
- ],
104
- )
105
- for row in chunk
106
- ]
107
- try:
108
- created = client.data_modeling.instances.apply(nodes=nodes, auto_create_direct_relations=True)
109
- except CogniteAPIError as e:
110
- raise CDFAPIError(f"Failed to populate view; {escape(str(e))}")
111
- else:
112
- progress.update(task, advance=len(created.nodes))
113
-
114
- def _get_config_from_user(self, client: ToolkitClient) -> PopulateConfig:
115
- view = self._get_view_from_user(client)
116
- table = self._get_table_from_user()
117
- instance_space = self._get_instance_space_from_user(client)
118
- external_id_column = self._get_external_id_column_from_user(table)
119
- return PopulateConfig(
120
- view=view,
121
- table=table,
122
- instance_space=instance_space,
123
- external_id_column=external_id_column,
124
- )
125
-
126
- @staticmethod
127
- def _get_view_from_user(client: ToolkitClient) -> View:
128
- data_models = client.data_modeling.data_models.list(inline_views=False, limit=-1, all_versions=False)
129
- data_model_choices = [
130
- Choice(f"{dm.as_id().as_tuple()}", value=dm)
131
- for dm in sorted(data_models, key=lambda dm: (dm.space, dm.external_id))
132
- ]
133
- selected_data_model: DataModel[ViewId] | None = questionary.select(
134
- "Select the data model containing the view to populate",
135
- choices=data_model_choices,
136
- ).ask()
137
-
138
- if selected_data_model is None:
139
- print("No data model selected. Exiting.")
140
- raise typer.Exit(0)
141
-
142
- view_options = [
143
- Choice(view.external_id, value=view)
144
- for view in sorted(selected_data_model.views, key=lambda v: v.external_id, reverse=True)
145
- ]
146
- selected_view: ViewId | None = questionary.select(
147
- "Select the view to populate",
148
- choices=view_options,
149
- ).ask()
150
- if selected_view is None:
151
- print("No view selected. Exiting.")
152
- raise typer.Exit(0)
153
- view = client.data_modeling.views.retrieve(selected_view)
154
- return view[0]
155
-
156
- def _get_table_from_user(self) -> Path:
157
- selected_table: str | None = questionary.path("Enter the path to the table to populate the view with").ask()
158
- if selected_table is None:
159
- print("No table path provided. Exiting.")
160
- raise typer.Exit(0)
161
- table_path = Path(selected_table)
162
- if not table_path.exists():
163
- print("Table path does not exist.")
164
- return self._get_table_from_user()
165
- if table_path.suffix not in (".csv", ".parquet"):
166
- print("Only CSV and Parquet files are supported. Please provide a valid file.")
167
- return self._get_table_from_user()
168
- return table_path
169
-
170
- @staticmethod
171
- def _get_instance_space_from_user(client: ToolkitClient) -> str:
172
- spaces = client.data_modeling.spaces.list(limit=-1)
173
- space_choices = [Choice(space.space, value=space) for space in sorted(spaces, key=lambda s: s.space)]
174
- selected_space: Space | None = questionary.select(
175
- "Select the instance space to write the nodes to", choices=space_choices
176
- ).ask()
177
- if selected_space is None:
178
- print("No instance space selected. Exiting.")
179
- raise typer.Exit(0)
180
- return selected_space.space
181
-
182
- @staticmethod
183
- def _get_external_id_column_from_user(table: Path) -> str:
184
- columns = get_table_columns(table)
185
- selected_column: str | None = questionary.select(
186
- "Select the column in the table that contains the external IDs of the nodes",
187
- choices=[Choice(col, value=col) for col in columns],
188
- ).ask()
189
- if selected_column is None:
190
- print("No external ID column selected. Exiting.")
191
- raise typer.Exit(0)
192
- return selected_column
193
-
194
- @staticmethod
195
- def _validate_config(
196
- user_view_id: list[str],
197
- table: Path | None,
198
- instance_space: str | None,
199
- external_id_column: str | None,
200
- client: ToolkitClient,
201
- ) -> PopulateConfig:
202
- if missing := [name for name, value in locals().items() if value is None]:
203
- raise ToolkitRequiredValueError(f"Missing required values: {humanize_collection(missing)}")
204
- # Happy Mypy
205
- instance_space = cast(str, instance_space)
206
- table = cast(Path, table)
207
- external_id_column = cast(str, external_id_column)
208
-
209
- if not table.exists():
210
- raise ToolkitFileNotFoundError(f"Table {table.as_posix()} not found", table)
211
- columns = {col.casefold() for col in get_table_columns(table)}
212
- if external_id_column.casefold() not in columns:
213
- raise ToolkitRequiredValueError(
214
- f"External ID column {external_id_column!r} not found in table {table.name}", external_id_column
215
- )
216
-
217
- view_id = ViewId.load(tuple(user_view_id)) # type: ignore[arg-type]
218
- try:
219
- views = client.data_modeling.views.retrieve(view_id)
220
- except CogniteAPIError as e:
221
- raise CDFAPIError(f"Failed to retrieve view {view_id:!r}; {escape(str(e))}")
222
- if not views:
223
- raise ToolkitResourceMissingError(f"View {view_id} not found", repr(view_id))
224
- view = max(views, key=lambda v: v.created_time)
225
- try:
226
- space = client.data_modeling.spaces.retrieve(instance_space)
227
- except CogniteAPIError as e:
228
- raise CDFAPIError(f"Failed to retrieve instance space {instance_space!r}; {escape(str(e))}")
229
- if space is None:
230
- raise ToolkitResourceMissingError(f"Instance space {instance_space} not found", repr(instance_space))
231
-
232
- return PopulateConfig(
233
- view=view, table=table, instance_space=instance_space, external_id_column=external_id_column
234
- )
235
-
236
- @staticmethod
237
- def _properties_by_column(columns: list[str], view: View) -> tuple[dict[str, str], dict[str, PropertyType]]:
238
- properties_by_column: dict[str, str] = {}
239
- property_types_by_column: dict[str, PropertyType] = {}
240
- container_property_by_id = {
241
- prop_id.casefold(): (prop_id, prop)
242
- for prop_id, prop in view.properties.items()
243
- if isinstance(prop, MappedProperty)
244
- }
245
- for col in columns:
246
- if col.casefold() not in container_property_by_id:
247
- continue
248
- prop_id, prop = container_property_by_id[col.casefold()]
249
- properties_by_column[col] = prop_id
250
- property_types_by_column[col] = prop.type
251
- return properties_by_column, property_types_by_column
252
-
253
- @classmethod
254
- def _serialize_value(cls, value: Any, property_type: PropertyType, instance_space: str) -> Any:
255
- if isinstance(value, str):
256
- try:
257
- return cls._serialize_value(json.loads(value), property_type, instance_space)
258
- except json.JSONDecodeError:
259
- ...
260
- elif isinstance(property_type, ListablePropertyType) and property_type.is_list and isinstance(value, list):
261
- return [cls._serialize_value(v, property_type, instance_space) for v in value]
262
- elif isinstance(property_type, ListablePropertyType) and property_type.is_list and not isinstance(value, list):
263
- return [cls._serialize_value(value, property_type, instance_space)]
264
-
265
- if value is None:
266
- return None
267
-
268
- match (property_type, value):
269
- case (Text() | CDFExternalIdReference(), _):
270
- return str(value)
271
- case (Boolean(), str()):
272
- return value.lower() in ("true", "1")
273
- case (Boolean(), _):
274
- return bool(value)
275
- case (Timestamp(), _):
276
- return pd.Timestamp(value).to_pydatetime().isoformat(timespec="milliseconds")
277
- case (Date(), _):
278
- return pd.Timestamp(value).strftime("%Y-%m-%d")
279
- case (Json(), dict() | list() | str()):
280
- return value
281
- case (Float32() | Float64(), _):
282
- float_value = float(value)
283
- if math.isinf(float_value) or math.isnan(float_value):
284
- return None
285
- return float_value
286
- case (Int32() | Int64(), _):
287
- try:
288
- return int(value)
289
- except ValueError:
290
- return None
291
- case (DirectRelation(), _str):
292
- return {"space": instance_space, "externalId": value}
293
- case (DirectRelation(), _):
294
- return value
295
- case (Enum(), _):
296
- return next(
297
- (
298
- opt
299
- for opt in property_type.values.keys()
300
- if opt.casefold() == str(value).casefold()
301
- and str(value).casefold() != (property_type.unknown_value or "").casefold()
302
- ),
303
- None,
304
- )
305
- case _:
306
- return value