PyPI - cognite-toolkit - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

cognite-toolkit 0.7.0py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

cognite_toolkit/_cdf_tk/apps/_upload_app.py CHANGED Viewed

@@ -95,13 +95,13 @@ class UploadApp(typer.Typer):
                     typer.echo("No selection made for deploying resources. Exiting.")
                     raise typer.Exit(code=1)
-            client = EnvironmentVariables.create_from_environment().get_client()
-            cmd.run(
-                lambda: cmd.upload(
-                    input_dir=input_dir,
-                    dry_run=dry_run,
-                    verbose=verbose,
-                    deploy_resources=deploy_resources,
-                    client=client,
-                )
+        client = EnvironmentVariables.create_from_environment().get_client()
+        cmd.run(
+            lambda: cmd.upload(
+                input_dir=input_dir,
+                dry_run=dry_run,
+                verbose=verbose,
+                deploy_resources=deploy_resources,
+                client=client,
             )
+        )

cognite_toolkit/_cdf_tk/commands/__init__.py CHANGED Viewed

@@ -11,7 +11,6 @@ from .build_cmd import BuildCommand
 from .clean import CleanCommand
 from .collect import CollectCommand
 from .deploy import DeployCommand
-from .dump_data import DumpDataCommand
 from .dump_resource import DumpResourceCommand
 from .featureflag import FeatureFlagCommand
 from .init import InitCommand
@@ -27,7 +26,6 @@ __all__ = [
     "CollectCommand",
     "DeployCommand",
     "DownloadCommand",
-    "DumpDataCommand",
     "DumpResourceCommand",
     "FeatureFlagCommand",
     "InitCommand",

cognite_toolkit/_cdf_tk/utils/fileio/_writers.py CHANGED Viewed

@@ -19,7 +19,7 @@ from cognite_toolkit._cdf_tk.exceptions import ToolkitMissingDependencyError, To
 from cognite_toolkit._cdf_tk.utils._auxiliary import get_concrete_subclasses
 from cognite_toolkit._cdf_tk.utils.collection import humanize_collection
 from cognite_toolkit._cdf_tk.utils.file import sanitize_filename
-from cognite_toolkit._cdf_tk.utils.table_writers import DataType
+from cognite_toolkit._cdf_tk.utils.useful_types import DataType
 from ._base import T_IO, CellValue, Chunk, FileIO, SchemaColumn
 from ._compression import Compression, Uncompressed

cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml CHANGED Viewed

@@ -12,7 +12,7 @@ jobs:
     environment: dev
     name: Deploy
     container:
-      image: cognite/toolkit:0.7.0
+      image: cognite/toolkit:0.7.1
       env:
         CDF_CLUSTER: ${{ vars.CDF_CLUSTER }}
         CDF_PROJECT: ${{ vars.CDF_PROJECT }}

cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml CHANGED Viewed

@@ -10,7 +10,7 @@ jobs:
     environment: dev
     name: Deploy Dry Run
     container:
-      image: cognite/toolkit:0.7.0
+      image: cognite/toolkit:0.7.1
       env:
         CDF_CLUSTER: ${{ vars.CDF_CLUSTER }}
         CDF_PROJECT: ${{ vars.CDF_PROJECT }}

cognite_toolkit/_resources/cdf.toml CHANGED Viewed

@@ -4,7 +4,7 @@ default_env = "<DEFAULT_ENV_PLACEHOLDER>"
 [modules]
 # This is the version of the modules. It should not be changed manually.
 # It will be updated by the 'cdf modules upgrade' command.
-version = "0.7.0"
+version = "0.7.1"
 [plugins]

cognite_toolkit/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.7.0"
1	+ __version__ = "0.7.1"

{cognite_toolkit-0.7.0.dist-info → cognite_toolkit-0.7.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cognite_toolkit
-Version: 0.7.0
+Version: 0.7.1
 Summary: Official Cognite Data Fusion tool for project templates and configuration deployment
 Project-URL: Homepage, https://docs.cognite.com/cdf/deploy/cdf_toolkit/
 Project-URL: Changelog, https://github.com/cognitedata/toolkit/releases

{cognite_toolkit-0.7.0.dist-info → cognite_toolkit-0.7.1.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 cognite_toolkit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cognite_toolkit/_cdf.py,sha256=PzDig6dgbDX5VL88AeijQuTeYb2SS_yvenw9gr4fnxY,5794
-cognite_toolkit/_version.py,sha256=RaANGbRu5e-vehwXI1-Qe2ggPPfs1TQaZj072JdbLk4,22
+cognite_toolkit/_version.py,sha256=2KJZDSMOG7KS82AxYOrZ4ZihYxX0wjfUjDsIZh3L024,22
 cognite_toolkit/_cdf_tk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cognite_toolkit/_cdf_tk/cdf_toml.py,sha256=VSWV9h44HusWIaKpWgjrOMrc3hDoPTTXBXlp6-NOrIM,9079
 cognite_toolkit/_cdf_tk/constants.py,sha256=3UpFZ60xXdqgPqqpqCITQuAvjnVExH_IlbASxoelvu8,7236
@@ -25,7 +25,7 @@ cognite_toolkit/_cdf_tk/apps/_profile_app.py,sha256=vSRJW54bEvIul8_4rOqyOYA7ztXx
 cognite_toolkit/_cdf_tk/apps/_purge.py,sha256=KYI1wFy7yHFEM1qJnTYc4_8E2FVGu4QhPsWsxop1sZA,14242
 cognite_toolkit/_cdf_tk/apps/_repo_app.py,sha256=jOf_s7oUWJqnRyz89JFiSzT2l8GlyQ7wqidHUQavGo0,1455
 cognite_toolkit/_cdf_tk/apps/_run.py,sha256=eXua4n0hW4qRMkzaxR0PiZh-JFLf8gnWw1_5O-0-vm0,8987
-cognite_toolkit/_cdf_tk/apps/_upload_app.py,sha256=BgJrcm_KikLLdr2ZUPG9CdL2hrLr7T0gR6cxih5kll0,4267
+cognite_toolkit/_cdf_tk/apps/_upload_app.py,sha256=1nF0-7oCAXLlmTGyUOKTmxkZqvA0Xo6U6lqk-SqKmCc,4227
 cognite_toolkit/_cdf_tk/builders/__init__.py,sha256=Y-AJ4VrcUCRquGNEgDCiwmWW3iGWnJl2DrL17gsUIBg,1172
 cognite_toolkit/_cdf_tk/builders/_base.py,sha256=N32Y17hfepp45rMW_o4qeUY9nsysmtcxpX4GkF-tsio,7829
 cognite_toolkit/_cdf_tk/builders/_datamodels.py,sha256=hN3fWQAktrWdaGAItZ0tHpBXqJDu0JfH6t7pO7EIl2Q,3541
@@ -100,7 +100,7 @@ cognite_toolkit/_cdf_tk/client/data_classes/streams.py,sha256=DHSDrBax81fUzneIik
 cognite_toolkit/_cdf_tk/client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cognite_toolkit/_cdf_tk/client/utils/_concurrency.py,sha256=3GtQbKDaosyKHEt-KzxKK9Yie4TvZPdoou2vUk6dUa8,2298
 cognite_toolkit/_cdf_tk/client/utils/_http_client.py,sha256=oXNKrIaizG4WiSAhL_kSCHAuL4aaaEhCU4pOJGxh6Xs,483
-cognite_toolkit/_cdf_tk/commands/__init__.py,sha256=OJYtHiERtUBXm3cjUTyPVaYIMVQpu9HJv1QNGPL-AIQ,1418
+cognite_toolkit/_cdf_tk/commands/__init__.py,sha256=gHA3yWI3UacMD79ZpCyh8MjA1fzuEg5pxZGts2VsXLs,1356
 cognite_toolkit/_cdf_tk/commands/_base.py,sha256=1gl8Y-yqfedRMfdbwM3iPTIUIZriX1UvC1deLsJSJwM,2667
 cognite_toolkit/_cdf_tk/commands/_changes.py,sha256=fvw2C5N2BVf-7MUpiB1FkDVCJ0xIy4lfDyFgpWaLPeo,24651
 cognite_toolkit/_cdf_tk/commands/_cli_commands.py,sha256=TK6U_rm6VZT_V941kTyHMoulWgJzbDC8YIIQDPJ5x3w,1011
@@ -116,7 +116,6 @@ cognite_toolkit/_cdf_tk/commands/build_cmd.py,sha256=6m-lK0vccje1gaQ_fd68UvA4Cbh
 cognite_toolkit/_cdf_tk/commands/clean.py,sha256=KDcUn1MEpvk_K7WqQPBiZcIlGV61JVG6D0DcYUXj7BM,16567
 cognite_toolkit/_cdf_tk/commands/collect.py,sha256=zBMKhhvjOpuASMnwP0eeHRI02tANcvFEZgv0CQO1ECc,627
 cognite_toolkit/_cdf_tk/commands/deploy.py,sha256=PO9r9iK1UEoDdoATS4hgjCP11DLGc2xSaY0g14nyATY,23519
-cognite_toolkit/_cdf_tk/commands/dump_data.py,sha256=8l4M2kqV4DjiV5js5s7EbFVNxV0Np4ld8ogw19vaJp0,21804
 cognite_toolkit/_cdf_tk/commands/dump_resource.py,sha256=ylAFST3GgkWT1Qa-JIzmQXbrQgNCB1UrptrBf3WsyvY,39658
 cognite_toolkit/_cdf_tk/commands/featureflag.py,sha256=lgLMwuNIwFjvvKn1sNMunkq4VTwdNqXtrZfdGFTrNcI,968
 cognite_toolkit/_cdf_tk/commands/init.py,sha256=pcxFhZheXm3FPU1pkeh10M0WXPg7EcLFUgJlrE817tE,9257
@@ -282,7 +281,6 @@ cognite_toolkit/_cdf_tk/utils/progress_tracker.py,sha256=LGpC22iSTTlo6FWi38kqBu_
 cognite_toolkit/_cdf_tk/utils/repository.py,sha256=voQLZ6NiNvdAFxqeWHbvzDLsLHl6spjQBihiLyCsGW8,4104
 cognite_toolkit/_cdf_tk/utils/sentry_utils.py,sha256=Q3ekrR0bWMtlPVQrfUSsETlkLIaDUZ2u-RdNFFr9-dg,564
 cognite_toolkit/_cdf_tk/utils/sql_parser.py,sha256=jernu2amPQ54cQZ4vFZm1gEhFZfGcjU-yLQQG_RFo_M,6458
-cognite_toolkit/_cdf_tk/utils/table_writers.py,sha256=6BS_CMsIY5WE2O9u693Q4b0b-0E3-nlTuQ7NXk9OzX4,17870
 cognite_toolkit/_cdf_tk/utils/text.py,sha256=1-LQMo633_hEhNhishQo7Buj-7np5Pe4qKk0TQofMzE,3906
 cognite_toolkit/_cdf_tk/utils/thread_safe_dict.py,sha256=NbRHcZvWpF9xHP5OkOMGFpxrPNbi0Q3Eea6PUNbGlt4,3426
 cognite_toolkit/_cdf_tk/utils/useful_types.py,sha256=oK88W6G_aK3hebORSQKZjWrq7jG-pO2lkLWSWYMlngM,1872
@@ -291,7 +289,7 @@ cognite_toolkit/_cdf_tk/utils/fileio/__init__.py,sha256=0rJsL3jClj_smxh_Omqchf0K
 cognite_toolkit/_cdf_tk/utils/fileio/_base.py,sha256=eC6mRIwSD4LjyFa83BoBnhO0t3l-ctQMW295LIyxXLk,827
 cognite_toolkit/_cdf_tk/utils/fileio/_compression.py,sha256=8BAPgg5OKc3vkEEkqOvYsuyh12iXVNuEmC0omWwyJNQ,2355
 cognite_toolkit/_cdf_tk/utils/fileio/_readers.py,sha256=i9TTqG2aml0B2Z6ZFKe7Z-bOAOa-wHz3fEemJGvIQww,15813
-cognite_toolkit/_cdf_tk/utils/fileio/_writers.py,sha256=4buAPp73Qfc0hw_LMyFI3g2DhdM4hbrasXuwMCiAcCQ,17732
+cognite_toolkit/_cdf_tk/utils/fileio/_writers.py,sha256=mc23m0kJgl57FUDvwLmS7yR3xVZWQguPJa_63-qQ_L0,17731
 cognite_toolkit/_cdf_tk/utils/http_client/__init__.py,sha256=G8b7Bg4yIet5R4Igh3dS2SntWzE6I0iTGBeNlNsSxkQ,857
 cognite_toolkit/_cdf_tk/utils/http_client/_client.py,sha256=NTRfloXkCiS_rl5Vl1D_hsyTTowMKWDsiIR4oGwTADI,11208
 cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py,sha256=gNEJLb-tCoRh-OQA0BcJpESWl416ctC_6xKhWdwI4BU,13920
@@ -302,13 +300,13 @@ cognite_toolkit/_repo_files/.gitignore,sha256=ip9kf9tcC5OguF4YF4JFEApnKYw0nG0vPi
 cognite_toolkit/_repo_files/AzureDevOps/.devops/README.md,sha256=OLA0D7yCX2tACpzvkA0IfkgQ4_swSd-OlJ1tYcTBpsA,240
 cognite_toolkit/_repo_files/AzureDevOps/.devops/deploy-pipeline.yml,sha256=brULcs8joAeBC_w_aoWjDDUHs3JheLMIR9ajPUK96nc,693
 cognite_toolkit/_repo_files/AzureDevOps/.devops/dry-run-pipeline.yml,sha256=OBFDhFWK1mlT4Dc6mDUE2Es834l8sAlYG50-5RxRtHk,723
-cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml,sha256=S8MFGAgtuHTYilFDxn907tu9kRL65RQO1XbFwzIWTJk,666
-cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml,sha256=obiNW_GgW2PeViZmYU0pUBh1KZvixdzjPmwEzBKQl4k,2429
-cognite_toolkit/_resources/cdf.toml,sha256=2plDWdhchZQwul76okB71n0RbjV5bBKKo72gwDWP7Eo,474
+cognite_toolkit/_repo_files/GitHub/.github/workflows/deploy.yaml,sha256=PpKK3jflxLwNjCldzTWYHhg2hQ0Omi1BonQrnjWmgXo,666
+cognite_toolkit/_repo_files/GitHub/.github/workflows/dry-run.yaml,sha256=ASOi_Om6-yPgKXo8NQE4xSju0f7c0BCGPVjFlNKTyeg,2429
+cognite_toolkit/_resources/cdf.toml,sha256=A4-O_130gdFgF96eY8qDobuPNcGvhb2LPE6Rbs8PnkI,474
 cognite_toolkit/demo/__init__.py,sha256=-m1JoUiwRhNCL18eJ6t7fZOL7RPfowhCuqhYFtLgrss,72
 cognite_toolkit/demo/_base.py,sha256=6xKBUQpXZXGQ3fJ5f7nj7oT0s2n7OTAGIa17ZlKHZ5U,8052
-cognite_toolkit-0.7.0.dist-info/METADATA,sha256=w2PxW2DrLeWISkNeWn3vNiTIZdu6k_NzAIOFACAOVQE,4500
-cognite_toolkit-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-cognite_toolkit-0.7.0.dist-info/entry_points.txt,sha256=JlR7MH1_UMogC3QOyN4-1l36VbrCX9xUdQoHGkuJ6-4,83
-cognite_toolkit-0.7.0.dist-info/licenses/LICENSE,sha256=CW0DRcx5tL-pCxLEN7ts2S9g2sLRAsWgHVEX4SN9_Mc,752
-cognite_toolkit-0.7.0.dist-info/RECORD,,
+cognite_toolkit-0.7.1.dist-info/METADATA,sha256=n8jC9M1BlwW_IosE5ybERRNMUPxfyKTxvN4QDL7PltM,4500
+cognite_toolkit-0.7.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+cognite_toolkit-0.7.1.dist-info/entry_points.txt,sha256=JlR7MH1_UMogC3QOyN4-1l36VbrCX9xUdQoHGkuJ6-4,83
+cognite_toolkit-0.7.1.dist-info/licenses/LICENSE,sha256=CW0DRcx5tL-pCxLEN7ts2S9g2sLRAsWgHVEX4SN9_Mc,752
+cognite_toolkit-0.7.1.dist-info/RECORD,,

cognite_toolkit/_cdf_tk/commands/dump_data.py DELETED Viewed

@@ -1,489 +0,0 @@
-import time
-from abc import ABC, abstractmethod
-from collections.abc import Callable, Iterable, Iterator
-from functools import lru_cache
-from itertools import groupby
-from pathlib import Path
-from typing import Any, ClassVar, Generic, Literal
-from cognite.client.data_classes import (
-    Asset,
-    AssetFilter,
-    DataSetList,
-    Event,
-    EventFilter,
-    FileMetadata,
-    FileMetadataFilter,
-    LabelDefinitionList,
-    TimeSeries,
-    TimeSeriesFilter,
-)
-from cognite.client.data_classes._base import T_CogniteResource
-from rich.console import Console
-from rich.progress import track
-from cognite_toolkit._cdf_tk.client import ToolkitClient
-from cognite_toolkit._cdf_tk.commands._base import ToolkitCommand
-from cognite_toolkit._cdf_tk.cruds import (
-    AssetCRUD,
-    DataSetsCRUD,
-    EventCRUD,
-    FileMetadataCRUD,
-    LabelCRUD,
-    ResourceCRUD,
-    TimeSeriesCRUD,
-)
-from cognite_toolkit._cdf_tk.exceptions import (
-    ToolkitFileExistsError,
-    ToolkitIsADirectoryError,
-    ToolkitValueError,
-)
-from cognite_toolkit._cdf_tk.utils import humanize_collection
-from cognite_toolkit._cdf_tk.utils.cdf import metadata_key_counts
-from cognite_toolkit._cdf_tk.utils.file import safe_rmtree
-from cognite_toolkit._cdf_tk.utils.producer_worker import ProducerWorkerExecutor
-from cognite_toolkit._cdf_tk.utils.table_writers import (
-    FileFormat,
-    Schema,
-    SchemaColumn,
-    SchemaColumnList,
-    TableFileWriter,
-)
-class DataFinder:
-    supported_formats: ClassVar[frozenset[FileFormat]] = frozenset()
-    # This is the standard maximum items that can be returns by most CDF endpoints.
-    chunk_size: ClassVar[int] = 1000
-    def validate_format(self, format_: str) -> Literal[FileFormat]:
-        if format_ in self.supported_formats:
-            return format_  # type: ignore[return-value]
-        raise ToolkitValueError(
-            f"Unsupported format {format_}. Supported formats are {humanize_collection(self.supported_formats)}."
-        )
-    @abstractmethod
-    def create_iterators(
-        self, format_: FileFormat, limit: int | None
-    ) -> Iterator[tuple[Schema, int, Iterable, Callable]]:
-        """Create an iterator for the specified format."""
-        raise NotImplementedError("This method should be implemented in subclasses.")
-class AssetCentricFinder(DataFinder, ABC, Generic[T_CogniteResource]):
-    def __init__(self, client: ToolkitClient, hierarchies: list[str], data_sets: list[str]):
-        self.client = client
-        self.hierarchies = hierarchies
-        self.data_sets = data_sets
-        self.loader = self._create_loader(client)
-        self._hierarchy_set = set(self.hierarchies)
-        self._data_set_set = set(self.data_sets)
-        self._used_labels: set[str] = set()
-        self._used_data_sets: set[str] = set()
-    @abstractmethod
-    def _create_loader(self, client: ToolkitClient) -> ResourceCRUD:
-        """Create the appropriate loader for the finder."""
-        raise NotImplementedError()
-    @lru_cache
-    def aggregate_count(self, hierarchies: tuple[str, ...], data_sets: tuple[str, ...]) -> int:
-        return self._aggregate_count(list(hierarchies), list(data_sets))
-    @abstractmethod
-    def _aggregate_count(self, hierarchies: list[str], data_sets: list[str]) -> int:
-        raise NotImplementedError()
-    @abstractmethod
-    def _get_resource_columns(self) -> SchemaColumnList:
-        """Get the columns for the schema."""
-        raise NotImplementedError()
-    @abstractmethod
-    def create_resource_iterator(self, limit: int | None) -> Iterable:
-        raise NotImplementedError()
-    @abstractmethod
-    def _resource_processor(self, items: Iterable[T_CogniteResource]) -> list[tuple[str, list[dict[str, Any]]]]:
-        """Process the resources and return them in a format suitable for writing."""
-        raise NotImplementedError()
-    def _to_write(self, items: Iterable[T_CogniteResource]) -> list[dict[str, Any]]:
-        write_items: list[dict[str, Any]] = []
-        for item in items:
-            dumped = self.loader.dump_resource(item)
-            if "metadata" in dumped:
-                metadata = dumped.pop("metadata")
-                for key, value in metadata.items():
-                    dumped[f"metadata.{key}"] = value
-            if isinstance(dumped.get("labels"), list):
-                dumped["labels"] = [label["externalId"] for label in dumped["labels"]]
-                self._used_labels.update(dumped["labels"])
-            if "dataSetExternalId" in dumped:
-                self._used_data_sets.add(dumped["dataSetExternalId"])
-            write_items.append(dumped)
-        return write_items
-    def create_iterators(
-        self, format_: FileFormat, limit: int | None
-    ) -> Iterator[tuple[Schema, int, Iterable, Callable]]:
-        total = self.aggregate_count(tuple(self.hierarchies), tuple(self.data_sets))
-        columns = self._get_resource_columns()
-        iteration_count = total // self.chunk_size + (1 if total % self.chunk_size > 0 else 0)
-        if iteration_count == 0:
-            return
-        yield (
-            Schema(
-                display_name=self.loader.display_name,
-                format_=format_,
-                columns=columns,
-                folder_name=self.loader.folder_name,
-                kind=self.loader.kind,
-            ),
-            iteration_count,
-            self.create_resource_iterator(limit),
-            self._resource_processor,
-        )
-        if self._used_data_sets:
-            yield self._data_sets()
-        if self._used_labels:
-            yield self._labels()
-    def _data_sets(self) -> tuple[Schema, int, Iterable, Callable]:
-        data_sets = self.client.data_sets.retrieve_multiple(
-            external_ids=list(self._used_data_sets), ignore_unknown_ids=True
-        )
-        loader = DataSetsCRUD.create_loader(self.client)
-        def process_data_sets(items: DataSetList) -> list[tuple[str, list[dict[str, Any]]]]:
-            # All data sets are written to a single group, thus the empty string as the group key.
-            # (Group keys are for example used in CSV files to create separate files for each
-            # data set an asset belongs to.)
-            return [("", [loader.dump_resource(item) for item in items])]
-        return (
-            # YAML format does not need columns.
-            Schema(
-                display_name=loader.display_name,
-                format_="yaml",
-                columns=SchemaColumnList(),
-                folder_name=loader.folder_name,
-                kind=loader.kind,
-            ),
-            1,
-            [data_sets],
-            process_data_sets,
-        )
-    def _labels(self) -> tuple[Schema, int, Iterable, Callable]:
-        labels = self.client.labels.retrieve(external_id=list(self._used_labels))
-        loader = LabelCRUD.create_loader(self.client)
-        def process_labels(items: LabelDefinitionList) -> list[tuple[str, list[dict[str, Any]]]]:
-            # All labels are written to a single group, thus the empty string as the group key.
-            # (Group keys are for example used in CSV files to create separate files for each
-            # label an asset belongs to.)
-            return [("", [loader.dump_resource(item) for item in items])]
-        return (
-            # YAML format does not need columns.
-            Schema(
-                display_name=loader.display_name,
-                format_="yaml",
-                columns=SchemaColumnList(),
-                folder_name=loader.folder_name,
-                kind=loader.kind,
-            ),
-            1,
-            [labels],
-            process_labels,
-        )
-class AssetFinder(AssetCentricFinder[Asset]):
-    supported_formats = frozenset({"csv", "parquet", "yaml"})
-    def _create_loader(self, client: ToolkitClient) -> ResourceCRUD:
-        return AssetCRUD.create_loader(client)
-    def _aggregate_count(self, hierarchies: list[str], data_sets: list[str]) -> int:
-        return self.client.assets.aggregate_count(
-            filter=AssetFilter(
-                data_set_ids=[{"externalId": item} for item in data_sets] or None,
-                asset_subtree_ids=[{"externalId": item} for item in hierarchies] or None,
-            )
-        )
-    def create_resource_iterator(self, limit: int | None) -> Iterator:
-        return self.client.assets(
-            chunk_size=self.chunk_size,
-            asset_subtree_external_ids=self.hierarchies or None,
-            data_set_external_ids=self.data_sets or None,
-            limit=limit,
-        )
-    def _resource_processor(self, assets: Iterable[Asset]) -> list[tuple[str, list[dict[str, Any]]]]:
-        grouped_assets: list[tuple[str, list[dict[str, object]]]] = []
-        for group, asset_group in groupby(
-            sorted([(self._group(asset), asset) for asset in assets], key=lambda x: x[0]), key=lambda x: x[0]
-        ):
-            grouped_assets.append((group, self._to_write([asset for _, asset in asset_group])))
-        return grouped_assets
-    def _group(self, item: Asset) -> str:
-        if self.hierarchies and self.data_sets:
-            asset_external_id = self.client.lookup.assets.external_id(item.root_id or 0)
-            data_set_external_id = self.client.lookup.data_sets.external_id(item.data_set_id or 0)
-            if asset_external_id and data_set_external_id:
-                return f"{asset_external_id}.{data_set_external_id}"
-            elif asset_external_id:
-                return asset_external_id
-            elif data_set_external_id:
-                return data_set_external_id
-            return ""
-        elif self.hierarchies:
-            return self.client.lookup.assets.external_id(item.root_id or 0) or ""
-        elif self.data_sets:
-            return self.client.lookup.data_sets.external_id(item.data_set_id or 0) or ""
-        return ""
-    def _get_resource_columns(self) -> SchemaColumnList:
-        columns = SchemaColumnList(
-            [
-                SchemaColumn(name="externalId", type="string"),
-                SchemaColumn(name="name", type="string"),
-                SchemaColumn(name="parentExternalId", type="string"),
-                SchemaColumn(name="description", type="string"),
-                SchemaColumn(name="dataSetExternalId", type="string"),
-                SchemaColumn(name="source", type="string"),
-                SchemaColumn(name="labels", type="string", is_array=True),
-                SchemaColumn(name="geoLocation", type="json"),
-            ]
-        )
-        data_set_ids = self.client.lookup.data_sets.id(self.data_sets) if self.data_sets else []
-        root_ids = self.client.lookup.assets.id(self.hierarchies) if self.hierarchies else []
-        metadata_keys = metadata_key_counts(self.client, "assets", data_set_ids or None, root_ids or None)
-        sorted_keys = sorted([key for key, count in metadata_keys if count > 0])
-        columns.extend([SchemaColumn(name=f"metadata.{key}", type="string") for key in sorted_keys])
-        return columns
-class FileMetadataFinder(AssetCentricFinder[FileMetadata]):
-    supported_formats = frozenset({"csv", "parquet"})
-    def _create_loader(self, client: ToolkitClient) -> ResourceCRUD:
-        return FileMetadataCRUD.create_loader(client)
-    def _aggregate_count(self, hierarchies: list[str], data_sets: list[str]) -> int:
-        result = self.client.files.aggregate(
-            filter=FileMetadataFilter(
-                data_set_ids=[{"externalId": item} for item in data_sets] or None,
-                asset_subtree_ids=[{"externalId": item} for item in hierarchies] or None,
-            )
-        )
-        return result[0].count if result else 0
-    def _get_resource_columns(self) -> SchemaColumnList:
-        columns = SchemaColumnList(
-            [
-                SchemaColumn(name="externalId", type="string"),
-                SchemaColumn(name="name", type="string"),
-                SchemaColumn(name="directory", type="string"),
-                SchemaColumn(name="source", type="string"),
-                SchemaColumn(name="mimeType", type="string"),
-                SchemaColumn(name="assetExternalIds", type="string", is_array=True),
-                SchemaColumn(name="dataSetExternalId", type="string"),
-                SchemaColumn(name="sourceCreatedTime", type="integer"),
-                SchemaColumn(name="sourceModifiedTime", type="integer"),
-                SchemaColumn(name="securityCategories", type="string", is_array=True),
-                SchemaColumn(name="labels", type="string", is_array=True),
-                SchemaColumn(name="geoLocation", type="json"),
-            ]
-        )
-        data_set_ids = self.client.lookup.data_sets.id(self.data_sets) if self.data_sets else []
-        root_ids = self.client.lookup.assets.id(self.hierarchies) if self.hierarchies else []
-        metadata_keys = metadata_key_counts(self.client, "files", data_set_ids or None, root_ids or None)
-        sorted_keys = sorted([key for key, count in metadata_keys if count > 0])
-        columns.extend([SchemaColumn(name=f"metadata.{key}", type="string") for key in sorted_keys])
-        return columns
-    def create_resource_iterator(self, limit: int | None) -> Iterable:
-        return self.client.files(
-            chunk_size=self.chunk_size,
-            asset_subtree_external_ids=self.hierarchies or None,
-            data_set_external_ids=self.data_sets or None,
-            limit=limit,
-        )
-    def _resource_processor(self, items: Iterable[FileMetadata]) -> list[tuple[str, list[dict[str, Any]]]]:
-        return [("", self._to_write(items))]
-class TimeSeriesFinder(AssetCentricFinder[TimeSeries]):
-    supported_formats = frozenset({"csv", "parquet", "yaml"})
-    def _create_loader(self, client: ToolkitClient) -> TimeSeriesCRUD:
-        return TimeSeriesCRUD.create_loader(client)
-    def _aggregate_count(self, hierarchies: list[str], data_sets: list[str]) -> int:
-        return self.client.time_series.aggregate_count(
-            filter=TimeSeriesFilter(
-                data_set_ids=[{"externalId": item} for item in data_sets] or None,
-                asset_subtree_ids=[{"externalId": item} for item in hierarchies] or None,
-            )
-        )
-    def create_resource_iterator(self, limit: int | None) -> Iterator:
-        return self.client.time_series(
-            chunk_size=self.chunk_size,
-            asset_subtree_external_ids=self.hierarchies or None,
-            data_set_external_ids=self.data_sets or None,
-            limit=limit,
-        )
-    def _resource_processor(self, time_series: Iterable[TimeSeries]) -> list[tuple[str, list[dict[str, Any]]]]:
-        return [("", self._to_write(time_series))]
-    def _get_resource_columns(self) -> SchemaColumnList:
-        columns = SchemaColumnList(
-            [
-                SchemaColumn(name="externalId", type="string"),
-                SchemaColumn(name="name", type="string"),
-                SchemaColumn(name="isString", type="boolean"),
-                SchemaColumn(name="unit", type="string"),
-                SchemaColumn(name="unitExternalId", type="string"),
-                SchemaColumn(name="assetExternalId", type="string"),
-                SchemaColumn(name="isStep", type="boolean"),
-                SchemaColumn(name="description", type="string"),
-                SchemaColumn(name="dataSetExternalId", type="string"),
-                SchemaColumn(name="securityCategories", type="string", is_array=True),
-            ]
-        )
-        data_set_ids = self.client.lookup.data_sets.id(self.data_sets) if self.data_sets else []
-        root_ids = self.client.lookup.assets.id(self.hierarchies) if self.hierarchies else []
-        metadata_keys = metadata_key_counts(self.client, "timeseries", data_set_ids or None, root_ids or None)
-        sorted_keys = sorted([key for key, count in metadata_keys if count > 0])
-        columns.extend([SchemaColumn(name=f"metadata.{key}", type="string") for key in sorted_keys])
-        return columns
-class EventFinder(AssetCentricFinder[Event]):
-    supported_formats = frozenset({"csv", "parquet"})
-    def _create_loader(self, client: ToolkitClient) -> ResourceCRUD:
-        return EventCRUD.create_loader(client)
-    def _aggregate_count(self, hierarchies: list[str], data_sets: list[str]) -> int:
-        return self.client.events.aggregate_count(
-            filter=EventFilter(
-                data_set_ids=[{"externalId": item} for item in data_sets] or None,
-                asset_subtree_ids=[{"externalId": item} for item in hierarchies] or None,
-            )
-        )
-    def _get_resource_columns(self) -> SchemaColumnList:
-        columns = SchemaColumnList(
-            [
-                SchemaColumn(name="externalId", type="string"),
-                SchemaColumn(name="dataSetExternalId", type="string"),
-                SchemaColumn(name="startTime", type="integer"),
-                SchemaColumn(name="endTime", type="integer"),
-                SchemaColumn(name="type", type="string"),
-                SchemaColumn(name="subtype", type="string"),
-                SchemaColumn(name="description", type="string"),
-                SchemaColumn(name="assetExternalIds", type="string", is_array=True),
-                SchemaColumn(name="source", type="string"),
-            ]
-        )
-        data_set_ids = self.client.lookup.data_sets.id(self.data_sets) if self.data_sets else []
-        root_ids = self.client.lookup.assets.id(self.hierarchies) if self.hierarchies else []
-        metadata_keys = metadata_key_counts(self.client, "events", data_set_ids or None, root_ids or None)
-        sorted_keys = sorted([key for key, count in metadata_keys if count > 0])
-        columns.extend([SchemaColumn(name=f"metadata.{key}", type="string") for key in sorted_keys])
-        return columns
-    def create_resource_iterator(self, limit: int | None) -> Iterable:
-        return self.client.events(
-            chunk_size=self.chunk_size,
-            asset_subtree_external_ids=self.hierarchies or None,
-            data_set_external_ids=self.data_sets or None,
-            limit=limit,
-        )
-    def _resource_processor(self, items: Iterable[Event]) -> list[tuple[str, list[dict[str, Any]]]]:
-        return [("", self._to_write(items))]
-class DumpDataCommand(ToolkitCommand):
-    def dump_table(
-        self,
-        finder: DataFinder,
-        output_dir: Path,
-        clean: bool,
-        limit: int | None = None,
-        format_: str = "csv",
-        verbose: bool = False,
-        parallel_threshold: int = 10,
-        max_queue_size: int = 10,
-    ) -> None:
-        """Dumps data from CDF to a file
-        Args:
-            finder (DataFinder): The finder object to use for fetching data.
-            output_dir (Path): The directory to write the output files to.
-            clean (bool): Whether to clean the output directory before writing files.
-            limit (int | None, optional): The maximum number of rows to write. Defaults to None.
-            format_ (Literal["yaml", "csv", "parquet"], optional): The format of the output file. Defaults to "csv".
-            verbose (bool, optional): Whether to print detailed progress information. Defaults to False.
-            parallel_threshold (int, optional): The iteration threshold for parallel processing. Defaults to 10.
-            max_queue_size (int, optional): If using parallel processing, the maximum size of the queue. Defaults to 10.
-        """
-        valid_format = finder.validate_format(format_)
-        self.validate_directory(output_dir, clean)
-        console = Console()
-        # The ignore is used as MyPy does not understand that is_supported_format
-        # above guarantees that the format is valid.
-        for schema, iteration_count, resource_iterator, resource_processor in finder.create_iterators(
-            valid_format, limit
-        ):
-            writer_cls = TableFileWriter.get_write_cls(schema.format_)
-            row_counts = 0
-            t0 = time.perf_counter()
-            with writer_cls(schema, output_dir) as writer:
-                if iteration_count > parallel_threshold:
-                    executor = ProducerWorkerExecutor(
-                        download_iterable=resource_iterator,
-                        process=resource_processor,
-                        write=writer.write_rows,
-                        iteration_count=iteration_count,
-                        max_queue_size=max_queue_size,
-                        download_description=f"Downloading {schema.display_name}",
-                        process_description=f"Processing {schema.display_name}",
-                        write_description=f"Writing {schema.display_name} to file",
-                    )
-                    executor.run()
-                    executor.raise_on_error()
-                    row_counts = executor.total_items
-                else:
-                    for resources in track(
-                        resource_iterator, total=iteration_count, description=f"Dumping {schema.display_name}"
-                    ):
-                        row_counts += len(resources)
-                        processed = resource_processor(resources)
-                        writer.write_rows(processed)
-            elapsed = time.perf_counter() - t0
-            console.print(f"Dumped {row_counts:,} rows to {output_dir} in {elapsed:,.2f} seconds.")
-    @staticmethod
-    def validate_directory(output_dir: Path, clean: bool) -> None:
-        if output_dir.exists() and clean:
-            safe_rmtree(output_dir)
-        elif output_dir.exists():
-            raise ToolkitFileExistsError(f"Output directory {output_dir!s} already exists. Use --clean to remove it.")
-        elif output_dir.suffix:
-            raise ToolkitIsADirectoryError(f"Output directory {output_dir!s} is not a directory.")

cognite_toolkit/_cdf_tk/utils/table_writers.py DELETED Viewed

@@ -1,434 +0,0 @@
-import csv
-import importlib.util
-import json
-import sys
-from abc import abstractmethod
-from collections.abc import Collection, Iterator, Mapping, Sequence
-from dataclasses import dataclass
-from datetime import date, datetime, timezone
-from functools import lru_cache
-from io import TextIOWrapper
-from pathlib import Path
-from types import MappingProxyType
-from typing import IO, TYPE_CHECKING, Any, ClassVar, Generic, Literal, SupportsIndex, TypeAlias, TypeVar, overload
-from cognite.client.data_classes.data_modeling import data_types as dt
-from cognite.client.data_classes.data_modeling.views import MappedProperty, ViewProperty
-from cognite_toolkit._cdf_tk.exceptions import ToolkitMissingDependencyError, ToolkitTypeError, ToolkitValueError
-from cognite_toolkit._cdf_tk.utils import humanize_collection, sanitize_filename
-from cognite_toolkit._cdf_tk.utils.file import yaml_safe_dump
-from .useful_types import JsonVal
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-if TYPE_CHECKING:
-    import pyarrow as pa
-    import pyarrow.parquet as pq
-FileFormat: TypeAlias = Literal["csv", "parquet", "yaml"]
-DataType: TypeAlias = Literal["string", "integer", "float", "boolean", "json", "date", "timestamp", "epoch"]
-PrimaryCellValue: TypeAlias = datetime | date | str | int | float | bool | JsonVal | None
-CellValue: TypeAlias = PrimaryCellValue | list[PrimaryCellValue]
-Rows: TypeAlias = list[dict[str, CellValue]]
-@dataclass(frozen=True)
-class SchemaColumn:
-    name: str
-    type: DataType
-    is_array: bool = False
-    def __post_init__(self) -> None:
-        if self.type == "json" and self.is_array:
-            raise ValueError("JSON columns cannot be arrays. Use 'is_array=False' for JSON columns.")
-class SchemaColumnList(list, Sequence[SchemaColumn]):
-    # Implemented to get correct type hints
-    def __init__(self, collection: Collection[SchemaColumn] | None = None) -> None:
-        super().__init__(collection or [])
-    def __iter__(self) -> Iterator[SchemaColumn]:
-        return super().__iter__()
-    @overload
-    def __getitem__(self, index: SupportsIndex) -> SchemaColumn: ...
-    @overload
-    def __getitem__(self, index: slice) -> Self: ...
-    def __getitem__(self, index: SupportsIndex | slice, /) -> SchemaColumn | Self:
-        if isinstance(index, slice):
-            return type(self)(super().__getitem__(index))
-        return super().__getitem__(index)
-    @classmethod
-    def create_from_view_properties(cls, properties: Mapping[str, ViewProperty], support_edges: bool = False) -> Self:
-        """Create a SchemaColumnList from a mapping of ViewProperty objects.
-        Args:
-            properties (Mapping[str, ViewProperty]): A mapping of property names to ViewProperty objects.
-            support_edges (bool): Whether the the view supports edges. If True, the schema will include
-                startNode and endNode columns.
-        Returns:
-            SchemaColumnList: A list of SchemaColumn objects representing the properties.
-        """
-        columns = [
-            SchemaColumn("space", "string", is_array=False),
-            SchemaColumn("externalId", "string", is_array=False),
-            SchemaColumn("instanceType", "string"),
-            SchemaColumn("existingVersion", "integer", is_array=False),
-            SchemaColumn("type", "json", is_array=False),
-        ]
-        if support_edges:
-            columns.append(SchemaColumn("startNode", "json", is_array=False))
-            columns.append(SchemaColumn("endNode", "json", is_array=False))
-        for name, prop in properties.items():
-            if not isinstance(prop, MappedProperty):
-                # We skip all properties that does not reside in a container.
-                continue
-            schema_type = cls._dms_to_schema_type(prop.type)
-            is_array = (
-                isinstance(prop.type, dt.ListablePropertyType)
-                and prop.type.is_list
-                and schema_type != "json"  # JSON is not an array type
-            )
-            columns.append(SchemaColumn(name=f"properties.{name}", type=schema_type, is_array=is_array))
-        return cls(columns)
-    @classmethod
-    def _dms_to_schema_type(cls, model_type: dt.PropertyType) -> DataType:
-        if isinstance(model_type, dt.Text | dt.Enum | dt.CDFExternalIdReference):
-            return "string"
-        elif isinstance(model_type, dt.Boolean):
-            return "boolean"
-        elif isinstance(model_type, dt.Json | dt.DirectRelation):
-            return "json"
-        elif isinstance(model_type, dt.Int32 | dt.Int64):
-            return "integer"
-        elif isinstance(model_type, dt.Float32 | dt.Float64):
-            return "float"
-        elif isinstance(model_type, dt.Timestamp):
-            return "timestamp"
-        elif isinstance(model_type, dt.Date):
-            return "date"
-        else:
-            raise ToolkitTypeError(
-                f"Failed convertion from data modeling type to Table Schema. Unknown type: {type(model_type)!r}."
-            )
-@dataclass
-class Schema:
-    display_name: str
-    folder_name: str
-    kind: str
-    format_: FileFormat
-    columns: SchemaColumnList
-T_IO = TypeVar("T_IO", bound=IO)
-class TableFileWriter(Generic[T_IO]):
-    encoding = "utf-8"
-    newline = "\n"
-    format: ClassVar[FileFormat]
-    def __init__(self, schema: Schema, output_dir: Path, max_file_size_bytes: int = 128 * 1024 * 1024) -> None:
-        self.max_file_size_bytes = max_file_size_bytes
-        self.schema = schema
-        self.output_dir = output_dir
-        self._file_count = 1
-        self._writer_by_filepath: dict[Path, T_IO] = {}
-    def write_rows(self, rows_group_list: list[tuple[str, Rows]]) -> None:
-        """Write rows to a file."""
-        for group, group_rows in rows_group_list:
-            if not group_rows:
-                continue
-            writer = self._get_writer(group)
-            self._write_rows(writer, group_rows)
-    @abstractmethod
-    def _write_rows(self, writer: T_IO, rows: Rows) -> None:
-        raise NotImplementedError()
-    @abstractmethod
-    def _create_writer(self, filepath: Path) -> T_IO:
-        """Create a writer for the given file path."""
-        raise NotImplementedError("This method should be implemented in subclasses.")
-    @abstractmethod
-    def _is_above_file_size_limit(self, filepath: Path, writer: T_IO) -> bool:
-        """Check if the file size is above the limit."""
-        raise NotImplementedError("This method should be implemented in subclasses.")
-    def __enter__(self) -> "TableFileWriter":
-        self._file_count = 1
-        return self
-    def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: Any | None) -> None:
-        for writer in self._writer_by_filepath.values():
-            writer.close()
-        self._writer_by_filepath.clear()
-        return None
-    def _get_writer(self, group: str) -> T_IO:
-        clean_name = f"{sanitize_filename(group)}-" if group else ""
-        file_path = (
-            self.output_dir
-            / self.schema.folder_name
-            / f"{clean_name}part-{self._file_count:04}.{self.schema.kind}.{self.format}"
-        )
-        file_path.parent.mkdir(parents=True, exist_ok=True)
-        if file_path not in self._writer_by_filepath:
-            self._writer_by_filepath[file_path] = self._create_writer(file_path)
-        elif self._is_above_file_size_limit(file_path, self._writer_by_filepath[file_path]):
-            self._writer_by_filepath[file_path].close()
-            del self._writer_by_filepath[file_path]
-            self._file_count += 1
-            return self._get_writer(group)
-        return self._writer_by_filepath[file_path]
-    @classmethod
-    def get_write_cls(cls, format_: FileFormat) -> "type[TableFileWriter]":
-        """Get the writer class for the given format."""
-        write_cls = _TABLEWRITER_CLASS_BY_FORMAT.get(format_)
-        if write_cls is None:
-            raise ToolkitValueError(
-                f"Unsupported format {format_}. Supported formats are {humanize_collection(_TABLEWRITER_CLASS_BY_FORMAT.keys())}."
-            )
-        return write_cls
-class ParquetWriter(TableFileWriter["pq.ParquetWriter"]):
-    """Parquet writer for CDF Toolkit.
-    Caveat: This mutates the rows to convert JSON, timestamp, and date columns to appropriate formats.
-    This is necessary because pyarrow does not support JSON, timestamp, and date types directly in the way we need.
-    We avoid making a copy of each row for performance reasons, but this means that the rows passed to this writer
-    will be modified in place.
-    """
-    format = "parquet"
-    def __init__(self, schema: Schema, output_dir: Path, max_file_size_bytes: int = 128 * 1024 * 1024) -> None:
-        super().__init__(schema, output_dir, max_file_size_bytes)
-        self._check_pyarrow_dependency()
-    def _create_writer(self, filepath: Path) -> "pq.ParquetWriter":
-        import pyarrow.parquet as pq
-        schema = self._create_schema()
-        return pq.ParquetWriter(filepath, schema)
-    def _write_rows(self, writer: "pq.ParquetWriter", rows: Rows) -> None:
-        import pyarrow as pa
-        if json_columns := self._json_columns():
-            for row in rows:
-                json_values = set(row.keys()) & json_columns
-                for col in json_values:
-                    row[col] = json.dumps(row[col])
-        if timestamp_columns := self._timestamp_columns():
-            for row in rows:
-                for col in set(row.keys()) & timestamp_columns:
-                    cell_value = row[col]
-                    if isinstance(cell_value, list):
-                        # MyPy does not understand that a list of PrimaryCellValue is valid here
-                        # It expects a union of PrimaryCellValue and list[PrimaryCellValue].
-                        row[col] = [self._to_datetime(value) for value in cell_value]  # type: ignore[assignment]
-                    else:
-                        row[col] = self._to_datetime(cell_value)
-        if date_columns := self._date_columns():
-            for row in rows:
-                for col in set(row.keys()) & date_columns:
-                    cell_value = row[col]
-                    if isinstance(cell_value, list):
-                        # MyPy does not understand that a list of PrimaryCellValue is valid here.
-                        # It expects a union of PrimaryCellValue and list[PrimaryCellValue].
-                        row[col] = [self._to_date(value) for value in cell_value]  # type: ignore[assignment]
-                    else:
-                        row[col] = self._to_date(cell_value)
-        table = pa.Table.from_pylist(rows, schema=self._create_schema())
-        writer.write_table(table)
-    def _is_above_file_size_limit(self, filepath: Path, writer: "pq.ParquetWriter") -> bool:
-        return filepath.exists() and filepath.stat().st_size > self.max_file_size_bytes
-    @lru_cache(maxsize=1)
-    def _json_columns(self) -> set[str]:
-        """Check if the writer supports JSON format."""
-        return {col.name for col in self.schema.columns if col.type == "json"}
-    @lru_cache(maxsize=1)
-    def _timestamp_columns(self) -> set[str]:
-        """Check if the writer supports timestamp format."""
-        return {col.name for col in self.schema.columns if col.type == "timestamp"}
-    @lru_cache(maxsize=1)
-    def _date_columns(self) -> set[str]:
-        return {col.name for col in self.schema.columns if col.type == "date"}
-    @classmethod
-    def _to_datetime(cls, value: CellValue) -> CellValue:
-        if isinstance(value, datetime) or value is None:
-            output = value
-        elif isinstance(value, date):
-            output = datetime.combine(value, datetime.min.time())
-        elif isinstance(value, int | float):
-            # Assuming the value is a timestamp in milliseconds
-            output = datetime.fromtimestamp(value / 1000.0)
-        elif isinstance(value, str):
-            output = cls._convert_data_modelling_timestamp(value)
-        else:
-            raise ToolkitTypeError(
-                f"Unsupported value type for datetime conversion: {type(value)}. Expected datetime, date, int, float, or str."
-            )
-        if output is not None and output.tzinfo is None:
-            # Ensure the datetime is in UTC
-            output = output.replace(tzinfo=timezone.utc)
-        elif output is not None and output.tzinfo is not None:
-            # Convert to UTC if it has a timezone
-            output = output.astimezone(timezone.utc)
-        return output
-    @classmethod
-    def _to_date(cls, value: CellValue) -> CellValue:
-        if isinstance(value, date) or value is None:
-            return value
-        elif isinstance(value, datetime):
-            return value.date()
-        elif isinstance(value, int | float):
-            # Assuming the value is a timestamp in milliseconds
-            return date.fromtimestamp(value / 1000.0)
-        elif isinstance(value, str):
-            return cls._convert_data_modelling_timestamp(value).date()
-        else:
-            raise ToolkitTypeError(
-                f"Unsupported value type for date conversion: {type(value)}. Expected date, datetime, int, float, or str."
-            )
-    @classmethod
-    def _convert_data_modelling_timestamp(cls, timestamp: str) -> datetime:
-        """Convert a timestamp string from the data modeling format to a datetime object."""
-        try:
-            return datetime.fromisoformat(timestamp)
-        except ValueError:
-            # Typically hits if the timestamp has truncated milliseconds,
-            # For example, "2021-01-01T00:00:00.17+00:00".
-            # In Python 3.10, the strptime requires exact formats so we need both formats below.
-            # In Python 3.11-13, if the timestamp matches on the second it will match on the first,
-            # so when we set lower bound to 3.11 the loop will not be needed.
-            for format_ in ["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%dT%H:%M:%S%z"]:
-                try:
-                    return datetime.strptime(timestamp, format_)
-                except ValueError:
-                    continue
-            raise ValueError(
-                f"Invalid timestamp format: {timestamp}. Expected ISO 8601 format with optional milliseconds and timezone."
-            )
-    @lru_cache(maxsize=1)
-    def _create_schema(self) -> "pa.Schema":
-        """Create a pyarrow schema from the schema definition."""
-        self._check_pyarrow_dependency()
-        import pyarrow as pa
-        fields: list[pa.Field] = []
-        for prop in self.schema.columns:
-            pa_type = self._as_pa_type(prop.type, prop.is_array)
-            fields.append(pa.field(prop.name, pa_type, nullable=True))
-        return pa.schema(fields)
-    @staticmethod
-    def _check_pyarrow_dependency() -> None:
-        if importlib.util.find_spec("pyarrow") is None:
-            raise ToolkitMissingDependencyError(
-                "Writing to parquet requires pyarrow. Install with 'pip install \"cognite-toolkit[table]\"'"
-            )
-    @staticmethod
-    def _as_pa_type(type_: DataType, is_array: bool) -> "pa.DataType":
-        """Convert a data type to a pyarrow type."""
-        import pyarrow as pa
-        if type_ == "string":
-            pa_type = pa.string()
-        elif type_ == "integer":
-            pa_type = pa.int64()
-        elif type_ == "float":
-            pa_type = pa.float64()
-        elif type_ == "boolean":
-            pa_type = pa.bool_()
-        elif type_ == "date":
-            pa_type = pa.date32()
-        elif type_ == "time":
-            pa_type = pa.time64("ms")
-        elif type_ == "json":
-            pa_type = pa.string()
-        elif type_ == "timestamp":
-            pa_type = pa.timestamp("ms", tz="UTC")
-        else:
-            raise ToolkitValueError(f"Unsupported data type {type_}.")
-        if is_array:
-            pa_type = pa.list_(pa_type)
-        return pa_type
-class CSVWriter(TableFileWriter[TextIOWrapper]):
-    format = "csv"
-    def _create_writer(self, filepath: Path) -> TextIOWrapper:
-        stream = filepath.open("a", encoding=self.encoding, newline=self.newline)
-        writer = self._create_dict_writer(stream)
-        if filepath.stat().st_size == 0:
-            writer.writeheader()
-        return stream
-    def _is_above_file_size_limit(self, filepath: Path, writer: TextIOWrapper) -> bool:
-        current_position = writer.tell()
-        writer.seek(0, 2)
-        if writer.tell() > self.max_file_size_bytes:
-            return True
-        writer.seek(current_position)
-        return False
-    def _write_rows(self, writer: TextIOWrapper, rows: Rows) -> None:
-        dict_writer = self._create_dict_writer(writer)
-        dict_writer.writerows(rows)
-    def _create_dict_writer(self, writer: TextIOWrapper) -> csv.DictWriter:
-        return csv.DictWriter(writer, fieldnames=[col.name for col in self.schema.columns], extrasaction="ignore")
-class YAMLWriter(TableFileWriter[TextIOWrapper]):
-    format = "yaml"
-    def _create_writer(self, filepath: Path) -> TextIOWrapper:
-        return filepath.open("a", encoding=self.encoding, newline=self.newline)
-    def _is_above_file_size_limit(self, filepath: Path, writer: TextIOWrapper) -> bool:
-        current_position = writer.tell()
-        writer.seek(0, 2)
-        if writer.tell() > self.max_file_size_bytes:
-            return True
-        writer.seek(current_position)
-        return False
-    def _write_rows(self, writer: TextIOWrapper, rows: Rows) -> None:
-        writer.write(yaml_safe_dump(rows))
-_TABLEWRITER_CLASS_BY_FORMAT: MappingProxyType[str, type[TableFileWriter]] = MappingProxyType(
-    {w.format: w for w in TableFileWriter.__subclasses__()}  # type: ignore[type-abstract]
-)

{cognite_toolkit-0.7.0.dist-info → cognite_toolkit-0.7.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{cognite_toolkit-0.7.0.dist-info → cognite_toolkit-0.7.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cognite_toolkit-0.7.0.dist-info → cognite_toolkit-0.7.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

cognite-toolkit 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

cognite-toolkit 0.7.0py3-none-any.whl → 0.7.1py3-none-any.whl