PyPI - cognite-toolkit - Versions diffs - 0.6.87__py3-none-any.whl → 0.6.89__py3-none-any.whl - Mend

cognite-toolkit 0.6.87py3-none-any.whl → 0.6.89py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

cognite_toolkit/_cdf_tk/commands/modules.py CHANGED Viewed

@@ -95,11 +95,14 @@ class ModulesCommand(ToolkitCommand):
         print_warning: bool = True,
         skip_tracking: bool = False,
         silent: bool = False,
+        temp_dir_suffix: str | None = None,
         module_source_dir: Path | None = None,
     ):
         super().__init__(print_warning, skip_tracking, silent)
         self._module_source_dir: Path | None = module_source_dir
-        self._temp_download_dir = Path(tempfile.gettempdir()) / MODULES
+        # Use suffix to make temp directory unique (useful for parallel test execution)
+        modules_dir_name = f"{MODULES}.{temp_dir_suffix}" if temp_dir_suffix else MODULES
+        self._temp_download_dir = Path(tempfile.gettempdir()) / modules_dir_name
         if not self._temp_download_dir.exists():
             self._temp_download_dir.mkdir(parents=True, exist_ok=True)
@@ -170,6 +173,11 @@ class ModulesCommand(ToolkitCommand):
             print(f"{INDENT}[{'yellow' if mode == 'clean' else 'green'}]Creating {package_name}[/]")
             for module in package.modules:
+                if module.module_id:
+                    self._additional_tracking_info.installed_module_ids.add(module.module_id)
+                if module.package_id:
+                    self._additional_tracking_info.installed_package_ids.add(module.package_id)
                 if module.dir in seen_modules:
                     # A module can be part of multiple packages
                     continue
@@ -769,7 +777,6 @@ default_organization_dir = "{organization_dir.name}"''',
     def _get_available_packages(self, user_library: Library | None = None) -> tuple[Packages, Path]:
         """
         Returns a list of available packages, either from the CDF TOML file or from external libraries if the feature flag is enabled.
-        If the feature flag is not enabled and no libraries are specified, it returns the built-in modules.
         """
         cdf_toml = CDFToml.load()
@@ -778,9 +785,8 @@ default_organization_dir = "{organization_dir.name}"''',
             for library_name, library in libraries.items():
                 try:
-                    additional_tracking_info = self._additional_tracking_info.setdefault("downloadedLibraryIds", [])
-                    if library_name not in additional_tracking_info:
-                        additional_tracking_info.append(library_name)
+                    if library_name:
+                        self._additional_tracking_info.downloaded_library_ids.add(library_name)
                     print(f"[green]Adding library {library_name} from {library.url}[/]")
                     # Extract filename from URL, fallback to library_name.zip if no filename found
@@ -802,14 +808,12 @@ default_organization_dir = "{organization_dir.name}"''',
                     # Track deployment pack download for each package and module
                     for package in packages.values():
-                        downloaded_package_ids = self._additional_tracking_info.setdefault("downloadedPackageIds", [])
-                        if package.id and package.id not in downloaded_package_ids:
-                            downloaded_package_ids.append(package.id)
+                        if package.id:
+                            self._additional_tracking_info.downloaded_package_ids.add(package.id)
-                        downloaded_module_ids = self._additional_tracking_info.setdefault("downloadedModuleIds", [])
                         for module in package.modules:
-                            if module.module_id and module.module_id not in downloaded_module_ids:
-                                downloaded_module_ids.append(module.module_id)
+                            if module.module_id:
+                                self._additional_tracking_info.downloaded_module_ids.add(module.module_id)
                     return packages, file_path.parent
                 except Exception as e:
@@ -821,7 +825,7 @@ default_organization_dir = "{organization_dir.name}"''',
                         ) from e
                     raise ToolkitError(f"Failed to add library {library_name}, {e}")
-            # If no libraries are specified or the flag is not enabled, load the built-in modules
+            # If no libraries are specified or the flag is not enabled, raise an error
             raise ValueError("No valid libraries found.")
         else:
             if user_library:

cognite_toolkit/_cdf_tk/data_classes/__init__.py CHANGED Viewed

@@ -32,6 +32,7 @@ from ._deploy_results import (
 from ._module_directories import ModuleDirectories, ModuleLocation
 from ._module_resources import ModuleResources
 from ._packages import Package, Packages
+from ._tracking_info import CommandTrackingInfo
 from ._yaml_comments import YAMLComments
 __all__ = [
@@ -47,6 +48,7 @@ __all__ = [
     "BuiltResource",
     "BuiltResourceFull",
     "BuiltResourceList",
+    "CommandTrackingInfo",
     "ConfigEntry",
     "ConfigYAMLs",
     "DatapointDeployResult",

cognite_toolkit/_cdf_tk/data_classes/_config_yaml.py CHANGED Viewed

@@ -496,7 +496,9 @@ class InitConfigYAML(YAMLWithComments[tuple[str, ...], ConfigEntry], ConfigYAMLC
         adds them to the config.yaml file.
         Args:
-            cognite_root_module: The root module for all cognite modules.
+            cognite_root_module: Path to the root directory containing all Cognite modules.
+            defaults_files: List of paths to default.config.yaml files to load.
+            ignore_patterns: Optional list of tuples containing patterns to ignore when loading defaults.
         Returns:
             self
@@ -509,6 +511,10 @@ class InitConfigYAML(YAMLWithComments[tuple[str, ...], ConfigEntry], ConfigYAMLC
             raw_file = safe_read(default_config)
             file_comments = self._extract_comments(raw_file, key_prefix=tuple(parts))
             file_data = cast(dict, read_yaml_content(raw_file))
+            # a file may exist, but contain just comments, thus the file_data is None
+            if file_data is None:
+                continue
             for key, value in file_data.items():
                 if len(parts) >= 1 and parts[0] in ROOT_MODULES:
                     key_path = (self._variables, *parts, key)

cognite_toolkit/_cdf_tk/data_classes/_module_directories.py CHANGED Viewed

@@ -164,6 +164,8 @@ class ModuleLocation:
         return ReadModule(
             dir=self.dir,
             resource_directories=tuple(self.resource_directories),
+            module_id=self.module_id,
+            package_id=self.package_id,
         )
@@ -178,6 +180,8 @@ class ReadModule:
     dir: Path
     resource_directories: tuple[str, ...]
+    module_id: str | None
+    package_id: str | None
     def resource_dir_path(self, resource_folder: str) -> Path | None:
         """Returns the path to a resource in the module.
@@ -198,12 +202,16 @@ class ReadModule:
         return cls(
             dir=Path(data["dir"]),
             resource_directories=tuple(data["resource_directories"]),
+            module_id=data.get("module_id"),
+            package_id=data.get("package_id"),
         )
     def dump(self) -> dict[str, Any]:
         return {
             "dir": self.dir.as_posix(),
             "resource_directories": list(self.resource_directories),
+            "module_id": self.module_id,
+            "package_id": self.package_id,
         }

cognite_toolkit/_cdf_tk/data_classes/_tracking_info.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Data class for command tracking information."""
+from typing import Any
+from pydantic import BaseModel, Field
+class CommandTrackingInfo(BaseModel):
+    """Structured tracking information for CLI commands.
+    This model provides type-safe tracking information that can be collected
+    during command execution and sent to Mixpanel for analytics.
+    Attributes:
+        project: The CDF project name.
+        cluster: The CDF cluster name.
+        module_ids: List of module IDs that were deployed or built.
+        package_ids: List of package IDs that were deployed or built.
+        installed_module_ids: List of module IDs that were installed.
+        installed_package_ids: List of package IDs that were installed.
+        downloaded_library_ids: List of library IDs that were downloaded.
+        downloaded_package_ids: List of package IDs that were downloaded.
+        downloaded_module_ids: List of module IDs that were downloaded.
+    """
+    project: str | None = Field(default=None)
+    cluster: str | None = Field(default=None)
+    module_ids: set[str] = Field(default_factory=set, alias="moduleIds")
+    package_ids: set[str] = Field(default_factory=set, alias="packageIds")
+    installed_module_ids: set[str] = Field(default_factory=set, alias="installedModuleIds")
+    installed_package_ids: set[str] = Field(default_factory=set, alias="installedPackageIds")
+    downloaded_library_ids: set[str] = Field(default_factory=set, alias="downloadedLibraryIds")
+    downloaded_package_ids: set[str] = Field(default_factory=set, alias="downloadedPackageIds")
+    downloaded_module_ids: set[str] = Field(default_factory=set, alias="downloadedModuleIds")
+    def to_dict(self) -> dict[str, Any]:
+        """Convert the tracking info to a dictionary for Mixpanel.
+        Returns:
+            A dictionary with camelCase keys matching Mixpanel's expected format.
+            Default values are excluded.
+        """
+        return self.model_dump(by_alias=True, exclude_defaults=True)

cognite_toolkit/_cdf_tk/storageio/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from pathlib import Path
 from cognite_toolkit._cdf_tk.utils._auxiliary import get_concrete_subclasses
 from cognite_toolkit._cdf_tk.utils.fileio import COMPRESSION_BY_SUFFIX
+from ._annotations import FileAnnotationIO
 from ._applications import CanvasIO, ChartIO
 from ._asset_centric import AssetIO, BaseAssetCentricIO, EventIO, FileMetadataIO, HierarchyIO, TimeSeriesIO
 from ._base import (
@@ -50,6 +51,7 @@ __all__ = [
     "ChartIO",
     "ConfigurableStorageIO",
     "EventIO",
+    "FileAnnotationIO",
     "FileMetadataIO",
     "HierarchyIO",
     "InstanceIO",

cognite_toolkit/_cdf_tk/storageio/_annotations.py ADDED Viewed

@@ -0,0 +1,102 @@
+from collections.abc import Iterable, Sequence
+from typing import Any
+from cognite.client.data_classes import Annotation, AnnotationFilter
+from cognite_toolkit._cdf_tk.utils.collection import chunker_sequence
+from cognite_toolkit._cdf_tk.utils.useful_types import JsonVal
+from ._asset_centric import FileMetadataIO
+from ._base import Page, StorageIO
+from .selectors import AssetCentricSelector
+class FileAnnotationIO(StorageIO[AssetCentricSelector, Annotation]):
+    SUPPORTED_DOWNLOAD_FORMATS = frozenset({".ndjson"})
+    SUPPORTED_COMPRESSIONS = frozenset({".gz"})
+    CHUNK_SIZE = 1000
+    BASE_SELECTOR = AssetCentricSelector
+    MISSING_ID = "<MISSING_RESOURCE_ID>"
+    def as_id(self, item: Annotation) -> str:
+        project = item._cognite_client.config.project
+        return f"INTERNAL_ID_project_{project}_{item.id!s}"
+    def stream_data(self, selector: AssetCentricSelector, limit: int | None = None) -> Iterable[Page]:
+        total = 0
+        for file_chunk in FileMetadataIO(self.client).stream_data(selector, None):
+            # Todo Support pagination. This is missing in the SDK.
+            results = self.client.annotations.list(
+                filter=AnnotationFilter(
+                    annotated_resource_type="file",
+                    annotated_resource_ids=[{"id": file_metadata.id} for file_metadata in file_chunk.items],
+                )
+            )
+            if limit is not None and total + len(results) > limit:
+                results = results[: limit - total]
+            for chunk in chunker_sequence(results, self.CHUNK_SIZE):
+                yield Page(worker_id="main", items=chunk)
+                total += len(chunk)
+            if limit is not None and total >= limit:
+                break
+    def count(self, selector: AssetCentricSelector) -> int | None:
+        """There is no efficient way to count annotations in CDF."""
+        return None
+    def data_to_json_chunk(
+        self, data_chunk: Sequence[Annotation], selector: AssetCentricSelector | None = None
+    ) -> list[dict[str, JsonVal]]:
+        files_ids: set[int] = set()
+        for item in data_chunk:
+            if item.annotated_resource_type == "file" and item.annotated_resource_id is not None:
+                files_ids.add(item.annotated_resource_id)
+            if file_id := self._get_file_id(item.data):
+                files_ids.add(file_id)
+        self.client.lookup.files.external_id(list(files_ids))  # Preload file external IDs
+        asset_ids = {asset_id for item in data_chunk if (asset_id := self._get_asset_id(item.data))}
+        self.client.lookup.assets.external_id(list(asset_ids))  # Preload asset external IDs
+        return [self.dump_annotation_to_json(item) for item in data_chunk]
+    def dump_annotation_to_json(self, annotation: Annotation) -> dict[str, JsonVal]:
+        """Dump annotations to a list of JSON serializable dictionaries.
+        Args:
+            annotation: The annotations to dump.
+        Returns:
+            A list of JSON serializable dictionaries representing the annotations.
+        """
+        dumped = annotation.as_write().dump()
+        if isinstance(annotated_resource_id := dumped.pop("annotatedResourceId", None), int):
+            external_id = self.client.lookup.files.external_id(annotated_resource_id)
+            dumped["annotatedResourceExternalId"] = self.MISSING_ID if external_id is None else external_id
+        if isinstance(data := dumped.get("data"), dict):
+            if isinstance(file_ref := data.get("fileRef"), dict) and isinstance(file_ref.get("id"), int):
+                external_id = self.client.lookup.files.external_id(file_ref.pop("id"))
+                file_ref["externalId"] = self.MISSING_ID if external_id is None else external_id
+            if isinstance(asset_ref := data.get("assetRef"), dict) and isinstance(asset_ref.get("id"), int):
+                external_id = self.client.lookup.assets.external_id(asset_ref.pop("id"))
+                asset_ref["externalId"] = self.MISSING_ID if external_id is None else external_id
+        return dumped
+    @classmethod
+    def _get_file_id(cls, data: dict[str, Any]) -> int | None:
+        file_ref = data.get("fileRef")
+        if isinstance(file_ref, dict):
+            id_ = file_ref.get("id")
+            if isinstance(id_, int):
+                return id_
+        return None
+    @classmethod
+    def _get_asset_id(cls, data: dict[str, Any]) -> int | None:
+        asset_ref = data.get("assetRef")
+        if isinstance(asset_ref, dict):
+            id_ = asset_ref.get("id")
+            if isinstance(id_, int):
+                return id_
+        return None

cognite_toolkit/_cdf_tk/tracker.py CHANGED Viewed

@@ -14,7 +14,7 @@ from mixpanel import Consumer, Mixpanel, MixpanelException
 from cognite_toolkit._cdf_tk.cdf_toml import CDFToml
 from cognite_toolkit._cdf_tk.constants import IN_BROWSER
-from cognite_toolkit._cdf_tk.data_classes._built_modules import BuiltModule
+from cognite_toolkit._cdf_tk.data_classes import CommandTrackingInfo
 from cognite_toolkit._cdf_tk.tk_warnings import ToolkitWarning, WarningList
 from cognite_toolkit._cdf_tk.utils import get_cicd_environment
 from cognite_toolkit._version import __version__
@@ -49,7 +49,7 @@ class Tracker:
         warning_list: WarningList[ToolkitWarning],
         result: str | Exception,
         cmd: str,
-        additional_tracking_info: dict[str, Any] | None = None,
+        additional_tracking_info: CommandTrackingInfo | None = None,
     ) -> bool:
         warning_count = Counter([type(w).__name__ for w in warning_list])
@@ -58,7 +58,7 @@ class Tracker:
             warning_details[f"warningMostCommon{no}Count"] = count
             warning_details[f"warningMostCommon{no}Name"] = warning
-        positional_args, optional_args = self._parse_sys_args()
+        subcommands, optional_args = self._parse_sys_args()
         event_information = {
             "userInput": self.user_command,
             "toolkitVersion": __version__,
@@ -69,27 +69,17 @@ class Tracker:
             **warning_details,
             "result": type(result).__name__ if isinstance(result, Exception) else result,
             "error": str(result) if isinstance(result, Exception) else "",
-            **positional_args,
+            "subcommands": subcommands,
             **optional_args,
             "alphaFlags": [name for name, value in self._cdf_toml.alpha_flags.items() if value],
             "plugins": [name for name, value in self._cdf_toml.plugins.items() if value],
         }
         if additional_tracking_info:
-            event_information.update(additional_tracking_info)
+            event_information.update(additional_tracking_info.to_dict())
         return self._track(f"command{cmd.capitalize()}", event_information)
-    def track_module_build(self, module: BuiltModule) -> bool:
-        event_information = {
-            "module": module.name,
-            "location_path": module.location.path.as_posix(),
-            "warning_count": module.warning_count,
-            "status": module.status,
-            **{resource_type: len(resource_build) for resource_type, resource_build in module.resources.items()},
-        }
-        return self._track("moduleBuild", event_information)
     def _track(self, event_name: str, event_information: dict[str, Any]) -> bool:
         if self.skip_tracking or not self.opted_in or "PYTEST_CURRENT_TEST" in os.environ:
             return False
@@ -138,9 +128,9 @@ class Tracker:
         return distinct_id
     @staticmethod
-    def _parse_sys_args() -> tuple[dict[str, str], dict[str, str | bool]]:
+    def _parse_sys_args() -> tuple[list[str], dict[str, str | bool]]:
         optional_args: dict[str, str | bool] = {}
-        positional_args: dict[str, str] = {}
+        subcommands: list[str] = []
         last_key: str | None = None
         if sys.argv and len(sys.argv) > 1:
             for arg in sys.argv[1:]:
@@ -157,11 +147,11 @@ class Tracker:
                     optional_args[last_key] = arg
                     last_key = None
                 else:
-                    positional_args[f"positionalArg{len(positional_args)}"] = arg
+                    subcommands.append(arg)
             if last_key:
                 optional_args[last_key] = True
-        return positional_args, optional_args
+        return subcommands, optional_args
     @property
     def _cicd(self) -> str:

cognite_toolkit/_cdf_tk/utils/fileio/_readers.py CHANGED Viewed

@@ -7,6 +7,7 @@ from dataclasses import dataclass
 from functools import partial
 from io import TextIOWrapper
 from pathlib import Path
+from typing import Any
 import yaml
@@ -87,26 +88,20 @@ class FailedParsing:
     error: str
-class TableReader(FileReader, ABC): ...
-class CSVReader(TableReader):
-    """Reads CSV files and yields each row as a dictionary.
+class TableReader(FileReader, ABC):
+    """Reads table-like files and yields each row as a dictionary.
     Args:
-        input_file (Path): The path to the CSV file to read.
+        input_file (Path): The path to the table file to read.
         sniff_rows (int | None): Optional number of rows to sniff for
             schema detection. If None, no schema is detected. If a schema is sniffed
-            from the first `sniff_rows` rows, it will be used to parse the CSV.
+            from the first `sniff_rows` rows, it will be used to parse the table.
         schema (Sequence[SchemaColumn] | None): Optional schema to use for parsing.
             You can either provide a schema or use `sniff_rows` to detect it.
         keep_failed_cells (bool): If True, failed cells will be kept in the
             `failed_cell` attribute. If False, they will be ignored.
     """
-    format = ".csv"
     def __init__(
         self,
         input_file: Path,
@@ -152,18 +147,19 @@ class CSVReader(TableReader):
     @classmethod
     def sniff_schema(cls, input_file: Path, sniff_rows: int = 100) -> list[SchemaColumn]:
         """
-        Sniff the schema from the first `sniff_rows` rows of the CSV file.
+        Sniff the schema from the first `sniff_rows` rows of the file.
         Args:
-            input_file (Path): The path to the CSV file.
+            input_file (Path): The path to the tabular file.
             sniff_rows (int): The number of rows to read for sniffing the schema.
         Returns:
             list[SchemaColumn]: The inferred schema as a list of SchemaColumn objects.
         Raises:
             ValueError: If `sniff_rows` is not a positive integer.
             ToolkitFileNotFoundError: If the file does not exist.
-            ToolkitValueError: If the file is not a CSV file or if there are issues with the content.
+            ToolkitValueError: If the file is not the correct format or if there are issues with the content.
         """
         if sniff_rows <= 0:
@@ -171,43 +167,50 @@ class CSVReader(TableReader):
         if not input_file.exists():
             raise ToolkitFileNotFoundError(f"File not found: {input_file.as_posix()!r}.")
-        if input_file.suffix != ".csv":
-            raise ToolkitValueError(f"Expected a .csv file got a {input_file.suffix!r} file instead.")
+        if input_file.suffix != cls.format:
+            raise ToolkitValueError(f"Expected a {cls.format} file got a {input_file.suffix!r} file instead.")
-        with input_file.open("r", encoding="utf-8-sig") as file:
-            reader = csv.DictReader(file)
-            column_names = Counter(reader.fieldnames)
-            if duplicated := [name for name, count in column_names.items() if count > 1]:
-                raise ToolkitValueError(f"CSV file contains duplicate headers: {humanize_collection(duplicated)}")
-            sample_rows: list[dict[str, str]] = []
-            for no, row in enumerate(reader):
-                if no >= sniff_rows:
-                    break
-                sample_rows.append(row)
+        column_names, sample_rows = cls._read_sample_rows(input_file, sniff_rows)
+        cls._check_column_names(column_names)
+        return cls._infer_schema(sample_rows, column_names)
-            if not sample_rows:
-                raise ToolkitValueError(f"No data found in the file: {input_file.as_posix()!r}.")
+    @classmethod
+    @abstractmethod
+    def _read_sample_rows(cls, input_file: Path, sniff_rows: int) -> tuple[Sequence[str], list[dict[str, str]]]: ...
-            schema = []
-            for column_name in reader.fieldnames or []:
-                sample_values = [row[column_name] for row in sample_rows if column_name in row]
-                if not sample_values:
-                    column = SchemaColumn(name=column_name, type="string")
+    @classmethod
+    def _infer_schema(cls, sample_rows: list[dict[str, Any]], column_names: Sequence[str]) -> list[SchemaColumn]:
+        schema: list[SchemaColumn] = []
+        for column_name in column_names:
+            sample_values = [row[column_name] for row in sample_rows if column_name in row]
+            if not sample_values:
+                column = SchemaColumn(name=column_name, type="string")
+            else:
+                data_types = Counter(
+                    infer_data_type_from_value(value, dtype="Json")[0] for value in sample_values if value is not None
+                )
+                if not data_types:
+                    inferred_type = "string"
                 else:
-                    data_types = Counter(
-                        infer_data_type_from_value(value, dtype="Json")[0]
-                        for value in sample_values
-                        if value is not None
-                    )
-                    if not data_types:
-                        inferred_type = "string"
-                    else:
-                        inferred_type = data_types.most_common()[0][0]
-                    # Json dtype is a subset of Datatype that SchemaColumn accepts
-                    column = SchemaColumn(name=column_name, type=inferred_type)  # type: ignore[arg-type]
-                schema.append(column)
+                    inferred_type = data_types.most_common()[0][0]
+                # Json dtype is a subset of Datatype that SchemaColumn accepts
+                column = SchemaColumn(name=column_name, type=inferred_type)  # type: ignore[arg-type]
+            schema.append(column)
         return schema
+    @classmethod
+    def _check_column_names(cls, column_names: Sequence[str]) -> None:
+        """Check for duplicate column names."""
+        duplicates = [name for name, count in Counter(column_names).items() if count > 1]
+        if duplicates:
+            raise ToolkitValueError(f"Duplicate column names found: {humanize_collection(duplicates)}.")
+class CSVReader(TableReader):
+    """Reads CSV files and yields each row as a dictionary."""
+    format = ".csv"
     def _read_chunks_from_file(self, file: TextIOWrapper) -> Iterator[dict[str, JsonVal]]:
         if self.keep_failed_cells and self.failed_cell:
             self.failed_cell.clear()
@@ -231,10 +234,31 @@ class CSVReader(TableReader):
         with compression.open("r") as file:
             yield from csv.DictReader(file)
+    @classmethod
+    def _read_sample_rows(cls, input_file: Path, sniff_rows: int) -> tuple[Sequence[str], list[dict[str, str]]]:
+        column_names: Sequence[str] = []
+        compression = Compression.from_filepath(input_file)
+        with compression.open("r") as file:
+            reader = csv.DictReader(file)
+            column_names = reader.fieldnames or []
+            sample_rows: list[dict[str, str]] = []
+            for no, row in enumerate(reader):
+                if no >= sniff_rows:
+                    break
+                sample_rows.append(row)
+            if not sample_rows:
+                raise ToolkitValueError(f"No data found in the file: {input_file.as_posix()!r}.")
+        return column_names, sample_rows
 class ParquetReader(TableReader):
     format = ".parquet"
+    def __init__(self, input_file: Path) -> None:
+        # Parquet files have their own schema, so we don't need to sniff or provide one.
+        super().__init__(input_file, sniff_rows=None, schema=None, keep_failed_cells=False)
     def read_chunks(self) -> Iterator[dict[str, JsonVal]]:
         import pyarrow.parquet as pq
@@ -258,6 +282,28 @@ class ParquetReader(TableReader):
                 return value
         return value
+    @classmethod
+    def _read_sample_rows(cls, input_file: Path, sniff_rows: int) -> tuple[Sequence[str], list[dict[str, str]]]:
+        import pyarrow.parquet as pq
+        column_names: Sequence[str] = []
+        sample_rows: list[dict[str, str]] = []
+        with pq.ParquetFile(input_file) as parquet_file:
+            column_names = parquet_file.schema.names
+            row_count = min(sniff_rows, parquet_file.metadata.num_rows)
+            row_iter = parquet_file.iter_batches(batch_size=row_count)
+            try:
+                batch = next(row_iter)
+                for row in batch.to_pylist():
+                    str_row = {key: (str(value) if value is not None else "") for key, value in row.items()}
+                    sample_rows.append(str_row)
+            except StopIteration:
+                pass
+            if not sample_rows:
+                raise ToolkitValueError(f"No data found in the file: {input_file.as_posix()!r}.")
+        return column_names, sample_rows
 FILE_READ_CLS_BY_FORMAT: Mapping[str, type[FileReader]] = {}
 TABLE_READ_CLS_BY_FORMAT: Mapping[str, type[TableReader]] = {}

cognite_toolkit/_cdf_tk/utils/http_client/_client.py CHANGED Viewed

@@ -147,13 +147,15 @@ class HTTPClient:
             timeout=self.config.timeout,
         )
-    def _create_headers(self, api_version: str | None = None) -> MutableMapping[str, str]:
+    def _create_headers(
+        self, api_version: str | None = None, content_type: str = "application/json", accept: str = "application/json"
+    ) -> MutableMapping[str, str]:
         headers: MutableMapping[str, str] = {}
         headers["User-Agent"] = f"httpx/{httpx.__version__} {get_user_agent()}"
         auth_name, auth_value = self.config.credentials.authorization_header()
         headers[auth_name] = auth_value
-        headers["content-type"] = "application/json"
-        headers["accept"] = "application/json"
+        headers["content-type"] = content_type
+        headers["accept"] = accept
         headers["x-cdp-sdk"] = f"CogniteToolkit:{get_current_toolkit_version()}"
         headers["x-cdp-app"] = self.config.client_name
         headers["cdf-version"] = api_version or self.config.api_subversion
@@ -162,7 +164,7 @@ class HTTPClient:
         return headers
     def _make_request(self, item: RequestMessage) -> httpx.Response:
-        headers = self._create_headers(item.api_version)
+        headers = self._create_headers(item.api_version, item.content_type, item.accept)
         params: dict[str, PrimitiveType] | None = None
         if isinstance(item, ParamRequest):
             params = item.parameters

cognite_toolkit/_cdf_tk/utils/http_client/_data_classes.py CHANGED Viewed

@@ -92,6 +92,8 @@ class RequestMessage(HTTPMessage):
     read_attempt: int = 0
     status_attempt: int = 0
     api_version: str | None = None
+    content_type: str = "application/json"
+    accept: str = "application/json"
     @property
     def total_attempts(self) -> int:

cognite-toolkit 0.6.87__py3-none-any.whl → 0.6.89__py3-none-any.whl

cognite-toolkit 0.6.87py3-none-any.whl → 0.6.89py3-none-any.whl