PyPI - cognite-extractor-utils - Versions diffs - 7.5.14__py3-none-any.whl → 7.7.0__py3-none-any.whl - Mend

cognite-extractor-utils 7.5.14py3-none-any.whl → 7.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (47) hide show

cognite/extractorutils/__init__.py +1 -1
cognite/extractorutils/_inner_util.py +1 -1
cognite/extractorutils/base.py +120 -40
cognite/extractorutils/configtools/__init__.py +4 -5
cognite/extractorutils/configtools/_util.py +3 -2
cognite/extractorutils/configtools/elements.py +206 -33
cognite/extractorutils/configtools/loaders.py +68 -16
cognite/extractorutils/configtools/validators.py +5 -1
cognite/extractorutils/exceptions.py +11 -2
cognite/extractorutils/metrics.py +17 -12
cognite/extractorutils/statestore/__init__.py +77 -3
cognite/extractorutils/statestore/_base.py +7 -3
cognite/extractorutils/statestore/hashing.py +129 -15
cognite/extractorutils/statestore/watermark.py +77 -87
cognite/extractorutils/threading.py +30 -4
cognite/extractorutils/unstable/__init__.py +5 -5
cognite/extractorutils/unstable/configuration/__init__.py +3 -0
cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
cognite/extractorutils/unstable/configuration/loaders.py +78 -13
cognite/extractorutils/unstable/configuration/models.py +121 -7
cognite/extractorutils/unstable/core/__init__.py +5 -0
cognite/extractorutils/unstable/core/_dto.py +5 -3
cognite/extractorutils/unstable/core/base.py +113 -4
cognite/extractorutils/unstable/core/errors.py +41 -0
cognite/extractorutils/unstable/core/logger.py +149 -0
cognite/extractorutils/unstable/core/restart_policy.py +16 -2
cognite/extractorutils/unstable/core/runtime.py +44 -6
cognite/extractorutils/unstable/core/tasks.py +53 -1
cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
cognite/extractorutils/uploader/__init__.py +9 -5
cognite/extractorutils/uploader/_base.py +4 -5
cognite/extractorutils/uploader/assets.py +13 -8
cognite/extractorutils/uploader/data_modeling.py +37 -2
cognite/extractorutils/uploader/events.py +14 -9
cognite/extractorutils/uploader/files.py +80 -21
cognite/extractorutils/uploader/raw.py +12 -7
cognite/extractorutils/uploader/time_series.py +370 -94
cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
cognite/extractorutils/uploader_extractor.py +47 -9
cognite/extractorutils/uploader_types.py +26 -1
cognite/extractorutils/util.py +76 -23
{cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/METADATA +1 -1
cognite_extractor_utils-7.7.0.dist-info/RECORD +50 -0
cognite_extractor_utils-7.5.14.dist-info/RECORD +0 -50
{cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/WHEEL +0 -0
{cognite_extractor_utils-7.5.14.dist-info → cognite_extractor_utils-7.7.0.dist-info}/licenses/LICENSE +0 -0

cognite/extractorutils/uploader/time_series.py CHANGED Viewed

@@ -1,3 +1,6 @@
+"""
+Upload queue for time series and sequences.
+"""
 #  Copyright 2023 Cognite AS
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,7 +19,7 @@ import math
 from collections.abc import Callable
 from datetime import datetime
 from types import TracebackType
-from typing import Any
+from typing import Any, Generic, Literal, TypedDict, TypeVar
 from cognite.client import CogniteClient
 from cognite.client.data_classes import (
@@ -26,6 +29,9 @@ from cognite.client.data_classes import (
     StatusCode,
     TimeSeries,
 )
+from cognite.client.data_classes.data_modeling import NodeId
+from cognite.client.data_classes.data_modeling.extractor_extensions.v1 import CogniteExtractorTimeSeriesApply
+from cognite.client.data_classes.data_modeling.instances import DirectRelationReference
 from cognite.client.exceptions import CogniteDuplicatedError, CogniteNotFoundError
 from cognite.extractorutils.threading import CancellationToken
 from cognite.extractorutils.uploader._base import (
@@ -59,6 +65,18 @@ DataPointWithStatus = tuple[TimeStamp, float, FullStatusCode] | tuple[TimeStamp,
 DataPoint = DataPointWithoutStatus | DataPointWithStatus
 DataPointList = list[DataPoint]
+TQueue = TypeVar("TQueue", bound="BaseTimeSeriesUploadQueue")
+IdType = TypeVar("IdType", EitherId, NodeId)
+class CdmDatapointsPayload(TypedDict):
+    """
+    Represents a payload for CDF datapoints, linking them to a specific instance.
+    """
+    instanceId: NodeId
+    datapoints: DataPointList
 def default_time_series_factory(external_id: str, datapoints: DataPointList) -> TimeSeries:
     """
@@ -79,9 +97,9 @@ def default_time_series_factory(external_id: str, datapoints: DataPointList) ->
     return TimeSeries(external_id=external_id, is_string=is_string)
-class TimeSeriesUploadQueue(AbstractUploadQueue):
+class BaseTimeSeriesUploadQueue(AbstractUploadQueue, Generic[IdType]):
     """
-    Upload queue for time series
+    Abstract base upload queue for time series.
     Args:
         cdf_client: Cognite Data Fusion client to use
@@ -93,12 +111,6 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
             methods).
         trigger_log_level: Log level to log upload triggers to.
         thread_name: Thread name of uploader thread.
-        create_missing: Create missing time series if possible (ie, if external id is used). Either given as a boolean
-            (True would auto-create a time series with nothing but an external ID), or as a factory function taking an
-            external ID and a list of datapoints about to be inserted and returning a TimeSeries object.
-        data_set_id: Data set id passed to create_missing. Does nothing if create_missing is False.
-            If a custom timeseries creation method is set in create_missing, this is used as fallback if
-            that method does not set data set id on its own.
     """
     def __init__(
@@ -109,8 +121,6 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
         max_upload_interval: int | None = None,
         trigger_log_level: str = "DEBUG",
         thread_name: str | None = None,
-        create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
-        data_set_id: int | None = None,
         cancellation_token: CancellationToken | None = None,
     ):
         # Super sets post_upload and threshold
@@ -124,24 +134,14 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
             cancellation_token,
         )
-        self.missing_factory: Callable[[str, DataPointList], TimeSeries]
-        if isinstance(create_missing, bool):
-            self.create_missing = create_missing
-            self.missing_factory = default_time_series_factory
-        else:
-            self.create_missing = True
-            self.missing_factory = create_missing
-        self.upload_queue: dict[EitherId, DataPointList] = {}
+        self.upload_queue: dict[IdType, DataPointList] = {}
         self.points_queued = TIMESERIES_UPLOADER_POINTS_QUEUED
         self.points_written = TIMESERIES_UPLOADER_POINTS_WRITTEN
         self.queue_size = TIMESERIES_UPLOADER_QUEUE_SIZE
-        self.data_set_id = data_set_id
     def _verify_datapoint_time(self, time: int | float | datetime | str) -> bool:
-        if isinstance(time, int) or isinstance(time, float):
+        if isinstance(time, int | float):
             return not math.isnan(time) and time >= MIN_DATAPOINT_TIMESTAMP
         elif isinstance(time, str):
             return False
@@ -155,10 +155,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
             )
         elif isinstance(value, str):
             return len(value) <= MAX_DATAPOINT_STRING_LENGTH
-        elif isinstance(value, datetime):
-            return False
-        else:
-            return True
+        return not isinstance(value, datetime)
     def _is_datapoint_valid(
         self,
@@ -171,18 +168,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
         else:
             return True
-    def add_to_upload_queue(
-        self, *, id: int | None = None, external_id: str | None = None, datapoints: DataPointList | None = None
-    ) -> None:
-        """
-        Add data points to upload queue. The queue will be uploaded if the queue size is larger than the threshold
-        specified in the __init__.
-        Args:
-            id: Internal ID of time series. Either this or external_id must be set.
-            external_id: External ID of time series. Either this or external_id must be set.
-            datapoints: list of data points to add
-        """
+    def _sanitize_datapoints(self, datapoints: DataPointList | None) -> DataPointList:
         datapoints = datapoints or []
         old_len = len(datapoints)
         datapoints = list(filter(self._is_datapoint_valid, datapoints))
@@ -194,6 +180,116 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
             self.logger.warning(f"Discarding {diff} datapoints due to bad timestamp or value")
             TIMESERIES_UPLOADER_POINTS_DISCARDED.inc(diff)
+        return datapoints
+    def __exit__(
+        self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        """
+        Wraps around stop method, for use as context manager.
+        Args:
+            exc_type: Exception type
+            exc_val: Exception value
+            exc_tb: Traceback
+        """
+        self.stop()
+    def __len__(self) -> int:
+        """
+        The size of the upload queue.
+        Returns:
+            Number of data points in queue
+        """
+        return self.upload_queue_size
+    def __enter__(self: TQueue) -> TQueue:
+        """
+        Wraps around start method, for use as context manager.
+        Returns:
+            self
+        """
+        self.start()
+        return self
+class TimeSeriesUploadQueue(BaseTimeSeriesUploadQueue[EitherId]):
+    """
+    Upload queue for time series.
+    Args:
+        cdf_client: Cognite Data Fusion client to use
+        post_upload_function: A function that will be called after each upload. The function will be given one argument:
+            A list of dicts containing the datapoints that were uploaded (on the same format as the kwargs in
+            datapoints upload in the Cognite SDK).
+        max_queue_size: Maximum size of upload queue. Defaults to no max size.
+        max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
+            methods).
+        trigger_log_level: Log level to log upload triggers to.
+        thread_name: Thread name of uploader thread.
+        create_missing: Create missing time series if possible (ie, if external id is used). Either given as a boolean
+            (True would auto-create a time series with nothing but an external ID), or as a factory function taking an
+            external ID and a list of datapoints about to be inserted and returning a TimeSeries object.
+        data_set_id: Data set id passed to create_missing. Does nothing if create_missing is False.
+            If a custom timeseries creation method is set in create_missing, this is used as fallback if
+            that method does not set data set id on its own.
+    """
+    def __init__(
+        self,
+        cdf_client: CogniteClient,
+        post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
+        max_queue_size: int | None = None,
+        max_upload_interval: int | None = None,
+        trigger_log_level: str = "DEBUG",
+        thread_name: str | None = None,
+        create_missing: Callable[[str, DataPointList], TimeSeries] | bool = False,
+        data_set_id: int | None = None,
+        cancellation_token: CancellationToken | None = None,
+    ):
+        # Super sets post_upload and threshold
+        super().__init__(
+            cdf_client,
+            post_upload_function,
+            max_queue_size,
+            max_upload_interval,
+            trigger_log_level,
+            thread_name,
+            cancellation_token,
+        )
+        self.missing_factory: Callable[[str, DataPointList], TimeSeries]
+        if isinstance(create_missing, bool):
+            self.create_missing = create_missing
+            self.missing_factory = default_time_series_factory
+        else:
+            self.create_missing = True
+            self.missing_factory = create_missing
+        self.data_set_id = data_set_id
+    def add_to_upload_queue(
+        self,
+        *,
+        id: int | None = None,  # noqa: A002
+        external_id: str | None = None,
+        datapoints: DataPointList | None = None,
+    ) -> None:
+        """
+        Add data points to upload queue.
+        The queue will be uploaded if the queue size is larger than the threshold specified in the ``__init__``.
+        Args:
+            id: Internal ID of time series. Either this or external_id must be set.
+            external_id: External ID of time series. Either this or external_id must be set.
+            datapoints: list of data points to add
+        """
+        datapoints = self._sanitize_datapoints(datapoints)
         either_id = EitherId(id=id, external_id=external_id)
         with self.lock:
@@ -209,7 +305,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
     def upload(self) -> None:
         """
-        Trigger an upload of the queue, clears queue afterwards
+        Trigger an upload of the queue, clears queue afterwards.
         """
         @retry(
@@ -239,9 +335,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
                 if self.create_missing:
                     # Get the time series that can be created
-                    create_these_ids = set(
-                        [id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict]
-                    )
+                    create_these_ids = {id_dict["externalId"] for id_dict in ex.not_found if "externalId" in id_dict}
                     datapoints_lists: dict[str, DataPointList] = {
                         ts_dict["externalId"]: ts_dict["datapoints"]
                         for ts_dict in upload_this
@@ -294,7 +388,7 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
                 ]
             )
-            for _either_id, datapoints in self.upload_queue.items():
+            for datapoints in self.upload_queue.values():
                 self.points_written.inc(len(datapoints))
             try:
@@ -307,40 +401,231 @@ class TimeSeriesUploadQueue(AbstractUploadQueue):
             self.upload_queue_size = 0
             self.queue_size.set(self.upload_queue_size)
-    def __enter__(self) -> "TimeSeriesUploadQueue":
+class CDMTimeSeriesUploadQueue(BaseTimeSeriesUploadQueue[NodeId]):
+    """
+    Upload queue for CDM time series.
+    Args:
+        cdf_client: Cognite Data Fusion client to use
+        post_upload_function: A function that will be called after each upload. The function will be given one argument:
+            A list of dicts containing the datapoints that were uploaded (on the same format as the kwargs in
+            datapoints upload in the Cognite SDK).
+        max_queue_size: Maximum size of upload queue. Defaults to no max size.
+        max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
+            methods).
+        trigger_log_level: Log level to log upload triggers to.
+        thread_name: Thread name of uploader thread.
+    """
+    def __init__(
+        self,
+        cdf_client: CogniteClient,
+        post_upload_function: Callable[[list[dict[str, str | DataPointList]]], None] | None = None,
+        max_queue_size: int | None = None,
+        max_upload_interval: int | None = None,
+        trigger_log_level: str = "DEBUG",
+        thread_name: str | None = None,
+        create_missing: Callable[[NodeId, DataPointList], CogniteExtractorTimeSeriesApply] | bool = False,
+        cancellation_token: CancellationToken | None = None,
+        source: DirectRelationReference | None = None,
+    ):
+        super().__init__(
+            cdf_client,
+            post_upload_function,
+            max_queue_size,
+            max_upload_interval,
+            trigger_log_level,
+            thread_name,
+            cancellation_token,
+        )
+        self.missing_factory: Callable[[NodeId, DataPointList], CogniteExtractorTimeSeriesApply]
+        self.source = source
+        if isinstance(create_missing, bool):
+            self.create_missing = create_missing
+            self.missing_factory = self.default_cdm_time_series_factory
+        else:
+            self.create_missing = True
+            self.missing_factory = create_missing
+    def default_cdm_time_series_factory(
+        self, instance_id: NodeId, datapoints: DataPointList
+    ) -> CogniteExtractorTimeSeriesApply:
         """
-        Wraps around start method, for use as context manager
+        Default CDM time series factory used when create_missing in a CDMTimeSeriesUploadQueue is given as a boolean.
+        Args:
+            instance_id: Instance ID of time series to create
+            datapoints: The list of datapoints that were tried to be inserted
+            source: The source of the time series, used for creating the DirectRelationReference
         Returns:
-            self
+            A CogniteExtractorTimeSeriesApply object with instance_id set, and the is_string automatically detected
         """
-        self.start()
-        return self
+        is_string = (
+            isinstance(datapoints[0].get("value"), str)
+            if isinstance(datapoints[0], dict)
+            else isinstance(datapoints[0][1], str)
+        )
-    def __exit__(
-        self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
+        time_series_type: Literal["numeric", "string"] = "string" if is_string else "numeric"
+        return CogniteExtractorTimeSeriesApply(
+            space=instance_id.space,
+            external_id=instance_id.external_id,
+            is_step=False,
+            time_series_type=time_series_type,
+            source=self.source,
+        )
+    def add_to_upload_queue(
+        self,
+        *,
+        instance_id: NodeId,
+        datapoints: DataPointList | None = None,
     ) -> None:
         """
-        Wraps around stop method, for use as context manager
+        Add data points to upload queue.
+        The queue will be uploaded if the queue size is larger than the threshold specified in the __init__.
         Args:
-            exc_type: Exception type
-            exc_val: Exception value
-            exc_tb: Traceback
+            instance_id: The identifier for the time series to which the datapoints belong.
+            datapoints: list of data points to add
         """
-        self.stop()
+        datapoints = self._sanitize_datapoints(datapoints)
-    def __len__(self) -> int:
-        """
-        The size of the upload queue
+        with self.lock:
+            if instance_id not in self.upload_queue:
+                self.upload_queue[instance_id] = []
-        Returns:
-            Number of data points in queue
+            self.upload_queue[instance_id].extend(datapoints)
+            self.points_queued.inc(len(datapoints))
+            self.upload_queue_size += len(datapoints)
+            self.queue_size.set(self.upload_queue_size)
+            self._check_triggers()
+    def upload(self) -> None:
         """
-        return self.upload_queue_size
+        Trigger an upload of the queue, clears queue afterwards.
+        """
+        @retry(
+            exceptions=cognite_exceptions(),
+            cancellation_token=self.cancellation_token,
+            tries=RETRIES,
+            delay=RETRY_DELAY,
+            max_delay=RETRY_MAX_DELAY,
+            backoff=RETRY_BACKOFF_FACTOR,
+        )
+        def _upload_batch(upload_this: list[CdmDatapointsPayload], retries: int = 5) -> list[CdmDatapointsPayload]:
+            if len(upload_this) == 0:
+                return upload_this
+            try:
+                self.cdf_client.time_series.data.insert_multiple(upload_this)  # type: ignore[arg-type]
+            except CogniteNotFoundError as ex:
+                if not retries:
+                    raise ex
+                if not self.create_missing:
+                    self.logger.error("Could not upload data points to %s: %s", str(ex.not_found), str(ex))
+                # Get IDs of time series that exists, but failed because of the non-existing time series
+                retry_these = [
+                    NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
+                    for id_dict in ex.failed
+                    if id_dict not in ex.not_found
+                ]
+                if self.create_missing:
+                    # Get the time series that can be created
+                    create_these_ids = {
+                        NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
+                        for id_dict in ex.not_found
+                    }
+                    self.logger.info(f"Creating {len(create_these_ids)} time series")
+                    datapoints_lists: dict[NodeId, DataPointList] = {
+                        ts_dict["instanceId"]: ts_dict["datapoints"]
+                        for ts_dict in upload_this
+                        if ts_dict["instanceId"] in create_these_ids
+                    }
+                    to_create: list[CogniteExtractorTimeSeriesApply] = [
+                        self.missing_factory(instance_id, datapoints_lists[instance_id])
+                        for instance_id in create_these_ids
+                    ]
+                    instance_result = self.cdf_client.data_modeling.instances.apply(to_create)
+                    retry_these.extend([node.as_id() for node in instance_result.nodes])
+                    if len(ex.not_found) != len(create_these_ids):
+                        missing = [
+                            id_dict
+                            for id_dict in ex.not_found
+                            if NodeId(id_dict["instanceId"]["space"], id_dict["instanceId"]["externalId"])
+                            not in retry_these
+                        ]
+                        missing_num = len(ex.not_found) - len(create_these_ids)
+                        self.logger.error(
+                            f"{missing_num} time series not found, and could not be created automatically: "
+                            + str(missing)
+                            + " Data will be dropped"
+                        )
+                # Remove entries with non-existing time series from upload queue
+                upload_this = [entry for entry in upload_this if entry["instanceId"] in retry_these]
+                # Upload remaining
+                _upload_batch(upload_this, retries - 1)
+            return upload_this
+        if len(self.upload_queue) == 0:
+            return
+        with self.lock:
+            upload_this = _upload_batch(
+                [
+                    {"instanceId": instance_id, "datapoints": list(datapoints)}
+                    for instance_id, datapoints in self.upload_queue.items()
+                    if len(datapoints) > 0
+                ]
+            )
+            for datapoints in self.upload_queue.values():
+                self.points_written.inc(len(datapoints))
+            try:
+                self._post_upload(upload_this)
+            except Exception as e:
+                self.logger.error("Error in upload callback: %s", str(e))
+            self.upload_queue.clear()
+            self.logger.info(f"Uploaded {self.upload_queue_size} datapoints")
+            self.upload_queue_size = 0
+            self.queue_size.set(self.upload_queue_size)
 class SequenceUploadQueue(AbstractUploadQueue):
+    """
+    Upload queue for sequences.
+    Args:
+        cdf_client: Cognite Data Fusion client to use
+        post_upload_function: A function that will be called after each upload. The function will be given one
+            argument: A list of the events that were uploaded.
+        max_queue_size: Maximum size of upload queue. Defaults to no max size.
+        max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
+            methods).
+        trigger_log_level: Log level to log upload triggers to.
+        thread_name: Thread name of uploader thread.
+        create_missing: Create missing sequences if possible (ie, if external id is used).
+    """
     def __init__(
         self,
         cdf_client: CogniteClient,
@@ -352,19 +637,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
         create_missing: bool = False,
         cancellation_token: CancellationToken | None = None,
     ):
-        """
-        Args:
-            cdf_client: Cognite Data Fusion client to use
-            post_upload_function: A function that will be called after each upload. The function will be given one
-                argument: A list of the events that were uploaded.
-            max_queue_size: Maximum size of upload queue. Defaults to no max size.
-            max_upload_interval: Automatically trigger an upload each m seconds when run as a thread (use start/stop
-                methods).
-            trigger_log_level: Log level to log upload triggers to.
-            thread_name: Thread name of uploader thread.
-            create_missing: Create missing sequences if possible (ie, if external id is used)
-        """
         # Super sets post_upload and threshold
         super().__init__(
             cdf_client,
@@ -393,7 +665,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
     def set_sequence_metadata(
         self,
         metadata: dict[str, str | int | float],
-        id: int | None = None,
+        id: int | None = None,  # noqa: A002
         external_id: str | None = None,
         asset_external_id: str | None = None,
         dataset_external_id: str | None = None,
@@ -401,8 +673,10 @@ class SequenceUploadQueue(AbstractUploadQueue):
         description: str | None = None,
     ) -> None:
         """
-        Set sequence metadata. Metadata will be cached until the sequence is created. The metadata will be updated
-        if the sequence already exists
+        Set sequence metadata.
+        Metadata will be cached until the sequence is created. The metadata will be updated if the sequence already
+        exists.
         Args:
             metadata: Sequence metadata
@@ -427,10 +701,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
             self.sequence_descriptions[either_id] = description
     def set_sequence_column_definition(
-        self, col_def: list[dict[str, str]], id: int | None = None, external_id: str | None = None
+        self,
+        col_def: list[dict[str, str]],
+        id: int | None = None,  # noqa: A002
+        external_id: str | None = None,
     ) -> None:
         """
-        Set sequence column definition
+        Set sequence column definition.
         Args:
             col_def: Sequence column definition
@@ -450,12 +727,13 @@ class SequenceUploadQueue(AbstractUploadQueue):
         | SequenceData
         | SequenceRows,
         column_external_ids: list[dict] | None = None,
-        id: int | None = None,
+        id: int | None = None,  # noqa: A002
         external_id: str | None = None,
     ) -> None:
         """
-        Add sequence rows to upload queue. Mirrors implementation of SequenceApi.insert. Inserted rows will be
-        cached until uploaded
+        Add sequence rows to upload queue.
+        Mirrors implementation of SequenceApi.insert. Inserted rows will be cached until uploaded.
         Args:
             rows: The rows to be inserted. Can either be a list of tuples, a list of ["rownumber": ..., "values": ...]
@@ -466,7 +744,6 @@ class SequenceUploadQueue(AbstractUploadQueue):
             external_id: Sequence external ID
                 Us if id is None
         """
         if len(rows) == 0:
             pass
@@ -509,7 +786,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
     def upload(self) -> None:
         """
-        Trigger an upload of the queue, clears queue afterwards
+        Trigger an upload of the queue, clears queue afterwards.
         """
         @retry(
@@ -571,15 +848,14 @@ class SequenceUploadQueue(AbstractUploadQueue):
     def _create_or_update(self, either_id: EitherId) -> None:
         """
-        Create or update sequence, based on provided metadata and column definitions
+        Create or update sequence, based on provided metadata and column definitions.
         Args:
             either_id: Id/External Id of sequence to be updated
         """
         column_def = self.column_definitions.get(either_id)
         if column_def is None:
-            self.logger.error(f"Can't create sequence {str(either_id)}, no column definitions provided")
+            self.logger.error(f"Can't create sequence {either_id!s}, no column definitions provided")
         try:
             seq = self.cdf_client.sequences.create(
@@ -596,7 +872,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
             )
         except CogniteDuplicatedError:
-            self.logger.info(f"Sequnce already exist: {either_id}")
+            self.logger.info(f"Sequence already exist: {either_id}")
             seq = self.cdf_client.sequences.retrieve(  # type: ignore [assignment]
                 id=either_id.internal_id,
                 external_id=either_id.external_id,
@@ -608,7 +884,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
     def _resolve_asset_ids(self) -> None:
         """
-        Resolve id of assets if specified, for use in sequence creation
+        Resolve id of assets if specified, for use in sequence creation.
         """
         assets = set(self.sequence_asset_external_ids.values())
         assets.discard(None)  # type: ignore  # safeguard, remove Nones if any
@@ -628,7 +904,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
     def _resolve_dataset_ids(self) -> None:
         """
-        Resolve id of datasets if specified, for use in sequence creation
+        Resolve id of datasets if specified, for use in sequence creation.
         """
         datasets = set(self.sequence_dataset_external_ids.values())
         datasets.discard(None)  # type: ignore  # safeguard, remove Nones if any
@@ -648,7 +924,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
     def __enter__(self) -> "SequenceUploadQueue":
         """
-        Wraps around start method, for use as context manager
+        Wraps around start method, for use as context manager.
         Returns:
             self
@@ -660,7 +936,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
         self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None
     ) -> None:
         """
-        Wraps around stop method, for use as context manager
+        Wraps around stop method, for use as context manager.
         Args:
             exc_type: Exception type
@@ -671,7 +947,7 @@ class SequenceUploadQueue(AbstractUploadQueue):
     def __len__(self) -> int:
         """
-        The size of the upload queue
+        The size of the upload queue.
         Returns:
             Number of data points in queue

cognite-extractor-utils 7.5.14__py3-none-any.whl → 7.7.0__py3-none-any.whl

Potentially problematic release.

cognite-extractor-utils 7.5.14py3-none-any.whl → 7.7.0py3-none-any.whl