PyPI - cognite-extractor-utils - Versions diffs - 7.5.13__py3-none-any.whl → 7.6.0__py3-none-any.whl - Mend

cognite-extractor-utils 7.5.13py3-none-any.whl → 7.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cognite-extractor-utils might be problematic. Click here for more details.

Files changed (47) hide show

cognite/extractorutils/__init__.py +1 -1
cognite/extractorutils/_inner_util.py +1 -1
cognite/extractorutils/base.py +120 -40
cognite/extractorutils/configtools/__init__.py +4 -5
cognite/extractorutils/configtools/_util.py +3 -2
cognite/extractorutils/configtools/elements.py +213 -35
cognite/extractorutils/configtools/loaders.py +68 -16
cognite/extractorutils/configtools/validators.py +5 -1
cognite/extractorutils/exceptions.py +11 -2
cognite/extractorutils/metrics.py +17 -12
cognite/extractorutils/statestore/__init__.py +77 -3
cognite/extractorutils/statestore/_base.py +7 -3
cognite/extractorutils/statestore/hashing.py +129 -15
cognite/extractorutils/statestore/watermark.py +77 -87
cognite/extractorutils/threading.py +30 -4
cognite/extractorutils/unstable/__init__.py +5 -5
cognite/extractorutils/unstable/configuration/__init__.py +3 -0
cognite/extractorutils/unstable/configuration/exceptions.py +13 -2
cognite/extractorutils/unstable/configuration/loaders.py +90 -19
cognite/extractorutils/unstable/configuration/models.py +121 -7
cognite/extractorutils/unstable/core/__init__.py +5 -0
cognite/extractorutils/unstable/core/_dto.py +5 -3
cognite/extractorutils/unstable/core/base.py +113 -4
cognite/extractorutils/unstable/core/errors.py +41 -0
cognite/extractorutils/unstable/core/logger.py +149 -0
cognite/extractorutils/unstable/core/restart_policy.py +16 -2
cognite/extractorutils/unstable/core/runtime.py +119 -36
cognite/extractorutils/unstable/core/tasks.py +53 -1
cognite/extractorutils/unstable/scheduling/__init__.py +13 -0
cognite/extractorutils/unstable/scheduling/_scheduler.py +1 -1
cognite/extractorutils/uploader/__init__.py +7 -5
cognite/extractorutils/uploader/_base.py +4 -5
cognite/extractorutils/uploader/assets.py +13 -8
cognite/extractorutils/uploader/data_modeling.py +37 -2
cognite/extractorutils/uploader/events.py +14 -9
cognite/extractorutils/uploader/files.py +80 -21
cognite/extractorutils/uploader/raw.py +12 -7
cognite/extractorutils/uploader/time_series.py +58 -49
cognite/extractorutils/uploader/upload_failure_handler.py +35 -2
cognite/extractorutils/uploader_extractor.py +29 -6
cognite/extractorutils/uploader_types.py +15 -1
cognite/extractorutils/util.py +76 -23
{cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/METADATA +1 -1
cognite_extractor_utils-7.6.0.dist-info/RECORD +50 -0
cognite_extractor_utils-7.5.13.dist-info/RECORD +0 -50
{cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/WHEEL +0 -0
{cognite_extractor_utils-7.5.13.dist-info → cognite_extractor_utils-7.6.0.dist-info}/licenses/LICENSE +0 -0

cognite/extractorutils/unstable/core/base.py CHANGED Viewed

@@ -1,5 +1,50 @@
+"""
+This module provides the base class for extractors.
+It includes functionality for task management, logging, error handling, and configuration management.
+Extractors should subclass the `Extractor` class and implement the `__init_tasks__` method to define their tasks.
+The subclass should also define several class attributes:
+- ``NAME``: A human-readable name for the extractor.
+- ``EXTERNAL_ID``: A unique identifier for the extractor, used when reporting to CDF Integrations.
+- ``DESCRIPTION``: A brief description of the extractor.
+- ``VERSION``: The version of the extractor, used when reporting to CDF Integrations. This should follow semantic
+   versioning.
+- ``CONFIG_TYPE``: The type of the application configuration for the extractor, which should be a subclass of
+  ``ExtractorConfig``. This should be the same class as the one used for the generic type parameter of the
+  ``Extractor`` class.
+.. code-block:: python
+    class MyConfig(ExtractorConfig):
+        parameter: str
+        another_parameter: int
+        schedule: ScheduleConfig
+    class MyExtractor(Extractor[MyConfig]):
+        NAME = "My Extractor"
+        EXTERNAL_ID = "my-extractor"
+        DESCRIPTION = "An example extractor"
+        VERSION = "1.0.0"
+        CONFIG_TYPE = MyConfig
+        def __init_tasks__(self) -> None:
+            self.add_task(
+                ScheduledTask(
+                    name="my_task",
+                    description="An example task",
+                    schedule=self.application_config.schedule,
+                    target=self.my_task_function,
+                )
+            )
+        def my_task_function(self, task_context: TaskContext) -> None:
+            task_context.logger.info("Running my task")
+"""
 import logging
-import logging.config
 import time
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
@@ -30,7 +75,7 @@ from cognite.extractorutils.unstable.core.tasks import ContinuousTask, Scheduled
 from cognite.extractorutils.unstable.scheduling import TaskScheduler
 from cognite.extractorutils.util import now
-__all__ = ["ConfigType", "ConfigRevision", "Extractor"]
+__all__ = ["ConfigRevision", "ConfigType", "Extractor"]
 ConfigType = TypeVar("ConfigType", bound=ExtractorConfig)
 ConfigRevision = Literal["local"] | int
@@ -40,6 +85,13 @@ _T = TypeVar("_T", bound=ExtractorConfig)
 class FullConfig(Generic[_T]):
+    """
+    A class that holds the full configuration for an extractor.
+    This includes the connection configuration, application configuration, and which revision of the application
+    configuration is currently active.
+    """
     def __init__(
         self,
         connection_config: ConnectionConfig,
@@ -52,6 +104,16 @@ class FullConfig(Generic[_T]):
 class Extractor(Generic[ConfigType], CogniteLogger):
+    """
+    Base class for all extractors.
+    This class provides the basic functionality for running an extractor, including task management, logging,
+    error handling, and configuration management.
+    It designed to be subclassed by specific extractors, which should implement the `__init_tasks__` method
+    to define their tasks.
+    """
     NAME: str
     EXTERNAL_ID: str
     DESCRIPTION: str
@@ -128,6 +190,13 @@ class Extractor(Generic[ConfigType], CogniteLogger):
                     root.addHandler(fh)
     def __init_tasks__(self) -> None:
+        """
+        This method should be overridden by subclasses to define their tasks.
+        It is called automatically when the extractor is initialized.
+        Subclasses should call ``self.add_task(...)`` to add tasks to the extractor.
+        """
         pass
     def _set_runtime_message_queue(self, queue: Queue) -> None:
@@ -200,6 +269,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
         )
     def restart(self) -> None:
+        """
+        Trigger a restart of the extractor.
+        """
         self._logger.info("Restarting extractor")
         if self._runtime_messages:
             self._runtime_messages.put(RuntimeMessage.RESTART)
@@ -210,12 +282,20 @@ class Extractor(Generic[ConfigType], CogniteLogger):
         return cls(config)
     def add_task(self, task: Task) -> None:
+        """
+        Add a task to the extractor.
+        This method wraps the task's target function to include error handling and task tracking.
+        Args:
+            task: The task to add. It should be an instance of ``StartupTask``, ``ContinuousTask``, or ``ScheduledTask``
+        """
         # Store this for later, since we'll override it with the wrapped version
         target = task.target
         def run_task(task_context: TaskContext) -> None:
             """
-            A wrapped version of the task's target, with tracking and error handling
+            A wrapped version of the task's target, with tracking and error handling.
             """
             # Record a task start
             with self._checkin_lock:
@@ -275,7 +355,7 @@ class Extractor(Generic[ConfigType], CogniteLogger):
                     {
                         "name": t.name,
                         "type": "continuous" if isinstance(t, ContinuousTask) else "batch",
-                        "action": True if isinstance(t, ScheduledTask) else False,
+                        "action": bool(isinstance(t, ScheduledTask)),
                         "description": t.description,
                     }
                     for t in self._tasks
@@ -285,14 +365,29 @@ class Extractor(Generic[ConfigType], CogniteLogger):
         )
     def start(self) -> None:
+        """
+        Start the extractor.
+        Instead of calling this method directly, it is recommended to use the context manager interface by using the
+        ``with`` statement, which ensures proper cleanup on exit.
+        """
         self._setup_logging()
         self._report_extractor_info()
         Thread(target=self._run_checkin, name="ExtractorCheckin", daemon=True).start()
     def stop(self) -> None:
+        """
+        Stop the extractor.
+        Instead of calling this method directly, it is recommended to use the context manager interface by using the
+        ``with`` statement, which ensures proper cleanup on exit.
+        """
         self.cancellation_token.cancel()
     def __enter__(self) -> Self:
+        """
+        Start the extractor in a context manager.
+        """
         self.start()
         return self
@@ -302,6 +397,9 @@ class Extractor(Generic[ConfigType], CogniteLogger):
         exc_val: BaseException | None,
         exc_tb: TracebackType | None,
     ) -> bool:
+        """
+        Stop the extractor when exiting the context manager.
+        """
         self.stop()
         with self._checkin_lock:
             self._checkin()
@@ -310,6 +408,17 @@ class Extractor(Generic[ConfigType], CogniteLogger):
         return exc_val is None
     def run(self) -> None:
+        """
+        Run the extractor. This method starts the extractor and runs all tasks that have been added.
+        This method assumes ``self.start()`` has been called first. The recommended way to use this method is
+        to use the context manager interface, which ensures that the extractor is started and stopped properly.
+        .. code-block:: python
+            with extractor:
+                extractor.run()
+        """
         has_scheduled = False
         startup: list[StartupTask] = []

cognite/extractorutils/unstable/core/errors.py CHANGED Viewed

@@ -1,3 +1,7 @@
+"""
+This module defines the Error and ErrorLevel classes for reporting errors in extractors.
+"""
 import logging
 from enum import Enum
 from types import TracebackType
@@ -15,12 +19,19 @@ __all__ = ["Error", "ErrorLevel"]
 class ErrorLevel(Enum):
+    """
+    Enumeration of error levels for reporting errors in extractors.
+    """
     warning = "warning"
     error = "error"
     fatal = "fatal"
     @property
     def log_level(self) -> int:
+        """
+        Returns the corresponding logging level for the error level.
+        """
         match self:
             case ErrorLevel.warning:
                 return logging.WARNING
@@ -33,6 +44,20 @@ class ErrorLevel(Enum):
 class Error:
+    """
+    Represents an error that occurred during the run of an extractor.
+    This class should not be instantiated directly. Instead, use the ``CogniteLogger`` methods (either in the
+    TaskContext or the extractor base class) to create errors.
+    Args:
+        level: The severity level of the error.
+        description: A brief description of the error.
+        details: Additional details about the error, if any.
+        task_name: The name of the task during which the error occurred, if applicable.
+        extractor: The extractor instance that reported the error.
+    """
     def __init__(
         self,
         level: ErrorLevel,
@@ -55,6 +80,9 @@ class Error:
         self._extractor._report_error(self)
     def instant(self) -> None:
+        """
+        Make this error an instant error, meaning it does not have a duration.
+        """
         # Only end the error once
         if self.end_time is not None:
             return
@@ -65,6 +93,11 @@ class Error:
         self._extractor._report_error(self)
     def finish(self) -> None:
+        """
+        Mark the error as finished, setting the end time to the current time.
+        This method should be called when the error is resolved or no longer relevant.
+        """
         # Only end the error once
         if self.end_time is not None:
             return
@@ -75,6 +108,11 @@ class Error:
         self._extractor._report_error(self)
     def __enter__(self) -> "Error":
+        """
+        Start tracking an error as a context manager.
+        This allows the error to be automatically finished when exiting the context.
+        """
         return self
     def __exit__(
@@ -83,5 +121,8 @@ class Error:
         exc_val: BaseException | None,
         exc_tb: TracebackType | None,
     ) -> bool:
+        """
+        Finish the error context manager, marking the error as finished.
+        """
         self.finish()
         return exc_val is None

cognite/extractorutils/unstable/core/logger.py CHANGED Viewed

@@ -1,3 +1,10 @@
+"""
+This module provides the ``CogniteLogger`` base class, which is an abstract base class for logging.
+This class is subclassed by both the ``TaskContext`` and the ``Extractor`` base classes, providing a unified interface
+for logging and error handling in extractors.
+"""
 from abc import ABC, abstractmethod
 from logging import Logger, getLogger
 from traceback import format_exception
@@ -9,6 +16,16 @@ from cognite.extractorutils.unstable.core.errors import Error, ErrorLevel
 class CogniteLogger(ABC):
+    """
+    Base class for logging and error handling in extractors.
+    This class provides methods to log messages at different levels (debug, info, warning, error, fatal) and to
+    create and manage errors that occur during the execution of an extractor.
+    If you use this class instead of a standard logger, you will get additional functionality such as reporting errors
+    back to CDF.
+    """
     def __init__(self) -> None:
         self._logger: Logger = getLogger()
@@ -24,9 +41,15 @@ class CogniteLogger(ABC):
         pass
     def debug(self, message: str) -> None:
+        """
+        Log a debug message.
+        """
         self._logger.debug(message)
     def info(self, message: str) -> None:
+        """
+        Log an information message.
+        """
         self._logger.info(message)
     def begin_warning(
@@ -36,6 +59,32 @@ class CogniteLogger(ABC):
         details: str | None = None,
         auto_log: bool = True,
     ) -> Error:
+        """
+        Begin a warning error.
+        This will both log the message and create an error object that can be used to track and report the error.
+        Args:
+            message: The message to log and include in the error.
+            details: Additional details about the error, if any.
+            auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
+        Returns:
+            An ``Error`` object representing the warning error, tied to the current extractor instance.
+        Examples:
+            To track and complete an error, you can keep a reference to the error object and call its ``finish``
+            method when the error is resolved, or use it in a context manager to automatically finish it:
+            ... code-block:: python
+                error = logger.begin_warning("This is a warning", details="Some details")
+                # Do something
+                error.finish()
+            ... code-block:: python
+                with logger.begin_warning("This is a warning", details="Some details")
+                    # Do something
+        """
         if auto_log:
             self._logger.warning(message)
         return self._new_error(
@@ -51,6 +100,32 @@ class CogniteLogger(ABC):
         details: str | None = None,
         auto_log: bool = True,
     ) -> Error:
+        """
+        Begin an error.
+        This will both log the message and create an error object that can be used to track and report the error.
+        Args:
+            message: The message to log and include in the error.
+            details: Additional details about the error, if any.
+            auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
+        Returns:
+            An ``Error`` object representing the error, tied to the current extractor instance.
+        Examples:
+            To track and complete an error, you can keep a reference to the error object and call its ``finish``
+            method when the error is resolved, or use it in a context manager to automatically finish it:
+            ... code-block:: python
+                error = logger.begin_error("This is an error", details="Some details")
+                # Do something
+                error.finish()
+            ... code-block:: python
+                with logger.begin_error("This is an error", details="Some details")
+                    # Do something
+        """
         if auto_log:
             self._logger.error(message)
         return self._new_error(
@@ -66,6 +141,32 @@ class CogniteLogger(ABC):
         details: str | None = None,
         auto_log: bool = True,
     ) -> Error:
+        """
+        Begin a fatal error.
+        This will both log the message and create an error object that can be used to track and report the error.
+        Args:
+            message: The message to log and include in the error.
+            details: Additional details about the error, if any.
+            auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
+        Returns:
+            An ``Error`` object representing the fatal error, tied to the current extractor instance.
+        Examples:
+            To track and complete an error, you can keep a reference to the error object and call its ``finish``
+            method when the error is resolved, or use it in a context manager to automatically finish it:
+            ... code-block:: python
+                error = logger.begin_fatal("This is a fatal error", details="Some details")
+                # Do something
+                error.finish()
+            ... code-block:: python
+                with logger.begin_fatal("This is a fatal error", details="Some details")
+                    # Do something
+        """
         if auto_log:
             self._logger.critical(message)
         return self._new_error(
@@ -81,6 +182,17 @@ class CogniteLogger(ABC):
         details: str | None = None,
         auto_log: bool = True,
     ) -> None:
+        """
+        Report an instant warning.
+        This will log the message and create an error object that is marked as instant, meaning it does not have a
+        duration.
+        Args:
+            message: The message to log and include in the error.
+            details: Additional details about the error, if any.
+            auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
+        """
         if auto_log:
             self._logger.warning(message)
         self._new_error(
@@ -96,6 +208,17 @@ class CogniteLogger(ABC):
         details: str | None = None,
         auto_log: bool = True,
     ) -> None:
+        """
+        Report an instant error.
+        This will log the message and create an error object that is marked as instant, meaning it does not have a
+        duration.
+        Args:
+            message: The message to log and include in the error.
+            details: Additional details about the error, if any.
+            auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
+        """
         if auto_log:
             self._logger.error(message)
         self._new_error(
@@ -111,6 +234,17 @@ class CogniteLogger(ABC):
         details: str | None = None,
         auto_log: bool = True,
     ) -> None:
+        """
+        Report an instant fatal.
+        This will log the message and create an error object that is marked as instant, meaning it does not have a
+        duration.
+        Args:
+            message: The message to log and include in the error.
+            details: Additional details about the error, if any.
+            auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
+        """
         if auto_log:
             self._logger.critical(message)
         self._new_error(
@@ -128,6 +262,21 @@ class CogniteLogger(ABC):
         include_details: Literal["stack_trace"] | Literal["exception_message"] | bool = "exception_message",
         auto_log: bool = True,
     ) -> None:
+        """
+        Report an exception as an error.
+        This will log the message and create an error object that is marked as instant, meaning it does not have a
+        duration. The exception details can be included in the error.
+        Args:
+            message: The message to log and include in the error.
+            exception: The exception to report.
+            level: The severity level of the error. Defaults to ``ErrorLevel.error``.
+            include_details: How to include details about the exception. Can be "stack_trace", "exception_message",
+                or True (equivalent to "exception_message"). If False, no details are included. Defaults to
+                "exception_message".
+            auto_log: If True, the message will be logged to the standard logging framework as well. Defaults to True.
+        """
         if auto_log:
             self._logger.log(level=level.log_level, msg=message, exc_info=exception)

cognite/extractorutils/unstable/core/restart_policy.py CHANGED Viewed

@@ -1,3 +1,17 @@
+"""
+This module defines the restart policies for extractors.
+Is is used by the ``Runtime`` to determine whether an extractor should be restarted after a task failure.
+It provides three predefined restart policies:
+- ``NEVER``: The extractor will never be restarted.
+- ``WHEN_ANY_TASK_CRASHES``: The extractor will be restarted if any task crashes.
+- ``WHEN_CONTINUOUS_TASKS_CRASHES``: The extractor will be restarted only if a continuous task crashes.
+Users can also define their own restart policies by providing a callable that takes a `Task` and an `Exception`
+and returns a boolean indicating whether the extractor should be restarted.
+"""
 from collections.abc import Callable
 from cognite.extractorutils.unstable.core.tasks import ContinuousTask, Task
@@ -22,8 +36,8 @@ WHEN_CONTINUOUS_TASKS_CRASHES = _is_continuous
 WHEN_ANY_TASK_CRASHES = _true
 __all__ = [
-    "RestartPolicy",
     "NEVER",
-    "WHEN_CONTINUOUS_TASKS_CRASHES",
     "WHEN_ANY_TASK_CRASHES",
+    "WHEN_CONTINUOUS_TASKS_CRASHES",
+    "RestartPolicy",
 ]

cognite-extractor-utils 7.5.13__py3-none-any.whl → 7.6.0__py3-none-any.whl

Potentially problematic release.

cognite-extractor-utils 7.5.13py3-none-any.whl → 7.6.0py3-none-any.whl