PyPI - dreadnode - Versions diffs - 1.0.0rc0__py3-none-any.whl - Mend

dreadnode 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

dreadnode/__init__.py +51 -0
dreadnode/api/__init__.py +0 -0
dreadnode/api/client.py +249 -0
dreadnode/api/models.py +210 -0
dreadnode/artifact/__init__.py +0 -0
dreadnode/artifact/merger.py +599 -0
dreadnode/artifact/storage.py +126 -0
dreadnode/artifact/tree_builder.py +455 -0
dreadnode/constants.py +16 -0
dreadnode/integrations/__init__.py +0 -0
dreadnode/integrations/transformers.py +183 -0
dreadnode/main.py +1042 -0
dreadnode/metric.py +225 -0
dreadnode/object.py +29 -0
dreadnode/py.typed +0 -0
dreadnode/serialization.py +731 -0
dreadnode/task.py +447 -0
dreadnode/tracing/__init__.py +0 -0
dreadnode/tracing/constants.py +35 -0
dreadnode/tracing/exporters.py +157 -0
dreadnode/tracing/span.py +811 -0
dreadnode/types.py +25 -0
dreadnode/util.py +150 -0
dreadnode/version.py +3 -0
dreadnode-1.0.0rc0.dist-info/METADATA +122 -0
dreadnode-1.0.0rc0.dist-info/RECORD +27 -0
dreadnode-1.0.0rc0.dist-info/WHEEL +4 -0

dreadnode/task.py ADDED Viewed

@@ -0,0 +1,447 @@
+import asyncio
+import inspect
+import traceback
+import typing as t
+from dataclasses import dataclass
+from logfire._internal.stack_info import warn_at_user_stacklevel
+from opentelemetry.trace import Tracer
+from dreadnode.metric import Scorer, ScorerCallable
+from dreadnode.tracing.span import TaskSpan, current_run_span
+P = t.ParamSpec("P")
+R = t.TypeVar("R")
+class TaskFailedWarning(UserWarning):
+    pass
+class TaskGeneratorWarning(UserWarning):
+    pass
+class TaskSpanList(list[TaskSpan[R]]):
+    """
+    Lightweight wrapper around a list of TaskSpans to provide some convenience methods.
+    """
+    def sorted(self, *, reverse: bool = True) -> "TaskSpanList[R]":
+        """
+        Sorts the spans in this list by their average metric value.
+        Args:
+            reverse: If True, sorts in descending order. Defaults to True.
+        Returns:
+            A new TaskSpanList sorted by average metric value.
+        """
+        return TaskSpanList(
+            sorted(
+                self,
+                key=lambda span: span.get_average_metric_value(),
+                reverse=reverse,
+            ),
+        )
+    @t.overload
+    def top_n(
+        self,
+        n: int,
+        *,
+        as_outputs: t.Literal[False] = False,
+        reverse: bool = True,
+    ) -> "TaskSpanList[R]": ...
+    @t.overload
+    def top_n(
+        self,
+        n: int,
+        *,
+        as_outputs: t.Literal[True],
+        reverse: bool = True,
+    ) -> list[R]: ...
+    def top_n(
+        self,
+        n: int,
+        *,
+        as_outputs: bool = False,
+        reverse: bool = True,
+    ) -> "TaskSpanList[R] | list[R]":
+        """
+        Take the top n spans from this list, sorted by their average metric value.
+        Args:
+            n: The number of spans to take.
+            as_outputs: If True, returns a list of outputs instead of spans. Defaults to False.
+            reverse: If True, sorts in descending order. Defaults to True.
+        Returns:
+            A new TaskSpanList or list of outputs sorted by average metric value.
+        """
+        sorted_ = self.sorted(reverse=reverse)[:n]
+        return (
+            t.cast("list[R]", [span.output for span in sorted_])
+            if as_outputs
+            else TaskSpanList(sorted_)
+        )
+@dataclass
+class Task(t.Generic[P, R]):
+    """
+    Structured task wrapper for a function that can be executed within a run.
+    Tasks allow you to associate metadata, inputs, outputs, and metrics for a unit of work.
+    """
+    tracer: Tracer
+    name: str
+    "The name of the task. This is used for logging and tracing."
+    label: str
+    "The label of the task - used to group associated metrics and data together."
+    attributes: dict[str, t.Any]
+    "A dictionary of attributes to attach to the task span."
+    func: t.Callable[P, R]
+    "The function to execute as the task."
+    scorers: list[Scorer[R]]
+    "A list of scorers to evaluate the task's output."
+    tags: list[str]
+    "A list of tags to attach to the task span."
+    log_params: t.Sequence[str] | bool = False
+    "Whether to log all, or specific, incoming arguments to the function as parameters."
+    log_inputs: t.Sequence[str] | bool = True
+    "Whether to log all, or specific, incoming arguments to the function as inputs."
+    log_output: bool = True
+    "Whether to automatically log the result of the function as an output."
+    def __post_init__(self) -> None:
+        self.__signature__ = getattr(
+            self.func,
+            "__signature__",
+            inspect.signature(self.func),
+        )
+        self.__name__ = getattr(self.func, "__name__", self.name)
+        self.__doc__ = getattr(self.func, "__doc__", None)
+    def _bind_args(self, *args: P.args, **kwargs: P.kwargs) -> dict[str, t.Any]:
+        signature = inspect.signature(self.func)
+        bound_args = signature.bind(*args, **kwargs)
+        bound_args.apply_defaults()
+        return dict(bound_args.arguments)
+    def clone(self) -> "Task[P, R]":
+        """
+        Clone a task.
+        Returns:
+            A new Task instance with the same attributes as this one.
+        """
+        return Task(
+            tracer=self.tracer,
+            name=self.name,
+            label=self.label,
+            attributes=self.attributes.copy(),
+            func=self.func,
+            scorers=[scorer.clone() for scorer in self.scorers],
+            tags=self.tags.copy(),
+            log_params=self.log_params,
+            log_inputs=self.log_inputs,
+            log_output=self.log_output,
+        )
+    def with_(
+        self,
+        *,
+        scorers: t.Sequence[Scorer[R] | ScorerCallable[R]] | None = None,
+        name: str | None = None,
+        tags: t.Sequence[str] | None = None,
+        label: str | None = None,
+        log_params: t.Sequence[str] | bool | None = None,
+        log_inputs: t.Sequence[str] | bool | None = None,
+        log_output: bool | None = None,
+        append: bool = False,
+        **attributes: t.Any,
+    ) -> "Task[P, R]":
+        """
+        Clone a task and modify its attributes.
+        Args:
+            scorers: A list of new scorers to set or append to the task.
+            name: The new name for the task.
+            tags: A list of new tags to set or append to the task.
+            label: The new label for the task.
+            log_params: Whether to log all, or specific, incoming arguments to the function as parameters.
+            log_inputs: Whether to log all, or specific, incoming arguments to the function as inputs.
+            log_output: Whether to automatically log the result of the function as an output.
+            append: If True, appends the new scorers and tags to the existing ones. If False, replaces them.
+            **attributes: Additional attributes to set or update in the task.
+        Returns:
+            A new Task instance with the modified attributes.
+        """
+        task = self.clone()
+        task.name = name or task.name
+        task.label = label or task.label
+        task.log_params = log_params if log_params is not None else task.log_params
+        task.log_inputs = log_inputs if log_inputs is not None else task.log_inputs
+        task.log_output = log_output if log_output is not None else task.log_output
+        new_scorers = [Scorer.from_callable(self.tracer, scorer) for scorer in (scorers or [])]
+        new_tags = list(tags or [])
+        if append:
+            task.scorers.extend(new_scorers)
+            task.tags.extend(new_tags)
+            task.attributes.update(attributes)
+        else:
+            task.scorers = new_scorers
+            task.tags = new_tags
+            task.attributes = attributes
+        return task
+    async def run(self, *args: P.args, **kwargs: P.kwargs) -> TaskSpan[R]:
+        """
+        Execute the task and return the result as a TaskSpan.
+        Args:
+            args: The arguments to pass to the task.
+            kwargs: The keyword arguments to pass to the task.
+        Returns:
+            The span associated with task execution.
+        """
+        run = current_run_span.get()
+        if run is None or not run.is_recording:
+            raise RuntimeError("Tasks must be executed within a run")
+        bound_args = self._bind_args(*args, **kwargs)
+        params_to_log = (
+            bound_args
+            if self.log_params is True
+            else {k: v for k, v in bound_args.items() if k in self.log_params}
+            if self.log_params is not False
+            else {}
+        )
+        inputs_to_log = (
+            bound_args
+            if self.log_inputs is True
+            else {k: v for k, v in bound_args.items() if k in self.log_inputs}
+            if self.log_inputs is not False
+            else {}
+        )
+        with TaskSpan[R](
+            name=self.name,
+            label=self.label,
+            attributes=self.attributes,
+            params=params_to_log,
+            tags=self.tags,
+            run_id=run.run_id,
+            tracer=self.tracer,
+        ) as span:
+            span.run.log_metric(f"{self.label}.exec.count", 1, mode="count")
+            for name, value in params_to_log.items():
+                span.log_param(name, value)
+            input_object_hashes: list[str] = [
+                span.log_input(name, value, label=f"{self.label}.input.{name}")
+                for name, value in inputs_to_log.items()
+            ]
+            try:
+                output = t.cast("R | t.Awaitable[R]", self.func(*args, **kwargs))
+                if inspect.isawaitable(output):
+                    output = await output
+            except Exception:
+                span.run.log_metric(f"{self.label}.exec.success_rate", 0, mode="avg")
+                raise
+            span.run.log_metric(f"{self.label}.exec.success_rate", 1, mode="avg")
+            span.output = output
+            if self.log_output:
+                output_object_hash = span.log_output(
+                    "output",
+                    output,
+                    label=f"{self.label}.output",
+                )
+                # Link the output to the inputs
+                for input_object_hash in input_object_hashes:
+                    span.run.link_objects(output_object_hash, input_object_hash)
+            for scorer in self.scorers:
+                metric = await scorer(output)
+                span.log_metric(scorer.name, metric, origin=output)
+        return span
+    async def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R:
+        span = await self.run(*args, **kwargs)
+        return span.output
+    # NOTE(nick): Not sure I'm in love with these being instance methods here.
+    # We could move them to the top level class maybe.
+    async def map_run(
+        self,
+        count: int,
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> TaskSpanList[R]:
+        """
+        Run the task multiple times and return a list of spans.
+        Args:
+            count: The number of times to run the task.
+            args: The arguments to pass to the task.
+            kwargs: The keyword arguments to pass to the task.
+        Returns:
+            A TaskSpanList associated with each task execution.
+        """
+        spans = await asyncio.gather(*[self.run(*args, **kwargs) for _ in range(count)])
+        return TaskSpanList(spans)
+    async def map(self, count: int, *args: P.args, **kwargs: P.kwargs) -> list[R]:
+        """
+        Run the task multiple times and return a list of outputs.
+        Args:
+            count: The number of times to run the task.
+            args: The arguments to pass to the task.
+            kwargs: The keyword arguments to pass to the task.
+        Returns:
+            A list of outputs from each task execution.
+        """
+        spans = await self.map_run(count, *args, **kwargs)
+        return [span.output for span in spans]
+    async def top_n(
+        self,
+        count: int,
+        n: int,
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> list[R]:
+        """
+        Run the task multiple times and return the top n outputs.
+        Args:
+            count: The number of times to run the task.
+            n: The number of top outputs to return.
+            args: The arguments to pass to the task.
+            kwargs: The keyword arguments to pass to the task.
+        Returns:
+            A list of the top n outputs from the task executions.
+        """
+        spans = await self.map_run(count, *args, **kwargs)
+        return spans.top_n(n, as_outputs=True)
+    async def try_run(self, *args: P.args, **kwargs: P.kwargs) -> TaskSpan[R] | None:
+        """
+        Attempt to run the task and return the result as a TaskSpan.
+        If the task fails, a warning is logged and None is returned.
+        Args:
+            args: The arguments to pass to the task.
+            kwargs: The keyword arguments to pass to the task.
+        Returns:
+            The span associated with task execution, or None if the task failed.
+        """
+        try:
+            return await self.run(*args, **kwargs)
+        except Exception:  # noqa: BLE001
+            warn_at_user_stacklevel(
+                f"Task '{self.name}' ({self.label}) failed:\n{traceback.format_exc()}",
+                TaskFailedWarning,
+            )
+            return None
+    async def try_(self, *args: P.args, **kwargs: P.kwargs) -> R | None:
+        """
+        Attempt to run the task and return the result.
+        If the task fails, a warning is logged and None is returned.
+        Args:
+            args: The arguments to pass to the task.
+            kwargs: The keyword arguments to pass to the task.
+        Returns:
+            The output of the task, or None if the task failed.
+        """
+        span = await self.try_run(*args, **kwargs)
+        return span.output if span else None
+    async def try_map_run(
+        self,
+        count: int,
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> TaskSpanList[R]:
+        """
+        Attempt to run the task multiple times and return a list of spans.
+        If any task fails, a warning is logged and None is returned for that task.
+        Args:
+            count: The number of times to run the task.
+            args: The arguments to pass to the task.
+            kwargs: The keyword arguments to pass to the task.
+        Returns:
+            A TaskSpanList associated with each task execution.
+        """
+        spans = await asyncio.gather(
+            *[self.try_run(*args, **kwargs) for _ in range(count)],
+        )
+        return TaskSpanList([span for span in spans if span])
+    async def try_top_n(
+        self,
+        count: int,
+        n: int,
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> list[R]:
+        """
+        Attempt to run the task multiple times and return the top n outputs.
+        If any task fails, a warning is logged and None is returned for that task.
+        Args:
+            count: The number of times to run the task.
+            n: The number of top outputs to return.
+            args: The arguments to pass to the task.
+            kwargs: The keyword arguments to pass to the task.
+        Returns:
+            A list of the top n outputs from the task executions.
+        """
+        spans = await self.try_map_run(count, *args, **kwargs)
+        return spans.top_n(n, as_outputs=True)
+    async def try_map(self, count: int, *args: P.args, **kwargs: P.kwargs) -> list[R]:
+        """
+        Attempt to run the task multiple times and return a list of outputs.
+        If any task fails, a warning is logged and None is returned for that task.
+        Args:
+            count: The number of times to run the task.
+            args: The arguments to pass to the task.
+            kwargs: The keyword arguments to pass to the task.
+        Returns:
+            A list of outputs from each task execution.
+        """
+        spans = await self.try_map_run(count, *args, **kwargs)
+        return [span.output for span in spans if span]

dreadnode/tracing/__init__.py ADDED Viewed

File without changes

dreadnode/tracing/constants.py ADDED Viewed

@@ -0,0 +1,35 @@
+import typing as t
+SPAN_NAMESPACE = "dreadnode"
+SpanType = t.Literal["run", "task", "span", "run_update"]
+SPAN_ATTRIBUTE_VERSION = f"{SPAN_NAMESPACE}.version"
+SPAN_ATTRIBUTE_TYPE = f"{SPAN_NAMESPACE}.type"
+SPAN_ATTRIBUTE_SCHEMA = f"{SPAN_NAMESPACE}.schema"
+SPAN_ATTRIBUTE_LABEL = f"{SPAN_NAMESPACE}.label"
+SPAN_ATTRIBUTE_TAGS_ = f"{SPAN_NAMESPACE}.tags"
+SPAN_ATTRIBUTE_PROJECT = f"{SPAN_NAMESPACE}.project"
+SPAN_ATTRIBUTE_PARAMS = f"{SPAN_NAMESPACE}.params"
+SPAN_ATTRIBUTE_INPUTS = f"{SPAN_NAMESPACE}.inputs"
+SPAN_ATTRIBUTE_METRICS = f"{SPAN_NAMESPACE}.metrics"
+SPAN_ATTRIBUTE_OUTPUTS = f"{SPAN_NAMESPACE}.outputs"
+SPAN_ATTRIBUTE_OBJECTS = f"{SPAN_NAMESPACE}.objects"
+SPAN_ATTRIBUTE_OBJECT_SCHEMAS = f"{SPAN_NAMESPACE}.object_schemas"
+SPAN_ATTRIBUTE_ARTIFACTS = f"{SPAN_NAMESPACE}.artifacts"
+SPAN_ATTRIBUTE_RUN_ID = f"{SPAN_NAMESPACE}.run.id"
+SPAN_ATTRIBUTE_PARENT_TASK_ID = f"{SPAN_NAMESPACE}.task.parent_id"
+SPAN_ATTRIBUTE_LARGE_ATTRIBUTES = f"{SPAN_NAMESPACE}.large_attributes"
+EVENT_NAME_OBJECT = f"{SPAN_NAMESPACE}.object"
+EVENT_NAME_OBJECT_INPUT = f"{SPAN_NAMESPACE}.object.input"
+EVENT_NAME_OBJECT_OUTPUT = f"{SPAN_NAMESPACE}.object.output"
+EVENT_NAME_OBJECT_METRIC = f"{SPAN_NAMESPACE}.object.metric"
+EVENT_NAME_OBJECT_LINK = f"{SPAN_NAMESPACE}.object.link"
+EVENT_ATTRIBUTE_OBJECT_LABEL = f"{SPAN_NAMESPACE}.object.label"
+EVENT_ATTRIBUTE_OBJECT_HASH = f"{SPAN_NAMESPACE}.object.hash"
+EVENT_ATTRIBUTE_LINK_HASH = f"{SPAN_NAMESPACE}.link.hash"
+EVENT_ATTRIBUTE_ORIGIN_SPAN_ID = f"{SPAN_NAMESPACE}.origin.span_id"
+METRIC_ATTRIBUTE_SOURCE_HASH = f"{SPAN_NAMESPACE}.origin.hash"

dreadnode/tracing/exporters.py ADDED Viewed

@@ -0,0 +1,157 @@
+import threading
+import typing as t
+from collections.abc import Sequence
+from dataclasses import dataclass
+from pathlib import Path
+from typing import IO
+from google.protobuf import json_format
+from opentelemetry.exporter.otlp.proto.common._log_encoder import encode_logs
+from opentelemetry.exporter.otlp.proto.common.metrics_encoder import encode_metrics
+from opentelemetry.exporter.otlp.proto.common.trace_encoder import encode_spans
+from opentelemetry.sdk._logs import LogData
+from opentelemetry.sdk._logs.export import LogExporter, LogExportResult
+from opentelemetry.sdk.metrics.export import (
+    MetricReader,
+    MetricsData,
+)
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from dreadnode.util import logger
+@dataclass
+class FileExportConfig:
+    """Configuration for signal exports to JSONL files."""
+    base_path: str | Path = Path.cwd() / ".dreadnode"
+    prefix: str = ""
+    def get_path(self, signal: str) -> Path:
+        """Get the file path for a specific signal type."""
+        base = Path(self.base_path)
+        base.mkdir(parents=True, exist_ok=True)
+        return base / f"{self.prefix}{signal}.jsonl"
+class FileMetricReader(MetricReader):
+    """MetricReader that writes metrics to a file in OTLP format."""
+    def __init__(self, config: FileExportConfig):
+        super().__init__()
+        self.config = config
+        self._lock = threading.Lock()
+        self._file: IO[str] | None = None
+    @property
+    def file(self) -> IO[str]:
+        if not self._file:
+            self._file = self.config.get_path("metrics").open("a")
+        return self._file
+    def _receive_metrics(
+        self,
+        metrics_data: MetricsData,
+        timeout_millis: float = 10_000,  # noqa: ARG002
+        **kwargs: t.Any,  # noqa: ARG002
+    ) -> None:
+        if metrics_data is None:
+            return
+        try:
+            encoded = encode_metrics(metrics_data)
+            json_str = json_format.MessageToJson(encoded, indent=None)
+            with self._lock:
+                self.file.write(json_str + "\n")
+                self.file.flush()
+        except Exception as e:  # noqa: BLE001
+            logger.error(f"Failed to export metrics: {e}")
+    def shutdown(
+        self,
+        timeout_millis: float = 30_000,  # noqa: ARG002
+        **kwargs: t.Any,  # noqa: ARG002
+    ) -> None:
+        with self._lock:
+            if self._file:
+                self._file.close()
+                self._file = None
+class FileSpanExporter(SpanExporter):
+    """SpanExporter that writes spans to a file in OTLP format."""
+    def __init__(self, config: FileExportConfig):
+        self.config = config
+        self._lock = threading.Lock()
+        self._file: IO[str] | None = None
+    @property
+    def file(self) -> IO[str]:
+        if not self._file:
+            self._file = self.config.get_path("traces").open("a")
+        return self._file
+    def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
+        try:
+            encoded = encode_spans(spans)
+            json_str = json_format.MessageToJson(encoded, indent=None)
+            with self._lock:
+                self.file.write(json_str + "\n")
+                self.file.flush()
+        except Exception as e:  # noqa: BLE001
+            logger.error(f"Failed to export spans: {e}")
+            return SpanExportResult.FAILURE
+        return SpanExportResult.SUCCESS
+    def force_flush(
+        self,
+        timeout_millis: float = 30_000,  # noqa: ARG002
+    ) -> bool:
+        return True  # We flush above
+    def shutdown(self) -> None:
+        with self._lock:
+            if self._file:
+                self._file.close()
+                self._file = None
+class FileLogExporter(LogExporter):
+    """LogExporter that writes logs to a file in OTLP format."""
+    def __init__(self, config: FileExportConfig):
+        self.config = config
+        self._lock = threading.Lock()
+        self._file: IO[str] | None = None
+    @property
+    def file(self) -> IO[str]:
+        if not self._file:
+            self._file = self.config.get_path("logs").open("a")
+        return self._file
+    def export(self, batch: Sequence[LogData]) -> LogExportResult:
+        try:
+            encoded = encode_logs(batch)
+            json_str = json_format.MessageToJson(encoded, indent=None)
+            with self._lock:
+                self.file.write(json_str + "\n")
+                self.file.flush()
+        except Exception as e:  # noqa: BLE001
+            logger.error(f"Failed to export logs: {e}")
+            return LogExportResult.FAILURE
+        return LogExportResult.SUCCESS
+    def force_flush(
+        self,
+        timeout_millis: float = 30_000,  # noqa: ARG002
+    ) -> bool:
+        return True
+    def shutdown(self) -> None:
+        with self._lock:
+            if self._file:
+                self._file.close()
+                self._file = None