PyPI - botanu - Versions diffs - 0.1.dev63__tar.gz → 0.1.dev68__tar.gz - Mend

botanu 0.1.dev63tar.gz → 0.1.dev68tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{botanu-0.1.dev63 → botanu-0.1.dev68}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: botanu
-Version: 0.1.dev63
+Version: 0.1.dev68
 Summary: OpenTelemetry-native run-level cost attribution for AI workflows
 Project-URL: Homepage, https://github.com/botanu-ai/botanu-sdk-python
 Project-URL: Documentation, https://docs.botanu.ai
@@ -110,6 +110,8 @@ This SDK is built on [OpenTelemetry](https://opentelemetry.io/) for event-level
 ## Getting Started
 An **event** is one business transaction — resolving a support ticket, processing
 an order, generating a report. Each event may involve multiple **runs** (LLM calls,
 retries, sub-workflows) across multiple services. By correlating every run to a

{botanu-0.1.dev63 → botanu-0.1.dev68}/README.md RENAMED Viewed

@@ -8,6 +8,8 @@ This SDK is built on [OpenTelemetry](https://opentelemetry.io/) for event-level
 ## Getting Started
 An **event** is one business transaction — resolving a support ticket, processing
 an order, generating a report. Each event may involve multiple **runs** (LLM calls,
 retries, sub-workflows) across multiple services. By correlating every run to a

{botanu-0.1.dev63 → botanu-0.1.dev68}/src/botanu/__init__.py RENAMED Viewed

@@ -23,6 +23,9 @@ from botanu._version import __version__
 # Run context model
 from botanu.models.run_context import RunContext, RunOutcome, RunStatus
+# Processors
+from botanu.processors import RunContextEnricher, SampledSpanProcessor
 # Bootstrap
 from botanu.sdk.bootstrap import (
     disable,
@@ -73,4 +76,7 @@ __all__ = [
     "RunContext",
     "RunStatus",
     "RunOutcome",
+    # Processors
+    "RunContextEnricher",
+    "SampledSpanProcessor",
 ]

{botanu-0.1.dev63 → botanu-0.1.dev68}/src/botanu/models/run_context.py RENAMED Viewed

@@ -89,6 +89,7 @@ class RunContext:
     event_id: str
     customer_id: str
     environment: str
+    step: Optional[str] = None
     workflow_version: Optional[str] = None
     tenant_id: Optional[str] = None
     parent_run_id: Optional[str] = None
@@ -270,7 +271,10 @@ class RunContext:
             if self.cancelled_at:
                 attrs["botanu.run.cancelled_at"] = self.cancelled_at
         if self.outcome:
-            attrs["botanu.outcome.status"] = self.outcome.status.value
+            # `botanu.outcome.status` is NOT emitted (removed 2026-04-16):
+            # customer-reported outcome is trivially fakeable. Event outcome
+            # is derived from eval verdict rollup / HITL / SoR instead.
+            # Remaining fields are diagnostic only and stay for debugging.
             if self.outcome.reason_code:
                 attrs["botanu.outcome.reason_code"] = self.outcome.reason_code
             if self.outcome.error_class:

{botanu-0.1.dev63 → botanu-0.1.dev68}/src/botanu/processors/__init__.py RENAMED Viewed

@@ -8,5 +8,7 @@ All other processing should happen in the OTel Collector.
 """
 from botanu.processors.enricher import RunContextEnricher
+from botanu.processors.resource_enricher import ResourceEnricher
+from botanu.processors.sampled import SampledSpanProcessor
-__all__ = ["RunContextEnricher"]
+__all__ = ["RunContextEnricher", "ResourceEnricher", "SampledSpanProcessor"]

botanu-0.1.dev68/src/botanu/processors/resource_enricher.py ADDED Viewed

@@ -0,0 +1,179 @@
+# SPDX-FileCopyrightText: 2026 The Botanu Authors
+# SPDX-License-Identifier: Apache-2.0
+"""ResourceEnricher — infer `botanu.cloud_provider` + `botanu.bytes_transferred`
+from OTel semantic-convention attributes set by auto-instrumentation.
+Why this exists: the cost worker (botanu-cost-engine-workflow) prices non-LLM
+spans via `rate × bytes_transferred` and looks up rate cards keyed by
+`cloud_provider + system_name`. OTel auto-instrumentation emits the raw
+attributes (`db.system`, `http.request.body.size`, `aws.service`, etc.) but
+does NOT emit botanu-namespaced attributes in the shape the cost worker
+reads. Without this enricher, S3 PUTs, DynamoDB ops, and egress all price to
+$0 — see the `pricing.md` problem statement.
+Attributes written:
+- `botanu.cloud_provider`       ("aws" | "gcp" | "azure" | …)
+- `botanu.bytes_transferred`    (int, sent + received combined)
+The enricher is purely additive. It leaves all original OTel attributes
+intact — no customer observability breaks.
+Explicit values set by `set_bytes_transferred()` / `cloud_provider=` kwarg on
+trackers take precedence: this enricher only writes if the target attribute
+is not already present (checked at `on_end` time via the span's attribute
+dict).
+"""
+from __future__ import annotations
+import logging
+from typing import Mapping, Optional
+from opentelemetry import context
+from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
+logger = logging.getLogger(__name__)
+# System/service → cloud provider. Used when the semconv `cloud.provider`
+# attribute is absent (most auto-instrumentations don't set it, so we infer
+# from the db/messaging system name or the AWS/Azure/GCP service name).
+_SYSTEM_TO_CLOUD_PROVIDER: dict[str, str] = {
+    # AWS
+    "dynamodb": "aws",
+    "s3": "aws",
+    "sqs": "aws",
+    "sns": "aws",
+    "kinesis": "aws",
+    "eventbridge": "aws",
+    "lambda": "aws",
+    "elasticache": "aws",
+    "redshift": "aws",
+    "athena": "aws",
+    "neptune": "aws",
+    "efs": "aws",
+    # GCP
+    "firestore": "gcp",
+    "bigquery": "gcp",
+    "gcs": "gcp",
+    "pubsub": "gcp",
+    # Azure
+    "cosmosdb": "azure",
+    "azure_blob": "azure",
+    "servicebus": "azure",
+    "eventhub": "azure",
+    "synapse": "azure",
+}
+_BOTANU_CLOUD_PROVIDER = "botanu.cloud_provider"
+_BOTANU_BYTES_TRANSFERRED = "botanu.bytes_transferred"
+class ResourceEnricher(SpanProcessor):
+    """Write botanu-namespaced resource attributes from OTel semconv data.
+    Runs at `on_end` (not `on_start`) — auto-instrumentation populates the
+    source attributes on span start, but some (notably http.*.body.size) are
+    only known when the response completes.
+    """
+    def on_start(self, span: Span, parent_context: Optional[context.Context] = None) -> None:
+        # Cheap path: no work at start. Waiting until on_end lets us read
+        # response-time attributes that auto-instrumentation sets after the
+        # wrapped call returns (bytes, status codes, etc.).
+        return
+    def on_end(self, span: ReadableSpan) -> None:
+        attrs = span.attributes or {}
+        # Skip LLM spans entirely — LLM pricing goes through pricing_model_tokens
+        # (prompt/completion tokens), not bytes_transferred. Writing bytes here
+        # would double-count into cost_infra_usd.
+        if _is_llm_span(attrs):
+            return
+        cloud_provider = _infer_cloud_provider(attrs)
+        bytes_transferred = _infer_bytes_transferred(attrs)
+        if cloud_provider is None and bytes_transferred is None:
+            return
+        # Writing to a ReadableSpan: OTel SDK's ReadableSpan is read-only by
+        # contract, but the concrete _Span class exposes set_attribute. If
+        # the attribute is already set (explicit API or customer), skip —
+        # explicit beats inferred.
+        setter = getattr(span, "set_attribute", None)
+        if setter is None:
+            return
+        if cloud_provider is not None and _BOTANU_CLOUD_PROVIDER not in attrs:
+            setter(_BOTANU_CLOUD_PROVIDER, cloud_provider)
+        if bytes_transferred is not None and _BOTANU_BYTES_TRANSFERRED not in attrs:
+            setter(_BOTANU_BYTES_TRANSFERRED, bytes_transferred)
+    def shutdown(self) -> None:
+        pass
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        return True
+def _is_llm_span(attrs: Mapping[str, object]) -> bool:
+    return (
+        "gen_ai.request.model" in attrs
+        or "gen_ai.system" in attrs
+        or "llm.request.model" in attrs
+    )
+def _infer_cloud_provider(attrs: Mapping[str, object]) -> Optional[str]:
+    # 1. Explicit semconv `cloud.provider` (if set, trust it)
+    explicit = attrs.get("cloud.provider")
+    if isinstance(explicit, str) and explicit:
+        return explicit.lower()
+    # 2. AWS auto-instrumentation sets `aws.service` or `rpc.system="aws-api"`
+    if attrs.get("rpc.system") == "aws-api" or "aws.service" in attrs or "aws.region" in attrs:
+        return "aws"
+    if "gcp.service" in attrs or "gcp.project_id" in attrs:
+        return "gcp"
+    if "azure.resource" in attrs or "azure.namespace" in attrs:
+        return "azure"
+    # 3. Infer from system name (db.system, messaging.system, botanu.storage.system)
+    for key in ("db.system", "messaging.system", "botanu.storage.system"):
+        val = attrs.get(key)
+        if isinstance(val, str):
+            provider = _SYSTEM_TO_CLOUD_PROVIDER.get(val.lower())
+            if provider:
+                return provider
+    return None
+def _infer_bytes_transferred(attrs: Mapping[str, object]) -> Optional[int]:
+    total = 0
+    saw_any = False
+    # OTel HTTP semconv (stable)
+    for key in ("http.request.body.size", "http.response.body.size"):
+        val = attrs.get(key)
+        if isinstance(val, int) and val >= 0:
+            total += val
+            saw_any = True
+    # botanu tracker attrs (fallback — populated by DBTracker.set_result etc.)
+    if not saw_any:
+        for key in (
+            "botanu.data.bytes_read",
+            "botanu.data.bytes_written",
+            "botanu.messaging.bytes_transferred",
+            "botanu.warehouse.bytes_scanned",
+        ):
+            val = attrs.get(key)
+            if isinstance(val, int) and val >= 0:
+                total += val
+                saw_any = True
+    return total if saw_any else None

botanu-0.1.dev68/src/botanu/processors/sampled.py ADDED Viewed

@@ -0,0 +1,86 @@
+# SPDX-FileCopyrightText: 2026 The Botanu Authors
+# SPDX-License-Identifier: Apache-2.0
+"""SampledSpanProcessor — preserves the customer's sampling ratio.
+When botanu changes the TracerProvider sampler to AlwaysOn (to capture 100%),
+existing customer processors (Datadog exporter, Jaeger exporter, etc.) would
+suddenly see 10x the span volume if the customer had ratio-based sampling.
+This processor wraps an existing processor and applies the customer's original
+ratio at the export level. Result: the customer's exporter sees the same volume
+as before, their bill is unchanged, their dashboards are unchanged.
+botanu's own processor is NOT wrapped — it sees 100%.
+Sampling is deterministic: the same trace_id always gets the same decision.
+This matches OTel's ``TraceIdRatioBasedSampler`` algorithm.
+"""
+from __future__ import annotations
+import logging
+from typing import Optional
+from opentelemetry import context
+from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor
+from opentelemetry.trace import Span
+logger = logging.getLogger(__name__)
+class SampledSpanProcessor(SpanProcessor):
+    """Wraps a SpanProcessor with deterministic ratio sampling.
+    Args:
+        wrapped: The original processor to wrap (e.g., BatchSpanProcessor
+            sending to Datadog).
+        ratio: Sampling ratio (0.0 to 1.0). 0.1 means 10% of spans are
+            forwarded to the wrapped processor.
+    """
+    def __init__(self, wrapped: SpanProcessor, ratio: float) -> None:
+        if not 0.0 <= ratio <= 1.0:
+            raise ValueError(f"ratio must be between 0.0 and 1.0, got {ratio}")
+        self._wrapped = wrapped
+        self._ratio = ratio
+        # Pre-compute bound for comparison (avoids per-span float math)
+        self._bound = int(ratio * (2**64 - 1))
+    def _should_sample(self, trace_id: int) -> bool:
+        """Deterministic sampling decision based on trace_id.
+        Uses the upper 64 bits of the 128-bit trace_id, matching OTel's
+        TraceIdRatioBasedSampler algorithm. Same trace_id always produces
+        the same decision.
+        """
+        if self._ratio >= 1.0:
+            return True
+        if self._ratio <= 0.0:
+            return False
+        # Upper 64 bits of trace_id for deterministic comparison
+        upper = trace_id >> 64 if trace_id.bit_length() > 64 else trace_id
+        return upper <= self._bound
+    def on_start(
+        self,
+        span: Span,
+        parent_context: Optional[context.Context] = None,
+    ) -> None:
+        # Gate on_start with the same decision as on_end. Forwarding on_start
+        # unconditionally while gating on_end orphans spans inside wrapped
+        # processors (BatchSpanProcessor, Datadog exporter, etc.) — they hold
+        # start-time bookkeeping for spans whose on_end never fires. Over time
+        # this leaks memory in the customer's process.
+        if self._should_sample(span.context.trace_id):
+            self._wrapped.on_start(span, parent_context)
+    def on_end(self, span: ReadableSpan) -> None:
+        if self._should_sample(span.context.trace_id):
+            self._wrapped.on_end(span)
+    def shutdown(self) -> None:
+        self._wrapped.shutdown()
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        return self._wrapped.force_flush(timeout_millis)

botanu-0.1.dev68/src/botanu/register.py ADDED Viewed

@@ -0,0 +1,50 @@
+# SPDX-FileCopyrightText: 2026 The Botanu Authors
+# SPDX-License-Identifier: Apache-2.0
+"""Zero-code initialization entry point.
+Import this module to auto-initialize Botanu SDK with no code changes.
+All configuration is read from environment variables or botanu.yaml.
+Usage::
+    # As a Python module flag
+    python -m botanu.register && python app.py
+    # Or via PYTHONPATH preload (works with gunicorn, uvicorn, etc.)
+    python -c "import botanu.register" && python app.py
+    # Or in gunicorn config
+    # gunicorn.conf.py:
+    def on_starting(server):
+        import botanu.register  # noqa: F401
+    # Or in uvicorn
+    uvicorn app:app --env-file .env
+    # Or in Dockerfile
+    ENV BOTANU_API_KEY=btnu_live_...
+    ENV BOTANU_SERVICE_NAME=my-service
+    CMD ["python", "-c", "import botanu.register; import uvicorn; uvicorn.run('app:app')"]
+Configuration (env vars or botanu.yaml):
+    BOTANU_API_KEY        - API key (required for Botanu Cloud)
+    BOTANU_SERVICE_NAME   - Service name (recommended)
+    BOTANU_ENVIRONMENT    - Environment (default: production)
+See docs/getting-started/configuration.md for full options.
+"""
+from __future__ import annotations
+import logging
+from botanu.sdk.bootstrap import enable
+logger = logging.getLogger(__name__)
+result = enable()
+if result:
+    logger.info("Botanu SDK auto-initialized via botanu.register")

botanu-0.1.dev68/src/botanu/sampling/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+# SPDX-FileCopyrightText: 2026 The Botanu Authors
+# SPDX-License-Identifier: Apache-2.0
+"""Sampling primitives — content capture gate, future trace samplers."""
+from botanu.sampling.content_sampler import should_capture_content
+__all__ = ["should_capture_content"]

botanu-0.1.dev68/src/botanu/sampling/content_sampler.py ADDED Viewed

@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: 2026 The Botanu Authors
+# SPDX-License-Identifier: Apache-2.0
+"""Content capture sampling gate for eval.
+MVP: simple ``random.random() < rate`` check. The ``event_id`` parameter is
+accepted now so that a Month 2+ upgrade to hash-based deterministic sampling
+(SHA-256 of ``tenant_id || event_id``) won't break callers. Deterministic
+sampling matters for replays and backfills; simple random is sufficient for
+MVP volume.
+"""
+from __future__ import annotations
+import random
+from typing import Optional
+def should_capture_content(rate: float, event_id: Optional[str] = None) -> bool:
+    """Return True if this call's content should be captured.
+    Args:
+        rate: Capture rate in [0.0, 1.0]. 0.0 disables capture (default,
+            privacy-safe). 1.0 captures everything (sandbox/shadow).
+            Production typically uses 0.10–0.20.
+        event_id: Currently unused. Present so a future deterministic-hash
+            implementation can be swapped in without API churn.
+    Examples:
+        >>> should_capture_content(0.0)
+        False
+        >>> should_capture_content(1.0)
+        True
+    """
+    if rate <= 0.0:
+        return False
+    if rate >= 1.0:
+        return True
+    return random.random() < rate

{botanu-0.1.dev63 → botanu-0.1.dev68}/src/botanu/sdk/__init__.py RENAMED Viewed

@@ -15,7 +15,11 @@ from botanu.sdk.context import (
     set_baggage,
 )
 from botanu.sdk.decorators import botanu_outcome, botanu_workflow, run_botanu, workflow
-from botanu.sdk.span_helpers import emit_outcome, set_business_context
+from botanu.sdk.span_helpers import (
+    emit_outcome,
+    set_business_context,
+    set_correlation,
+)
 __all__ = [
     "BotanuConfig",
@@ -33,5 +37,6 @@ __all__ = [
     "run_botanu",
     "set_baggage",
     "set_business_context",
+    "set_correlation",
     "workflow",
 ]

botanu 0.1.dev63__tar.gz → 0.1.dev68__tar.gz

botanu 0.1.dev63tar.gz → 0.1.dev68tar.gz