PyPI - lmnr - Versions diffs - 0.4.22__tar.gz → 0.4.24__tar.gz - Mend

lmnr 0.4.22tar.gz → 0.4.24tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{lmnr-0.4.22 → lmnr-0.4.24}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lmnr
-Version: 0.4.22
+Version: 0.4.24
 Summary: Python SDK for Laminar AI
 License: Apache-2.0
 Author: lmnr.ai
@@ -16,6 +16,7 @@ Requires-Dist: argparse (>=1.0,<2.0)
 Requires-Dist: backoff (>=2.0,<3.0)
 Requires-Dist: deprecated (>=1.0,<2.0)
 Requires-Dist: jinja2 (>=3.0,<4.0)
+Requires-Dist: openai (>=1.52.0,<2.0.0)
 Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
 Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.27.0,<2.0.0)
 Requires-Dist: opentelemetry-exporter-otlp-proto-http (>=1.27.0,<2.0.0)

{lmnr-0.4.22 → lmnr-0.4.24}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lmnr"
-version = "0.4.22"
+version = "0.4.24"
 description = "Python SDK for Laminar AI"
 authors = [
   { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
 [tool.poetry]
 name = "lmnr"
-version = "0.4.22"
+version = "0.4.24"
 description = "Python SDK for Laminar AI"
 authors = ["lmnr.ai"]
 readme = "README.md"
@@ -62,6 +62,7 @@ opentelemetry-instrumentation-groq = ">=0.33.1"
 tqdm = "~=4.0"
 argparse = "~=1.0"
+openai = "^1.52.0"
 [tool.poetry.group.dev.dependencies]
 autopep8 = "^2.2.0"
 flake8 = "7.0.0"

{lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/__init__.py RENAMED Viewed

@@ -1,5 +1,7 @@
+from .sdk.datasets import EvaluationDataset, LaminarDataset
 from .sdk.evaluations import evaluate
 from .sdk.laminar import Laminar
 from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
 from .sdk.decorators import observe
 from .traceloop_sdk import Instruments
+from .traceloop_sdk.tracing.attributes import Attributes

lmnr-0.4.24/src/lmnr/sdk/datasets.py ADDED Viewed

@@ -0,0 +1,58 @@
+from abc import ABC, abstractmethod
+import logging
+from .log import get_default_logger
+from .laminar import Laminar as L
+from .types import (
+    Datapoint,
+)
+DEFAULT_FETCH_SIZE = 25
+class EvaluationDataset(ABC):
+    @abstractmethod
+    def __init__(self, *args, **kwargs):
+        pass
+    @abstractmethod
+    def __len__(self) -> int:
+        pass
+    @abstractmethod
+    def __getitem__(self, idx) -> Datapoint:
+        pass
+    def slice(self, start: int, end: int):
+        return [self[i] for i in range(max(start, 0), min(end, len(self)))]
+class LaminarDataset(EvaluationDataset):
+    def __init__(self, name: str, fetch_size: int = DEFAULT_FETCH_SIZE):
+        self.name = name
+        self._len = None
+        self._fetched_items = []
+        self._offset = 0
+        self._fetch_size = fetch_size
+        self._logger = get_default_logger(self.__class__.__name__, level=logging.DEBUG)
+    def _fetch_batch(self):
+        self._logger.debug(
+            f"dataset {self.name}. Fetching batch from {self._offset} to "
+            + f"{self._offset + self._fetch_size}"
+        )
+        resp = L.get_datapoints(self.name, self._offset, self._fetch_size)
+        self._fetched_items += resp.items
+        self._offset = len(self._fetched_items)
+        if self._len is None:
+            self._len = resp.totalCount
+    def __len__(self) -> int:
+        if self._len is None:
+            self._fetch_batch()
+        return self._len
+    def __getitem__(self, idx) -> Datapoint:
+        if idx >= len(self._fetched_items):
+            self._fetch_batch()
+        return self._fetched_items[idx]

{lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/evaluations.py RENAMED Viewed

@@ -1,17 +1,18 @@
 import asyncio
 import re
 import sys
-from abc import ABC, abstractmethod
-from contextlib import contextmanager
-from typing import Any, Awaitable, Optional, Set, Union
 import uuid
+from contextlib import contextmanager
 from tqdm import tqdm
+from typing import Any, Awaitable, Optional, Set, Union
 from ..traceloop_sdk.instruments import Instruments
 from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
+from .datasets import EvaluationDataset
 from .laminar import Laminar as L
+from .log import get_default_logger
 from .types import (
     Datapoint,
     EvaluationResultDatapoint,
@@ -84,7 +85,7 @@ class EvaluationReporter:
     ):
         self.cli_progress.close()
         print(
-            f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n"
+            f"\nCheck the results at {get_evaluation_url(project_id, evaluation_id)}\n"
         )
         print("Average scores:")
         for name, score in average_scores.items():
@@ -92,23 +93,6 @@ class EvaluationReporter:
         print("\n")
-class EvaluationDataset(ABC):
-    @abstractmethod
-    def __init__(self, *args, **kwargs):
-        pass
-    @abstractmethod
-    def __len__(self) -> int:
-        pass
-    @abstractmethod
-    def __getitem__(self, idx) -> Datapoint:
-        pass
-    def slice(self, start: int, end: int):
-        return [self[i] for i in range(max(start, 0), min(end, len(self)))]
 class Evaluation:
     def __init__(
         self,
@@ -135,14 +119,13 @@ class Evaluation:
             executor (Callable[..., Any]): The executor function.\
                             Takes the data point + any additional arguments\
                             and returns the output to evaluate.
-            evaluators (List[Callable[..., Any]]): List of evaluator functions.\
-                Each evaluator function takes the output of the executor _and_\
-                the target data, and returns a score. The score can be a\
-                single number or a record of string keys and number values.\
+            evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
+                names. Each evaluator function takes the output of the executor\
+                _and_ the target data, and returns a score. The score can be a\
+                single number or a dict of string keys and number values.\
                 If the score is a single number, it will be named after the\
-                evaluator function. If the function is anonymous, it will be\
-                named `evaluator_${index}`, where index is the index of the\
-                evaluator function in the list starting from 1.
+                evaluator function. Evaluator function names must contain only\
+                letters, digits, hyphens, underscores, or spaces.
             group_id (Optional[str], optional): Group id of the evaluation.
                             Defaults to "default".
             name (Optional[str], optional): The name of the evaluation.\
@@ -191,6 +174,7 @@ class Evaluation:
         self.group_id = group_id
         self.name = name
         self.batch_size = batch_size
+        self._logger = get_default_logger(self.__class__.__name__)
         L.initialize(
             project_api_key=project_api_key,
             base_url=base_url,
@@ -215,7 +199,7 @@ class Evaluation:
         )
         try:
-            result_datapoints = await self.evaluate_in_batches()
+            result_datapoints = await self._evaluate_in_batches()
         except Exception as e:
             self.reporter.stopWithError(e)
             self.is_finished = True
@@ -228,7 +212,7 @@ class Evaluation:
             self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
             self.is_finished = True
-    async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
+    async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
         result_datapoints = []
         for i in range(0, len(self.data), self.batch_size):
             batch = (
@@ -326,14 +310,14 @@ def evaluate(
         executor (Callable[..., Any]): The executor function.\
                         Takes the data point + any additional arguments\
                         and returns the output to evaluate.
-        evaluators (List[Callable[..., Any]]): List of evaluator functions.\
-            Each evaluator function takes the output of the executor _and_\
-            the target data, and returns a score. The score can be a\
-            single number or a record of string keys and number values.\
-            If the score is a single number, it will be named after the\
-            evaluator function. If the function is anonymous, it will be\
-            named `evaluator_${index}`, where index is the index of the\
-            evaluator function in the list starting from 1.
+        evaluators (List[Callable[..., Any]]):
+            evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
+                names. Each evaluator function takes the output of the executor\
+                _and_ the target data, and returns a score. The score can be a\
+                single number or a dict of string keys and number values.\
+                If the score is a single number, it will be named after the\
+                evaluator function. Evaluator function names must contain only\
+                letters, digits, hyphens, underscores, or spaces.
         group_id (Optional[str], optional): an identifier to group evaluations.\
                         It is practical to group evaluations that evaluate\
                         the same feature on the same dataset, to be able to\

{lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/laminar.py RENAMED Viewed

@@ -9,11 +9,13 @@ from opentelemetry.util.types import AttributeValue
 from opentelemetry.context import set_value, attach, detach
 from lmnr.traceloop_sdk import Traceloop
 from lmnr.traceloop_sdk.tracing import get_tracer
+from lmnr.traceloop_sdk.tracing.attributes import Attributes, SPAN_TYPE
+from lmnr.traceloop_sdk.decorators.base import json_dumps
 from contextlib import contextmanager
 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
 from pydantic.alias_generators import to_snake
-from typing import Any, Optional, Set, Union
+from typing import Any, Literal, Optional, Set, Union
 import copy
 import datetime
@@ -22,6 +24,7 @@ import json
 import logging
 import os
 import requests
+import urllib.parse
 import uuid
 from lmnr.traceloop_sdk.tracing.attributes import (
@@ -43,6 +46,7 @@ from .log import VerboseColorfulFormatter
 from .types import (
     CreateEvaluationResponse,
     EvaluationResultDatapoint,
+    GetDatapointsResponse,
     PipelineRunError,
     PipelineRunResponse,
     NodeInput,
@@ -279,20 +283,27 @@ class Laminar:
         cls,
         name: str,
         input: Any = None,
+        span_type: Union[Literal["DEFAULT"], Literal["LLM"]] = "DEFAULT",
     ):
         """Start a new span as the current span. Useful for manual
-        instrumentation.
+        instrumentation. If `span_type` is set to `"LLM"`, you should report
+        usage and response attributes manually. See `Laminar.set_span_attributes`
+        for more information.
         Usage example:
         ```python
         with Laminar.start_as_current_span("my_span", input="my_input") as span:
             await my_async_function()
+            Laminar.set_span_output("my_output")`
         ```
         Args:
             name (str): name of the span
             input (Any, optional): input to the span. Will be sent as an\
                 attribute, so must be json serializable. Defaults to None.
+            span_type (Union[Literal["DEFAULT"], Literal["LLM"]], optional):\
+                type of the span. If you use `"LLM"`, you should report usage\
+                and response attributes manually. Defaults to "DEFAULT".
         """
         with get_tracer() as tracer:
             span_path = get_span_path(name)
@@ -308,6 +319,7 @@ class Laminar:
                         SPAN_INPUT,
                         json.dumps(input),
                     )
+                span.set_attribute(SPAN_TYPE, span_type)
                 yield span
             # TODO: Figure out if this is necessary
@@ -327,7 +339,52 @@ class Laminar:
         """
         span = get_current_span()
         if output is not None and span != INVALID_SPAN:
-            span.set_attribute(SPAN_OUTPUT, json.dumps(output))
+            span.set_attribute(SPAN_OUTPUT, json_dumps(output))
+    @classmethod
+    def set_span_attributes(
+        cls,
+        attributes: dict[Attributes, Any],
+    ):
+        """Set attributes for the current span. Useful for manual
+        instrumentation.
+        Example:
+        ```python
+        with L.start_as_current_span(
+            name="my_span_name", input=input["messages"], span_type="LLM"
+        ):
+            response = await my_custom_call_to_openai(input)
+            L.set_span_output(response["choices"][0]["message"]["content"])
+            L.set_span_attributes({
+                Attributes.PROVIDER: 'openai',
+                Attributes.REQUEST_MODEL: input["model"],
+                Attributes.RESPONSE_MODEL: response["model"],
+                Attributes.INPUT_TOKEN_COUNT: response["usage"]["prompt_tokens"],
+                Attributes.OUTPUT_TOKEN_COUNT: response["usage"]["completion_tokens"],
+            })
+            # ...
+        ```
+        Args:
+            attributes (dict[ATTRIBUTES, Any]): attributes to set for the span
+        """
+        span = get_current_span()
+        if span == INVALID_SPAN:
+            return
+        for key, value in attributes.items():
+            # Python 3.12+ should do: if key not in Attributes:
+            try:
+                Attributes(key.value)
+            except (TypeError, AttributeError):
+                cls.__logger.warning(
+                    f"Attribute {key} is not a valid Laminar attribute."
+                )
+                continue
+            if not isinstance(value, (str, int, float, bool)):
+                span.set_attribute(key.value, json_dumps(value))
+            else:
+                span.set_attribute(key.value, value)
     @classmethod
     def set_session(
@@ -399,10 +456,36 @@ class Laminar:
             try:
                 resp_json = response.json()
                 raise ValueError(f"Error creating evaluation {json.dumps(resp_json)}")
-            except Exception:
+            except requests.exceptions.RequestException:
                 raise ValueError(f"Error creating evaluation {response.text}")
         return CreateEvaluationResponse.model_validate(response.json())
+    @classmethod
+    def get_datapoints(
+        cls,
+        dataset_name: str,
+        offset: int,
+        limit: int,
+    ) -> GetDatapointsResponse:
+        params = {"name": dataset_name, "offset": offset, "limit": limit}
+        url = (
+            cls.__base_http_url
+            + "/v1/datasets/datapoints?"
+            + urllib.parse.urlencode(params)
+        )
+        response = requests.get(url, headers=cls._headers())
+        if response.status_code != 200:
+            try:
+                resp_json = response.json()
+                raise ValueError(
+                    f"Error fetching datapoints: [{response.status_code}] {json.dumps(resp_json)}"
+                )
+            except requests.exceptions.RequestException:
+                raise ValueError(
+                    f"Error fetching datapoints: [{response.status_code}] {response.text}"
+                )
+        return GetDatapointsResponse.model_validate(response.json())
     @classmethod
     def _headers(cls):
         assert cls.__project_api_key is not None, "Project API key is not set"

{lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/log.py RENAMED Viewed

@@ -37,3 +37,13 @@ class VerboseFormatter(CustomFormatter):
     def format(self, record):
         formatter = logging.Formatter(self.fmt)
         return formatter.format(record)
+def get_default_logger(name: str, level: int = logging.INFO, propagate: bool = False):
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+    console_log_handler = logging.StreamHandler()
+    console_log_handler.setFormatter(VerboseColorfulFormatter())
+    logger.addHandler(console_log_handler)
+    logger.propagate = propagate
+    return logger

{lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/types.py RENAMED Viewed

@@ -79,6 +79,7 @@ class PipelineRunError(Exception):
 EvaluationDatapointData = dict[str, Any]
 EvaluationDatapointTarget = dict[str, Any]
+EvaluationDatapointMetadata = Optional[dict[str, Any]]
 # EvaluationDatapoint is a single data point in the evaluation
@@ -88,6 +89,7 @@ class Datapoint(pydantic.BaseModel):
     # input to the evaluator function (alongside the executor output).
     # Must be a dict with string keys
     target: EvaluationDatapointTarget
+    metadata: EvaluationDatapointMetadata = pydantic.Field(default=None)
 ExecutorFunctionReturnType = Any
@@ -153,3 +155,8 @@ class TraceType(Enum):
     DEFAULT = "DEFAULT"
     EVENT = "EVENT"  # must not be set manually
     EVALUATION = "EVALUATION"
+class GetDatapointsResponse(pydantic.BaseModel):
+    items: list[Datapoint]
+    totalCount: int

{lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/decorators/base.py RENAMED Viewed

@@ -1,9 +1,10 @@
 import json
 from functools import wraps
+import logging
 import os
+import pydantic
 import types
 from typing import Any, Optional
-import warnings
 from opentelemetry import trace
 from opentelemetry import context as context_api
@@ -17,20 +18,20 @@ from lmnr.traceloop_sdk.utils.json_encoder import JSONEncoder
 class CustomJSONEncoder(JSONEncoder):
     def default(self, o: Any) -> Any:
+        if isinstance(o, pydantic.BaseModel):
+            return o.model_dump_json()
         try:
             return super().default(o)
         except TypeError:
             return str(o)  # Fallback to string representation for unsupported types
-def _json_dumps(data: dict) -> str:
+def json_dumps(data: dict) -> str:
     try:
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", RuntimeWarning)
-            return json.dumps(data, cls=CustomJSONEncoder)
+        return json.dumps(data, cls=CustomJSONEncoder)
     except Exception:
         # Log the exception and return a placeholder if serialization completely fails
-        # Telemetry().log_exception(e)
+        logging.warning("Failed to serialize data to JSON, type: %s", type(data))
         return "{}"  # Return an empty JSON object as a fallback
@@ -59,7 +60,7 @@ def entity_method(
                     if _should_send_prompts():
                         span.set_attribute(
                             SPAN_INPUT,
-                            _json_dumps(
+                            json_dumps(
                                 get_input_from_func_args(
                                     fn, is_method(fn), args, kwargs
                                 )
@@ -78,7 +79,7 @@ def entity_method(
                     if _should_send_prompts():
                         span.set_attribute(
                             SPAN_OUTPUT,
-                            _json_dumps(res),
+                            json_dumps(res),
                         )
                 except TypeError:
                     pass
@@ -121,7 +122,7 @@ def aentity_method(
                     if _should_send_prompts():
                         span.set_attribute(
                             SPAN_INPUT,
-                            _json_dumps(
+                            json_dumps(
                                 get_input_from_func_args(
                                     fn, is_method(fn), args, kwargs
                                 )
@@ -138,7 +139,7 @@ def aentity_method(
                 try:
                     if _should_send_prompts():
-                        span.set_attribute(SPAN_OUTPUT, json.dumps(res))
+                        span.set_attribute(SPAN_OUTPUT, json_dumps(res))
                 except TypeError:
                     pass

lmnr-0.4.24/src/lmnr/traceloop_sdk/tracing/attributes.py ADDED Viewed

@@ -0,0 +1,35 @@
+from enum import Enum
+from opentelemetry.semconv_ai import SpanAttributes
+SPAN_INPUT = "lmnr.span.input"
+SPAN_OUTPUT = "lmnr.span.output"
+SPAN_TYPE = "lmnr.span.type"
+SPAN_PATH = "lmnr.span.path"
+ASSOCIATION_PROPERTIES = "lmnr.association.properties"
+SESSION_ID = "session_id"
+USER_ID = "user_id"
+TRACE_TYPE = "trace_type"
+# exposed to the user, configurable
+class Attributes(Enum):
+    # == This is the minimum set of attributes for a proper LLM span ==
+    #
+    # not SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
+    INPUT_TOKEN_COUNT = "gen_ai.usage.input_tokens"
+    # not SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
+    OUTPUT_TOKEN_COUNT = "gen_ai.usage.output_tokens"
+    TOTAL_TOKEN_COUNT = SpanAttributes.LLM_USAGE_TOTAL_TOKENS
+    PROVIDER = SpanAttributes.LLM_SYSTEM
+    REQUEST_MODEL = SpanAttributes.LLM_REQUEST_MODEL
+    RESPONSE_MODEL = SpanAttributes.LLM_RESPONSE_MODEL
+    #
+    ## == End of minimum set ==
+    # == Additional attributes ==
+    #
+    INPUT_COST = "gen_ai.usage.input_cost"
+    OUTPUT_COST = "gen_ai.usage.output_cost"
+    TOTAL_COST = "gen_ai.usage.cost"
+    #
+    # == End of additional attributes ==

lmnr-0.4.22/src/lmnr/traceloop_sdk/tracing/attributes.py DELETED Viewed

@@ -1,9 +0,0 @@
-SPAN_INPUT = "lmnr.span.input"
-SPAN_OUTPUT = "lmnr.span.output"
-SPAN_TYPE = "lmnr.span.type"
-SPAN_PATH = "lmnr.span.path"
-ASSOCIATION_PROPERTIES = "lmnr.association.properties"
-SESSION_ID = "session_id"
-USER_ID = "user_id"
-TRACE_TYPE = "trace_type"