PyPI - lmnr - Versions diffs - 0.4.17b0__py2.py3-none-any.whl - Mend

lmnr 0.4.17b0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

lmnr/__init__.py +5 -0
lmnr/cli.py +39 -0
lmnr/sdk/__init__.py +0 -0
lmnr/sdk/decorators.py +66 -0
lmnr/sdk/evaluations.py +354 -0
lmnr/sdk/laminar.py +403 -0
lmnr/sdk/log.py +39 -0
lmnr/sdk/types.py +155 -0
lmnr/sdk/utils.py +99 -0
lmnr/traceloop_sdk/.flake8 +12 -0
lmnr/traceloop_sdk/.python-version +1 -0
lmnr/traceloop_sdk/__init__.py +89 -0
lmnr/traceloop_sdk/config/__init__.py +9 -0
lmnr/traceloop_sdk/decorators/__init__.py +0 -0
lmnr/traceloop_sdk/decorators/base.py +178 -0
lmnr/traceloop_sdk/instruments.py +34 -0
lmnr/traceloop_sdk/tests/__init__.py +1 -0
lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +101 -0
lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +99 -0
lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +98 -0
lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +98 -0
lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +199 -0
lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +202 -0
lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +199 -0
lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +96 -0
lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +98 -0
lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +199 -0
lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +167 -0
lmnr/traceloop_sdk/tests/conftest.py +111 -0
lmnr/traceloop_sdk/tests/test_association_properties.py +229 -0
lmnr/traceloop_sdk/tests/test_manual.py +48 -0
lmnr/traceloop_sdk/tests/test_nested_tasks.py +47 -0
lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +50 -0
lmnr/traceloop_sdk/tests/test_sdk_initialization.py +57 -0
lmnr/traceloop_sdk/tests/test_tasks.py +32 -0
lmnr/traceloop_sdk/tests/test_workflows.py +262 -0
lmnr/traceloop_sdk/tracing/__init__.py +1 -0
lmnr/traceloop_sdk/tracing/attributes.py +9 -0
lmnr/traceloop_sdk/tracing/content_allow_list.py +24 -0
lmnr/traceloop_sdk/tracing/context_manager.py +13 -0
lmnr/traceloop_sdk/tracing/tracing.py +913 -0
lmnr/traceloop_sdk/utils/__init__.py +26 -0
lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +61 -0
lmnr/traceloop_sdk/utils/json_encoder.py +20 -0
lmnr/traceloop_sdk/utils/package_check.py +8 -0
lmnr/traceloop_sdk/version.py +1 -0
lmnr-0.4.17b0.dist-info/LICENSE +75 -0
lmnr-0.4.17b0.dist-info/METADATA +250 -0
lmnr-0.4.17b0.dist-info/RECORD +50 -0
lmnr-0.4.17b0.dist-info/WHEEL +4 -0

lmnr/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .sdk.evaluations import evaluate
+from .sdk.laminar import Laminar
+from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
+from .sdk.decorators import observe
+from .traceloop_sdk import Instruments

lmnr/cli.py ADDED Viewed

@@ -0,0 +1,39 @@
+from argparse import ArgumentParser
+import asyncio
+import importlib
+import os
+import sys
+from lmnr.sdk.evaluations import set_global_evaluation
+# TODO: Refactor this code
+async def run_evaluation(args):
+    sys.path.insert(0, os.getcwd())
+    with set_global_evaluation(True):
+        file = os.path.abspath(args.file)
+        spec = importlib.util.spec_from_file_location("run_eval", file)
+        mod = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(mod)
+        from lmnr.sdk.evaluations import _evaluation
+        evaluation = _evaluation
+        await evaluation.run()
+def cli():
+    parser = ArgumentParser(
+        prog="lmnr",
+        description="CLI for Laminar",
+    )
+    subparsers = parser.add_subparsers(title="subcommands", dest="subcommand")
+    parser_eval = subparsers.add_parser("eval", description="Run an evaluation")
+    parser_eval.add_argument("file", help="A file containing the evaluation to run")
+    parser_eval.set_defaults(func=run_evaluation)
+    parsed = parser.parse_args()
+    asyncio.run(parsed.func(parsed))

lmnr/sdk/__init__.py ADDED Viewed

File without changes

lmnr/sdk/decorators.py ADDED Viewed

@@ -0,0 +1,66 @@
+from lmnr.traceloop_sdk.decorators.base import (
+    entity_method,
+    aentity_method,
+)
+from opentelemetry.trace import INVALID_SPAN, get_current_span
+from typing import Callable, Optional, TypeVar, cast
+from typing_extensions import ParamSpec
+from lmnr.traceloop_sdk.tracing.attributes import SESSION_ID, USER_ID
+from lmnr.traceloop_sdk.tracing.tracing import update_association_properties
+from .utils import is_async
+P = ParamSpec("P")
+R = TypeVar("R")
+def observe(
+    *,
+    name: Optional[str] = None,
+    user_id: Optional[str] = None,
+    session_id: Optional[str] = None,
+) -> Callable[[Callable[P, R]], Callable[P, R]]:
+    """The main decorator entrypoint for Laminar. This is used to wrap
+    functions and methods to create spans.
+    Args:
+        name (Optional[str], optional): Name of the span. Function
+                        name is used if not specified.
+                        Defaults to None.
+        user_id (Optional[str], optional): User ID to associate
+                        with the span and the following context.
+                        Defaults to None.
+        session_id (Optional[str], optional): Session ID to associate with the
+                        span and the following context. Defaults to None.
+    Raises:
+        Exception: re-raises the exception if the wrapped function raises
+                   an exception
+    Returns:
+        R: Returns the result of the wrapped function
+    """
+    def decorator(func: Callable) -> Callable:
+        current_span = get_current_span()
+        if current_span != INVALID_SPAN:
+            if session_id is not None:
+                current_span.set_attribute(SESSION_ID, session_id)
+            if user_id is not None:
+                current_span.set_attribute(USER_ID, user_id)
+        association_properties = {}
+        if session_id is not None:
+            association_properties["session_id"] = session_id
+        if user_id is not None:
+            association_properties["user_id"] = user_id
+        update_association_properties(association_properties)
+        return (
+            aentity_method(name=name)(func)
+            if is_async(func)
+            else entity_method(name=name)(func)
+        )
+    return cast(Callable, decorator)

lmnr/sdk/evaluations.py ADDED Viewed

@@ -0,0 +1,354 @@
+import asyncio
+import re
+import sys
+from abc import ABC, abstractmethod
+from contextlib import contextmanager
+from typing import Any, Awaitable, Optional, Set, Union
+import uuid
+from tqdm import tqdm
+from ..traceloop_sdk.instruments import Instruments
+from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
+from .laminar import Laminar as L
+from .types import (
+    Datapoint,
+    EvaluationResultDatapoint,
+    EvaluatorFunction,
+    ExecutorFunction,
+    Numeric,
+    NumericTypes,
+    SpanType,
+    TraceType,
+)
+from .utils import is_async
+DEFAULT_BATCH_SIZE = 5
+_evaluation = None
+_set_global_evaluation = False
+@contextmanager
+def set_global_evaluation(set_global_evaluation: bool):
+    global _set_global_evaluation
+    original = _set_global_evaluation
+    try:
+        _set_global_evaluation = set_global_evaluation
+        yield
+    finally:
+        _set_global_evaluation = original
+        pass
+def get_evaluation_url(project_id: str, evaluation_id: str):
+    return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
+def get_average_scores(results: list[EvaluationResultDatapoint]) -> dict[str, Numeric]:
+    per_score_values = {}
+    for result in results:
+        for key, value in result.scores.items():
+            if key not in per_score_values:
+                per_score_values[key] = []
+            per_score_values[key].append(value)
+    average_scores = {}
+    for key, values in per_score_values.items():
+        average_scores[key] = sum(values) / len(values)
+    return average_scores
+class EvaluationReporter:
+    def __init__(self):
+        pass
+    def start(self, length: int):
+        self.cli_progress = tqdm(
+            total=length,
+            bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
+            ncols=60,
+        )
+    def update(self, batch_length: int):
+        self.cli_progress.update(batch_length)
+    def stopWithError(self, error: Exception):
+        self.cli_progress.close()
+        sys.stderr.write(f"\nError: {error}\n")
+    def stop(self, average_scores: dict[str, Numeric], project_id: str, evaluation_id: str):
+        self.cli_progress.close()
+        print(f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n")
+        print("Average scores:")
+        for name, score in average_scores.items():
+            print(f"{name}: {score}")
+        print("\n")
+class EvaluationDataset(ABC):
+    @abstractmethod
+    def __init__(self, *args, **kwargs):
+        pass
+    @abstractmethod
+    def __len__(self) -> int:
+        pass
+    @abstractmethod
+    def __getitem__(self, idx) -> Datapoint:
+        pass
+    def slice(self, start: int, end: int):
+        return [self[i] for i in range(max(start, 0), min(end, len(self)))]
+class Evaluation:
+    def __init__(
+        self,
+        data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
+        executor: Any,
+        evaluators: dict[str, EvaluatorFunction],
+        group_id: Optional[str] = None,
+        name: Optional[str] = None,
+        batch_size: int = DEFAULT_BATCH_SIZE,
+        project_api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        http_port: Optional[int] = None,
+        grpc_port: Optional[int] = None,
+        instruments: Optional[Set[Instruments]] = None,
+    ):
+        """
+        Initializes an instance of the Evaluations class.
+        Parameters:
+            data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
+                            `data` is the input to the executor function,
+                            `target` is the input to the evaluator function.
+            executor (Callable[..., Any]): The executor function.
+                            Takes the data point + any additional arguments
+                            and returns the output to evaluate.
+            evaluators (List[Callable[..., Any]]): List of evaluator functions.
+                Each evaluator function takes the output of the executor _and_
+                the target data, and returns a score. The score can be a
+                single number or a record of string keys and number values.
+                If the score is a single number, it will be named after the
+                evaluator function. If the function is anonymous, it will be
+                named `evaluator_${index}`, where index is the index of the
+                evaluator function in the list starting from 1.
+            group_id (Optional[str], optional): Group id of the evaluation.
+                            Defaults to "default".
+            name (Optional[str], optional): The name of the evaluation.
+                            It will be auto-generated if not provided.
+            batch_size (int, optional): The batch size for evaluation.
+                            Defaults to DEFAULT_BATCH_SIZE.
+            project_api_key (Optional[str], optional): The project API key.
+                            Defaults to an empty string.
+            base_url (Optional[str], optional): The base URL for the Laminar API.
+                            Useful if self-hosted elsewhere.
+                            Defaults to "https://api.lmnr.ai".
+            http_port (Optional[int], optional): The port for the Laminar API HTTP service.
+                            Defaults to 443.
+            instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
+                            Defaults to None. If None, all available instruments will be used.
+        """
+        if not evaluators:
+            raise ValueError("No evaluators provided")
+        # TODO: Compile regex once and then reuse it
+        for evaluator_name in evaluators:
+            if not re.match(r'^[\w\s-]+$', evaluator_name):
+                raise ValueError(f'Invalid evaluator key: "{evaluator_name}". Keys must only contain letters, digits, hyphens, underscores, or spaces.')
+        self.is_finished = False
+        self.reporter = EvaluationReporter()
+        if isinstance(data, list):
+            self.data = [
+                (Datapoint.model_validate(point) if isinstance(point, dict) else point)
+                for point in data
+            ]
+        else:
+            self.data = data
+        self.executor = executor
+        self.evaluators = evaluators
+        self.group_id = group_id
+        self.name = name
+        self.batch_size = batch_size
+        L.initialize(
+            project_api_key=project_api_key,
+            base_url=base_url,
+            http_port=http_port,
+            grpc_port=grpc_port,
+            instruments=instruments,
+        )
+    def run(self) -> Union[None, Awaitable[None]]:
+        if self.is_finished:
+            raise Exception("Evaluation is already finished")
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            return loop.create_task(self._run())
+        else:
+            return loop.run_until_complete(self._run())
+    async def _run(self) -> None:
+        self.reporter.start(
+            len(self.data),
+        )
+        try:
+            result_datapoints = await self.evaluate_in_batches()
+        except Exception as e:
+            self.reporter.stopWithError(e)
+            self.is_finished = True
+            return
+        else:
+            evaluation = L.create_evaluation(data=result_datapoints, group_id=self.group_id, name=self.name)
+            average_scores = get_average_scores(result_datapoints)
+            self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
+            self.is_finished = True
+    async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
+        result_datapoints = []
+        for i in range(0, len(self.data), self.batch_size):
+            batch = (
+                self.data[i: i + self.batch_size]
+                if isinstance(self.data, list)
+                else self.data.slice(i, i + self.batch_size)
+            )
+            batch_datapoints = await self._evaluate_batch(batch)
+            result_datapoints.extend(batch_datapoints)
+            self.reporter.update(len(batch))
+        return result_datapoints
+    async def _evaluate_batch(
+        self, batch: list[Datapoint]
+    ) -> list[EvaluationResultDatapoint]:
+        batch_promises = [self._evaluate_datapoint(datapoint) for datapoint in batch]
+        results = await asyncio.gather(*batch_promises)
+        return results
+    async def _evaluate_datapoint(
+        self, datapoint: Datapoint
+    ) -> EvaluationResultDatapoint:
+        with L.start_as_current_span("evaluation") as evaluation_span:
+            L._set_trace_type(trace_type=TraceType.EVALUATION)
+            evaluation_span.set_attribute(SPAN_TYPE, SpanType.EVALUATION.value)
+            with L.start_as_current_span(
+                "executor", input={"data": datapoint.data}
+            ) as executor_span:
+                executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
+                output = (
+                    await self.executor(datapoint.data)
+                    if is_async(self.executor)
+                    else self.executor(datapoint.data)
+                )
+                L.set_span_output(output)
+            target = datapoint.target
+            # Iterate over evaluators
+            scores: dict[str, Numeric] = {}
+            for evaluator_name, evaluator in self.evaluators.items():
+                with L.start_as_current_span(
+                    evaluator_name, input={"output": output, "target": target}
+                ) as evaluator_span:
+                    evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
+                    value = (
+                        await evaluator(output, target)
+                        if is_async(evaluator)
+                        else evaluator(output, target)
+                    )
+                    L.set_span_output(value)
+                # If evaluator returns a single number, use evaluator name as key
+                if isinstance(value, NumericTypes):
+                    scores[evaluator_name] = value
+                else:
+                    scores.update(value)
+            trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
+            return EvaluationResultDatapoint(
+                data=datapoint.data,
+                target=target,
+                executor_output=output,
+                scores=scores,
+                trace_id=trace_id,
+            )
+def evaluate(
+    data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
+    executor: ExecutorFunction,
+    evaluators: dict[str, EvaluatorFunction],
+    group_id: Optional[str] = None,
+    name: Optional[str] = None,
+    batch_size: int = DEFAULT_BATCH_SIZE,
+    project_api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    http_port: Optional[int] = None,
+    grpc_port: Optional[int] = None,
+    instruments: Optional[Set[Instruments]] = None,
+) -> Optional[Awaitable[None]]:
+    """
+    If added to the file which is called through lmnr eval command, then simply registers the evaluation.
+    Otherwise, if there is no event loop, creates it and runs the evaluation until completion.
+    If there is an event loop, schedules the evaluation as a task in the event loop and returns an awaitable handle.
+    Parameters:
+        data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
+                        `data` is the input to the executor function,
+                        `target` is the input to the evaluator function.
+        executor (Callable[..., Any]): The executor function.
+                        Takes the data point + any additional arguments
+                        and returns the output to evaluate.
+        evaluators (List[Callable[..., Any]]): List of evaluator functions.
+            Each evaluator function takes the output of the executor _and_
+            the target data, and returns a score. The score can be a
+            single number or a record of string keys and number values.
+            If the score is a single number, it will be named after the
+            evaluator function. If the function is anonymous, it will be
+            named `evaluator_${index}`, where index is the index of the
+            evaluator function in the list starting from 1.
+        group_id (Optional[str], optional): Group name which is same
+                        as the feature you are evaluating in your project or application.
+                        Defaults to "default".
+        name (Optional[str], optional): Optional name of the evaluation. Used to easily
+                        identify the evaluation in the group.
+        batch_size (int, optional): The batch size for evaluation.
+                        Defaults to DEFAULT_BATCH_SIZE.
+        project_api_key (Optional[str], optional): The project API key.
+                        Defaults to an empty string.
+        base_url (Optional[str], optional): The base URL for the Laminar API.
+                        Useful if self-hosted elsewhere.
+                        Defaults to "https://api.lmnr.ai".
+        http_port (Optional[int], optional): The port for the Laminar API HTTP service.
+                        Defaults to 443.
+        grpc_port (Optional[int], optional): The port for the Laminar API gRPC service.
+                        Defaults to 8443.
+        instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
+                        Defaults to None. If None, all available instruments will be used.
+    """
+    evaluation = Evaluation(
+        data=data,
+        executor=executor,
+        evaluators=evaluators,
+        group_id=group_id,
+        name=name,
+        batch_size=batch_size,
+        project_api_key=project_api_key,
+        base_url=base_url,
+        http_port=http_port,
+        grpc_port=grpc_port,
+        instruments=instruments,
+    )
+    global _evaluation
+    if _set_global_evaluation:
+        _evaluation = evaluation
+    else:
+        return evaluation.run()