PyPI - lmnr - Versions diffs - 0.4.11__py3-none-any.whl → 0.4.12__py3-none-any.whl - Mend

lmnr 0.4.11py3-none-any.whl → 0.4.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

lmnr/__init__.py +1 -1
lmnr/cli.py +39 -0
lmnr/sdk/decorators.py +3 -2
lmnr/sdk/evaluations.py +245 -76
lmnr/sdk/laminar.py +81 -44
lmnr/sdk/types.py +44 -5
lmnr/sdk/utils.py +4 -5
lmnr/traceloop_sdk/__init__.py +3 -42
lmnr/traceloop_sdk/config/__init__.py +0 -4
lmnr/traceloop_sdk/decorators/base.py +16 -9
lmnr/traceloop_sdk/tracing/attributes.py +8 -0
lmnr/traceloop_sdk/tracing/tracing.py +31 -201
{lmnr-0.4.11.dist-info → lmnr-0.4.12.dist-info}/METADATA +75 -101
{lmnr-0.4.11.dist-info → lmnr-0.4.12.dist-info}/RECORD +17 -18
lmnr-0.4.12.dist-info/entry_points.txt +3 -0
lmnr/traceloop_sdk/metrics/__init__.py +0 -0
lmnr/traceloop_sdk/metrics/metrics.py +0 -176
lmnr/traceloop_sdk/tracing/manual.py +0 -57
lmnr-0.4.11.dist-info/entry_points.txt +0 -3
{lmnr-0.4.11.dist-info → lmnr-0.4.12.dist-info}/LICENSE +0 -0
{lmnr-0.4.11.dist-info → lmnr-0.4.12.dist-info}/WHEEL +0 -0

lmnr/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .sdk.evaluations import Evaluation
+from .sdk.evaluations import evaluate
 from .sdk.laminar import Laminar
 from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
 from .sdk.decorators import observe

lmnr/cli.py ADDED Viewed

@@ -0,0 +1,39 @@
+from argparse import ArgumentParser
+import asyncio
+import importlib
+import os
+import sys
+from lmnr.sdk.evaluations import set_global_evaluation
+# TODO: Refactor this code
+async def run_evaluation(args):
+    sys.path.insert(0, os.getcwd())
+    with set_global_evaluation(True):
+        file = os.path.abspath(args.file)
+        spec = importlib.util.spec_from_file_location("run_eval", file)
+        mod = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(mod)
+        from lmnr.sdk.evaluations import _evaluation
+        evaluation = _evaluation
+        await evaluation.run()
+def cli():
+    parser = ArgumentParser(
+        prog="lmnr",
+        description="CLI for Laminar",
+    )
+    subparsers = parser.add_subparsers(title="subcommands", dest="subcommand")
+    parser_eval = subparsers.add_parser("eval", description="Run an evaluation")
+    parser_eval.add_argument("file", help="A file containing the evaluation to run")
+    parser_eval.set_defaults(func=run_evaluation)
+    parsed = parser.parse_args()
+    asyncio.run(parsed.func(parsed))

lmnr/sdk/decorators.py CHANGED Viewed

@@ -6,6 +6,7 @@ from opentelemetry.trace import INVALID_SPAN, get_current_span
 from typing import Callable, Optional, cast
+from lmnr.traceloop_sdk.tracing.attributes import SESSION_ID, USER_ID
 from lmnr.traceloop_sdk.tracing.tracing import update_association_properties
 from .utils import is_async
@@ -43,11 +44,11 @@ def observe(
         if current_span != INVALID_SPAN:
             if session_id is not None:
                 current_span.set_attribute(
-                    "traceloop.association.properties.session_id", session_id
+                    SESSION_ID, session_id
                 )
             if user_id is not None:
                 current_span.set_attribute(
-                    "traceloop.association.properties.user_id", user_id
+                    USER_ID, user_id
                 )
         association_properties = {}
         if session_id is not None:

lmnr/sdk/evaluations.py CHANGED Viewed

@@ -1,14 +1,78 @@
-from typing import Any, Union
-from .types import EvaluationDatapoint
-from .utils import is_async
-from .laminar import Laminar as L
 import asyncio
+import sys
 from abc import ABC, abstractmethod
+from contextlib import contextmanager
+from typing import Any, Awaitable, Optional, Set, Union
+import uuid
+from tqdm import tqdm
+from ..traceloop_sdk.instruments import Instruments
+from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
+from .laminar import Laminar as L
+from .types import (
+    CreateEvaluationResponse,
+    Datapoint,
+    EvaluationResultDatapoint,
+    EvaluatorFunction,
+    ExecutorFunction,
+    Numeric,
+    NumericTypes,
+    SpanType,
+    TraceType,
+)
+from .utils import is_async
 DEFAULT_BATCH_SIZE = 5
+_evaluation = None
+_set_global_evaluation = False
+@contextmanager
+def set_global_evaluation(set_global_evaluation: bool):
+    global _set_global_evaluation
+    original = _set_global_evaluation
+    try:
+        _set_global_evaluation = set_global_evaluation
+        yield
+    finally:
+        _set_global_evaluation = original
+        pass
+def get_evaluation_url(project_id: str, evaluation_id: str):
+    return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
+class EvaluationReporter:
+    def __init__(self):
+        pass
+    def start(self, name: str, project_id: str, id: str, length: int):
+        print(f"Running evaluation {name}...\n")
+        print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
+        self.cli_progress = tqdm(
+            total=length,
+            bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
+            ncols=60,
+        )
+    def update(self, batch_length: int):
+        self.cli_progress.update(batch_length)
+    def stopWithError(self, error: Exception):
+        self.cli_progress.close()
+        sys.stderr.write(f"\nError: {error}\n")
+    def stop(self, average_scores: dict[str, Numeric]):
+        self.cli_progress.close()
+        print("\nAverage scores:")
+        for name, score in average_scores.items():
+            print(f"{name}: {score}")
+        print("\n")
 class EvaluationDataset(ABC):
     @abstractmethod
@@ -20,7 +84,7 @@ class EvaluationDataset(ABC):
         pass
     @abstractmethod
-    def __getitem__(self, idx) -> EvaluationDatapoint:
+    def __getitem__(self, idx) -> Datapoint:
         pass
     def slice(self, start: int, end: int):
@@ -30,18 +94,21 @@ class EvaluationDataset(ABC):
 class Evaluation:
     def __init__(
         self,
-        name,
-        data: Union[EvaluationDataset, list[Union[EvaluationDatapoint, dict]]],
+        data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
         executor: Any,
-        evaluators: list[Any],
+        evaluators: dict[str, EvaluatorFunction],
+        name: Optional[str] = None,
         batch_size: int = DEFAULT_BATCH_SIZE,
-        project_api_key: str = "",
-        base_url: str = "https://api.lmnr.ai",
+        project_api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        http_port: Optional[int] = None,
+        grpc_port: Optional[int] = None,
+        instruments: Optional[Set[Instruments]] = None,
     ):
         """
         Initializes an instance of the Evaluations class.
         Parameters:
-            name (str): The name of the evaluation.
             data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
                             `data` is the input to the executor function,
                             `target` is the input to the evaluator function.
@@ -56,46 +123,43 @@ class Evaluation:
                 evaluator function. If the function is anonymous, it will be
                 named `evaluator_${index}`, where index is the index of the
                 evaluator function in the list starting from 1.
+            name (Optional[str], optional): The name of the evaluation.
+                            It will be auto-generated if not provided.
             batch_size (int, optional): The batch size for evaluation.
                             Defaults to DEFAULT_BATCH_SIZE.
-            project_api_key (str, optional): The project API key.
+            project_api_key (Optional[str], optional): The project API key.
                             Defaults to an empty string.
-            base_url (str, optional): The base URL for the LMNR API.
+            base_url (Optional[str], optional): The base URL for the Laminar API.
                             Useful if self-hosted elsewhere.
                             Defaults to "https://api.lmnr.ai".
+            http_port (Optional[int], optional): The port for the Laminar API HTTP service.
+                            Defaults to 443.
+            instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
+                            Defaults to None. If None, all available instruments will be used.
         """
+        self.is_finished = False
         self.name = name
+        self.reporter = EvaluationReporter()
         self.executor = executor
-        self.evaluators = dict(
-            zip(
-                [
-                    (
-                        e.__name__
-                        if e.__name__ and e.__name__ != "<lambda>"
-                        else f"evaluator_{i+1}"
-                    )
-                    for i, e in enumerate(evaluators)
-                ],
-                evaluators,
-            )
-        )
-        self.evaluator_names = list(self.evaluators.keys())
+        self.evaluators = evaluators
         if isinstance(data, list):
             self.data = [
-                (
-                    EvaluationDatapoint.model_validate(point)
-                    if isinstance(point, dict)
-                    else point
-                )
+                (Datapoint.model_validate(point) if isinstance(point, dict) else point)
                 for point in data
             ]
         else:
             self.data = data
         self.batch_size = batch_size
-        L.initialize(project_api_key=project_api_key, base_url=base_url)
+        L.initialize(
+            project_api_key=project_api_key,
+            base_url=base_url,
+            http_port=http_port,
+            grpc_port=grpc_port,
+            instruments=instruments,
+        )
-    def run(self):
+    def run(self) -> Union[None, Awaitable[None]]:
         """Runs the evaluation.
         Creates a new evaluation if no evaluation with such name exists, or
@@ -113,16 +177,38 @@ class Evaluation:
         ```
         """
+        if self.is_finished:
+            raise Exception("Evaluation is already finished")
         loop = asyncio.get_event_loop()
         if loop.is_running():
             return loop.create_task(self._run())
         else:
             return loop.run_until_complete(self._run())
-    async def _run(self):
-        response = L.create_evaluation(self.name)
+    async def _run(self) -> None:
+        evaluation = L.create_evaluation(self.name)
+        self.reporter.start(
+            evaluation.name,
+            evaluation.projectId,
+            evaluation.id,
+            len(self.data),
+        )
+        try:
+            await self.evaluate_in_batches(evaluation)
+        except Exception as e:
+            L.update_evaluation_status(evaluation.id, "Error")
+            self.reporter.stopWithError(e)
+            self.is_finished = True
+            return
+        # If we update with status "Finished", we expect averageScores to be not empty
+        updated_evaluation = L.update_evaluation_status(evaluation.id, "Finished")
+        self.reporter.stop(updated_evaluation.averageScores)
+        self.is_finished = True
-        # Process batches sequentially
+    async def evaluate_in_batches(self, evaluation: CreateEvaluationResponse):
         for i in range(0, len(self.data), self.batch_size):
             batch = (
                 self.data[i : i + self.batch_size]
@@ -130,49 +216,132 @@ class Evaluation:
                 else self.data.slice(i, i + self.batch_size)
             )
             try:
-                await self._evaluate_batch(batch)
+                results = await self._evaluate_batch(batch)
+                L.post_evaluation_results(evaluation.id, results)
             except Exception as e:
                 print(f"Error evaluating batch: {e}")
+            finally:
+                self.reporter.update(len(batch))
-        try:
-            L.update_evaluation_status(response.name, "Finished")
-            print(f"Evaluation {response.id} complete")
-        except Exception as e:
-            print(f"Error updating evaluation status: {e}")
-    async def _evaluate_batch(self, batch: list[EvaluationDatapoint]):
+    async def _evaluate_batch(
+        self, batch: list[Datapoint]
+    ) -> list[EvaluationResultDatapoint]:
         batch_promises = [self._evaluate_datapoint(datapoint) for datapoint in batch]
         results = await asyncio.gather(*batch_promises)
+        return results
+    async def _evaluate_datapoint(
+        self, datapoint: Datapoint
+    ) -> EvaluationResultDatapoint:
+        with L.start_as_current_span("evaluation") as evaluation_span:
+            L._set_trace_type(trace_type=TraceType.EVALUATION)
+            evaluation_span.set_attribute(SPAN_TYPE, SpanType.EVALUATION.value)
+            with L.start_as_current_span(
+                "executor", input={"data": datapoint.data}
+            ) as executor_span:
+                executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
+                output = (
+                    await self.executor(datapoint.data)
+                    if is_async(self.executor)
+                    else self.executor(datapoint.data)
+                )
+                L.set_span_output(output)
+            target = datapoint.target
-        return L.post_evaluation_results(self.name, results)
+            # Iterate over evaluators
+            scores: dict[str, Numeric] = {}
+            for evaluator_name, evaluator in self.evaluators.items():
+                with L.start_as_current_span(
+                    evaluator_name, input={"output": output, "target": target}
+                ) as evaluator_span:
+                    evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
+                    value = (
+                        await evaluator(output, target)
+                        if is_async(evaluator)
+                        else evaluator(output, target)
+                    )
+                    L.set_span_output(value)
-    async def _evaluate_datapoint(self, datapoint):
-        output = (
-            await self.executor(datapoint.data)
-            if is_async(self.executor)
-            else self.executor(datapoint.data)
-        )
-        target = datapoint.target
-        # Iterate over evaluators
-        scores = {}
-        for evaluator_name in self.evaluator_names:
-            evaluator = self.evaluators[evaluator_name]
-            value = (
-                await evaluator(output, target)
-                if is_async(evaluator)
-                else evaluator(output, target)
+                # If evaluator returns a single number, use evaluator name as key
+                if isinstance(value, NumericTypes):
+                    scores[evaluator_name] = value
+                else:
+                    scores.update(value)
+            trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
+            return EvaluationResultDatapoint(
+                data=datapoint.data,
+                target=target,
+                executor_output=output,
+                scores=scores,
+                trace_id=trace_id,
             )
-            # If evaluator returns a single number, use evaluator name as key
-            if isinstance(value, (int, float)):
-                scores[evaluator_name] = value
-            else:
-                scores.update(value)
-        return {
-            "executorOutput": output,
-            "data": datapoint.data,
-            "target": target,
-            "scores": scores,
-        }
+def evaluate(
+    data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
+    executor: ExecutorFunction,
+    evaluators: dict[str, EvaluatorFunction],
+    name: Optional[str] = None,
+    batch_size: int = DEFAULT_BATCH_SIZE,
+    project_api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    http_port: Optional[int] = None,
+    grpc_port: Optional[int] = None,
+    instruments: Optional[Set[Instruments]] = None,
+) -> Optional[Awaitable[None]]:
+    """
+    If added to the file which is called through lmnr eval command, then simply registers the evaluation.
+    Otherwise, if there is no event loop, creates it and runs the evaluation until completion.
+    If there is an event loop, schedules the evaluation as a task in the event loop and returns an awaitable handle.
+    Parameters:
+        data (Union[List[Union[EvaluationDatapoint, dict]], EvaluationDataset]): List of data points to evaluate or an evaluation dataset.
+                        `data` is the input to the executor function,
+                        `target` is the input to the evaluator function.
+        executor (Callable[..., Any]): The executor function.
+                        Takes the data point + any additional arguments
+                        and returns the output to evaluate.
+        evaluators (List[Callable[..., Any]]): List of evaluator functions.
+            Each evaluator function takes the output of the executor _and_
+            the target data, and returns a score. The score can be a
+            single number or a record of string keys and number values.
+            If the score is a single number, it will be named after the
+            evaluator function. If the function is anonymous, it will be
+            named `evaluator_${index}`, where index is the index of the
+            evaluator function in the list starting from 1.
+        name (Optional[str], optional): The name of the evaluation.
+            It will be auto-generated if not provided.
+        batch_size (int, optional): The batch size for evaluation.
+                        Defaults to DEFAULT_BATCH_SIZE.
+        project_api_key (Optional[str], optional): The project API key.
+                        Defaults to an empty string.
+        base_url (Optional[str], optional): The base URL for the Laminar API.
+                        Useful if self-hosted elsewhere.
+                        Defaults to "https://api.lmnr.ai".
+        http_port (Optional[int], optional): The port for the Laminar API HTTP service.
+                        Defaults to 443.
+        grpc_port (Optional[int], optional): The port for the Laminar API gRPC service.
+                        Defaults to 8443.
+        instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
+                        Defaults to None. If None, all available instruments will be used.
+    """
+    evaluation = Evaluation(
+        data=data,
+        executor=executor,
+        evaluators=evaluators,
+        name=name,
+        batch_size=batch_size,
+        project_api_key=project_api_key,
+        base_url=base_url,
+        http_port=http_port,
+        grpc_port=grpc_port,
+        instruments=instruments,
+    )
+    global _evaluation
+    if _set_global_evaluation:
+        _evaluation = evaluation
+    else:
+        return evaluation.run()

lmnr 0.4.11__py3-none-any.whl → 0.4.12__py3-none-any.whl

lmnr 0.4.11py3-none-any.whl → 0.4.12py3-none-any.whl