PyPI - lmnr - Versions diffs - 0.4.64__py3-none-any.whl → 0.4.65__py3-none-any.whl - Mend

lmnr 0.4.64py3-none-any.whl → 0.4.65py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

lmnr/openllmetry_sdk/instruments.py +1 -0
lmnr/openllmetry_sdk/tracing/tracing.py +50 -15
lmnr/sdk/browser/__init__.py +0 -9
lmnr/sdk/browser/browser_use_otel.py +118 -0
lmnr/sdk/browser/playwright_otel.py +310 -0
lmnr/sdk/browser/utils.py +104 -0
lmnr/sdk/client.py +313 -0
lmnr/sdk/datasets.py +2 -2
lmnr/sdk/evaluations.py +32 -10
lmnr/sdk/laminar.py +72 -194
lmnr/sdk/types.py +29 -4
lmnr/version.py +1 -1
{lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/METADATA +51 -51
{lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/RECORD +17 -14
lmnr/sdk/browser/playwright_patch.py +0 -377
{lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/LICENSE +0 -0
{lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/WHEEL +0 -0
{lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/entry_points.txt +0 -0

lmnr/sdk/client.py ADDED Viewed

@@ -0,0 +1,313 @@
+"""
+Laminar HTTP client. Used to send data to/from the Laminar API.
+Initialized in `Laminar` singleton, but can be imported
+in other classes.
+"""
+import asyncio
+import json
+import aiohttp
+import gzip
+from opentelemetry import trace
+from pydantic.alias_generators import to_snake
+import requests
+from typing import Awaitable, Optional, Union
+import urllib.parse
+import uuid
+from lmnr.sdk.types import (
+    EvaluationResultDatapoint,
+    GetDatapointsResponse,
+    InitEvaluationResponse,
+    NodeInput,
+    PipelineRunError,
+    PipelineRunRequest,
+    PipelineRunResponse,
+    SemanticSearchRequest,
+    SemanticSearchResponse,
+)
+from lmnr.version import SDK_VERSION
+class LaminarClient:
+    __base_url: str
+    __project_api_key: str
+    __session: aiohttp.ClientSession = None
+    __sync_session: requests.Session = None
+    @classmethod
+    def initialize(cls, base_url: str, project_api_key: str):
+        cls.__base_url = base_url
+        cls.__project_api_key = project_api_key
+        cls.__sync_session = requests.Session()
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            cls.__session = aiohttp.ClientSession()
+    @classmethod
+    def shutdown(cls):
+        cls.__sync_session.close()
+        if cls.__session is not None:
+            try:
+                loop = asyncio.get_event_loop()
+                if loop.is_running():
+                    cls.__session.close()
+                else:
+                    asyncio.run(cls.__session.close())
+            except Exception:
+                asyncio.run(cls.__session.close())
+    @classmethod
+    async def shutdown_async(cls):
+        if cls.__session is not None:
+            await cls.__session.close()
+    @classmethod
+    def run_pipeline(
+        cls,
+        pipeline: str,
+        inputs: dict[str, NodeInput],
+        env: dict[str, str] = {},
+        metadata: dict[str, str] = {},
+        parent_span_id: Optional[uuid.UUID] = None,
+        trace_id: Optional[uuid.UUID] = None,
+    ) -> Union[PipelineRunResponse, Awaitable[PipelineRunResponse]]:
+        if cls.__project_api_key is None:
+            raise ValueError(
+                "Please initialize the Laminar object with your project "
+                "API key or set the LMNR_PROJECT_API_KEY environment variable"
+            )
+        try:
+            current_span = trace.get_current_span()
+            if current_span != trace.INVALID_SPAN:
+                parent_span_id = parent_span_id or uuid.UUID(
+                    int=current_span.get_span_context().span_id
+                )
+                trace_id = trace_id or uuid.UUID(
+                    int=current_span.get_span_context().trace_id
+                )
+            request = PipelineRunRequest(
+                inputs=inputs,
+                pipeline=pipeline,
+                env=env or {},
+                metadata=metadata,
+                parent_span_id=parent_span_id,
+                trace_id=trace_id,
+            )
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                return loop.run_in_executor(None, cls.__run, request)
+            else:
+                return asyncio.run(cls.__run(request))
+        except Exception as e:
+            raise ValueError(f"Invalid request: {e}")
+    @classmethod
+    def semantic_search(
+        cls,
+        query: str,
+        dataset_id: uuid.UUID,
+        limit: Optional[int] = None,
+        threshold: Optional[float] = None,
+    ) -> SemanticSearchResponse:
+        request = SemanticSearchRequest(
+            query=query,
+            dataset_id=dataset_id,
+            limit=limit,
+            threshold=threshold,
+        )
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            return loop.run_in_executor(None, cls.__semantic_search, request)
+        else:
+            return asyncio.run(cls.__semantic_search(request))
+    @classmethod
+    async def init_eval(
+        cls, name: Optional[str] = None, group_name: Optional[str] = None
+    ) -> InitEvaluationResponse:
+        session = await cls.__get_session()
+        async with session.post(
+            cls.__base_url + "/v1/evals",
+            json={
+                "name": name,
+                "groupName": group_name,
+            },
+            headers=cls._headers(),
+        ) as response:
+            resp_json = await response.json()
+            return InitEvaluationResponse.model_validate(resp_json)
+    @classmethod
+    async def save_eval_datapoints(
+        cls,
+        eval_id: uuid.UUID,
+        datapoints: list[EvaluationResultDatapoint],
+        groupName: Optional[str] = None,
+    ):
+        session = await cls.__get_session()
+        async with session.post(
+            cls.__base_url + f"/v1/evals/{eval_id}/datapoints",
+            json={
+                "points": [datapoint.to_dict() for datapoint in datapoints],
+                "groupName": groupName,
+            },
+            headers=cls._headers(),
+        ) as response:
+            if response.status != 200:
+                raise ValueError(
+                    f"Error saving evaluation datapoints: {await response.text()}"
+                )
+    @classmethod
+    async def send_browser_events(
+        cls,
+        session_id: str,
+        trace_id: str,
+        events: list[dict],
+        source: str,
+    ):
+        session = await cls.__get_session()
+        payload = {
+            "sessionId": session_id,
+            "traceId": trace_id,
+            "events": events,
+            "source": source,
+            "sdkVersion": SDK_VERSION,
+        }
+        compressed_payload = gzip.compress(json.dumps(payload).encode("utf-8"))
+        async with session.post(
+            cls.__base_url + "/v1/browser-sessions/events",
+            data=compressed_payload,
+            headers={
+                **cls._headers(),
+                "Content-Encoding": "gzip",
+            },
+        ) as response:
+            if response.status != 200:
+                raise ValueError(
+                    f"Failed to send events: [{response.status}] {await response.text()}"
+                )
+    @classmethod
+    def send_browser_events_sync(
+        cls,
+        session_id: str,
+        trace_id: str,
+        events: list[dict],
+        source: str,
+    ):
+        url = cls.__base_url + "/v1/browser-sessions/events"
+        payload = {
+            "sessionId": session_id,
+            "traceId": trace_id,
+            "events": events,
+            "source": source,
+            "sdkVersion": SDK_VERSION,
+        }
+        compressed_payload = gzip.compress(json.dumps(payload).encode("utf-8"))
+        response = cls.__sync_session.post(
+            url,
+            data=compressed_payload,
+            headers={
+                **cls._headers(),
+                "Content-Encoding": "gzip",
+            },
+        )
+        if response.status_code != 200:
+            raise ValueError(
+                f"Failed to send events: [{response.status_code}] {response.text}"
+            )
+    @classmethod
+    def get_datapoints(
+        cls,
+        dataset_name: str,
+        offset: int,
+        limit: int,
+    ) -> GetDatapointsResponse:
+        # TODO: Use aiohttp. Currently, this function is called from within
+        # `LaminarDataset.__len__`, which is sync, but can be called from
+        # both sync and async (primarily async). Python does not make it easy
+        # to mix things this way, so we should probably refactor `LaminarDataset`.
+        params = {"name": dataset_name, "offset": offset, "limit": limit}
+        url = (
+            cls.__base_url + "/v1/datasets/datapoints?" + urllib.parse.urlencode(params)
+        )
+        response = cls.__sync_session.get(url, headers=cls._headers())
+        if response.status_code != 200:
+            try:
+                resp_json = response.json()
+                raise ValueError(
+                    f"Error fetching datapoints: [{response.status_code}] {json.dumps(resp_json)}"
+                )
+            except requests.exceptions.RequestException:
+                raise ValueError(
+                    f"Error fetching datapoints: [{response.status_code}] {response.text}"
+                )
+        return GetDatapointsResponse.model_validate(response.json())
+    @classmethod
+    async def __run(
+        cls,
+        request: PipelineRunRequest,
+    ) -> PipelineRunResponse:
+        session = await cls.__get_session()
+        async with session.post(
+            cls.__base_url + "/v1/pipeline/run",
+            data=json.dumps(request.to_dict()),
+            headers=cls._headers(),
+        ) as response:
+            if response.status != 200:
+                raise PipelineRunError(response)
+            try:
+                resp_json = await response.json()
+                keys = list(resp_json.keys())
+                for key in keys:
+                    value = resp_json[key]
+                    del resp_json[key]
+                    resp_json[to_snake(key)] = value
+                return PipelineRunResponse(**resp_json)
+            except Exception:
+                raise PipelineRunError(response)
+    @classmethod
+    async def __semantic_search(
+        cls,
+        request: SemanticSearchRequest,
+    ) -> SemanticSearchResponse:
+        session = await cls.__get_session()
+        async with session.post(
+            cls.__base_url + "/v1/semantic-search",
+            data=json.dumps(request.to_dict()),
+            headers=cls._headers(),
+        ) as response:
+            if response.status != 200:
+                raise ValueError(
+                    f"Error performing semantic search: [{response.status}] {await response.text()}"
+                )
+            try:
+                resp_json = await response.json()
+                for result in resp_json["results"]:
+                    result["dataset_id"] = uuid.UUID(result["datasetId"])
+                return SemanticSearchResponse(**resp_json)
+            except Exception as e:
+                raise ValueError(
+                    f"Error parsing semantic search response: status={response.status} error={e}"
+                )
+    @classmethod
+    def _headers(cls):
+        assert cls.__project_api_key is not None, "Project API key is not set"
+        return {
+            "Authorization": "Bearer " + cls.__project_api_key,
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+        }
+    @classmethod
+    async def __get_session(cls):
+        if cls.__session is None:
+            cls.__session = aiohttp.ClientSession()
+        return cls.__session

lmnr/sdk/datasets.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
+from .client import LaminarClient
 from .log import get_default_logger
-from .laminar import Laminar as L
 from .types import Datapoint
 DEFAULT_FETCH_SIZE = 25
@@ -38,7 +38,7 @@ class LaminarDataset(EvaluationDataset):
             f"dataset {self.name}. Fetching batch from {self._offset} to "
             + f"{self._offset + self._fetch_size}"
         )
-        resp = L.get_datapoints(self.name, self._offset, self._fetch_size)
+        resp = LaminarClient.get_datapoints(self.name, self._offset, self._fetch_size)
         self._fetched_items += resp.items
         self._offset = len(self._fetched_items)
         if self._len is None:

lmnr/sdk/evaluations.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Any, Awaitable, Optional, Set, Union
 from ..openllmetry_sdk.instruments import Instruments
 from ..openllmetry_sdk.tracing.attributes import SPAN_TYPE
+from .client import LaminarClient
 from .datasets import EvaluationDataset
 from .eval_control import EVALUATION_INSTANCE, PREPARE_ONLY
 from .laminar import Laminar as L
@@ -20,6 +21,7 @@ from .types import (
     HumanEvaluator,
     Numeric,
     NumericTypes,
+    PartialEvaluationDatapoint,
     SpanType,
     TraceType,
 )
@@ -209,7 +211,9 @@ class Evaluation:
     async def _run(self) -> None:
         self.reporter.start(len(self.data))
         try:
-            evaluation = await L.init_eval(name=self.name, group_name=self.group_name)
+            evaluation = await LaminarClient.init_eval(
+                name=self.name, group_name=self.group_name
+            )
             result_datapoints = await self._evaluate_in_batches(evaluation.id)
             # Wait for all background upload tasks to complete
@@ -227,6 +231,7 @@ class Evaluation:
         average_scores = get_average_scores(result_datapoints)
         self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
         self.is_finished = True
+        await LaminarClient.shutdown_async()
     async def _evaluate_in_batches(
         self, eval_id: uuid.UUID
@@ -260,12 +265,29 @@ class Evaluation:
     async def _evaluate_datapoint(
         self, eval_id: uuid.UUID, datapoint: Datapoint, index: int
     ) -> EvaluationResultDatapoint:
+        evaluation_id = uuid.uuid4()
         with L.start_as_current_span("evaluation") as evaluation_span:
             L._set_trace_type(trace_type=TraceType.EVALUATION)
             evaluation_span.set_attribute(SPAN_TYPE, SpanType.EVALUATION.value)
             with L.start_as_current_span(
                 "executor", input={"data": datapoint.data}
             ) as executor_span:
+                executor_span_id = uuid.UUID(
+                    int=executor_span.get_span_context().span_id
+                )
+                trace_id = uuid.UUID(int=executor_span.get_span_context().trace_id)
+                partial_datapoint = PartialEvaluationDatapoint(
+                    id=evaluation_id,
+                    data=datapoint.data,
+                    target=datapoint.target,
+                    index=index,
+                    trace_id=trace_id,
+                    executor_span_id=executor_span_id,
+                )
+                # First, create datapoint with trace_id so that we can show the dp in the UI
+                await LaminarClient.save_eval_datapoints(
+                    eval_id, [partial_datapoint], self.group_name
+                )
                 executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
                 # Run synchronous executors in a thread pool to avoid blocking
                 if not is_async(self.executor):
@@ -277,9 +299,6 @@ class Evaluation:
                     output = await self.executor(datapoint.data)
                 L.set_span_output(output)
-                executor_span_id = uuid.UUID(
-                    int=executor_span.get_span_context().span_id
-                )
             target = datapoint.target
             # Iterate over evaluators
@@ -289,11 +308,13 @@ class Evaluation:
                     evaluator_name, input={"output": output, "target": target}
                 ) as evaluator_span:
                     evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
-                    value = (
-                        await evaluator(output, target)
-                        if is_async(evaluator)
-                        else evaluator(output, target)
-                    )
+                    if is_async(evaluator):
+                        value = await evaluator(output, target)
+                    else:
+                        loop = asyncio.get_event_loop()
+                        value = await loop.run_in_executor(
+                            None, evaluator, output, target
+                        )
                     L.set_span_output(value)
                 # If evaluator returns a single number, use evaluator name as key
@@ -305,6 +326,7 @@ class Evaluation:
             trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
         datapoint = EvaluationResultDatapoint(
+            id=evaluation_id,
             data=datapoint.data,
             target=target,
             executor_output=output,
@@ -320,7 +342,7 @@ class Evaluation:
         # Create background upload task without awaiting it
         upload_task = asyncio.create_task(
-            L.save_eval_datapoints(eval_id, [datapoint], self.group_name)
+            LaminarClient.save_eval_datapoints(eval_id, [datapoint], self.group_name)
         )
         self.upload_tasks.append(upload_task)

lmnr 0.4.64__py3-none-any.whl → 0.4.65__py3-none-any.whl

lmnr 0.4.64py3-none-any.whl → 0.4.65py3-none-any.whl