evaldeck 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evaldeck/config.py +1 -0
- evaldeck/evaluator.py +13 -0
- evaldeck/integrations/__init__.py +14 -1
- evaldeck/integrations/langchain.py +125 -0
- {evaldeck-0.1.1.dist-info → evaldeck-0.1.3.dist-info}/METADATA +4 -1
- {evaldeck-0.1.1.dist-info → evaldeck-0.1.3.dist-info}/RECORD +9 -8
- {evaldeck-0.1.1.dist-info → evaldeck-0.1.3.dist-info}/WHEEL +0 -0
- {evaldeck-0.1.1.dist-info → evaldeck-0.1.3.dist-info}/entry_points.txt +0 -0
- {evaldeck-0.1.1.dist-info → evaldeck-0.1.3.dist-info}/licenses/LICENSE +0 -0
evaldeck/config.py
CHANGED
evaldeck/evaluator.py
CHANGED
|
@@ -571,4 +571,17 @@ class EvaluationRunner:
|
|
|
571
571
|
|
|
572
572
|
module = importlib.import_module(agent_config.module)
|
|
573
573
|
func = getattr(module, agent_config.function)
|
|
574
|
+
|
|
575
|
+
# Handle framework-specific integration
|
|
576
|
+
if agent_config.framework:
|
|
577
|
+
framework = agent_config.framework.lower()
|
|
578
|
+
|
|
579
|
+
if framework == "langchain":
|
|
580
|
+
from evaldeck.integrations.langchain import create_langchain_runner
|
|
581
|
+
|
|
582
|
+
return create_langchain_runner(func)
|
|
583
|
+
|
|
584
|
+
else:
|
|
585
|
+
raise ValueError(f"Unknown framework: {agent_config.framework}")
|
|
586
|
+
|
|
574
587
|
return func
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This module provides the OpenTelemetry/OpenInference adapter for capturing traces
|
|
4
4
|
from any instrumented AI framework (LangChain, CrewAI, LiteLLM, OpenAI, Anthropic, etc.)
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
Basic usage (manual setup):
|
|
7
7
|
from evaldeck.integrations import EvaldeckSpanProcessor, setup_otel_tracing
|
|
8
8
|
from openinference.instrumentation.langchain import LangChainInstrumentor
|
|
9
9
|
|
|
@@ -14,6 +14,19 @@ Usage:
|
|
|
14
14
|
|
|
15
15
|
trace = processor.get_latest_trace()
|
|
16
16
|
result = evaluator.evaluate(trace, test_case)
|
|
17
|
+
|
|
18
|
+
With framework integration (automatic setup via evaldeck.yaml):
|
|
19
|
+
# evaldeck.yaml
|
|
20
|
+
agent:
|
|
21
|
+
module: my_agent
|
|
22
|
+
function: create_agent
|
|
23
|
+
framework: langchain
|
|
24
|
+
|
|
25
|
+
# my_agent.py
|
|
26
|
+
def create_agent():
|
|
27
|
+
return create_react_agent(llm, tools)
|
|
28
|
+
|
|
29
|
+
# Run: evaldeck run
|
|
17
30
|
"""
|
|
18
31
|
|
|
19
32
|
from evaldeck.integrations.opentelemetry import (
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""LangChain integration for evaldeck.
|
|
2
|
+
|
|
3
|
+
Provides automatic instrumentation and trace capture for LangChain/LangGraph agents.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from evaldeck.trace import Trace
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LangChainIntegration:
|
|
15
|
+
"""LangChain/LangGraph integration.
|
|
16
|
+
|
|
17
|
+
Automatically sets up OpenTelemetry tracing and provides a wrapper
|
|
18
|
+
that invokes the agent and returns a Trace.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self) -> None:
|
|
22
|
+
self._processor: Any = None
|
|
23
|
+
self._agent: Any = None
|
|
24
|
+
self._initialized = False
|
|
25
|
+
|
|
26
|
+
def setup(self, agent_factory: Callable[[], Any]) -> None:
|
|
27
|
+
"""Set up instrumentation and create the agent.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
agent_factory: Function that returns the agent instance.
|
|
31
|
+
"""
|
|
32
|
+
if self._initialized:
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
# Import here to make langchain an optional dependency
|
|
36
|
+
try:
|
|
37
|
+
from openinference.instrumentation.langchain import LangChainInstrumentor
|
|
38
|
+
except ImportError as e:
|
|
39
|
+
raise ImportError(
|
|
40
|
+
"LangChain integration requires openinference-instrumentation-langchain. "
|
|
41
|
+
"Install with: pip install evaldeck[langchain]"
|
|
42
|
+
) from e
|
|
43
|
+
|
|
44
|
+
from evaldeck.integrations import setup_otel_tracing
|
|
45
|
+
|
|
46
|
+
# Set up OTel tracing
|
|
47
|
+
self._processor = setup_otel_tracing()
|
|
48
|
+
|
|
49
|
+
# Instrument LangChain
|
|
50
|
+
LangChainInstrumentor().instrument()
|
|
51
|
+
|
|
52
|
+
# Create the agent
|
|
53
|
+
self._agent = agent_factory()
|
|
54
|
+
self._initialized = True
|
|
55
|
+
|
|
56
|
+
def run(self, input: str) -> Trace:
|
|
57
|
+
"""Run the agent and return a trace.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
input: The input string to send to the agent.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Trace captured from the agent execution.
|
|
64
|
+
"""
|
|
65
|
+
if not self._initialized:
|
|
66
|
+
raise RuntimeError("Integration not initialized. Call setup() first.")
|
|
67
|
+
|
|
68
|
+
# Reset processor for fresh trace
|
|
69
|
+
self._processor.reset()
|
|
70
|
+
|
|
71
|
+
# Invoke the agent - auto-detect format
|
|
72
|
+
self._invoke_agent(input)
|
|
73
|
+
|
|
74
|
+
# Get and return trace
|
|
75
|
+
trace = self._processor.get_latest_trace()
|
|
76
|
+
if trace is None:
|
|
77
|
+
raise RuntimeError("No trace captured from agent execution")
|
|
78
|
+
|
|
79
|
+
return trace
|
|
80
|
+
|
|
81
|
+
def _invoke_agent(self, input: str) -> Any:
|
|
82
|
+
"""Invoke the agent with the appropriate format.
|
|
83
|
+
|
|
84
|
+
Auto-detects LangGraph vs legacy LangChain format.
|
|
85
|
+
"""
|
|
86
|
+
# LangGraph style (current)
|
|
87
|
+
if hasattr(self._agent, "invoke"):
|
|
88
|
+
# Try LangGraph message format first
|
|
89
|
+
try:
|
|
90
|
+
return self._agent.invoke({"messages": [("human", input)]})
|
|
91
|
+
except (TypeError, KeyError):
|
|
92
|
+
# Fall back to simple input
|
|
93
|
+
try:
|
|
94
|
+
return self._agent.invoke({"input": input})
|
|
95
|
+
except (TypeError, KeyError):
|
|
96
|
+
return self._agent.invoke(input)
|
|
97
|
+
|
|
98
|
+
# Legacy LangChain style
|
|
99
|
+
if hasattr(self._agent, "run"):
|
|
100
|
+
return self._agent.run(input)
|
|
101
|
+
|
|
102
|
+
# Callable
|
|
103
|
+
if callable(self._agent):
|
|
104
|
+
return self._agent(input)
|
|
105
|
+
|
|
106
|
+
raise RuntimeError(
|
|
107
|
+
f"Don't know how to invoke agent of type {type(self._agent)}. "
|
|
108
|
+
"Agent must have invoke(), run(), or be callable."
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def create_langchain_runner(agent_factory: Callable[[], Any]) -> Callable[[str], Trace]:
|
|
113
|
+
"""Create a runner function for LangChain agents.
|
|
114
|
+
|
|
115
|
+
This is the main entry point used by evaldeck's EvaluationRunner.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
agent_factory: Function that returns the agent instance.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
A function that takes input and returns a Trace.
|
|
122
|
+
"""
|
|
123
|
+
integration = LangChainIntegration()
|
|
124
|
+
integration.setup(agent_factory)
|
|
125
|
+
return integration.run
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: evaldeck
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: The evaluation framework for AI agents. Pytest for agents.
|
|
5
5
|
Project-URL: Homepage, https://github.com/tantra-run/evaldeck-py
|
|
6
6
|
Project-URL: Documentation, https://tantra-run.github.io/evaldeck-py/
|
|
@@ -29,6 +29,7 @@ Requires-Dist: rich>=13.0
|
|
|
29
29
|
Provides-Extra: all
|
|
30
30
|
Requires-Dist: anthropic>=0.18; extra == 'all'
|
|
31
31
|
Requires-Dist: openai>=1.0; extra == 'all'
|
|
32
|
+
Requires-Dist: openinference-instrumentation-langchain>=0.1; extra == 'all'
|
|
32
33
|
Provides-Extra: anthropic
|
|
33
34
|
Requires-Dist: anthropic>=0.18; extra == 'anthropic'
|
|
34
35
|
Provides-Extra: dev
|
|
@@ -45,6 +46,8 @@ Requires-Dist: mkdocs-autorefs>=0.5; extra == 'docs'
|
|
|
45
46
|
Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
46
47
|
Requires-Dist: mkdocs>=1.5; extra == 'docs'
|
|
47
48
|
Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
|
|
49
|
+
Provides-Extra: langchain
|
|
50
|
+
Requires-Dist: openinference-instrumentation-langchain>=0.1; extra == 'langchain'
|
|
48
51
|
Provides-Extra: openai
|
|
49
52
|
Requires-Dist: openai>=1.0; extra == 'openai'
|
|
50
53
|
Description-Content-Type: text/markdown
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
evaldeck/__init__.py,sha256=SF9kMDGuf3UHMHrMeT8vBPzdLUtEqTjTAlRk6Fry9b0,1877
|
|
2
2
|
evaldeck/cli.py,sha256=Khrl2CRkrYP18b1mG7sot82t-Glm4YAuNJxNkbRjuGU,10655
|
|
3
|
-
evaldeck/config.py,sha256=
|
|
4
|
-
evaldeck/evaluator.py,sha256=
|
|
3
|
+
evaldeck/config.py,sha256=0Ge9ZWxV_xZ68vzFkDy_IZTyPB_TtDDJtd_gN6tRyoY,5911
|
|
4
|
+
evaldeck/evaluator.py,sha256=bOIL7vaafYieUD7oZWAs6cZCK9ILCp0Wh3OfLKNhXFQ,20115
|
|
5
5
|
evaldeck/results.py,sha256=gygFnuh2cZdZv5ygxDB-Lksv_9N5sAj2HFkEXRgTnqQ,6039
|
|
6
6
|
evaldeck/test_case.py,sha256=cy3Qfcuh4h1BlLPAncthzx3ILabtnnqN76MNhoA_9j8,5084
|
|
7
7
|
evaldeck/trace.py,sha256=erVrdJyfUilutM1z6NioIp8FVbeCh5XP6VhGtbwAClU,5787
|
|
@@ -9,13 +9,14 @@ evaldeck/graders/__init__.py,sha256=M418zN3y06Dn449oUtBZLjwSw_tiUzCx3xvRriFhSd8,
|
|
|
9
9
|
evaldeck/graders/base.py,sha256=CvLq_AQQfQzdrb4Hs1q6gcKB05e0qfWn31fxXir8T-k,4821
|
|
10
10
|
evaldeck/graders/code.py,sha256=t2rfaB-U9LZnwtyCQ8NIW3Qxrb9aGVlgzgTU8oOHJuM,18012
|
|
11
11
|
evaldeck/graders/llm.py,sha256=nWMPacy-wTLKcE-PnIBdWyD1OHpXKNaTOyF1eicbdK0,11725
|
|
12
|
-
evaldeck/integrations/__init__.py,sha256=
|
|
12
|
+
evaldeck/integrations/__init__.py,sha256=PwvBNsNCRBsFUVC5hd2vGk7r4DntuPZIDGMYJOojJfg,1131
|
|
13
|
+
evaldeck/integrations/langchain.py,sha256=TYbtHgFOjUpGgLihW4Tnqyyq-AiC_9dy8gc7a0_7kIM,3839
|
|
13
14
|
evaldeck/integrations/opentelemetry.py,sha256=j518FXsD0pqMNF4TvO97elX9oDiK_VaKxXd243q8dLE,15164
|
|
14
15
|
evaldeck/metrics/__init__.py,sha256=jXTIx5k9f1CjwS-9jc25YLeodhencoUOLfbP8qvcbbw,551
|
|
15
16
|
evaldeck/metrics/base.py,sha256=ibUQNfbkQEXTX1x8SqmFWelWAF1DQ785LXP1KYIZWUk,1790
|
|
16
17
|
evaldeck/metrics/builtin.py,sha256=ghdqeZRN51PhLeG8bGnPW2NNoPUAaeD05HtYlWw5yQM,5399
|
|
17
|
-
evaldeck-0.1.
|
|
18
|
-
evaldeck-0.1.
|
|
19
|
-
evaldeck-0.1.
|
|
20
|
-
evaldeck-0.1.
|
|
21
|
-
evaldeck-0.1.
|
|
18
|
+
evaldeck-0.1.3.dist-info/METADATA,sha256=9vJ9PR4y-KaUfGjKSNn24T9Hx5hRdiMH9HrBN_nM6oE,8780
|
|
19
|
+
evaldeck-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
20
|
+
evaldeck-0.1.3.dist-info/entry_points.txt,sha256=wtyPiDMSTKf41ShIbQC5X8USDn68OybGecpTaMNaGts,47
|
|
21
|
+
evaldeck-0.1.3.dist-info/licenses/LICENSE,sha256=sEp2tzjeTY9bP_jb1TWAGV4yvxNhVngHpJNglJkT9YA,10770
|
|
22
|
+
evaldeck-0.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|