evaldeck 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
evaldeck/config.py CHANGED
@@ -15,6 +15,7 @@ class AgentConfig(BaseModel):
15
15
  module: str | None = None
16
16
  function: str | None = None
17
17
  class_name: str | None = None
18
+ framework: str | None = None # "langchain", "crewai", etc.
18
19
 
19
20
 
20
21
  class GraderDefaults(BaseModel):
evaldeck/evaluator.py CHANGED
@@ -571,4 +571,17 @@ class EvaluationRunner:
571
571
 
572
572
  module = importlib.import_module(agent_config.module)
573
573
  func = getattr(module, agent_config.function)
574
+
575
+ # Handle framework-specific integration
576
+ if agent_config.framework:
577
+ framework = agent_config.framework.lower()
578
+
579
+ if framework == "langchain":
580
+ from evaldeck.integrations.langchain import create_langchain_runner
581
+
582
+ return create_langchain_runner(func)
583
+
584
+ else:
585
+ raise ValueError(f"Unknown framework: {agent_config.framework}")
586
+
574
587
  return func
@@ -3,7 +3,7 @@
3
3
  This module provides the OpenTelemetry/OpenInference adapter for capturing traces
4
4
  from any instrumented AI framework (LangChain, CrewAI, LiteLLM, OpenAI, Anthropic, etc.)
5
5
 
6
- Usage:
6
+ Basic usage (manual setup):
7
7
  from evaldeck.integrations import EvaldeckSpanProcessor, setup_otel_tracing
8
8
  from openinference.instrumentation.langchain import LangChainInstrumentor
9
9
 
@@ -14,6 +14,19 @@ Usage:
14
14
 
15
15
  trace = processor.get_latest_trace()
16
16
  result = evaluator.evaluate(trace, test_case)
17
+
18
+ With framework integration (automatic setup via evaldeck.yaml):
19
+ # evaldeck.yaml
20
+ agent:
21
+ module: my_agent
22
+ function: create_agent
23
+ framework: langchain
24
+
25
+ # my_agent.py
26
+ def create_agent():
27
+ return create_react_agent(llm, tools)
28
+
29
+ # Run: evaldeck run
17
30
  """
18
31
 
19
32
  from evaldeck.integrations.opentelemetry import (
@@ -0,0 +1,125 @@
1
+ """LangChain integration for evaldeck.
2
+
3
+ Provides automatic instrumentation and trace capture for LangChain/LangGraph agents.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING, Any, Callable
9
+
10
+ if TYPE_CHECKING:
11
+ from evaldeck.trace import Trace
12
+
13
+
14
+ class LangChainIntegration:
15
+ """LangChain/LangGraph integration.
16
+
17
+ Automatically sets up OpenTelemetry tracing and provides a wrapper
18
+ that invokes the agent and returns a Trace.
19
+ """
20
+
21
+ def __init__(self) -> None:
22
+ self._processor: Any = None
23
+ self._agent: Any = None
24
+ self._initialized = False
25
+
26
+ def setup(self, agent_factory: Callable[[], Any]) -> None:
27
+ """Set up instrumentation and create the agent.
28
+
29
+ Args:
30
+ agent_factory: Function that returns the agent instance.
31
+ """
32
+ if self._initialized:
33
+ return
34
+
35
+ # Import here to make langchain an optional dependency
36
+ try:
37
+ from openinference.instrumentation.langchain import LangChainInstrumentor
38
+ except ImportError as e:
39
+ raise ImportError(
40
+ "LangChain integration requires openinference-instrumentation-langchain. "
41
+ "Install with: pip install evaldeck[langchain]"
42
+ ) from e
43
+
44
+ from evaldeck.integrations import setup_otel_tracing
45
+
46
+ # Set up OTel tracing
47
+ self._processor = setup_otel_tracing()
48
+
49
+ # Instrument LangChain
50
+ LangChainInstrumentor().instrument()
51
+
52
+ # Create the agent
53
+ self._agent = agent_factory()
54
+ self._initialized = True
55
+
56
+ def run(self, input: str) -> Trace:
57
+ """Run the agent and return a trace.
58
+
59
+ Args:
60
+ input: The input string to send to the agent.
61
+
62
+ Returns:
63
+ Trace captured from the agent execution.
64
+ """
65
+ if not self._initialized:
66
+ raise RuntimeError("Integration not initialized. Call setup() first.")
67
+
68
+ # Reset processor for fresh trace
69
+ self._processor.reset()
70
+
71
+ # Invoke the agent - auto-detect format
72
+ self._invoke_agent(input)
73
+
74
+ # Get and return trace
75
+ trace = self._processor.get_latest_trace()
76
+ if trace is None:
77
+ raise RuntimeError("No trace captured from agent execution")
78
+
79
+ return trace
80
+
81
+ def _invoke_agent(self, input: str) -> Any:
82
+ """Invoke the agent with the appropriate format.
83
+
84
+ Auto-detects LangGraph vs legacy LangChain format.
85
+ """
86
+ # LangGraph style (current)
87
+ if hasattr(self._agent, "invoke"):
88
+ # Try LangGraph message format first
89
+ try:
90
+ return self._agent.invoke({"messages": [("human", input)]})
91
+ except (TypeError, KeyError):
92
+ # Fall back to simple input
93
+ try:
94
+ return self._agent.invoke({"input": input})
95
+ except (TypeError, KeyError):
96
+ return self._agent.invoke(input)
97
+
98
+ # Legacy LangChain style
99
+ if hasattr(self._agent, "run"):
100
+ return self._agent.run(input)
101
+
102
+ # Callable
103
+ if callable(self._agent):
104
+ return self._agent(input)
105
+
106
+ raise RuntimeError(
107
+ f"Don't know how to invoke agent of type {type(self._agent)}. "
108
+ "Agent must have invoke(), run(), or be callable."
109
+ )
110
+
111
+
112
+ def create_langchain_runner(agent_factory: Callable[[], Any]) -> Callable[[str], Trace]:
113
+ """Create a runner function for LangChain agents.
114
+
115
+ This is the main entry point used by evaldeck's EvaluationRunner.
116
+
117
+ Args:
118
+ agent_factory: Function that returns the agent instance.
119
+
120
+ Returns:
121
+ A function that takes input and returns a Trace.
122
+ """
123
+ integration = LangChainIntegration()
124
+ integration.setup(agent_factory)
125
+ return integration.run
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evaldeck
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: The evaluation framework for AI agents. Pytest for agents.
5
5
  Project-URL: Homepage, https://github.com/tantra-run/evaldeck-py
6
6
  Project-URL: Documentation, https://tantra-run.github.io/evaldeck-py/
@@ -29,6 +29,7 @@ Requires-Dist: rich>=13.0
29
29
  Provides-Extra: all
30
30
  Requires-Dist: anthropic>=0.18; extra == 'all'
31
31
  Requires-Dist: openai>=1.0; extra == 'all'
32
+ Requires-Dist: openinference-instrumentation-langchain>=0.1; extra == 'all'
32
33
  Provides-Extra: anthropic
33
34
  Requires-Dist: anthropic>=0.18; extra == 'anthropic'
34
35
  Provides-Extra: dev
@@ -45,6 +46,8 @@ Requires-Dist: mkdocs-autorefs>=0.5; extra == 'docs'
45
46
  Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
46
47
  Requires-Dist: mkdocs>=1.5; extra == 'docs'
47
48
  Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
49
+ Provides-Extra: langchain
50
+ Requires-Dist: openinference-instrumentation-langchain>=0.1; extra == 'langchain'
48
51
  Provides-Extra: openai
49
52
  Requires-Dist: openai>=1.0; extra == 'openai'
50
53
  Description-Content-Type: text/markdown
@@ -1,7 +1,7 @@
1
1
  evaldeck/__init__.py,sha256=SF9kMDGuf3UHMHrMeT8vBPzdLUtEqTjTAlRk6Fry9b0,1877
2
2
  evaldeck/cli.py,sha256=Khrl2CRkrYP18b1mG7sot82t-Glm4YAuNJxNkbRjuGU,10655
3
- evaldeck/config.py,sha256=V1o5q7nkek2GH1q-bCszxO5DqmrsrvuKDGvb2eZikvs,5847
4
- evaldeck/evaluator.py,sha256=fRT9Keav5tm1dkHgca8Z0hnVrapCyS6l8Irm1SqjiTE,19695
3
+ evaldeck/config.py,sha256=0Ge9ZWxV_xZ68vzFkDy_IZTyPB_TtDDJtd_gN6tRyoY,5911
4
+ evaldeck/evaluator.py,sha256=bOIL7vaafYieUD7oZWAs6cZCK9ILCp0Wh3OfLKNhXFQ,20115
5
5
  evaldeck/results.py,sha256=gygFnuh2cZdZv5ygxDB-Lksv_9N5sAj2HFkEXRgTnqQ,6039
6
6
  evaldeck/test_case.py,sha256=cy3Qfcuh4h1BlLPAncthzx3ILabtnnqN76MNhoA_9j8,5084
7
7
  evaldeck/trace.py,sha256=erVrdJyfUilutM1z6NioIp8FVbeCh5XP6VhGtbwAClU,5787
@@ -9,13 +9,14 @@ evaldeck/graders/__init__.py,sha256=M418zN3y06Dn449oUtBZLjwSw_tiUzCx3xvRriFhSd8,
9
9
  evaldeck/graders/base.py,sha256=CvLq_AQQfQzdrb4Hs1q6gcKB05e0qfWn31fxXir8T-k,4821
10
10
  evaldeck/graders/code.py,sha256=t2rfaB-U9LZnwtyCQ8NIW3Qxrb9aGVlgzgTU8oOHJuM,18012
11
11
  evaldeck/graders/llm.py,sha256=nWMPacy-wTLKcE-PnIBdWyD1OHpXKNaTOyF1eicbdK0,11725
12
- evaldeck/integrations/__init__.py,sha256=IFyhW7gmnm3rRo27RRfL6Q2sGZ6lye7Abt9XqUql9eI,821
12
+ evaldeck/integrations/__init__.py,sha256=PwvBNsNCRBsFUVC5hd2vGk7r4DntuPZIDGMYJOojJfg,1131
13
+ evaldeck/integrations/langchain.py,sha256=TYbtHgFOjUpGgLihW4Tnqyyq-AiC_9dy8gc7a0_7kIM,3839
13
14
  evaldeck/integrations/opentelemetry.py,sha256=j518FXsD0pqMNF4TvO97elX9oDiK_VaKxXd243q8dLE,15164
14
15
  evaldeck/metrics/__init__.py,sha256=jXTIx5k9f1CjwS-9jc25YLeodhencoUOLfbP8qvcbbw,551
15
16
  evaldeck/metrics/base.py,sha256=ibUQNfbkQEXTX1x8SqmFWelWAF1DQ785LXP1KYIZWUk,1790
16
17
  evaldeck/metrics/builtin.py,sha256=ghdqeZRN51PhLeG8bGnPW2NNoPUAaeD05HtYlWw5yQM,5399
17
- evaldeck-0.1.1.dist-info/METADATA,sha256=jTSRn_6z8fAWBhmO87g1cosAIOEfr55kWa6hpp4Ngtg,8596
18
- evaldeck-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
19
- evaldeck-0.1.1.dist-info/entry_points.txt,sha256=wtyPiDMSTKf41ShIbQC5X8USDn68OybGecpTaMNaGts,47
20
- evaldeck-0.1.1.dist-info/licenses/LICENSE,sha256=sEp2tzjeTY9bP_jb1TWAGV4yvxNhVngHpJNglJkT9YA,10770
21
- evaldeck-0.1.1.dist-info/RECORD,,
18
+ evaldeck-0.1.3.dist-info/METADATA,sha256=9vJ9PR4y-KaUfGjKSNn24T9Hx5hRdiMH9HrBN_nM6oE,8780
19
+ evaldeck-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
20
+ evaldeck-0.1.3.dist-info/entry_points.txt,sha256=wtyPiDMSTKf41ShIbQC5X8USDn68OybGecpTaMNaGts,47
21
+ evaldeck-0.1.3.dist-info/licenses/LICENSE,sha256=sEp2tzjeTY9bP_jb1TWAGV4yvxNhVngHpJNglJkT9YA,10770
22
+ evaldeck-0.1.3.dist-info/RECORD,,