cartesia-line 0.1.0a1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartesia-line might be problematic. Click here for more details.
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/PKG-INFO +12 -6
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/README.md +7 -3
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/cartesia_line.egg-info/PKG-INFO +12 -6
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/cartesia_line.egg-info/SOURCES.txt +7 -1
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/cartesia_line.egg-info/requires.txt +3 -1
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/__init__.py +6 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/bridge.py +5 -1
- cartesia_line-0.1.2/line/evals/__init__.py +10 -0
- cartesia_line-0.1.2/line/evals/conversation_runner.py +195 -0
- cartesia_line-0.1.2/line/evals/similarity_utils.py +279 -0
- cartesia_line-0.1.2/line/evals/turn.py +236 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/events.py +20 -2
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/harness.py +19 -4
- cartesia_line-0.1.2/line/tools/system_tools.py +259 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/user_bridge.py +9 -1
- cartesia_line-0.1.2/line/utils/str.py +30 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/pyproject.toml +7 -3
- cartesia_line-0.1.2/tests/test_similarity_utils.py +99 -0
- cartesia_line-0.1.0a1/line/tools/system_tools.py +0 -120
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/LICENSE +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/cartesia_line.egg-info/dependency_links.txt +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/cartesia_line.egg-info/top_level.txt +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/bus.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/call_request.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/harness_types.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/nodes/__init__.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/nodes/base.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/nodes/conversation_context.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/nodes/reasoning.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/routes.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/tools/__init__.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/tools/tool_types.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/utils/__init__.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/utils/aio.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/utils/gemini_utils.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/utils/openai_utils.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/voice_agent_app.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/line/voice_agent_system.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/setup.cfg +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/tests/test_bridge.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/tests/test_bus.py +0 -0
- {cartesia_line-0.1.0a1 → cartesia_line-0.1.2}/tests/test_routes.py +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cartesia-line
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Cartesia Voice Agents SDK
|
|
5
5
|
Author-email: "Cartesia AI, Inc." <support@cartesia.ai>
|
|
6
6
|
License: Apache 2.0
|
|
7
7
|
Project-URL: Repository, https://github.com/cartesia-ai/line
|
|
8
|
-
Project-URL: Documentation, https://docs.cartesia.ai/line
|
|
8
|
+
Project-URL: Documentation, https://docs.cartesia.ai/line/
|
|
9
9
|
Project-URL: Homepage, https://cartesia.ai/agents
|
|
10
10
|
Keywords: voice,agents,ai,cartesia
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
@@ -32,9 +32,11 @@ Requires-Dist: uvicorn<1,>=0.35.0
|
|
|
32
32
|
Provides-Extra: dev
|
|
33
33
|
Requires-Dist: pytest; extra == "dev"
|
|
34
34
|
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-xdist==3.8.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-repeat==0.9.4; extra == "dev"
|
|
35
38
|
Requires-Dist: pre-commit; extra == "dev"
|
|
36
39
|
Requires-Dist: ruff==0.12.8; extra == "dev"
|
|
37
|
-
Requires-Dist: pytest-cov; extra == "dev"
|
|
38
40
|
Requires-Dist: google-genai<2,>=1.26.0; extra == "dev"
|
|
39
41
|
Provides-Extra: gemini
|
|
40
42
|
Requires-Dist: google-genai<2,>=1.26.0; python_version >= "3.9" and extra == "gemini"
|
|
@@ -65,9 +67,9 @@ Build intelligent, low-latency voice agents with background reasoning.
|
|
|
65
67
|
|
|
66
68
|
## Quickstart (< 5 minutes)
|
|
67
69
|
|
|
68
|
-
The Line SDK is designed to be used with the Cartesia [Line
|
|
70
|
+
The Line SDK is designed to be used with the Cartesia's voice agent platform [Line](https://cartesia.ai/agents).
|
|
69
71
|
- Create a [Cartesia account](https://play.cartesia.ai).
|
|
70
|
-
- Follow the [quickstart guide](https://docs.cartesia.ai/).
|
|
72
|
+
- Follow the [quickstart guide](https://docs.cartesia.ai/line/start-building/talk-to-your-first-agent).
|
|
71
73
|
|
|
72
74
|
And you'll be able to make your first voice call in a few minutes.
|
|
73
75
|
|
|
@@ -90,5 +92,9 @@ pip install cartesia-line
|
|
|
90
92
|
## Going Deeper
|
|
91
93
|
|
|
92
94
|
- **More examples**: [examples/](examples/) - See all available examples and patterns
|
|
93
|
-
- **
|
|
95
|
+
- **3rd party integrations**: [example_integrations/](example_integrations/) - See example integrations for external services
|
|
96
|
+
|
|
97
|
+
> [!NOTE]
|
|
98
|
+
> While Cartesia approves each example, they are implemented and maintained by our partners.
|
|
99
|
+
- **Full API reference**: [docs.cartesia.ai/line](https://docs.cartesia.ai/line/)
|
|
94
100
|
- **Get help**: [Discord community](https://discord.gg/cartesia)
|
|
@@ -20,9 +20,9 @@ Build intelligent, low-latency voice agents with background reasoning.
|
|
|
20
20
|
|
|
21
21
|
## Quickstart (< 5 minutes)
|
|
22
22
|
|
|
23
|
-
The Line SDK is designed to be used with the Cartesia [Line
|
|
23
|
+
The Line SDK is designed to be used with the Cartesia's voice agent platform [Line](https://cartesia.ai/agents).
|
|
24
24
|
- Create a [Cartesia account](https://play.cartesia.ai).
|
|
25
|
-
- Follow the [quickstart guide](https://docs.cartesia.ai/).
|
|
25
|
+
- Follow the [quickstart guide](https://docs.cartesia.ai/line/start-building/talk-to-your-first-agent).
|
|
26
26
|
|
|
27
27
|
And you'll be able to make your first voice call in a few minutes.
|
|
28
28
|
|
|
@@ -45,5 +45,9 @@ pip install cartesia-line
|
|
|
45
45
|
## Going Deeper
|
|
46
46
|
|
|
47
47
|
- **More examples**: [examples/](examples/) - See all available examples and patterns
|
|
48
|
-
- **
|
|
48
|
+
- **3rd party integrations**: [example_integrations/](example_integrations/) - See example integrations for external services
|
|
49
|
+
|
|
50
|
+
> [!NOTE]
|
|
51
|
+
> While Cartesia approves each example, they are implemented and maintained by our partners.
|
|
52
|
+
- **Full API reference**: [docs.cartesia.ai/line](https://docs.cartesia.ai/line/)
|
|
49
53
|
- **Get help**: [Discord community](https://discord.gg/cartesia)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cartesia-line
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Cartesia Voice Agents SDK
|
|
5
5
|
Author-email: "Cartesia AI, Inc." <support@cartesia.ai>
|
|
6
6
|
License: Apache 2.0
|
|
7
7
|
Project-URL: Repository, https://github.com/cartesia-ai/line
|
|
8
|
-
Project-URL: Documentation, https://docs.cartesia.ai/line
|
|
8
|
+
Project-URL: Documentation, https://docs.cartesia.ai/line/
|
|
9
9
|
Project-URL: Homepage, https://cartesia.ai/agents
|
|
10
10
|
Keywords: voice,agents,ai,cartesia
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
@@ -32,9 +32,11 @@ Requires-Dist: uvicorn<1,>=0.35.0
|
|
|
32
32
|
Provides-Extra: dev
|
|
33
33
|
Requires-Dist: pytest; extra == "dev"
|
|
34
34
|
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-xdist==3.8.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-repeat==0.9.4; extra == "dev"
|
|
35
38
|
Requires-Dist: pre-commit; extra == "dev"
|
|
36
39
|
Requires-Dist: ruff==0.12.8; extra == "dev"
|
|
37
|
-
Requires-Dist: pytest-cov; extra == "dev"
|
|
38
40
|
Requires-Dist: google-genai<2,>=1.26.0; extra == "dev"
|
|
39
41
|
Provides-Extra: gemini
|
|
40
42
|
Requires-Dist: google-genai<2,>=1.26.0; python_version >= "3.9" and extra == "gemini"
|
|
@@ -65,9 +67,9 @@ Build intelligent, low-latency voice agents with background reasoning.
|
|
|
65
67
|
|
|
66
68
|
## Quickstart (< 5 minutes)
|
|
67
69
|
|
|
68
|
-
The Line SDK is designed to be used with the Cartesia [Line
|
|
70
|
+
The Line SDK is designed to be used with the Cartesia's voice agent platform [Line](https://cartesia.ai/agents).
|
|
69
71
|
- Create a [Cartesia account](https://play.cartesia.ai).
|
|
70
|
-
- Follow the [quickstart guide](https://docs.cartesia.ai/).
|
|
72
|
+
- Follow the [quickstart guide](https://docs.cartesia.ai/line/start-building/talk-to-your-first-agent).
|
|
71
73
|
|
|
72
74
|
And you'll be able to make your first voice call in a few minutes.
|
|
73
75
|
|
|
@@ -90,5 +92,9 @@ pip install cartesia-line
|
|
|
90
92
|
## Going Deeper
|
|
91
93
|
|
|
92
94
|
- **More examples**: [examples/](examples/) - See all available examples and patterns
|
|
93
|
-
- **
|
|
95
|
+
- **3rd party integrations**: [example_integrations/](example_integrations/) - See example integrations for external services
|
|
96
|
+
|
|
97
|
+
> [!NOTE]
|
|
98
|
+
> While Cartesia approves each example, they are implemented and maintained by our partners.
|
|
99
|
+
- **Full API reference**: [docs.cartesia.ai/line](https://docs.cartesia.ai/line/)
|
|
94
100
|
- **Get help**: [Discord community](https://discord.gg/cartesia)
|
|
@@ -17,6 +17,10 @@ line/routes.py
|
|
|
17
17
|
line/user_bridge.py
|
|
18
18
|
line/voice_agent_app.py
|
|
19
19
|
line/voice_agent_system.py
|
|
20
|
+
line/evals/__init__.py
|
|
21
|
+
line/evals/conversation_runner.py
|
|
22
|
+
line/evals/similarity_utils.py
|
|
23
|
+
line/evals/turn.py
|
|
20
24
|
line/nodes/__init__.py
|
|
21
25
|
line/nodes/base.py
|
|
22
26
|
line/nodes/conversation_context.py
|
|
@@ -28,6 +32,8 @@ line/utils/__init__.py
|
|
|
28
32
|
line/utils/aio.py
|
|
29
33
|
line/utils/gemini_utils.py
|
|
30
34
|
line/utils/openai_utils.py
|
|
35
|
+
line/utils/str.py
|
|
31
36
|
tests/test_bridge.py
|
|
32
37
|
tests/test_bus.py
|
|
33
|
-
tests/test_routes.py
|
|
38
|
+
tests/test_routes.py
|
|
39
|
+
tests/test_similarity_utils.py
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from line.bridge import Bridge
|
|
4
4
|
from line.bus import Bus, Message
|
|
5
5
|
from line.call_request import CallRequest, PreCallResult
|
|
6
|
+
from line.evals import AgentTurn, ConversationRunner, Turn, UserTurn
|
|
6
7
|
from line.nodes.conversation_context import ConversationContext
|
|
7
8
|
|
|
8
9
|
# Reasoning components
|
|
@@ -26,4 +27,9 @@ __all__ = [
|
|
|
26
27
|
"VoiceAgentApp",
|
|
27
28
|
"VoiceAgentSystem",
|
|
28
29
|
"register_observability_event",
|
|
30
|
+
"AgentTurn",
|
|
31
|
+
"ConversationRunner",
|
|
32
|
+
"Turn",
|
|
33
|
+
"UserTurn",
|
|
34
|
+
"SimilarityUtils",
|
|
29
35
|
]
|
|
@@ -13,7 +13,11 @@ from typing import TYPE_CHECKING, Any, Callable, List, Optional, Type, TypeVar,
|
|
|
13
13
|
from loguru import logger
|
|
14
14
|
|
|
15
15
|
from line.bus import Bus, Message
|
|
16
|
-
from line.events import
|
|
16
|
+
from line.events import (
|
|
17
|
+
EventInstance,
|
|
18
|
+
EventsRegistry,
|
|
19
|
+
EventTypeOrAlias,
|
|
20
|
+
)
|
|
17
21
|
from line.routes import RouteBuilder, RouteHandler
|
|
18
22
|
|
|
19
23
|
if TYPE_CHECKING:
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ConversationRunner - A testing wrapper around ReasoningNode for conversation flow validation.
|
|
3
|
+
|
|
4
|
+
This class allows testing conversation flows by providing expected conversation traces
|
|
5
|
+
and validating that the ReasoningNode produces similar responses.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
from line.evals.similarity_utils import is_similar_str
|
|
11
|
+
from line.evals.turn import Turn
|
|
12
|
+
from line.events import EventInstance
|
|
13
|
+
from line.nodes.conversation_context import ConversationContext
|
|
14
|
+
from line.nodes.reasoning import ReasoningNode
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ConversationRunner:
|
|
18
|
+
"""
|
|
19
|
+
A testing wrapper for ReasoningNode that validates conversation flows.
|
|
20
|
+
|
|
21
|
+
This class takes an expected conversation trace and validates that a ReasoningNode
|
|
22
|
+
produces similar responses when given the same user inputs.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
reasoning_node: ReasoningNode,
|
|
28
|
+
expected_conversation: List[Turn],
|
|
29
|
+
initial_agent_message: Optional[str] = None,
|
|
30
|
+
test_note: Optional[str] = None,
|
|
31
|
+
):
|
|
32
|
+
"""
|
|
33
|
+
Initialize the test conversation.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
reasoning_node: The ReasoningNode to test
|
|
37
|
+
expected_conversation: List of Turn objects representing the expected conversation flow,
|
|
38
|
+
alternating between user and agent turns
|
|
39
|
+
initial_agent_message: Optional initial message from agent to verify against first AgentTurn
|
|
40
|
+
"""
|
|
41
|
+
self.reasoning_node = reasoning_node
|
|
42
|
+
self.expected_conversation = expected_conversation
|
|
43
|
+
self.initial_agent_message = initial_agent_message
|
|
44
|
+
self.test_note = test_note
|
|
45
|
+
|
|
46
|
+
def _verify_initial_agent_message(self) -> Optional[List[EventInstance]]:
|
|
47
|
+
"""
|
|
48
|
+
Verify the initial agent message and return its events if it exists.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
List of EventInstance if conversation starts with agent turn, None otherwise
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
AssertionError: If initial agent message doesn't match expected first AgentTurn
|
|
55
|
+
"""
|
|
56
|
+
if not self.expected_conversation:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
first_turn = self.expected_conversation[0]
|
|
60
|
+
if not first_turn.is_agent:
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
# If initial_agent_message is provided, verify it matches
|
|
64
|
+
if self.initial_agent_message is None:
|
|
65
|
+
return first_turn.to_events()
|
|
66
|
+
|
|
67
|
+
if first_turn.text == self.initial_agent_message:
|
|
68
|
+
return first_turn.to_events()
|
|
69
|
+
|
|
70
|
+
results = is_similar_str(self.initial_agent_message, first_turn.text)
|
|
71
|
+
if results.is_success:
|
|
72
|
+
return first_turn.to_events()
|
|
73
|
+
|
|
74
|
+
error_str = (
|
|
75
|
+
f"Initial agent message doesn't match expected first AgentTurn.\n"
|
|
76
|
+
f"Provided initial_agent_message: '{self.initial_agent_message}'\n"
|
|
77
|
+
f"Expected first AgentTurn text: '{first_turn.text}'\n"
|
|
78
|
+
f"Similarity error: {results.error}"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if self.test_note is not None:
|
|
82
|
+
error_str += f"\nTest notes: {self.test_note}"
|
|
83
|
+
|
|
84
|
+
raise AssertionError(error_str)
|
|
85
|
+
|
|
86
|
+
def _verify_conversation_pattern(self) -> None:
|
|
87
|
+
"""
|
|
88
|
+
Validate that the conversation follows proper alternating user-assistant pattern.
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
ValueError: If the conversation pattern is invalid
|
|
92
|
+
"""
|
|
93
|
+
if not self.expected_conversation:
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
# Ensure conversation ends with agent turn
|
|
97
|
+
last_turn = self.expected_conversation[-1]
|
|
98
|
+
if not last_turn.is_agent:
|
|
99
|
+
error_str = "Conversation must end with agent turn."
|
|
100
|
+
if self.test_note is not None:
|
|
101
|
+
error_str += f"\nTest notes: {self.test_note}"
|
|
102
|
+
raise ValueError(error_str)
|
|
103
|
+
|
|
104
|
+
# Validate alternating pattern
|
|
105
|
+
for i in range(1, len(self.expected_conversation)):
|
|
106
|
+
current_turn = self.expected_conversation[i]
|
|
107
|
+
previous_turn = self.expected_conversation[i - 1]
|
|
108
|
+
|
|
109
|
+
same_type = (current_turn.is_user and previous_turn.is_user) or (
|
|
110
|
+
current_turn.is_agent and previous_turn.is_agent
|
|
111
|
+
)
|
|
112
|
+
if same_type:
|
|
113
|
+
error_str = (
|
|
114
|
+
f"Invalid conversation pattern at position {i}: "
|
|
115
|
+
f"Two consecutive '{current_turn.role}' turns. "
|
|
116
|
+
f"Expected alternating user-assistant pattern."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if self.test_note is not None:
|
|
120
|
+
error_str += f"\nTest notes: {self.test_note}"
|
|
121
|
+
raise ValueError(error_str)
|
|
122
|
+
|
|
123
|
+
async def run(self) -> None:
|
|
124
|
+
"""
|
|
125
|
+
Run the conversation test, validating each agent response against expected.
|
|
126
|
+
|
|
127
|
+
This method processes the expected conversation turn by turn:
|
|
128
|
+
1. Process user turns by adding them to conversation history
|
|
129
|
+
2. For each user turn, get the expected agent response
|
|
130
|
+
3. Build ConversationContext and call process_context() on ReasoningNode
|
|
131
|
+
4. Convert actual response to Turn and validate similarity
|
|
132
|
+
5. Continue with next turn
|
|
133
|
+
|
|
134
|
+
Raises:
|
|
135
|
+
ValueError: If conversation pattern is invalid (non-alternating user-assistant turns)
|
|
136
|
+
AssertionError: If any agent response doesn't match expected
|
|
137
|
+
"""
|
|
138
|
+
# Validate conversation pattern first
|
|
139
|
+
self._verify_conversation_pattern()
|
|
140
|
+
|
|
141
|
+
# Track conversation history
|
|
142
|
+
conversation_history: List[EventInstance] = []
|
|
143
|
+
|
|
144
|
+
# Handle initial agent message
|
|
145
|
+
initial_events = self._verify_initial_agent_message()
|
|
146
|
+
i = 0
|
|
147
|
+
if initial_events is not None:
|
|
148
|
+
# Add the first agent turn to conversation history and skip it
|
|
149
|
+
conversation_history.extend(initial_events)
|
|
150
|
+
i = 1
|
|
151
|
+
|
|
152
|
+
while i < len(self.expected_conversation):
|
|
153
|
+
user_turn = self.expected_conversation[i]
|
|
154
|
+
|
|
155
|
+
# Add user turn events to history
|
|
156
|
+
user_events = user_turn.to_events()
|
|
157
|
+
conversation_history.extend(user_events)
|
|
158
|
+
i += 1
|
|
159
|
+
|
|
160
|
+
# Get expected agent response from following turn
|
|
161
|
+
expected_agent_turn = self.expected_conversation[i]
|
|
162
|
+
|
|
163
|
+
# Build conversation context from history
|
|
164
|
+
ctx = ConversationContext(
|
|
165
|
+
events=conversation_history.copy(),
|
|
166
|
+
system_prompt=self.reasoning_node.system_prompt,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Get actual response from reasoning node
|
|
170
|
+
actual_events = []
|
|
171
|
+
async for event in self.reasoning_node.process_context(ctx):
|
|
172
|
+
actual_events.append(event)
|
|
173
|
+
|
|
174
|
+
# Convert actual events to Turn
|
|
175
|
+
actual_turn = Turn.from_events(actual_events)
|
|
176
|
+
|
|
177
|
+
# Validate similarity
|
|
178
|
+
similarity_error = expected_agent_turn.is_similar(actual_turn)
|
|
179
|
+
if similarity_error is not None:
|
|
180
|
+
error_str = (
|
|
181
|
+
f"Agent turn doesn't match expected.\n"
|
|
182
|
+
f" User message: {user_turn.text}\n"
|
|
183
|
+
f" Expected: {expected_agent_turn}\n"
|
|
184
|
+
f" Actual: {actual_turn}\n"
|
|
185
|
+
f" Reason: {similarity_error}\n"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if self.test_note is not None:
|
|
189
|
+
error_str += f"\nTest notes: {self.test_note}"
|
|
190
|
+
|
|
191
|
+
raise AssertionError(error_str)
|
|
192
|
+
|
|
193
|
+
# Add actual agent turn events to history for next iteration
|
|
194
|
+
conversation_history.extend(actual_events)
|
|
195
|
+
i += 1
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Similarity checking utilities for conversation evaluation.
|
|
3
|
+
|
|
4
|
+
This module provides functions for comparing strings and dictionaries with semantic
|
|
5
|
+
similarity checking using AI models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Dict, List, Optional, Union # noqa: F401
|
|
10
|
+
|
|
11
|
+
from google.genai import Client
|
|
12
|
+
from google.genai.types import GenerateContentConfig
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class SimilarityResult:
|
|
17
|
+
is_success: Optional[bool] # None = if not applicable
|
|
18
|
+
error: Optional[str] # Error message if not successful
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def is_statement_pattern(s: str) -> bool:
|
|
22
|
+
"""Check if string is a statement pattern like <mentions something>."""
|
|
23
|
+
return s.strip().startswith("<") and s.strip().endswith(">")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def extract_statement(s: str) -> str:
|
|
27
|
+
"""Extract statement content from pattern by removing < and >."""
|
|
28
|
+
return s.strip()[1:-1]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def check_string_statement(statement: str, actual_text: str) -> SimilarityResult:
|
|
32
|
+
"""Check if actual text matches a statement pattern.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
statement: The statement description (without < >)
|
|
36
|
+
actual_text: The actual text to check against the statement
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
None if text matches statement, error message string if not
|
|
40
|
+
"""
|
|
41
|
+
client = Client()
|
|
42
|
+
|
|
43
|
+
prompt = f"""
|
|
44
|
+
Check if the following text matches this statement/requirement:
|
|
45
|
+
|
|
46
|
+
Statement: "{statement}"
|
|
47
|
+
Text: "{actual_text}"
|
|
48
|
+
|
|
49
|
+
Instructions:
|
|
50
|
+
- Respond with "YES" if the text matches the statement, or "NO: [reason]" if it doesn't.
|
|
51
|
+
- The text should contain or express the concept described in the statement.
|
|
52
|
+
|
|
53
|
+
Examples:
|
|
54
|
+
- Statement: "mentions SOC-2 compliance" vs Text: "Our security audit passed SOC-2 requirements" → YES
|
|
55
|
+
- Statement: "mentions SOC-2 compliance" vs Text: "We follow security best practices" → NO:
|
|
56
|
+
Doesn't mention SOC-2
|
|
57
|
+
- Statement: "asks for user name" vs Text: "What's your name?" → YES
|
|
58
|
+
- Statement: "asks for user name" vs Text: "How old are you?" → NO: Asks for age, not name
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
config = GenerateContentConfig(
|
|
62
|
+
temperature=0.1,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
response = client.models.generate_content(model="gemini-2.5-flash-lite", contents=prompt, config=config)
|
|
66
|
+
response_text = response.text.strip() if response.text else ""
|
|
67
|
+
|
|
68
|
+
if response_text.upper().startswith("YES"):
|
|
69
|
+
return SimilarityResult(is_success=True, error=None)
|
|
70
|
+
elif response_text.upper().startswith("NO"):
|
|
71
|
+
reason = response_text[3:].strip().lstrip(":").strip()
|
|
72
|
+
return SimilarityResult(is_success=False, error=reason)
|
|
73
|
+
else:
|
|
74
|
+
return SimilarityResult(
|
|
75
|
+
is_success=False,
|
|
76
|
+
error=f"Unexpected response format from statement check: {response_text}",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def is_similar_str(a: str, b: str) -> SimilarityResult:
|
|
81
|
+
"""Check if two strings have the same meaning using Gemini with special rule support.
|
|
82
|
+
|
|
83
|
+
Special Rules:
|
|
84
|
+
- "*" wildcard: Matches any string content (either a or b can be "*")
|
|
85
|
+
- Statement patterns: Strings like "<mentions SOC-2 compliance>" match text containing that concept
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
a: First string to compare
|
|
89
|
+
b: Second string to compare
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
None if strings are similar, error message string if not
|
|
93
|
+
"""
|
|
94
|
+
# * means any string is allowed
|
|
95
|
+
if a == "*" or b == "*":
|
|
96
|
+
return SimilarityResult(is_success=True, error=None)
|
|
97
|
+
|
|
98
|
+
# Handle statement patterns
|
|
99
|
+
result = is_similar_via_statement_pattern(a, b)
|
|
100
|
+
if result.is_success is not None:
|
|
101
|
+
return result
|
|
102
|
+
|
|
103
|
+
# Handle single text comparision
|
|
104
|
+
return is_similar_via_single_text_comparison(a, b)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def is_similar_via_statement_pattern(a: str, b: str) -> SimilarityResult:
|
|
108
|
+
a_is_statement = is_statement_pattern(a)
|
|
109
|
+
b_is_statement = is_statement_pattern(b)
|
|
110
|
+
|
|
111
|
+
if a_is_statement or b_is_statement:
|
|
112
|
+
# At least one is a statement pattern
|
|
113
|
+
if a_is_statement and b_is_statement:
|
|
114
|
+
# Both are statement patterns - compare the statements themselves
|
|
115
|
+
statement_a = extract_statement(a)
|
|
116
|
+
statement_b = extract_statement(b)
|
|
117
|
+
return is_similar_str(statement_a, statement_b) # Recursive call without < >
|
|
118
|
+
|
|
119
|
+
# One is a statement, one is actual text
|
|
120
|
+
statement = extract_statement(a) if a_is_statement else extract_statement(b)
|
|
121
|
+
actual_text = b if a_is_statement else a
|
|
122
|
+
|
|
123
|
+
return check_string_statement(statement, actual_text)
|
|
124
|
+
|
|
125
|
+
return SimilarityResult(is_success=None, error=None)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def is_similar_via_single_text_comparison(a: str, b: str) -> SimilarityResult:
|
|
129
|
+
# First check if strings are equal after basic normalization
|
|
130
|
+
if a.lower().strip() == b.lower().strip():
|
|
131
|
+
return SimilarityResult(is_success=True, error=None)
|
|
132
|
+
|
|
133
|
+
client = Client()
|
|
134
|
+
|
|
135
|
+
prompt = f"""
|
|
136
|
+
Compare these two strings and determine if they have the same or very similar meaning:
|
|
137
|
+
|
|
138
|
+
String A: "{a}"
|
|
139
|
+
String B: "{b}"
|
|
140
|
+
|
|
141
|
+
Rules:
|
|
142
|
+
- Respond with "YES" if they have the same meaning, or "NO: [reason]" if they don't.
|
|
143
|
+
- Consider paraphrasing, synonyms, and different ways of expressing the same concept.
|
|
144
|
+
- Ignore filler prefixes like "Now", "Okay", "Got it", "Thank you", "Finally", "Sounds good", etc.
|
|
145
|
+
- Affirmative phrases like "yes", "that is correct" or "correct" are similar
|
|
146
|
+
- For alphanumeric matching, you may allow mismatches on spacing
|
|
147
|
+
- For alphanumeric matching, you may allow matching when spelled out (e.g. 1 is equivalent to "one", 2 is equivalent to "two", etc.)
|
|
148
|
+
- For alphanumeric matching, you may allow semantic matching between spelled out numbers with spaces or concatenated string of digits
|
|
149
|
+
|
|
150
|
+
Examples:
|
|
151
|
+
- "What's your name?" vs "Can you tell me your name?" → YES
|
|
152
|
+
- "What's your name?" vs "What's your age?" → NO: Different information being requested
|
|
153
|
+
- "You are verified" vs "Your identity is confirmed" → YES
|
|
154
|
+
- "Now, what's your Name?" vs "Thank you, what's your name?" → YES
|
|
155
|
+
- "Hello" vs "Goodbye" → NO: Opposite greetings with different meanings
|
|
156
|
+
- "one two three four" versus "1234" → YES
|
|
157
|
+
""" # noqa: E501
|
|
158
|
+
|
|
159
|
+
config = GenerateContentConfig(
|
|
160
|
+
temperature=0.1, # Low temperature for consistent results
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
response = client.models.generate_content(model="gemini-2.5-flash-lite", contents=prompt, config=config)
|
|
164
|
+
|
|
165
|
+
response_text = response.text.strip() if response.text else ""
|
|
166
|
+
|
|
167
|
+
if response_text.upper().startswith("YES"):
|
|
168
|
+
return SimilarityResult(is_success=True, error=None)
|
|
169
|
+
elif response_text.upper().startswith("NO"):
|
|
170
|
+
# Extract and return reason
|
|
171
|
+
reason = response_text[3:].strip().lstrip(":").strip()
|
|
172
|
+
return SimilarityResult(is_success=False, error=reason)
|
|
173
|
+
else:
|
|
174
|
+
# Fallback in case of unexpected response format
|
|
175
|
+
return SimilarityResult(
|
|
176
|
+
is_success=False,
|
|
177
|
+
error=f"Unexpected response format from similarity check: {response_text}\n"
|
|
178
|
+
f'String A: "{a}"\nString B: "{b}"',
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def is_similar_text(a: Union[List[str], str], b: Union[List[str], str]) -> SimilarityResult:
|
|
183
|
+
"""Given two texts that are lists, check that at least one element from a is similar to one element from b.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
a: First list of strings to compare
|
|
187
|
+
b: Second list of strings to compare
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
SimilarityResult indicating if the lists are similar
|
|
191
|
+
""" # noqa: E501
|
|
192
|
+
a = [a] if isinstance(a, str) else a
|
|
193
|
+
b = [b] if isinstance(b, str) else b
|
|
194
|
+
|
|
195
|
+
if not a and not b:
|
|
196
|
+
raise RuntimeError("Both lists are empty")
|
|
197
|
+
if not a or not b:
|
|
198
|
+
return SimilarityResult(is_success=False, error=f"One list is empty: a={a}, b={b}")
|
|
199
|
+
|
|
200
|
+
# Check if any element from 'a' is similar to any element from 'b'
|
|
201
|
+
for a_item in a:
|
|
202
|
+
for b_item in b:
|
|
203
|
+
result = is_similar_str(a_item, b_item)
|
|
204
|
+
if result.is_success:
|
|
205
|
+
return SimilarityResult(is_success=True, error=None)
|
|
206
|
+
|
|
207
|
+
if len(a) == 1 and len(b) == 1:
|
|
208
|
+
return SimilarityResult(is_success=False, error=f"{a} != {b}")
|
|
209
|
+
else:
|
|
210
|
+
return SimilarityResult(
|
|
211
|
+
is_success=False, error=f"No similar elements found the following two lists: a={a}, b={b}"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def is_similar_dict(actual: Dict, expected: Dict) -> SimilarityResult:
|
|
216
|
+
"""Recursively check if two dictionaries are similar.
|
|
217
|
+
|
|
218
|
+
Uses string similarity checking for string values and recursive comparison for nested dicts.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
actual: The actual dictionary
|
|
222
|
+
expected: The expected dictionary
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
None if dictionaries are similar, error message string if not
|
|
226
|
+
"""
|
|
227
|
+
# Check if keys match
|
|
228
|
+
actual_keys = set(actual.keys())
|
|
229
|
+
expected_keys = set(expected.keys())
|
|
230
|
+
|
|
231
|
+
if actual_keys != expected_keys:
|
|
232
|
+
missing_keys = expected_keys - actual_keys
|
|
233
|
+
extra_keys = actual_keys - expected_keys
|
|
234
|
+
error_parts = []
|
|
235
|
+
if missing_keys:
|
|
236
|
+
error_parts.append(f"missing keys: {list(missing_keys)}")
|
|
237
|
+
if extra_keys:
|
|
238
|
+
error_parts.append(f"extra keys: {list(extra_keys)}")
|
|
239
|
+
return SimilarityResult(
|
|
240
|
+
is_success=False,
|
|
241
|
+
error=f"Key mismatch - {', '.join(error_parts)}",
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Check each key-value pair
|
|
245
|
+
for key in expected_keys:
|
|
246
|
+
actual_value = actual[key]
|
|
247
|
+
expected_value = expected[key]
|
|
248
|
+
|
|
249
|
+
# Skip validation if expected value is None
|
|
250
|
+
if expected_value is None:
|
|
251
|
+
continue
|
|
252
|
+
|
|
253
|
+
# Handle string values with similarity checking
|
|
254
|
+
if isinstance(expected_value, str) and isinstance(actual_value, str):
|
|
255
|
+
result = is_similar_str(actual_value, expected_value)
|
|
256
|
+
if result.is_success is False:
|
|
257
|
+
return SimilarityResult(
|
|
258
|
+
is_success=False,
|
|
259
|
+
error=f"String value mismatch for key '{key}': {result.error}",
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Handle nested dictionaries
|
|
263
|
+
elif isinstance(expected_value, dict) and isinstance(actual_value, dict):
|
|
264
|
+
error = is_similar_dict(actual_value, expected_value)
|
|
265
|
+
if error.is_success is False:
|
|
266
|
+
return SimilarityResult(
|
|
267
|
+
is_success=False,
|
|
268
|
+
error=f"Nested dict mismatch for key '{key}': {error}",
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# Handle other types with exact comparison
|
|
272
|
+
else:
|
|
273
|
+
if actual_value != expected_value:
|
|
274
|
+
return SimilarityResult(
|
|
275
|
+
is_success=False,
|
|
276
|
+
error=f"Value mismatch for key '{key}': expected {expected_value}, got {actual_value}",
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
return SimilarityResult(is_success=True, error=None)
|