cartesia-line 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartesia-line might be problematic. Click here for more details.

Files changed (41) hide show
  1. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/PKG-INFO +4 -2
  2. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/cartesia_line.egg-info/PKG-INFO +4 -2
  3. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/cartesia_line.egg-info/SOURCES.txt +7 -1
  4. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/cartesia_line.egg-info/requires.txt +3 -1
  5. cartesia_line-0.1.3/line/evals/__init__.py +10 -0
  6. cartesia_line-0.1.3/line/evals/conversation_runner.py +195 -0
  7. cartesia_line-0.1.3/line/evals/similarity_utils.py +279 -0
  8. cartesia_line-0.1.3/line/evals/turn.py +236 -0
  9. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/events.py +1 -2
  10. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/harness.py +4 -4
  11. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/tools/system_tools.py +70 -3
  12. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/user_bridge.py +1 -1
  13. cartesia_line-0.1.3/line/utils/str.py +30 -0
  14. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/pyproject.toml +6 -2
  15. cartesia_line-0.1.3/tests/test_similarity_utils.py +99 -0
  16. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/LICENSE +0 -0
  17. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/README.md +0 -0
  18. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/cartesia_line.egg-info/dependency_links.txt +0 -0
  19. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/cartesia_line.egg-info/top_level.txt +0 -0
  20. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/__init__.py +0 -0
  21. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/bridge.py +0 -0
  22. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/bus.py +0 -0
  23. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/call_request.py +0 -0
  24. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/harness_types.py +0 -0
  25. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/nodes/__init__.py +0 -0
  26. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/nodes/base.py +0 -0
  27. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/nodes/conversation_context.py +0 -0
  28. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/nodes/reasoning.py +0 -0
  29. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/routes.py +0 -0
  30. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/tools/__init__.py +0 -0
  31. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/tools/tool_types.py +0 -0
  32. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/utils/__init__.py +0 -0
  33. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/utils/aio.py +0 -0
  34. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/utils/gemini_utils.py +0 -0
  35. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/utils/openai_utils.py +0 -0
  36. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/voice_agent_app.py +0 -0
  37. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/line/voice_agent_system.py +0 -0
  38. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/setup.cfg +0 -0
  39. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/tests/test_bridge.py +0 -0
  40. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/tests/test_bus.py +0 -0
  41. {cartesia_line-0.1.1 → cartesia_line-0.1.3}/tests/test_routes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cartesia-line
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Cartesia Voice Agents SDK
5
5
  Author-email: "Cartesia AI, Inc." <support@cartesia.ai>
6
6
  License: Apache 2.0
@@ -32,9 +32,11 @@ Requires-Dist: uvicorn<1,>=0.35.0
32
32
  Provides-Extra: dev
33
33
  Requires-Dist: pytest; extra == "dev"
34
34
  Requires-Dist: pytest-asyncio; extra == "dev"
35
+ Requires-Dist: pytest-cov; extra == "dev"
36
+ Requires-Dist: pytest-xdist==3.8.0; extra == "dev"
37
+ Requires-Dist: pytest-repeat==0.9.4; extra == "dev"
35
38
  Requires-Dist: pre-commit; extra == "dev"
36
39
  Requires-Dist: ruff==0.12.8; extra == "dev"
37
- Requires-Dist: pytest-cov; extra == "dev"
38
40
  Requires-Dist: google-genai<2,>=1.26.0; extra == "dev"
39
41
  Provides-Extra: gemini
40
42
  Requires-Dist: google-genai<2,>=1.26.0; python_version >= "3.9" and extra == "gemini"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cartesia-line
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Cartesia Voice Agents SDK
5
5
  Author-email: "Cartesia AI, Inc." <support@cartesia.ai>
6
6
  License: Apache 2.0
@@ -32,9 +32,11 @@ Requires-Dist: uvicorn<1,>=0.35.0
32
32
  Provides-Extra: dev
33
33
  Requires-Dist: pytest; extra == "dev"
34
34
  Requires-Dist: pytest-asyncio; extra == "dev"
35
+ Requires-Dist: pytest-cov; extra == "dev"
36
+ Requires-Dist: pytest-xdist==3.8.0; extra == "dev"
37
+ Requires-Dist: pytest-repeat==0.9.4; extra == "dev"
35
38
  Requires-Dist: pre-commit; extra == "dev"
36
39
  Requires-Dist: ruff==0.12.8; extra == "dev"
37
- Requires-Dist: pytest-cov; extra == "dev"
38
40
  Requires-Dist: google-genai<2,>=1.26.0; extra == "dev"
39
41
  Provides-Extra: gemini
40
42
  Requires-Dist: google-genai<2,>=1.26.0; python_version >= "3.9" and extra == "gemini"
@@ -17,6 +17,10 @@ line/routes.py
17
17
  line/user_bridge.py
18
18
  line/voice_agent_app.py
19
19
  line/voice_agent_system.py
20
+ line/evals/__init__.py
21
+ line/evals/conversation_runner.py
22
+ line/evals/similarity_utils.py
23
+ line/evals/turn.py
20
24
  line/nodes/__init__.py
21
25
  line/nodes/base.py
22
26
  line/nodes/conversation_context.py
@@ -28,6 +32,8 @@ line/utils/__init__.py
28
32
  line/utils/aio.py
29
33
  line/utils/gemini_utils.py
30
34
  line/utils/openai_utils.py
35
+ line/utils/str.py
31
36
  tests/test_bridge.py
32
37
  tests/test_bus.py
33
- tests/test_routes.py
38
+ tests/test_routes.py
39
+ tests/test_similarity_utils.py
@@ -8,9 +8,11 @@ uvicorn<1,>=0.35.0
8
8
  [dev]
9
9
  pytest
10
10
  pytest-asyncio
11
+ pytest-cov
12
+ pytest-xdist==3.8.0
13
+ pytest-repeat==0.9.4
11
14
  pre-commit
12
15
  ruff==0.12.8
13
- pytest-cov
14
16
  google-genai<2,>=1.26.0
15
17
 
16
18
  [gemini]
@@ -0,0 +1,10 @@
1
+ # Evaluation components
2
+ from line.evals.conversation_runner import ConversationRunner
3
+ from line.evals.turn import AgentTurn, Turn, UserTurn
4
+
5
+ __all__ = [
6
+ "ConversationRunner",
7
+ "AgentTurn",
8
+ "Turn",
9
+ "UserTurn",
10
+ ]
@@ -0,0 +1,195 @@
1
+ """
2
+ ConversationRunner - A testing wrapper around ReasoningNode for conversation flow validation.
3
+
4
+ This class allows testing conversation flows by providing expected conversation traces
5
+ and validating that the ReasoningNode produces similar responses.
6
+ """
7
+
8
+ from typing import List, Optional
9
+
10
+ from line.evals.similarity_utils import is_similar_str
11
+ from line.evals.turn import Turn
12
+ from line.events import EventInstance
13
+ from line.nodes.conversation_context import ConversationContext
14
+ from line.nodes.reasoning import ReasoningNode
15
+
16
+
17
+ class ConversationRunner:
18
+ """
19
+ A testing wrapper for ReasoningNode that validates conversation flows.
20
+
21
+ This class takes an expected conversation trace and validates that a ReasoningNode
22
+ produces similar responses when given the same user inputs.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ reasoning_node: ReasoningNode,
28
+ expected_conversation: List[Turn],
29
+ initial_agent_message: Optional[str] = None,
30
+ test_note: Optional[str] = None,
31
+ ):
32
+ """
33
+ Initialize the test conversation.
34
+
35
+ Args:
36
+ reasoning_node: The ReasoningNode to test
37
+ expected_conversation: List of Turn objects representing the expected conversation flow,
38
+ alternating between user and agent turns
39
+ initial_agent_message: Optional initial message from agent to verify against first AgentTurn
40
+ """
41
+ self.reasoning_node = reasoning_node
42
+ self.expected_conversation = expected_conversation
43
+ self.initial_agent_message = initial_agent_message
44
+ self.test_note = test_note
45
+
46
+ def _verify_initial_agent_message(self) -> Optional[List[EventInstance]]:
47
+ """
48
+ Verify the initial agent message and return its events if it exists.
49
+
50
+ Returns:
51
+ List of EventInstance if conversation starts with agent turn, None otherwise
52
+
53
+ Raises:
54
+ AssertionError: If initial agent message doesn't match expected first AgentTurn
55
+ """
56
+ if not self.expected_conversation:
57
+ return None
58
+
59
+ first_turn = self.expected_conversation[0]
60
+ if not first_turn.is_agent:
61
+ return None
62
+
63
+ # If initial_agent_message is provided, verify it matches
64
+ if self.initial_agent_message is None:
65
+ return first_turn.to_events()
66
+
67
+ if first_turn.text == self.initial_agent_message:
68
+ return first_turn.to_events()
69
+
70
+ results = is_similar_str(self.initial_agent_message, first_turn.text)
71
+ if results.is_success:
72
+ return first_turn.to_events()
73
+
74
+ error_str = (
75
+ f"Initial agent message doesn't match expected first AgentTurn.\n"
76
+ f"Provided initial_agent_message: '{self.initial_agent_message}'\n"
77
+ f"Expected first AgentTurn text: '{first_turn.text}'\n"
78
+ f"Similarity error: {results.error}"
79
+ )
80
+
81
+ if self.test_note is not None:
82
+ error_str += f"\nTest notes: {self.test_note}"
83
+
84
+ raise AssertionError(error_str)
85
+
86
+ def _verify_conversation_pattern(self) -> None:
87
+ """
88
+ Validate that the conversation follows proper alternating user-assistant pattern.
89
+
90
+ Raises:
91
+ ValueError: If the conversation pattern is invalid
92
+ """
93
+ if not self.expected_conversation:
94
+ return
95
+
96
+ # Ensure conversation ends with agent turn
97
+ last_turn = self.expected_conversation[-1]
98
+ if not last_turn.is_agent:
99
+ error_str = "Conversation must end with agent turn."
100
+ if self.test_note is not None:
101
+ error_str += f"\nTest notes: {self.test_note}"
102
+ raise ValueError(error_str)
103
+
104
+ # Validate alternating pattern
105
+ for i in range(1, len(self.expected_conversation)):
106
+ current_turn = self.expected_conversation[i]
107
+ previous_turn = self.expected_conversation[i - 1]
108
+
109
+ same_type = (current_turn.is_user and previous_turn.is_user) or (
110
+ current_turn.is_agent and previous_turn.is_agent
111
+ )
112
+ if same_type:
113
+ error_str = (
114
+ f"Invalid conversation pattern at position {i}: "
115
+ f"Two consecutive '{current_turn.role}' turns. "
116
+ f"Expected alternating user-assistant pattern."
117
+ )
118
+
119
+ if self.test_note is not None:
120
+ error_str += f"\nTest notes: {self.test_note}"
121
+ raise ValueError(error_str)
122
+
123
+ async def run(self) -> None:
124
+ """
125
+ Run the conversation test, validating each agent response against expected.
126
+
127
+ This method processes the expected conversation turn by turn:
128
+ 1. Process user turns by adding them to conversation history
129
+ 2. For each user turn, get the expected agent response
130
+ 3. Build ConversationContext and call process_context() on ReasoningNode
131
+ 4. Convert actual response to Turn and validate similarity
132
+ 5. Continue with next turn
133
+
134
+ Raises:
135
+ ValueError: If conversation pattern is invalid (non-alternating user-assistant turns)
136
+ AssertionError: If any agent response doesn't match expected
137
+ """
138
+ # Validate conversation pattern first
139
+ self._verify_conversation_pattern()
140
+
141
+ # Track conversation history
142
+ conversation_history: List[EventInstance] = []
143
+
144
+ # Handle initial agent message
145
+ initial_events = self._verify_initial_agent_message()
146
+ i = 0
147
+ if initial_events is not None:
148
+ # Add the first agent turn to conversation history and skip it
149
+ conversation_history.extend(initial_events)
150
+ i = 1
151
+
152
+ while i < len(self.expected_conversation):
153
+ user_turn = self.expected_conversation[i]
154
+
155
+ # Add user turn events to history
156
+ user_events = user_turn.to_events()
157
+ conversation_history.extend(user_events)
158
+ i += 1
159
+
160
+ # Get expected agent response from following turn
161
+ expected_agent_turn = self.expected_conversation[i]
162
+
163
+ # Build conversation context from history
164
+ ctx = ConversationContext(
165
+ events=conversation_history.copy(),
166
+ system_prompt=self.reasoning_node.system_prompt,
167
+ )
168
+
169
+ # Get actual response from reasoning node
170
+ actual_events = []
171
+ async for event in self.reasoning_node.process_context(ctx):
172
+ actual_events.append(event)
173
+
174
+ # Convert actual events to Turn
175
+ actual_turn = Turn.from_events(actual_events)
176
+
177
+ # Validate similarity
178
+ similarity_error = expected_agent_turn.is_similar(actual_turn)
179
+ if similarity_error is not None:
180
+ error_str = (
181
+ f"Agent turn doesn't match expected.\n"
182
+ f" User message: {user_turn.text}\n"
183
+ f" Expected: {expected_agent_turn}\n"
184
+ f" Actual: {actual_turn}\n"
185
+ f" Reason: {similarity_error}\n"
186
+ )
187
+
188
+ if self.test_note is not None:
189
+ error_str += f"\nTest notes: {self.test_note}"
190
+
191
+ raise AssertionError(error_str)
192
+
193
+ # Add actual agent turn events to history for next iteration
194
+ conversation_history.extend(actual_events)
195
+ i += 1
@@ -0,0 +1,279 @@
1
+ """
2
+ Similarity checking utilities for conversation evaluation.
3
+
4
+ This module provides functions for comparing strings and dictionaries with semantic
5
+ similarity checking using AI models.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Dict, List, Optional, Union # noqa: F401
10
+
11
+ from google.genai import Client
12
+ from google.genai.types import GenerateContentConfig
13
+
14
+
15
+ @dataclass
16
+ class SimilarityResult:
17
+ is_success: Optional[bool] # None = if not applicable
18
+ error: Optional[str] # Error message if not successful
19
+
20
+
21
+ def is_statement_pattern(s: str) -> bool:
22
+ """Check if string is a statement pattern like <mentions something>."""
23
+ return s.strip().startswith("<") and s.strip().endswith(">")
24
+
25
+
26
+ def extract_statement(s: str) -> str:
27
+ """Extract statement content from pattern by removing < and >."""
28
+ return s.strip()[1:-1]
29
+
30
+
31
+ def check_string_statement(statement: str, actual_text: str) -> SimilarityResult:
32
+ """Check if actual text matches a statement pattern.
33
+
34
+ Args:
35
+ statement: The statement description (without < >)
36
+ actual_text: The actual text to check against the statement
37
+
38
+ Returns:
39
+ None if text matches statement, error message string if not
40
+ """
41
+ client = Client()
42
+
43
+ prompt = f"""
44
+ Check if the following text matches this statement/requirement:
45
+
46
+ Statement: "{statement}"
47
+ Text: "{actual_text}"
48
+
49
+ Instructions:
50
+ - Respond with "YES" if the text matches the statement, or "NO: [reason]" if it doesn't.
51
+ - The text should contain or express the concept described in the statement.
52
+
53
+ Examples:
54
+ - Statement: "mentions SOC-2 compliance" vs Text: "Our security audit passed SOC-2 requirements" → YES
55
+ - Statement: "mentions SOC-2 compliance" vs Text: "We follow security best practices" → NO:
56
+ Doesn't mention SOC-2
57
+ - Statement: "asks for user name" vs Text: "What's your name?" → YES
58
+ - Statement: "asks for user name" vs Text: "How old are you?" → NO: Asks for age, not name
59
+ """
60
+
61
+ config = GenerateContentConfig(
62
+ temperature=0.1,
63
+ )
64
+
65
+ response = client.models.generate_content(model="gemini-2.5-flash-lite", contents=prompt, config=config)
66
+ response_text = response.text.strip() if response.text else ""
67
+
68
+ if response_text.upper().startswith("YES"):
69
+ return SimilarityResult(is_success=True, error=None)
70
+ elif response_text.upper().startswith("NO"):
71
+ reason = response_text[3:].strip().lstrip(":").strip()
72
+ return SimilarityResult(is_success=False, error=reason)
73
+ else:
74
+ return SimilarityResult(
75
+ is_success=False,
76
+ error=f"Unexpected response format from statement check: {response_text}",
77
+ )
78
+
79
+
80
+ def is_similar_str(a: str, b: str) -> SimilarityResult:
81
+ """Check if two strings have the same meaning using Gemini with special rule support.
82
+
83
+ Special Rules:
84
+ - "*" wildcard: Matches any string content (either a or b can be "*")
85
+ - Statement patterns: Strings like "<mentions SOC-2 compliance>" match text containing that concept
86
+
87
+ Args:
88
+ a: First string to compare
89
+ b: Second string to compare
90
+
91
+ Returns:
92
+ None if strings are similar, error message string if not
93
+ """
94
+ # * means any string is allowed
95
+ if a == "*" or b == "*":
96
+ return SimilarityResult(is_success=True, error=None)
97
+
98
+ # Handle statement patterns
99
+ result = is_similar_via_statement_pattern(a, b)
100
+ if result.is_success is not None:
101
+ return result
102
+
103
+ # Handle single text comparision
104
+ return is_similar_via_single_text_comparison(a, b)
105
+
106
+
107
+ def is_similar_via_statement_pattern(a: str, b: str) -> SimilarityResult:
108
+ a_is_statement = is_statement_pattern(a)
109
+ b_is_statement = is_statement_pattern(b)
110
+
111
+ if a_is_statement or b_is_statement:
112
+ # At least one is a statement pattern
113
+ if a_is_statement and b_is_statement:
114
+ # Both are statement patterns - compare the statements themselves
115
+ statement_a = extract_statement(a)
116
+ statement_b = extract_statement(b)
117
+ return is_similar_str(statement_a, statement_b) # Recursive call without < >
118
+
119
+ # One is a statement, one is actual text
120
+ statement = extract_statement(a) if a_is_statement else extract_statement(b)
121
+ actual_text = b if a_is_statement else a
122
+
123
+ return check_string_statement(statement, actual_text)
124
+
125
+ return SimilarityResult(is_success=None, error=None)
126
+
127
+
128
+ def is_similar_via_single_text_comparison(a: str, b: str) -> SimilarityResult:
129
+ # First check if strings are equal after basic normalization
130
+ if a.lower().strip() == b.lower().strip():
131
+ return SimilarityResult(is_success=True, error=None)
132
+
133
+ client = Client()
134
+
135
+ prompt = f"""
136
+ Compare these two strings and determine if they have the same or very similar meaning:
137
+
138
+ String A: "{a}"
139
+ String B: "{b}"
140
+
141
+ Rules:
142
+ - Respond with "YES" if they have the same meaning, or "NO: [reason]" if they don't.
143
+ - Consider paraphrasing, synonyms, and different ways of expressing the same concept.
144
+ - Ignore filler prefixes like "Now", "Okay", "Got it", "Thank you", "Finally", "Sounds good", etc.
145
+ - Affirmative phrases like "yes", "that is correct" or "correct" are similar
146
+ - For alphanumeric matching, you may allow mismatches on spacing
147
+ - For alphanumeric matching, you may allow matching when spelled out (e.g. 1 is equivalent to "one", 2 is equivalent to "two", etc.)
148
+ - For alphanumeric matching, you may allow semantic matching between spelled out numbers with spaces or concatenated string of digits
149
+
150
+ Examples:
151
+ - "What's your name?" vs "Can you tell me your name?" → YES
152
+ - "What's your name?" vs "What's your age?" → NO: Different information being requested
153
+ - "You are verified" vs "Your identity is confirmed" → YES
154
+ - "Now, what's your Name?" vs "Thank you, what's your name?" → YES
155
+ - "Hello" vs "Goodbye" → NO: Opposite greetings with different meanings
156
+ - "one two three four" versus "1234" → YES
157
+ """ # noqa: E501
158
+
159
+ config = GenerateContentConfig(
160
+ temperature=0.1, # Low temperature for consistent results
161
+ )
162
+
163
+ response = client.models.generate_content(model="gemini-2.5-flash-lite", contents=prompt, config=config)
164
+
165
+ response_text = response.text.strip() if response.text else ""
166
+
167
+ if response_text.upper().startswith("YES"):
168
+ return SimilarityResult(is_success=True, error=None)
169
+ elif response_text.upper().startswith("NO"):
170
+ # Extract and return reason
171
+ reason = response_text[3:].strip().lstrip(":").strip()
172
+ return SimilarityResult(is_success=False, error=reason)
173
+ else:
174
+ # Fallback in case of unexpected response format
175
+ return SimilarityResult(
176
+ is_success=False,
177
+ error=f"Unexpected response format from similarity check: {response_text}\n"
178
+ f'String A: "{a}"\nString B: "{b}"',
179
+ )
180
+
181
+
182
+ def is_similar_text(a: Union[List[str], str], b: Union[List[str], str]) -> SimilarityResult:
183
+ """Given two texts that are lists, check that at least one element from a is similar to one element from b.
184
+
185
+ Args:
186
+ a: First list of strings to compare
187
+ b: Second list of strings to compare
188
+
189
+ Returns:
190
+ SimilarityResult indicating if the lists are similar
191
+ """ # noqa: E501
192
+ a = [a] if isinstance(a, str) else a
193
+ b = [b] if isinstance(b, str) else b
194
+
195
+ if not a and not b:
196
+ raise RuntimeError("Both lists are empty")
197
+ if not a or not b:
198
+ return SimilarityResult(is_success=False, error=f"One list is empty: a={a}, b={b}")
199
+
200
+ # Check if any element from 'a' is similar to any element from 'b'
201
+ for a_item in a:
202
+ for b_item in b:
203
+ result = is_similar_str(a_item, b_item)
204
+ if result.is_success:
205
+ return SimilarityResult(is_success=True, error=None)
206
+
207
+ if len(a) == 1 and len(b) == 1:
208
+ return SimilarityResult(is_success=False, error=f"{a} != {b}")
209
+ else:
210
+ return SimilarityResult(
211
+ is_success=False, error=f"No similar elements found the following two lists: a={a}, b={b}"
212
+ )
213
+
214
+
215
+ def is_similar_dict(actual: Dict, expected: Dict) -> SimilarityResult:
216
+ """Recursively check if two dictionaries are similar.
217
+
218
+ Uses string similarity checking for string values and recursive comparison for nested dicts.
219
+
220
+ Args:
221
+ actual: The actual dictionary
222
+ expected: The expected dictionary
223
+
224
+ Returns:
225
+ None if dictionaries are similar, error message string if not
226
+ """
227
+ # Check if keys match
228
+ actual_keys = set(actual.keys())
229
+ expected_keys = set(expected.keys())
230
+
231
+ if actual_keys != expected_keys:
232
+ missing_keys = expected_keys - actual_keys
233
+ extra_keys = actual_keys - expected_keys
234
+ error_parts = []
235
+ if missing_keys:
236
+ error_parts.append(f"missing keys: {list(missing_keys)}")
237
+ if extra_keys:
238
+ error_parts.append(f"extra keys: {list(extra_keys)}")
239
+ return SimilarityResult(
240
+ is_success=False,
241
+ error=f"Key mismatch - {', '.join(error_parts)}",
242
+ )
243
+
244
+ # Check each key-value pair
245
+ for key in expected_keys:
246
+ actual_value = actual[key]
247
+ expected_value = expected[key]
248
+
249
+ # Skip validation if expected value is None
250
+ if expected_value is None:
251
+ continue
252
+
253
+ # Handle string values with similarity checking
254
+ if isinstance(expected_value, str) and isinstance(actual_value, str):
255
+ result = is_similar_str(actual_value, expected_value)
256
+ if result.is_success is False:
257
+ return SimilarityResult(
258
+ is_success=False,
259
+ error=f"String value mismatch for key '{key}': {result.error}",
260
+ )
261
+
262
+ # Handle nested dictionaries
263
+ elif isinstance(expected_value, dict) and isinstance(actual_value, dict):
264
+ error = is_similar_dict(actual_value, expected_value)
265
+ if error.is_success is False:
266
+ return SimilarityResult(
267
+ is_success=False,
268
+ error=f"Nested dict mismatch for key '{key}': {error}",
269
+ )
270
+
271
+ # Handle other types with exact comparison
272
+ else:
273
+ if actual_value != expected_value:
274
+ return SimilarityResult(
275
+ is_success=False,
276
+ error=f"Value mismatch for key '{key}': expected {expected_value}, got {actual_value}",
277
+ )
278
+
279
+ return SimilarityResult(is_success=True, error=None)
@@ -0,0 +1,236 @@
1
+ """
2
+ Turn-based conversation representation for evaluation.
3
+
4
+ This module provides Turn classes that represent conversation turns with automatic
5
+ conversion to/from Event instances for use with ReasoningNode testing.
6
+ """
7
+
8
+ import json
9
+ from typing import Any, Dict, List, Literal, Optional, Union
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+ from line.evals.similarity_utils import is_similar_dict, is_similar_text
14
+ from line.events import (
15
+ AgentResponse,
16
+ DTMFOutputEvent,
17
+ EndCall,
18
+ EventInstance,
19
+ ToolResult,
20
+ TransferCall,
21
+ UserTranscriptionReceived,
22
+ )
23
+ from line.events import (
24
+ ToolCall as EventToolCall,
25
+ )
26
+
27
+
28
+ class ToolCall(BaseModel):
29
+ """Tool call representation within a Turn."""
30
+
31
+ name: str
32
+ arguments: Dict[str, Any] = Field(default_factory=dict)
33
+ result: Any = None
34
+
35
+
36
+ class Turn(BaseModel):
37
+ """Base class for conversation turns with event conversion capabilities."""
38
+
39
+ role: Literal["user", "assistant"]
40
+ text: Union[List[str], str] = ""
41
+ tool_calls: List[ToolCall] = Field(default_factory=list)
42
+ telephony_events: list[Union[DTMFOutputEvent, TransferCall, EndCall]] = Field(default_factory=list)
43
+
44
+ @property
45
+ def is_user(self) -> bool:
46
+ """Check if this is a user turn."""
47
+ return self.role == "user"
48
+
49
+ @property
50
+ def is_agent(self) -> bool:
51
+ """Check if this is an agent turn."""
52
+ return self.role == "assistant"
53
+
54
+ def to_events(self) -> List[EventInstance]:
55
+ """Convert this turn to a list of Event instances."""
56
+ events = []
57
+
58
+ if self.role == "user":
59
+ if isinstance(self.text, str):
60
+ events.append(UserTranscriptionReceived(content=self.text))
61
+ return events
62
+
63
+ # Otherwise, it must be a list
64
+ if len(self.text) != 1:
65
+ raise RuntimeError("Must include exactly one text element for user turn. {len(self.text)=}")
66
+ if self.text:
67
+ # Join all text elements with a space for user transcription
68
+ events.append(UserTranscriptionReceived(content=self.text[0]))
69
+ elif self.role == "assistant":
70
+ # Add tool calls first
71
+ for tool_call in self.tool_calls:
72
+ events.append(EventToolCall(tool_name=tool_call.name, tool_args=tool_call.arguments))
73
+ if tool_call.result is not None:
74
+ events.append(
75
+ ToolResult(
76
+ tool_name=tool_call.name,
77
+ tool_args=tool_call.arguments,
78
+ result=tool_call.result,
79
+ )
80
+ )
81
+
82
+ # Add text response
83
+ if self.text:
84
+ if isinstance(self.text, str):
85
+ events.append(AgentResponse(content=self.text))
86
+ elif isinstance(self.text, list):
87
+ events.append(AgentResponse(content=self.text[0]))
88
+ else:
89
+ raise RuntimeError(f"Unexpected text type: {type(self.text)=}")
90
+
91
+ return events
92
+
93
+ @classmethod
94
+ def from_events(cls, events: List[EventInstance]) -> "Turn":
95
+ """Create a Turn from a list of Event instances."""
96
+ text = ""
97
+ tool_calls = []
98
+ role = "assistant" # Default to assistant
99
+
100
+ # Track tool calls and their results
101
+ tool_call_map = {}
102
+ telephony_events = []
103
+
104
+ for event in events:
105
+ if isinstance(event, UserTranscriptionReceived):
106
+ role = "user"
107
+ text += event.content
108
+ elif isinstance(event, AgentResponse):
109
+ role = "assistant"
110
+ text += event.content
111
+ elif isinstance(event, EventToolCall):
112
+ role = "assistant"
113
+ tool_call_map[event.tool_name] = ToolCall(name=event.tool_name, arguments=event.tool_args)
114
+ elif isinstance(event, ToolResult):
115
+ role = "assistant"
116
+ if event.tool_name in tool_call_map:
117
+ tool_call_map[event.tool_name].result = event.result
118
+ else:
119
+ # Create tool call if we only have the result
120
+ tool_call_map[event.tool_name] = ToolCall(
121
+ name=event.tool_name,
122
+ arguments=event.tool_args,
123
+ result=event.result,
124
+ )
125
+ elif (
126
+ isinstance(event, DTMFOutputEvent)
127
+ or isinstance(event, TransferCall)
128
+ or isinstance(event, EndCall)
129
+ ):
130
+ role = "assistant"
131
+ telephony_events.append(event)
132
+
133
+ tool_calls = list(tool_call_map.values())
134
+ text = text.strip()
135
+
136
+ return cls(role=role, text=text, tool_calls=tool_calls, telephony_events=telephony_events)
137
+
138
+ def is_similar(self, other: "Turn") -> Optional[str]:
139
+ """Check if this turn is similar to another turn.
140
+
141
+ Returns:
142
+ None if turns are similar, error description string if not
143
+ """
144
+ # Check role matches
145
+ if self.role != other.role:
146
+ return f"Role mismatch: expected '{other.role}', got '{self.role}'"
147
+
148
+ # Check text similarity
149
+ if self.text or other.text:
150
+ results = is_similar_text(self.text, other.text)
151
+ if results.is_success is False:
152
+ return f"Text mismatch: {results.error}"
153
+
154
+ # Check tool calls match
155
+ if len(self.tool_calls) != len(other.tool_calls):
156
+ return f"Tool call count mismatch: expected {len(other.tool_calls)}, got {len(self.tool_calls)}"
157
+
158
+ # Sort tool calls by name for comparison
159
+ self_tools = sorted(self.tool_calls, key=lambda x: x.name)
160
+ other_tools = sorted(other.tool_calls, key=lambda x: x.name)
161
+
162
+ for self_tool, other_tool in zip(self_tools, other_tools):
163
+ if self_tool.name != other_tool.name:
164
+ return f"Tool name mismatch: expected '{other_tool.name}', got '{self_tool.name}'"
165
+
166
+ # Check arguments similarity
167
+ if self_tool.arguments or other_tool.arguments:
168
+ results = is_similar_dict(self_tool.arguments, other_tool.arguments)
169
+ if results.is_success is False:
170
+ return f"Tool '{self_tool.name}' arguments mismatch: {results.error}"
171
+
172
+ # Check result similarity
173
+ if self_tool.result != other_tool.result:
174
+ return (
175
+ f"Tool '{self_tool.name}' result mismatch: "
176
+ f"expected {other_tool.result}, got {self_tool.result}"
177
+ )
178
+
179
+ if self.telephony_events != other.telephony_events:
180
+ return f"telephony_events mismatch: expected {other.telephony_events} to match {self.telephony_events}" # noqa: E501
181
+
182
+ return None
183
+
184
+
185
+ class UserTurn(Turn):
186
+ """User conversation turn."""
187
+
188
+ role: Literal["user"] = "user"
189
+
190
+
191
+ class AgentTurn(Turn):
192
+ """Agent conversation turn."""
193
+
194
+ role: Literal["assistant"] = "assistant"
195
+
196
+
197
+ def make_turn(data: Dict[str, Any]) -> Union[UserTurn, AgentTurn]:
198
+ """Create a UserTurn or AgentTurn from dictionary data.
199
+
200
+ Args:
201
+ data: Dictionary containing turn data with 'role' field and other turn properties
202
+
203
+ Returns:
204
+ UserTurn or AgentTurn instance based on the role
205
+
206
+ Raises:
207
+ ValueError: If role is not 'user' or 'assistant'
208
+ """
209
+ role = data.get("role")
210
+
211
+ if role == "user":
212
+ return UserTurn(**data)
213
+ elif role == "assistant":
214
+ return AgentTurn(**data)
215
+ else:
216
+ raise ValueError(f"Invalid role '{role}'. Must be 'user' or 'assistant'")
217
+
218
+
219
+ def load_conversation_json(file_path: str) -> List[Union[UserTurn, AgentTurn]]:
220
+ """Load a conversation from a JSON file.
221
+
222
+ Args:
223
+ file_path: Path to JSON file containing conversation data
224
+
225
+ Returns:
226
+ List of Turn instances (UserTurn or AgentTurn)
227
+
228
+ Raises:
229
+ FileNotFoundError: If the file doesn't exist
230
+ json.JSONDecodeError: If the file contains invalid JSON
231
+ ValueError: If any turn has an invalid role
232
+ """
233
+ with open(file_path, "r") as f:
234
+ data = json.load(f)
235
+
236
+ return [make_turn(turn_data) for turn_data in data]
@@ -134,8 +134,7 @@ class AgentError(BaseModel):
134
134
  class TransferCall(BaseModel):
135
135
  """Transfer call to destination."""
136
136
 
137
- destination: str
138
- reason: Optional[str] = None
137
+ target_phone_number: str
139
138
 
140
139
 
141
140
  class AgentHandoff(BaseModel):
@@ -143,15 +143,15 @@ class ConversationHarness:
143
143
  await self._send(EndCallOutput())
144
144
  logger.info("End call message sent")
145
145
 
146
- async def transfer_call(self, destination: str = ""):
146
+ async def transfer_call(self, target_phone_number: str = ""):
147
147
  """
148
148
  Send transfer_call message
149
149
 
150
150
  Args:
151
- destination: Optional destination for call transfer
151
+ target_phone_number: Optional target phone number for call transfer
152
152
  """
153
- await self._send(TransferOutput(target_phone_number=destination))
154
- logger.info(f"Transfer call message sent to {destination}")
153
+ await self._send(TransferOutput(target_phone_number=target_phone_number))
154
+ logger.info(f"Transfer call message sent to {target_phone_number}")
155
155
  self.shutdown_event.set()
156
156
 
157
157
  async def send_message(self, message: str):
@@ -1,11 +1,12 @@
1
1
  """System tool definitions for Cartesia Voice Agents SDK."""
2
2
 
3
- from typing import AsyncGenerator, Dict, Union
3
+ from typing import AsyncGenerator, Dict, List, Optional, Union
4
4
 
5
5
  from pydantic import BaseModel, Field
6
6
 
7
7
  from line.events import AgentResponse, EndCall
8
8
  from line.tools.tool_types import ToolDefinition
9
+ from line.utils.str import is_e164_phone_number
9
10
 
10
11
  try:
11
12
  from google.genai import types as gemini_types
@@ -188,5 +189,71 @@ class DTMFToolCall(ToolDefinition):
188
189
  }
189
190
 
190
191
 
191
- class DTMFToolCallTool(ToolDefinition):
192
- """DTMF tool call system tool definition."""
192
+ class TransferToolCall(ToolDefinition): # noqa: F811
193
+ """Arguments for the transfer_tool_call tool."""
194
+
195
+ def __init__(self, target_phone_numbers: List[str], description: Optional[str] = None):
196
+ for destination in target_phone_numbers:
197
+ if not is_e164_phone_number(destination):
198
+ raise ValueError(f"Invalid destination phone number. {destination=}")
199
+
200
+ self.target_phone_numbers = target_phone_numbers
201
+ self._description = description
202
+
203
+ @classmethod
204
+ def name(cls) -> str:
205
+ return "transfer_tool"
206
+
207
+ def description(self) -> str:
208
+ return self._description or "Initiates a transfer of the call to the destination phone number."
209
+
210
+ @classmethod
211
+ def parameters_description(cls) -> str:
212
+ return "The destination phone number to transfer the call to"
213
+
214
+ def to_gemini_tool(self) -> "gemini_types.Tool":
215
+ """Convert to Gemini tool format"""
216
+ return gemini_types.Tool(
217
+ function_declarations=[
218
+ gemini_types.FunctionDeclaration(
219
+ name=self.name(),
220
+ description=self.description(),
221
+ parameters={
222
+ "type": "object",
223
+ "properties": {
224
+ "target_phone_number": {
225
+ "type": "string",
226
+ "description": self.parameters_description(),
227
+ "enum": self.target_phone_numbers,
228
+ }
229
+ },
230
+ "required": ["target_phone_number"],
231
+ },
232
+ )
233
+ ]
234
+ )
235
+
236
+ def to_openai_tool(self) -> Dict[str, object]:
237
+ """Convert to OpenAI tool format for Responses API.
238
+
239
+ Note: This returns the format expected by OpenAI's Responses API,
240
+ not the Chat Completions API format.
241
+ """
242
+ return {
243
+ "type": "function",
244
+ "name": self.name(),
245
+ "description": self.description(),
246
+ "parameters": {
247
+ "type": "object",
248
+ "properties": {
249
+ "target_phone_number": {
250
+ "type": "string",
251
+ "enum": self.target_phone_numbers,
252
+ "description": self.parameters_description(),
253
+ },
254
+ },
255
+ "required": ["target_phone_number"],
256
+ "additionalProperties": False,
257
+ "strict": True,
258
+ },
259
+ }
@@ -105,7 +105,7 @@ def create_user_bridge(harness: "ConversationHarness", authorized_node: str) ->
105
105
  async def send_transfer_call(message: Message):
106
106
  """Transfer call to destination."""
107
107
  event: TransferCall = message.event
108
- return await harness.transfer_call(event.destination)
108
+ return await harness.transfer_call(event.target_phone_number)
109
109
 
110
110
  async def send_log_metric(message: Message):
111
111
  """Log metric via harness."""
@@ -0,0 +1,30 @@
1
+ def is_e164_phone_number(phone: str) -> bool:
2
+ """Check if a string is a valid E.164 compliant phone number.
3
+
4
+ E.164 format requirements:
5
+ - Must start with '+'
6
+ - Followed by 5-15 digits
7
+ - No spaces, hyphens, or other characters
8
+
9
+ Args:
10
+ phone: The phone number string to validate
11
+
12
+ Returns:
13
+ bool: True if the string is E.164 compliant, False otherwise
14
+
15
+
16
+ Note: 1+4=5 is practically the mininum number of digits. A country can have
17
+ a short national phone number code (len=4) if they are small (e.g. Falkland Islands)
18
+ """
19
+ # Must start with '+'
20
+ if not phone.startswith("+"):
21
+ return False
22
+
23
+ # Remove the '+' and check the rest
24
+ digits = phone[1:]
25
+
26
+ # Must be between 1 and 15 digits
27
+ if not digits.isdigit() or len(digits) < 5 or len(digits) > 15:
28
+ return False
29
+
30
+ return True
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "cartesia-line"
7
- version = "0.1.1"
7
+ version = "0.1.3"
8
8
  description = "Cartesia Voice Agents SDK"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -43,11 +43,15 @@ dependencies = [
43
43
  dev = [
44
44
  "pytest",
45
45
  "pytest-asyncio",
46
+ "pytest-cov",
47
+ "pytest-xdist==3.8.0",
48
+ "pytest-repeat==0.9.4",
46
49
  "pre-commit",
47
50
  "ruff==0.12.8",
48
- "pytest-cov",
49
51
  "google-genai>=1.26.0,<2",
50
52
  ]
53
+
54
+
51
55
  gemini = [
52
56
  "google-genai>=1.26.0,<2; python_version>='3.9'",
53
57
  "aiohttp>=3.12.0",
@@ -0,0 +1,99 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+ from line.evals.similarity_utils import is_similar_dict, is_similar_str, is_similar_text
6
+
7
+ # Skip tests if GEMINI_API_KEY is not set
8
+ pytestmark = pytest.mark.skipif(
9
+ not os.getenv("GEMINI_API_KEY"),
10
+ reason="GEMINI_API_KEY environment variable not set",
11
+ )
12
+
13
+
14
+ def test_wildcard_matching():
15
+ """Test wildcard (*) matches any string."""
16
+ result = is_similar_str("*", "Hello world")
17
+ assert result.is_success
18
+ assert result.error is None
19
+
20
+
21
+ def test_statement_pattern_matching():
22
+ """Test statement patterns like <mentions something>."""
23
+ result = is_similar_str("<mentions compliance>", "We are SOC-2 compliant")
24
+ assert result.is_success
25
+ assert result.error is None
26
+
27
+ result = is_similar_str("<mentions SOC-2>", "We follow general security practices")
28
+ assert not result.is_success
29
+ assert result.error is not None
30
+
31
+
32
+ def test_semantic_similarity():
33
+ """Test AI-powered semantic matching."""
34
+ result = is_similar_str("What's your name?", "Can you tell me your name?")
35
+ assert result.is_success
36
+ assert result.error is None
37
+
38
+ result = is_similar_str("What's your name?", "What's your age?")
39
+ assert not result.is_success
40
+ assert result.error is not None
41
+
42
+
43
+ def test_dict_similarity():
44
+ """Test dictionary comparison with nested structures."""
45
+ actual = {"user": {"name": "John"}, "status": "active"}
46
+ expected = {"user": {"name": "John"}, "status": "active"}
47
+ result = is_similar_dict(actual, expected)
48
+ assert result.is_success
49
+ assert result.error is None
50
+
51
+ # Test mismatch
52
+ actual_bad = {"user": {"name": "Jane"}, "status": "active"}
53
+ result = is_similar_dict(actual_bad, expected)
54
+ assert not result.is_success
55
+ assert result.error is not None
56
+
57
+
58
+ def test_list_similarity():
59
+ """Test list comparison with at least one matching element."""
60
+ # Matches itself, via str
61
+ result = is_similar_text("Hello", "Hello")
62
+ assert result.is_success
63
+ assert result.error is None
64
+
65
+ # Matches itself via list
66
+ result = is_similar_text(["Hello"], ["Hello"])
67
+ assert result.is_success
68
+ assert result.error is None
69
+
70
+ # Should not match itself
71
+ result = is_similar_text("Hello", "Bye")
72
+ assert not result.is_success
73
+ assert result.error is not None
74
+
75
+ # Matches similarity
76
+ result = is_similar_text(["Hello"], ["Hi"])
77
+ assert result.is_success
78
+ assert result.error is None
79
+
80
+ # Tests matches wildcard
81
+ result = is_similar_text(["Hello"], ["*"])
82
+ assert result.is_success
83
+ assert result.error is None
84
+
85
+ # Matches similar with multiple
86
+ result = is_similar_text(
87
+ ["Hello", "oranges"],
88
+ [
89
+ "apples",
90
+ "Hi",
91
+ ],
92
+ )
93
+ assert result.is_success
94
+ assert result.error is None
95
+
96
+ # Test no match
97
+ result = is_similar_text(["apples", "oranges"], ["cats", "dogs"])
98
+ assert not result.is_success
99
+ assert result.error is not None
File without changes
File without changes
File without changes
File without changes