massgen 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +94 -0
- massgen/agent_config.py +507 -0
- massgen/backend/CLAUDE_API_RESEARCH.md +266 -0
- massgen/backend/Function calling openai responses.md +1161 -0
- massgen/backend/GEMINI_API_DOCUMENTATION.md +410 -0
- massgen/backend/OPENAI_RESPONSES_API_FORMAT.md +65 -0
- massgen/backend/__init__.py +25 -0
- massgen/backend/base.py +180 -0
- massgen/backend/chat_completions.py +228 -0
- massgen/backend/claude.py +661 -0
- massgen/backend/gemini.py +652 -0
- massgen/backend/grok.py +187 -0
- massgen/backend/response.py +397 -0
- massgen/chat_agent.py +440 -0
- massgen/cli.py +686 -0
- massgen/configs/README.md +293 -0
- massgen/configs/creative_team.yaml +53 -0
- massgen/configs/gemini_4o_claude.yaml +31 -0
- massgen/configs/news_analysis.yaml +51 -0
- massgen/configs/research_team.yaml +51 -0
- massgen/configs/single_agent.yaml +18 -0
- massgen/configs/single_flash2.5.yaml +44 -0
- massgen/configs/technical_analysis.yaml +51 -0
- massgen/configs/three_agents_default.yaml +31 -0
- massgen/configs/travel_planning.yaml +51 -0
- massgen/configs/two_agents.yaml +39 -0
- massgen/frontend/__init__.py +20 -0
- massgen/frontend/coordination_ui.py +945 -0
- massgen/frontend/displays/__init__.py +24 -0
- massgen/frontend/displays/base_display.py +83 -0
- massgen/frontend/displays/rich_terminal_display.py +3497 -0
- massgen/frontend/displays/simple_display.py +93 -0
- massgen/frontend/displays/terminal_display.py +381 -0
- massgen/frontend/logging/__init__.py +9 -0
- massgen/frontend/logging/realtime_logger.py +197 -0
- massgen/message_templates.py +431 -0
- massgen/orchestrator.py +1222 -0
- massgen/tests/__init__.py +10 -0
- massgen/tests/multi_turn_conversation_design.md +214 -0
- massgen/tests/multiturn_llm_input_analysis.md +189 -0
- massgen/tests/test_case_studies.md +113 -0
- massgen/tests/test_claude_backend.py +310 -0
- massgen/tests/test_grok_backend.py +160 -0
- massgen/tests/test_message_context_building.py +293 -0
- massgen/tests/test_rich_terminal_display.py +378 -0
- massgen/tests/test_v3_3agents.py +117 -0
- massgen/tests/test_v3_simple.py +216 -0
- massgen/tests/test_v3_three_agents.py +272 -0
- massgen/tests/test_v3_two_agents.py +176 -0
- massgen/utils.py +79 -0
- massgen/v1/README.md +330 -0
- massgen/v1/__init__.py +91 -0
- massgen/v1/agent.py +605 -0
- massgen/v1/agents.py +330 -0
- massgen/v1/backends/gemini.py +584 -0
- massgen/v1/backends/grok.py +410 -0
- massgen/v1/backends/oai.py +571 -0
- massgen/v1/cli.py +351 -0
- massgen/v1/config.py +169 -0
- massgen/v1/examples/fast-4o-mini-config.yaml +44 -0
- massgen/v1/examples/fast_config.yaml +44 -0
- massgen/v1/examples/production.yaml +70 -0
- massgen/v1/examples/single_agent.yaml +39 -0
- massgen/v1/logging.py +974 -0
- massgen/v1/main.py +368 -0
- massgen/v1/orchestrator.py +1138 -0
- massgen/v1/streaming_display.py +1190 -0
- massgen/v1/tools.py +160 -0
- massgen/v1/types.py +245 -0
- massgen/v1/utils.py +199 -0
- massgen-0.0.3.dist-info/METADATA +568 -0
- massgen-0.0.3.dist-info/RECORD +76 -0
- massgen-0.0.3.dist-info/WHEEL +5 -0
- massgen-0.0.3.dist-info/entry_points.txt +2 -0
- massgen-0.0.3.dist-info/licenses/LICENSE +204 -0
- massgen-0.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,652 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gemini backend implementation using structured output for voting and answer submission.
|
|
3
|
+
|
|
4
|
+
APPROACH: Uses structured output instead of function declarations to handle the limitation
|
|
5
|
+
where Gemini API cannot combine builtin tools with user-defined function declarations.
|
|
6
|
+
|
|
7
|
+
KEY FEATURES:
|
|
8
|
+
- ✅ Structured output for vote and new_answer mechanisms
|
|
9
|
+
- ✅ Builtin tools support (code_execution + grounding)
|
|
10
|
+
- ✅ Streaming with proper token usage tracking
|
|
11
|
+
- ✅ Error handling and response parsing
|
|
12
|
+
- ✅ Compatible with MassGen StreamChunk architecture
|
|
13
|
+
|
|
14
|
+
TECHNICAL SOLUTION:
|
|
15
|
+
- Uses Pydantic models to define structured output schemas
|
|
16
|
+
- Prompts model to use specific JSON format for voting/answering
|
|
17
|
+
- Converts structured responses to standard tool call format
|
|
18
|
+
- Maintains compatibility with existing MassGen workflow
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import json
|
|
23
|
+
import enum
|
|
24
|
+
from typing import Dict, List, Any, AsyncGenerator, Optional
|
|
25
|
+
from .base import LLMBackend, StreamChunk
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
from pydantic import BaseModel, Field
|
|
29
|
+
except ImportError:
|
|
30
|
+
BaseModel = None
|
|
31
|
+
Field = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class VoteOption(enum.Enum):
|
|
35
|
+
"""Vote options for agent selection."""
|
|
36
|
+
|
|
37
|
+
AGENT1 = "agent1"
|
|
38
|
+
AGENT2 = "agent2"
|
|
39
|
+
AGENT3 = "agent3"
|
|
40
|
+
AGENT4 = "agent4"
|
|
41
|
+
AGENT5 = "agent5"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ActionType(enum.Enum):
|
|
45
|
+
"""Action types for structured output."""
|
|
46
|
+
|
|
47
|
+
VOTE = "vote"
|
|
48
|
+
NEW_ANSWER = "new_answer"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class VoteAction(BaseModel):
|
|
52
|
+
"""Structured output for voting action."""
|
|
53
|
+
|
|
54
|
+
action: ActionType = Field(default=ActionType.VOTE, description="Action type")
|
|
55
|
+
agent_id: str = Field(
|
|
56
|
+
description="Anonymous agent ID to vote for (e.g., 'agent1', 'agent2')"
|
|
57
|
+
)
|
|
58
|
+
reason: str = Field(description="Brief reason why this agent has the best answer")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class NewAnswerAction(BaseModel):
|
|
62
|
+
"""Structured output for new answer action."""
|
|
63
|
+
|
|
64
|
+
action: ActionType = Field(default=ActionType.NEW_ANSWER, description="Action type")
|
|
65
|
+
content: str = Field(
|
|
66
|
+
description="Your improved answer. If any builtin tools like search or code execution were used, include how they are used here."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class CoordinationResponse(BaseModel):
|
|
71
|
+
"""Structured response for coordination actions."""
|
|
72
|
+
|
|
73
|
+
action_type: ActionType = Field(description="Type of action to take")
|
|
74
|
+
vote_data: Optional[VoteAction] = Field(
|
|
75
|
+
default=None, description="Vote data if action is vote"
|
|
76
|
+
)
|
|
77
|
+
answer_data: Optional[NewAnswerAction] = Field(
|
|
78
|
+
default=None, description="Answer data if action is new_answer"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class GeminiBackend(LLMBackend):
|
|
83
|
+
"""Google Gemini backend using structured output for coordination."""
|
|
84
|
+
|
|
85
|
+
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
|
86
|
+
super().__init__(api_key, **kwargs)
|
|
87
|
+
self.api_key = (
|
|
88
|
+
api_key or os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
|
|
89
|
+
)
|
|
90
|
+
self.search_count = 0
|
|
91
|
+
self.code_execution_count = 0
|
|
92
|
+
|
|
93
|
+
if BaseModel is None:
|
|
94
|
+
raise ImportError(
|
|
95
|
+
"pydantic is required for Gemini backend. Install with: pip install pydantic"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def detect_coordination_tools(self, tools: List[Dict[str, Any]]) -> bool:
|
|
99
|
+
"""Detect if tools contain vote/new_answer coordination tools."""
|
|
100
|
+
if not tools:
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
tool_names = set()
|
|
104
|
+
for tool in tools:
|
|
105
|
+
if tool.get("type") == "function":
|
|
106
|
+
if "function" in tool:
|
|
107
|
+
tool_names.add(tool["function"].get("name", ""))
|
|
108
|
+
elif "name" in tool:
|
|
109
|
+
tool_names.add(tool.get("name", ""))
|
|
110
|
+
|
|
111
|
+
return "vote" in tool_names and "new_answer" in tool_names
|
|
112
|
+
|
|
113
|
+
def build_structured_output_prompt(
|
|
114
|
+
self, base_content: str, valid_agent_ids: Optional[List[str]] = None
|
|
115
|
+
) -> str:
|
|
116
|
+
"""Build prompt that encourages structured output for coordination."""
|
|
117
|
+
agent_list = ""
|
|
118
|
+
if valid_agent_ids:
|
|
119
|
+
agent_list = f"Valid agents: {', '.join(valid_agent_ids)}"
|
|
120
|
+
|
|
121
|
+
return f"""{base_content}
|
|
122
|
+
|
|
123
|
+
IMPORTANT: You must respond with a structured JSON decision at the end of your response.
|
|
124
|
+
|
|
125
|
+
If you want to VOTE for an existing agent's answer:
|
|
126
|
+
{{
|
|
127
|
+
"action_type": "vote",
|
|
128
|
+
"vote_data": {{
|
|
129
|
+
"action": "vote",
|
|
130
|
+
"agent_id": "agent1", // Choose from: {agent_list or 'agent1, agent2, agent3, etc.'}
|
|
131
|
+
"reason": "Brief reason for your vote"
|
|
132
|
+
}}
|
|
133
|
+
}}
|
|
134
|
+
|
|
135
|
+
If you want to provide a NEW ANSWER:
|
|
136
|
+
{{
|
|
137
|
+
"action_type": "new_answer",
|
|
138
|
+
"answer_data": {{
|
|
139
|
+
"action": "new_answer",
|
|
140
|
+
"content": "Your complete improved answer here"
|
|
141
|
+
}}
|
|
142
|
+
}}
|
|
143
|
+
|
|
144
|
+
Make your decision and include the JSON at the very end of your response."""
|
|
145
|
+
|
|
146
|
+
def extract_structured_response(
|
|
147
|
+
self, response_text: str
|
|
148
|
+
) -> Optional[Dict[str, Any]]:
|
|
149
|
+
"""Extract structured JSON response from model output."""
|
|
150
|
+
try:
|
|
151
|
+
import re
|
|
152
|
+
|
|
153
|
+
# Strategy 0: Look for JSON inside markdown code blocks first
|
|
154
|
+
markdown_json_pattern = r"```json\s*(\{.*?\})\s*```"
|
|
155
|
+
markdown_matches = re.findall(
|
|
156
|
+
markdown_json_pattern, response_text, re.DOTALL
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
for match in reversed(markdown_matches):
|
|
160
|
+
try:
|
|
161
|
+
parsed = json.loads(match.strip())
|
|
162
|
+
if isinstance(parsed, dict) and "action_type" in parsed:
|
|
163
|
+
return parsed
|
|
164
|
+
except json.JSONDecodeError:
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
# Strategy 1: Look for complete JSON blocks with proper braces
|
|
168
|
+
json_pattern = r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}"
|
|
169
|
+
json_matches = re.findall(json_pattern, response_text, re.DOTALL)
|
|
170
|
+
|
|
171
|
+
# Try parsing each match (in reverse order - last one first)
|
|
172
|
+
for match in reversed(json_matches):
|
|
173
|
+
try:
|
|
174
|
+
cleaned_match = match.strip()
|
|
175
|
+
parsed = json.loads(cleaned_match)
|
|
176
|
+
if isinstance(parsed, dict) and "action_type" in parsed:
|
|
177
|
+
return parsed
|
|
178
|
+
except json.JSONDecodeError:
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
# Strategy 2: Look for JSON blocks with nested braces (more complex)
|
|
182
|
+
brace_count = 0
|
|
183
|
+
json_start = -1
|
|
184
|
+
|
|
185
|
+
for i, char in enumerate(response_text):
|
|
186
|
+
if char == "{":
|
|
187
|
+
if brace_count == 0:
|
|
188
|
+
json_start = i
|
|
189
|
+
brace_count += 1
|
|
190
|
+
elif char == "}":
|
|
191
|
+
brace_count -= 1
|
|
192
|
+
if brace_count == 0 and json_start >= 0:
|
|
193
|
+
# Found a complete JSON block
|
|
194
|
+
json_block = response_text[json_start : i + 1]
|
|
195
|
+
try:
|
|
196
|
+
parsed = json.loads(json_block)
|
|
197
|
+
if isinstance(parsed, dict) and "action_type" in parsed:
|
|
198
|
+
return parsed
|
|
199
|
+
except json.JSONDecodeError:
|
|
200
|
+
pass
|
|
201
|
+
json_start = -1
|
|
202
|
+
|
|
203
|
+
# Strategy 3: Line-by-line approach (fallback)
|
|
204
|
+
lines = response_text.strip().split("\n")
|
|
205
|
+
json_candidates = []
|
|
206
|
+
|
|
207
|
+
for i, line in enumerate(lines):
|
|
208
|
+
stripped = line.strip()
|
|
209
|
+
if stripped.startswith("{") and stripped.endswith("}"):
|
|
210
|
+
json_candidates.append(stripped)
|
|
211
|
+
elif stripped.startswith("{"):
|
|
212
|
+
# Multi-line JSON - collect until closing brace
|
|
213
|
+
json_text = stripped
|
|
214
|
+
for j in range(i + 1, len(lines)):
|
|
215
|
+
json_text += "\n" + lines[j].strip()
|
|
216
|
+
if lines[j].strip().endswith("}"):
|
|
217
|
+
json_candidates.append(json_text)
|
|
218
|
+
break
|
|
219
|
+
|
|
220
|
+
# Try to parse each candidate
|
|
221
|
+
for candidate in reversed(json_candidates):
|
|
222
|
+
try:
|
|
223
|
+
parsed = json.loads(candidate)
|
|
224
|
+
if isinstance(parsed, dict) and "action_type" in parsed:
|
|
225
|
+
return parsed
|
|
226
|
+
except json.JSONDecodeError:
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
except Exception:
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
def convert_structured_to_tool_calls(
|
|
235
|
+
self, structured_response: Dict[str, Any]
|
|
236
|
+
) -> List[Dict[str, Any]]:
|
|
237
|
+
"""Convert structured response to tool call format."""
|
|
238
|
+
action_type = structured_response.get("action_type")
|
|
239
|
+
|
|
240
|
+
if action_type == "vote":
|
|
241
|
+
vote_data = structured_response.get("vote_data", {})
|
|
242
|
+
return [
|
|
243
|
+
{
|
|
244
|
+
"id": f"vote_{hash(str(vote_data)) % 10000}",
|
|
245
|
+
"type": "function",
|
|
246
|
+
"function": {
|
|
247
|
+
"name": "vote",
|
|
248
|
+
"arguments": {
|
|
249
|
+
"agent_id": vote_data.get("agent_id", ""),
|
|
250
|
+
"reason": vote_data.get("reason", ""),
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
}
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
elif action_type == "new_answer":
|
|
257
|
+
answer_data = structured_response.get("answer_data", {})
|
|
258
|
+
return [
|
|
259
|
+
{
|
|
260
|
+
"id": f"new_answer_{hash(str(answer_data)) % 10000}",
|
|
261
|
+
"type": "function",
|
|
262
|
+
"function": {
|
|
263
|
+
"name": "new_answer",
|
|
264
|
+
"arguments": {"content": answer_data.get("content", "")},
|
|
265
|
+
},
|
|
266
|
+
}
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
return []
|
|
270
|
+
|
|
271
|
+
async def stream_with_tools(
|
|
272
|
+
self, messages: List[Dict[str, Any]], tools: List[Dict[str, Any]], **kwargs
|
|
273
|
+
) -> AsyncGenerator[StreamChunk, None]:
|
|
274
|
+
"""Stream response using Gemini API with structured output for coordination."""
|
|
275
|
+
try:
|
|
276
|
+
from google import genai
|
|
277
|
+
|
|
278
|
+
# Extract parameters
|
|
279
|
+
model_name = kwargs.get("model", "gemini-2.5-flash")
|
|
280
|
+
temperature = kwargs.get("temperature", 0.1)
|
|
281
|
+
enable_web_search = kwargs.get("enable_web_search", False)
|
|
282
|
+
enable_code_execution = kwargs.get("enable_code_execution", False)
|
|
283
|
+
|
|
284
|
+
# Check if this is a coordination request
|
|
285
|
+
is_coordination = self.detect_coordination_tools(tools)
|
|
286
|
+
valid_agent_ids = None
|
|
287
|
+
|
|
288
|
+
if is_coordination:
|
|
289
|
+
# Extract valid agent IDs from vote tool enum if available
|
|
290
|
+
for tool in tools:
|
|
291
|
+
if tool.get("type") == "function":
|
|
292
|
+
func_def = tool.get("function", {})
|
|
293
|
+
if func_def.get("name") == "vote":
|
|
294
|
+
agent_id_param = (
|
|
295
|
+
func_def.get("parameters", {})
|
|
296
|
+
.get("properties", {})
|
|
297
|
+
.get("agent_id", {})
|
|
298
|
+
)
|
|
299
|
+
if "enum" in agent_id_param:
|
|
300
|
+
valid_agent_ids = agent_id_param["enum"]
|
|
301
|
+
break
|
|
302
|
+
|
|
303
|
+
# Build content string from messages
|
|
304
|
+
conversation_content = ""
|
|
305
|
+
system_message = ""
|
|
306
|
+
|
|
307
|
+
for msg in messages:
|
|
308
|
+
if msg.get("role") == "system":
|
|
309
|
+
system_message = msg.get("content", "")
|
|
310
|
+
elif msg.get("role") == "user":
|
|
311
|
+
conversation_content += f"User: {msg.get('content', '')}\n"
|
|
312
|
+
elif msg.get("role") == "assistant":
|
|
313
|
+
conversation_content += f"Assistant: {msg.get('content', '')}\n"
|
|
314
|
+
|
|
315
|
+
# For coordination requests, modify the prompt to use structured output
|
|
316
|
+
if is_coordination:
|
|
317
|
+
conversation_content = self.build_structured_output_prompt(
|
|
318
|
+
conversation_content, valid_agent_ids
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
# Combine system message and conversation
|
|
322
|
+
full_content = ""
|
|
323
|
+
if system_message:
|
|
324
|
+
full_content += f"{system_message}\n\n"
|
|
325
|
+
full_content += conversation_content
|
|
326
|
+
|
|
327
|
+
# Use google-genai package
|
|
328
|
+
client = genai.Client(api_key=self.api_key)
|
|
329
|
+
|
|
330
|
+
# Setup builtin tools
|
|
331
|
+
builtin_tools = []
|
|
332
|
+
if enable_web_search:
|
|
333
|
+
try:
|
|
334
|
+
from google.genai import types
|
|
335
|
+
|
|
336
|
+
grounding_tool = types.Tool(google_search=types.GoogleSearch())
|
|
337
|
+
builtin_tools.append(grounding_tool)
|
|
338
|
+
except ImportError:
|
|
339
|
+
yield StreamChunk(
|
|
340
|
+
type="content",
|
|
341
|
+
content="\n⚠️ Web search requires google.genai.types\n",
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
if enable_code_execution:
|
|
345
|
+
try:
|
|
346
|
+
from google.genai import types
|
|
347
|
+
|
|
348
|
+
code_tool = types.Tool(code_execution=types.ToolCodeExecution())
|
|
349
|
+
builtin_tools.append(code_tool)
|
|
350
|
+
except ImportError:
|
|
351
|
+
yield StreamChunk(
|
|
352
|
+
type="content",
|
|
353
|
+
content="\n⚠️ Code execution requires google.genai.types\n",
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
config = {
|
|
357
|
+
"temperature": temperature,
|
|
358
|
+
"max_output_tokens": kwargs.get("max_tokens", 8192),
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
# Add builtin tools to config
|
|
362
|
+
if builtin_tools:
|
|
363
|
+
config["tools"] = builtin_tools
|
|
364
|
+
|
|
365
|
+
# For coordination requests, use JSON response format (may conflict with builtin tools)
|
|
366
|
+
if is_coordination and not builtin_tools:
|
|
367
|
+
config["response_mime_type"] = "application/json"
|
|
368
|
+
config["response_schema"] = CoordinationResponse.model_json_schema()
|
|
369
|
+
elif is_coordination and builtin_tools:
|
|
370
|
+
# Cannot use structured output with builtin tools - fallback to text parsing
|
|
371
|
+
pass
|
|
372
|
+
|
|
373
|
+
# Use streaming for real-time response
|
|
374
|
+
full_content_text = ""
|
|
375
|
+
final_response = None
|
|
376
|
+
|
|
377
|
+
for chunk in client.models.generate_content_stream(
|
|
378
|
+
model=model_name, contents=full_content, config=config
|
|
379
|
+
):
|
|
380
|
+
if hasattr(chunk, "text") and chunk.text:
|
|
381
|
+
chunk_text = chunk.text
|
|
382
|
+
full_content_text += chunk_text
|
|
383
|
+
yield StreamChunk(type="content", content=chunk_text)
|
|
384
|
+
|
|
385
|
+
# Keep track of the final response for tool processing
|
|
386
|
+
if hasattr(chunk, "candidates"):
|
|
387
|
+
final_response = chunk
|
|
388
|
+
|
|
389
|
+
# Check for tools used in each chunk for real-time detection
|
|
390
|
+
if builtin_tools and hasattr(chunk, "candidates") and chunk.candidates:
|
|
391
|
+
candidate = chunk.candidates[0]
|
|
392
|
+
|
|
393
|
+
# Check for code execution in this chunk
|
|
394
|
+
if (
|
|
395
|
+
enable_code_execution
|
|
396
|
+
and hasattr(candidate, "content")
|
|
397
|
+
and hasattr(candidate.content, "parts")
|
|
398
|
+
):
|
|
399
|
+
for part in candidate.content.parts:
|
|
400
|
+
if (
|
|
401
|
+
hasattr(part, "executable_code")
|
|
402
|
+
and part.executable_code
|
|
403
|
+
):
|
|
404
|
+
code_content = getattr(
|
|
405
|
+
part.executable_code,
|
|
406
|
+
"code",
|
|
407
|
+
str(part.executable_code),
|
|
408
|
+
)
|
|
409
|
+
yield StreamChunk(
|
|
410
|
+
type="content",
|
|
411
|
+
content=f"\n💻 [Code Executed]\n```python\n{code_content}\n```\n",
|
|
412
|
+
)
|
|
413
|
+
elif (
|
|
414
|
+
hasattr(part, "code_execution_result")
|
|
415
|
+
and part.code_execution_result
|
|
416
|
+
):
|
|
417
|
+
result_content = getattr(
|
|
418
|
+
part.code_execution_result,
|
|
419
|
+
"output",
|
|
420
|
+
str(part.code_execution_result),
|
|
421
|
+
)
|
|
422
|
+
yield StreamChunk(
|
|
423
|
+
type="content",
|
|
424
|
+
content=f"📊 [Result] {result_content}\n",
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
content = full_content_text
|
|
428
|
+
|
|
429
|
+
# Process coordination FIRST (before adding tool indicators that might confuse parsing)
|
|
430
|
+
tool_calls_detected = []
|
|
431
|
+
if is_coordination and content.strip():
|
|
432
|
+
# For structured output mode, the entire content is JSON
|
|
433
|
+
structured_response = None
|
|
434
|
+
# Try multiple parsing strategies
|
|
435
|
+
try:
|
|
436
|
+
# Strategy 1: Parse entire content as JSON (works for both modes)
|
|
437
|
+
structured_response = json.loads(content.strip())
|
|
438
|
+
except json.JSONDecodeError:
|
|
439
|
+
# Strategy 2: Extract JSON from mixed text content (handles markdown-wrapped JSON)
|
|
440
|
+
structured_response = self.extract_structured_response(content)
|
|
441
|
+
|
|
442
|
+
if (
|
|
443
|
+
structured_response
|
|
444
|
+
and isinstance(structured_response, dict)
|
|
445
|
+
and "action_type" in structured_response
|
|
446
|
+
):
|
|
447
|
+
# Convert to tool calls
|
|
448
|
+
tool_calls = self.convert_structured_to_tool_calls(
|
|
449
|
+
structured_response
|
|
450
|
+
)
|
|
451
|
+
if tool_calls:
|
|
452
|
+
tool_calls_detected = tool_calls
|
|
453
|
+
|
|
454
|
+
# Process builtin tool results if any tools were used
|
|
455
|
+
builtin_tool_results = []
|
|
456
|
+
if (
|
|
457
|
+
builtin_tools
|
|
458
|
+
and final_response
|
|
459
|
+
and hasattr(final_response, "candidates")
|
|
460
|
+
and final_response.candidates
|
|
461
|
+
):
|
|
462
|
+
# Check for grounding or code execution results
|
|
463
|
+
candidate = final_response.candidates[0]
|
|
464
|
+
|
|
465
|
+
# Check for web search results - only show if actually used
|
|
466
|
+
if (
|
|
467
|
+
hasattr(candidate, "grounding_metadata")
|
|
468
|
+
and candidate.grounding_metadata
|
|
469
|
+
):
|
|
470
|
+
# Check if web search was actually used by looking for queries or chunks
|
|
471
|
+
search_actually_used = False
|
|
472
|
+
search_queries = []
|
|
473
|
+
|
|
474
|
+
# Look for web search queries
|
|
475
|
+
if (
|
|
476
|
+
hasattr(candidate.grounding_metadata, "web_search_queries")
|
|
477
|
+
and candidate.grounding_metadata.web_search_queries
|
|
478
|
+
):
|
|
479
|
+
try:
|
|
480
|
+
for (
|
|
481
|
+
query
|
|
482
|
+
) in candidate.grounding_metadata.web_search_queries:
|
|
483
|
+
if query and query.strip():
|
|
484
|
+
search_queries.append(query.strip())
|
|
485
|
+
search_actually_used = True
|
|
486
|
+
except (TypeError, AttributeError):
|
|
487
|
+
pass
|
|
488
|
+
|
|
489
|
+
# Look for grounding chunks (indicates actual search results)
|
|
490
|
+
if (
|
|
491
|
+
hasattr(candidate.grounding_metadata, "grounding_chunks")
|
|
492
|
+
and candidate.grounding_metadata.grounding_chunks
|
|
493
|
+
):
|
|
494
|
+
try:
|
|
495
|
+
if len(candidate.grounding_metadata.grounding_chunks) > 0:
|
|
496
|
+
search_actually_used = True
|
|
497
|
+
except (TypeError, AttributeError):
|
|
498
|
+
pass
|
|
499
|
+
|
|
500
|
+
# Only show indicators if search was actually used
|
|
501
|
+
if search_actually_used:
|
|
502
|
+
yield StreamChunk(
|
|
503
|
+
type="content",
|
|
504
|
+
content="🔍 [Builtin Tool: Web Search] Results integrated\n",
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
# Show search queries
|
|
508
|
+
for query in search_queries:
|
|
509
|
+
yield StreamChunk(
|
|
510
|
+
type="content", content=f"🔍 [Search Query] '{query}'\n"
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
builtin_result = {
|
|
514
|
+
"id": f"web_search_{hash(str(candidate.grounding_metadata)) % 10000}",
|
|
515
|
+
"tool_type": "google_search_retrieval",
|
|
516
|
+
"status": "completed",
|
|
517
|
+
"metadata": str(candidate.grounding_metadata),
|
|
518
|
+
}
|
|
519
|
+
builtin_tool_results.append(builtin_result)
|
|
520
|
+
self.search_count += 1
|
|
521
|
+
|
|
522
|
+
# Check for code execution in the response parts
|
|
523
|
+
if (
|
|
524
|
+
enable_code_execution
|
|
525
|
+
and hasattr(candidate, "content")
|
|
526
|
+
and hasattr(candidate.content, "parts")
|
|
527
|
+
):
|
|
528
|
+
# Look for executable_code and code_execution_result parts
|
|
529
|
+
code_parts = []
|
|
530
|
+
for part in candidate.content.parts:
|
|
531
|
+
if hasattr(part, "executable_code") and part.executable_code:
|
|
532
|
+
code_content = getattr(
|
|
533
|
+
part.executable_code, "code", str(part.executable_code)
|
|
534
|
+
)
|
|
535
|
+
code_parts.append(f"Code: {code_content}")
|
|
536
|
+
elif (
|
|
537
|
+
hasattr(part, "code_execution_result")
|
|
538
|
+
and part.code_execution_result
|
|
539
|
+
):
|
|
540
|
+
result_content = getattr(
|
|
541
|
+
part.code_execution_result,
|
|
542
|
+
"output",
|
|
543
|
+
str(part.code_execution_result),
|
|
544
|
+
)
|
|
545
|
+
code_parts.append(f"Result: {result_content}")
|
|
546
|
+
|
|
547
|
+
if code_parts:
|
|
548
|
+
# Code execution was actually used
|
|
549
|
+
yield StreamChunk(
|
|
550
|
+
type="content",
|
|
551
|
+
content="💻 [Builtin Tool: Code Execution] Code executed\n",
|
|
552
|
+
)
|
|
553
|
+
# Also show the actual code and result
|
|
554
|
+
for part in code_parts:
|
|
555
|
+
if part.startswith("Code: "):
|
|
556
|
+
code_content = part[6:] # Remove "Code: " prefix
|
|
557
|
+
yield StreamChunk(
|
|
558
|
+
type="content",
|
|
559
|
+
content=f"💻 [Code Executed]\n```python\n{code_content}\n```\n",
|
|
560
|
+
)
|
|
561
|
+
elif part.startswith("Result: "):
|
|
562
|
+
result_content = part[8:] # Remove "Result: " prefix
|
|
563
|
+
yield StreamChunk(
|
|
564
|
+
type="content",
|
|
565
|
+
content=f"📊 [Result] {result_content}\n",
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
builtin_result = {
|
|
569
|
+
"id": f"code_execution_{hash(str(code_parts)) % 10000}",
|
|
570
|
+
"tool_type": "code_execution",
|
|
571
|
+
"status": "completed",
|
|
572
|
+
"code_parts": code_parts,
|
|
573
|
+
"output": "; ".join(code_parts),
|
|
574
|
+
}
|
|
575
|
+
builtin_tool_results.append(builtin_result)
|
|
576
|
+
self.code_execution_count += 1
|
|
577
|
+
|
|
578
|
+
# Yield builtin tool results
|
|
579
|
+
if builtin_tool_results:
|
|
580
|
+
yield StreamChunk(
|
|
581
|
+
type="builtin_tool_results",
|
|
582
|
+
builtin_tool_results=builtin_tool_results,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
# Yield coordination tool calls if detected
|
|
586
|
+
if tool_calls_detected:
|
|
587
|
+
yield StreamChunk(type="tool_calls", tool_calls=tool_calls_detected)
|
|
588
|
+
|
|
589
|
+
# Build complete message
|
|
590
|
+
complete_message = {"role": "assistant", "content": content.strip()}
|
|
591
|
+
if tool_calls_detected:
|
|
592
|
+
complete_message["tool_calls"] = tool_calls_detected
|
|
593
|
+
|
|
594
|
+
yield StreamChunk(
|
|
595
|
+
type="complete_message", complete_message=complete_message
|
|
596
|
+
)
|
|
597
|
+
yield StreamChunk(type="done")
|
|
598
|
+
|
|
599
|
+
except Exception as e:
|
|
600
|
+
yield StreamChunk(type="error", error=f"Gemini API error: {e}")
|
|
601
|
+
|
|
602
|
+
def get_provider_name(self) -> str:
|
|
603
|
+
"""Get the provider name."""
|
|
604
|
+
return "Gemini"
|
|
605
|
+
|
|
606
|
+
def get_supported_builtin_tools(self) -> List[str]:
|
|
607
|
+
"""Get list of builtin tools supported by Gemini."""
|
|
608
|
+
return ["google_search_retrieval", "code_execution"]
|
|
609
|
+
|
|
610
|
+
def estimate_tokens(self, text: str) -> int:
|
|
611
|
+
"""Estimate token count for text (Gemini uses ~4 chars per token)."""
|
|
612
|
+
return len(text) // 4
|
|
613
|
+
|
|
614
|
+
def calculate_cost(
|
|
615
|
+
self, input_tokens: int, output_tokens: int, model: str
|
|
616
|
+
) -> float:
|
|
617
|
+
"""Calculate cost for Gemini token usage (2025 pricing)."""
|
|
618
|
+
model_lower = model.lower()
|
|
619
|
+
|
|
620
|
+
if "gemini-2.5-pro" in model_lower:
|
|
621
|
+
# Gemini 2.5 Pro pricing
|
|
622
|
+
input_cost = (input_tokens / 1_000_000) * 1.25
|
|
623
|
+
output_cost = (output_tokens / 1_000_000) * 5.0
|
|
624
|
+
elif "gemini-2.5-flash" in model_lower:
|
|
625
|
+
if "lite" in model_lower:
|
|
626
|
+
# Gemini 2.5 Flash-Lite pricing
|
|
627
|
+
input_cost = (input_tokens / 1_000_000) * 0.075
|
|
628
|
+
output_cost = (output_tokens / 1_000_000) * 0.30
|
|
629
|
+
else:
|
|
630
|
+
# Gemini 2.5 Flash pricing
|
|
631
|
+
input_cost = (input_tokens / 1_000_000) * 0.15
|
|
632
|
+
output_cost = (output_tokens / 1_000_000) * 0.60
|
|
633
|
+
else:
|
|
634
|
+
# Default fallback (assume Flash pricing)
|
|
635
|
+
input_cost = (input_tokens / 1_000_000) * 0.15
|
|
636
|
+
output_cost = (output_tokens / 1_000_000) * 0.60
|
|
637
|
+
|
|
638
|
+
# Add tool usage costs (estimates)
|
|
639
|
+
tool_costs = 0.0
|
|
640
|
+
if self.search_count > 0:
|
|
641
|
+
tool_costs += self.search_count * 0.01 # Estimated search cost
|
|
642
|
+
|
|
643
|
+
if self.code_execution_count > 0:
|
|
644
|
+
tool_costs += self.code_execution_count * 0.005 # Estimated execution cost
|
|
645
|
+
|
|
646
|
+
return input_cost + output_cost + tool_costs
|
|
647
|
+
|
|
648
|
+
def reset_tool_usage(self):
|
|
649
|
+
"""Reset tool usage tracking."""
|
|
650
|
+
self.search_count = 0
|
|
651
|
+
self.code_execution_count = 0
|
|
652
|
+
super().reset_token_usage()
|