massgen 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +94 -0
- massgen/agent_config.py +507 -0
- massgen/backend/CLAUDE_API_RESEARCH.md +266 -0
- massgen/backend/Function calling openai responses.md +1161 -0
- massgen/backend/GEMINI_API_DOCUMENTATION.md +410 -0
- massgen/backend/OPENAI_RESPONSES_API_FORMAT.md +65 -0
- massgen/backend/__init__.py +25 -0
- massgen/backend/base.py +180 -0
- massgen/backend/chat_completions.py +228 -0
- massgen/backend/claude.py +661 -0
- massgen/backend/gemini.py +652 -0
- massgen/backend/grok.py +187 -0
- massgen/backend/response.py +397 -0
- massgen/chat_agent.py +440 -0
- massgen/cli.py +686 -0
- massgen/configs/README.md +293 -0
- massgen/configs/creative_team.yaml +53 -0
- massgen/configs/gemini_4o_claude.yaml +31 -0
- massgen/configs/news_analysis.yaml +51 -0
- massgen/configs/research_team.yaml +51 -0
- massgen/configs/single_agent.yaml +18 -0
- massgen/configs/single_flash2.5.yaml +44 -0
- massgen/configs/technical_analysis.yaml +51 -0
- massgen/configs/three_agents_default.yaml +31 -0
- massgen/configs/travel_planning.yaml +51 -0
- massgen/configs/two_agents.yaml +39 -0
- massgen/frontend/__init__.py +20 -0
- massgen/frontend/coordination_ui.py +945 -0
- massgen/frontend/displays/__init__.py +24 -0
- massgen/frontend/displays/base_display.py +83 -0
- massgen/frontend/displays/rich_terminal_display.py +3497 -0
- massgen/frontend/displays/simple_display.py +93 -0
- massgen/frontend/displays/terminal_display.py +381 -0
- massgen/frontend/logging/__init__.py +9 -0
- massgen/frontend/logging/realtime_logger.py +197 -0
- massgen/message_templates.py +431 -0
- massgen/orchestrator.py +1222 -0
- massgen/tests/__init__.py +10 -0
- massgen/tests/multi_turn_conversation_design.md +214 -0
- massgen/tests/multiturn_llm_input_analysis.md +189 -0
- massgen/tests/test_case_studies.md +113 -0
- massgen/tests/test_claude_backend.py +310 -0
- massgen/tests/test_grok_backend.py +160 -0
- massgen/tests/test_message_context_building.py +293 -0
- massgen/tests/test_rich_terminal_display.py +378 -0
- massgen/tests/test_v3_3agents.py +117 -0
- massgen/tests/test_v3_simple.py +216 -0
- massgen/tests/test_v3_three_agents.py +272 -0
- massgen/tests/test_v3_two_agents.py +176 -0
- massgen/utils.py +79 -0
- massgen/v1/README.md +330 -0
- massgen/v1/__init__.py +91 -0
- massgen/v1/agent.py +605 -0
- massgen/v1/agents.py +330 -0
- massgen/v1/backends/gemini.py +584 -0
- massgen/v1/backends/grok.py +410 -0
- massgen/v1/backends/oai.py +571 -0
- massgen/v1/cli.py +351 -0
- massgen/v1/config.py +169 -0
- massgen/v1/examples/fast-4o-mini-config.yaml +44 -0
- massgen/v1/examples/fast_config.yaml +44 -0
- massgen/v1/examples/production.yaml +70 -0
- massgen/v1/examples/single_agent.yaml +39 -0
- massgen/v1/logging.py +974 -0
- massgen/v1/main.py +368 -0
- massgen/v1/orchestrator.py +1138 -0
- massgen/v1/streaming_display.py +1190 -0
- massgen/v1/tools.py +160 -0
- massgen/v1/types.py +245 -0
- massgen/v1/utils.py +199 -0
- massgen-0.0.3.dist-info/METADATA +568 -0
- massgen-0.0.3.dist-info/RECORD +76 -0
- massgen-0.0.3.dist-info/WHEEL +5 -0
- massgen-0.0.3.dist-info/entry_points.txt +2 -0
- massgen-0.0.3.dist-info/licenses/LICENSE +204 -0
- massgen-0.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
# Gemini API Documentation for Backend Integration
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The Gemini API provides access to Google's latest generative AI models with multimodal capabilities, streaming support, and function calling.
|
|
6
|
+
|
|
7
|
+
## Authentication
|
|
8
|
+
|
|
9
|
+
- Requires API key from Google AI Studio
|
|
10
|
+
- Set up authentication in Python client
|
|
11
|
+
|
|
12
|
+
## Models Available
|
|
13
|
+
|
|
14
|
+
1. **Gemini 2.5 Pro**: Most powerful thinking model with features for complex reasoning
|
|
15
|
+
2. **Gemini 2.5 Flash**: Newest multimodal model with next generation features
|
|
16
|
+
3. **Gemini 2.5 Flash-Lite**: Lighter version
|
|
17
|
+
|
|
18
|
+
**Note**: Starting April 29, 2025, Gemini 1.5 Pro and Gemini 1.5 Flash models are not available in projects with no prior usage.
|
|
19
|
+
|
|
20
|
+
## Python SDK Installation & Basic Usage
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install -q -U google-genai
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from google import genai
|
|
28
|
+
|
|
29
|
+
client = genai.Client()
|
|
30
|
+
|
|
31
|
+
response = client.models.generate_content(
|
|
32
|
+
model="gemini-2.5-flash",
|
|
33
|
+
contents="Explain how AI works in a few words",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
print(response.text)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Streaming Implementation
|
|
40
|
+
|
|
41
|
+
### Synchronous Streaming
|
|
42
|
+
```python
|
|
43
|
+
for chunk in client.models.generate_content_stream(
|
|
44
|
+
model='gemini-2.0-flash',
|
|
45
|
+
contents='Tell me a story in 300 words.'
|
|
46
|
+
):
|
|
47
|
+
print(chunk.text)
|
|
48
|
+
print("_" * 80)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Asynchronous Streaming
|
|
52
|
+
```python
|
|
53
|
+
async for chunk in await client.aio.models.generate_content_stream(
|
|
54
|
+
model='gemini-2.0-flash',
|
|
55
|
+
contents="Write a cute story about cats."
|
|
56
|
+
):
|
|
57
|
+
if chunk.text:
|
|
58
|
+
print(chunk.text)
|
|
59
|
+
print("_" * 80)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Async Concurrent Execution
|
|
63
|
+
```python
|
|
64
|
+
async def get_response():
|
|
65
|
+
async for chunk in await client.aio.models.generate_content_stream(
|
|
66
|
+
model='gemini-2.0-flash',
|
|
67
|
+
contents='Tell me a story in 500 words.'
|
|
68
|
+
):
|
|
69
|
+
if chunk.text:
|
|
70
|
+
print(chunk.text)
|
|
71
|
+
print("_" * 80)
|
|
72
|
+
|
|
73
|
+
async def something_else():
|
|
74
|
+
for i in range(5):
|
|
75
|
+
print("==========not blocked!==========")
|
|
76
|
+
await asyncio.sleep(1)
|
|
77
|
+
|
|
78
|
+
async def async_demo():
|
|
79
|
+
task1 = asyncio.create_task(get_response())
|
|
80
|
+
task2 = asyncio.create_task(something_else())
|
|
81
|
+
await asyncio.gather(task1, task2)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Function Calling
|
|
85
|
+
|
|
86
|
+
### Overview
|
|
87
|
+
- Allows models to interact with external tools and APIs
|
|
88
|
+
- Three primary use cases:
|
|
89
|
+
1. Augment Knowledge
|
|
90
|
+
2. Extend Capabilities
|
|
91
|
+
3. Take Actions
|
|
92
|
+
|
|
93
|
+
### Function Call Workflow
|
|
94
|
+
1. Define function declarations with:
|
|
95
|
+
- Name
|
|
96
|
+
- Description
|
|
97
|
+
- Parameters (type, properties)
|
|
98
|
+
|
|
99
|
+
2. Call model with function declarations
|
|
100
|
+
3. Model decides whether to:
|
|
101
|
+
- Generate text response
|
|
102
|
+
- Call specified function(s)
|
|
103
|
+
|
|
104
|
+
### Function Call Modes
|
|
105
|
+
- **AUTO** (default): Flexible response
|
|
106
|
+
- **ANY**: Force function call
|
|
107
|
+
- **NONE**: Prohibit function calls
|
|
108
|
+
|
|
109
|
+
### Supported Capabilities
|
|
110
|
+
- Parallel function calling
|
|
111
|
+
- Compositional (sequential) function calling
|
|
112
|
+
- Automatic function calling (Python SDK)
|
|
113
|
+
|
|
114
|
+
### Best Practices
|
|
115
|
+
- Provide clear, specific function descriptions
|
|
116
|
+
- Use strong typing for parameters
|
|
117
|
+
- Limit total number of tools (10-20 recommended)
|
|
118
|
+
- Implement robust error handling
|
|
119
|
+
- Be mindful of security and token limits
|
|
120
|
+
|
|
121
|
+
### Supported Models for Function Calling
|
|
122
|
+
- Gemini 2.5 Pro
|
|
123
|
+
- Gemini 2.5 Flash
|
|
124
|
+
- Gemini 2.5 Flash-Lite
|
|
125
|
+
|
|
126
|
+
## Structured Output
|
|
127
|
+
|
|
128
|
+
### Overview
|
|
129
|
+
Structured output allows constraining model responses to specific JSON schemas or enums, ensuring predictable data formats.
|
|
130
|
+
|
|
131
|
+
### Implementation with Pydantic Models
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from google import genai
|
|
135
|
+
from pydantic import BaseModel, Field
|
|
136
|
+
import enum
|
|
137
|
+
|
|
138
|
+
class ActionType(enum.Enum):
|
|
139
|
+
VOTE = "vote"
|
|
140
|
+
NEW_ANSWER = "new_answer"
|
|
141
|
+
|
|
142
|
+
class VoteAction(BaseModel):
|
|
143
|
+
action: ActionType = Field(default=ActionType.VOTE)
|
|
144
|
+
agent_id: str = Field(description="Agent ID to vote for")
|
|
145
|
+
reason: str = Field(description="Reason for voting")
|
|
146
|
+
|
|
147
|
+
class CoordinationResponse(BaseModel):
|
|
148
|
+
action_type: ActionType
|
|
149
|
+
vote_data: VoteAction | None = None
|
|
150
|
+
|
|
151
|
+
client = genai.Client()
|
|
152
|
+
|
|
153
|
+
response = client.models.generate_content(
|
|
154
|
+
model="gemini-2.5-flash",
|
|
155
|
+
contents="Choose the best agent and explain why.",
|
|
156
|
+
config={
|
|
157
|
+
"response_mime_type": "application/json",
|
|
158
|
+
"response_schema": CoordinationResponse,
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Response will be structured JSON matching the schema
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Enum-Only Responses
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
class Instrument(enum.Enum):
|
|
169
|
+
PERCUSSION = "Percussion"
|
|
170
|
+
STRING = "String"
|
|
171
|
+
WIND = "Wind"
|
|
172
|
+
|
|
173
|
+
response = client.models.generate_content(
|
|
174
|
+
model='gemini-2.5-flash',
|
|
175
|
+
contents='What type of instrument is an oboe?',
|
|
176
|
+
config={
|
|
177
|
+
'response_mime_type': 'text/x.enum',
|
|
178
|
+
'response_schema': Instrument,
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Best Practices for Structured Output
|
|
184
|
+
- Keep schemas simple to avoid `InvalidArgument: 400` errors
|
|
185
|
+
- Use Pydantic models for complex JSON structures
|
|
186
|
+
- Add field descriptions for clarity
|
|
187
|
+
- Provide clear context in prompts
|
|
188
|
+
- Use `propertyOrdering` for consistent output order
|
|
189
|
+
|
|
190
|
+
## Builtin Tools
|
|
191
|
+
|
|
192
|
+
### Code Execution
|
|
193
|
+
|
|
194
|
+
**Overview:**
|
|
195
|
+
- Executes Python code within the model's runtime environment
|
|
196
|
+
- Maximum execution time: 30 seconds
|
|
197
|
+
- Can regenerate code up to 5 times if errors occur
|
|
198
|
+
- No additional charge beyond standard token pricing
|
|
199
|
+
|
|
200
|
+
**Supported Libraries:**
|
|
201
|
+
- numpy, pandas, matplotlib, scikit-learn
|
|
202
|
+
- Cannot install custom libraries
|
|
203
|
+
- Can generate Matplotlib graphs and handle file inputs (CSV, text)
|
|
204
|
+
|
|
205
|
+
**Configuration:**
|
|
206
|
+
```python
|
|
207
|
+
from google.genai import types
|
|
208
|
+
|
|
209
|
+
code_tool = types.Tool(code_execution=types.ToolCodeExecution())
|
|
210
|
+
config = types.GenerateContentConfig(tools=[code_tool])
|
|
211
|
+
|
|
212
|
+
response = client.models.generate_content(
|
|
213
|
+
model="gemini-2.5-flash",
|
|
214
|
+
contents="Calculate sum of first 50 prime numbers",
|
|
215
|
+
config=config
|
|
216
|
+
)
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
**Response Format:**
|
|
220
|
+
- `text`: Model's explanatory text
|
|
221
|
+
- `executableCode`: Generated Python code
|
|
222
|
+
- `codeExecutionResult`: Execution output
|
|
223
|
+
- Access via `response.candidates[0].content.parts`
|
|
224
|
+
|
|
225
|
+
**Limitations:**
|
|
226
|
+
- Python only
|
|
227
|
+
- Cannot return non-code artifacts
|
|
228
|
+
- Maximum file input ~2MB
|
|
229
|
+
- Some variation in performance
|
|
230
|
+
|
|
231
|
+
### Grounding (Web Search)
|
|
232
|
+
|
|
233
|
+
**Overview:**
|
|
234
|
+
- Provides real-time web information for factual accuracy
|
|
235
|
+
- Includes citations and source attribution
|
|
236
|
+
- Single billable use per request (even with multiple queries)
|
|
237
|
+
|
|
238
|
+
**Configuration:**
|
|
239
|
+
```python
|
|
240
|
+
from google.genai import types
|
|
241
|
+
|
|
242
|
+
grounding_tool = types.Tool(google_search=types.GoogleSearch())
|
|
243
|
+
config = types.GenerateContentConfig(tools=[grounding_tool])
|
|
244
|
+
|
|
245
|
+
response = client.models.generate_content(
|
|
246
|
+
model="gemini-2.5-flash",
|
|
247
|
+
contents="Latest AI developments in 2025",
|
|
248
|
+
config=config
|
|
249
|
+
)
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
**Response Metadata:**
|
|
253
|
+
Access via `response.candidates[0].grounding_metadata`:
|
|
254
|
+
- `webSearchQueries`: Search queries used
|
|
255
|
+
- `groundingChunks`: Web sources (URI and title)
|
|
256
|
+
- `groundingSupports`: Links text segments to sources
|
|
257
|
+
|
|
258
|
+
**Best Practices:**
|
|
259
|
+
- Process citations using `groundingSupports` and `groundingChunks`
|
|
260
|
+
- Use for current events and factual verification
|
|
261
|
+
- Review Search tool notebook for detailed examples
|
|
262
|
+
|
|
263
|
+
### URL Context (Experimental)
|
|
264
|
+
|
|
265
|
+
**Overview:**
|
|
266
|
+
- Process up to 20 URLs per request as additional context
|
|
267
|
+
- Extract and analyze content from web pages
|
|
268
|
+
- Currently free during experimental phase
|
|
269
|
+
|
|
270
|
+
**Capabilities:**
|
|
271
|
+
- Extract key data points from web pages
|
|
272
|
+
- Compare information across multiple URLs
|
|
273
|
+
- Synthesize data from multiple sources
|
|
274
|
+
- Answer questions based on webpage content
|
|
275
|
+
|
|
276
|
+
**Limitations:**
|
|
277
|
+
- Works best with standard web pages
|
|
278
|
+
- Not recommended for multimedia (YouTube videos)
|
|
279
|
+
- Daily quotas: 1500 queries per project, 100 per user
|
|
280
|
+
- Available on gemini-2.5-pro and gemini-2.5-flash
|
|
281
|
+
|
|
282
|
+
**Example Use Cases:**
|
|
283
|
+
```python
|
|
284
|
+
# Compare recipes from multiple URLs
|
|
285
|
+
"Compare recipes from URL1 and URL2"
|
|
286
|
+
|
|
287
|
+
# Extract schedule information
|
|
288
|
+
"Give me three day events schedule based on URL"
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
## Additional Capabilities
|
|
292
|
+
|
|
293
|
+
- **Multimodal input**: text, images, video
|
|
294
|
+
- **Long context support**: millions of tokens
|
|
295
|
+
- **Structured output generation** (see above)
|
|
296
|
+
- **Native image generation**
|
|
297
|
+
- **Embeddings** for RAG workflows
|
|
298
|
+
- **OpenAI-compatible interface**: Can use OpenAI Python library with `stream=True`
|
|
299
|
+
|
|
300
|
+
## Integration Notes for Backend
|
|
301
|
+
|
|
302
|
+
### Key Implementation Points:
|
|
303
|
+
1. Use `google.generativeai` (imported as `genai`) for direct API access
|
|
304
|
+
2. Use `from google import genai` with `genai.Client()` for newer client patterns
|
|
305
|
+
3. Use `generate_content()` with `stream=True` for streaming
|
|
306
|
+
4. Check for `chunk.text` to ensure non-empty chunks
|
|
307
|
+
5. Configure structured output with `config={"response_mime_type": "application/json", "response_schema": Schema}`
|
|
308
|
+
6. Compatible with asyncio patterns needed for architecture
|
|
309
|
+
|
|
310
|
+
### Correct Package Usage:
|
|
311
|
+
```python
|
|
312
|
+
# Correct import (google-genai package)
|
|
313
|
+
from google import genai
|
|
314
|
+
|
|
315
|
+
# Client-based approach (recommended)
|
|
316
|
+
client = genai.Client()
|
|
317
|
+
client.models.generate_content(...)
|
|
318
|
+
|
|
319
|
+
# Note: Old google-generativeai package is deprecated
|
|
320
|
+
# Use google-genai instead: pip install -q -U google-genai
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### Authentication Setup:
|
|
324
|
+
- Get API key from Google AI Studio
|
|
325
|
+
- Set `GOOGLE_API_KEY` or `GEMINI_API_KEY` environment variable
|
|
326
|
+
- Use `genai.configure(api_key=api_key)` for direct API access
|
|
327
|
+
- Handle authentication errors appropriately
|
|
328
|
+
|
|
329
|
+
### Error Handling:
|
|
330
|
+
- Implement robust error handling for API failures
|
|
331
|
+
- Handle rate limits and quota exceeded scenarios
|
|
332
|
+
- Manage streaming connection failures gracefully
|
|
333
|
+
- Handle `InvalidArgument: 400` errors for complex schemas
|
|
334
|
+
|
|
335
|
+
### Pricing and Rate Limits:
|
|
336
|
+
- Pricing details: https://ai.google.dev/pricing
|
|
337
|
+
- Rate limits: https://ai.google.dev/gemini-api/docs/rate-limits
|
|
338
|
+
- Monitor usage and implement cost controls
|
|
339
|
+
|
|
340
|
+
## Tool Usage Restrictions & Multi-Tool Support
|
|
341
|
+
|
|
342
|
+
### Regular Gemini API (Stable)
|
|
343
|
+
**✅ Supported Combinations:**
|
|
344
|
+
- `code_execution` + `grounding` (includes search) - **RECOMMENDED**
|
|
345
|
+
- `function_declarations` only (user-defined tools)
|
|
346
|
+
|
|
347
|
+
**❌ NOT Supported:**
|
|
348
|
+
- `code_execution` + `function_declarations`
|
|
349
|
+
- `grounding` + `function_declarations`
|
|
350
|
+
- All three tool types together
|
|
351
|
+
|
|
352
|
+
### Live API (Preview/Experimental)
|
|
353
|
+
**✅ Multi-Tool Support:**
|
|
354
|
+
- Can combine `google_search` + `code_execution` + `function_declarations`
|
|
355
|
+
- Full flexibility but comes with major limitations
|
|
356
|
+
|
|
357
|
+
**🚨 Live API Restrictions (NOT Recommended for MassGen):**
|
|
358
|
+
- **Status**: Preview/experimental - unstable for production
|
|
359
|
+
- **Session Limits**: 3 free, 50-1000 paid (too restrictive)
|
|
360
|
+
- **Real-time focus**: WebSocket-based, designed for audio/video
|
|
361
|
+
- **Cost**: 50% premium over regular API
|
|
362
|
+
- **Availability**: Not guaranteed, capacity varies
|
|
363
|
+
- **Complexity**: Requires WebSocket implementation
|
|
364
|
+
|
|
365
|
+
### Recommendation for MassGen Backend
|
|
366
|
+
**✅ Use Regular API with `code_execution + grounding`:**
|
|
367
|
+
- Stable, production-ready
|
|
368
|
+
- Covers both code execution and web search needs
|
|
369
|
+
- Standard REST endpoints
|
|
370
|
+
- Predictable pricing and limits
|
|
371
|
+
- No session restrictions
|
|
372
|
+
|
|
373
|
+
**❌ Avoid Live API:**
|
|
374
|
+
- Session limits incompatible with multi-agent scaling
|
|
375
|
+
- Preview status unsuitable for production
|
|
376
|
+
- Unnecessary complexity for text-based coordination
|
|
377
|
+
|
|
378
|
+
## Implementation Status for MassGen
|
|
379
|
+
|
|
380
|
+
**✅ COMPLETED**: GeminiBackend class implemented with:
|
|
381
|
+
- [x] Google Gemini API integration with proper authentication
|
|
382
|
+
- [x] Structured output for coordination (vote/new_answer) using JSON schemas
|
|
383
|
+
- [x] Streaming functionality compatible with StreamChunk architecture
|
|
384
|
+
- [x] Cost calculation for Gemini 2.5 models (Flash, Flash-Lite, Pro)
|
|
385
|
+
- [x] Error handling for Gemini-specific responses and API limitations
|
|
386
|
+
- [x] Support for builtin tools (code_execution + grounding/web search)
|
|
387
|
+
- [x] Integration with SingleAgent and orchestrator patterns
|
|
388
|
+
- [x] Tool result detection and streaming for code execution and web search
|
|
389
|
+
- [x] CLI and configuration support with AgentConfig.create_gemini_config()
|
|
390
|
+
- [x] NO Live API support (uses regular API only)
|
|
391
|
+
|
|
392
|
+
**Key Features:**
|
|
393
|
+
- **Structured Output**: Uses `response_mime_type: "application/json"` with Pydantic schemas for coordination
|
|
394
|
+
- **Builtin Tools**: Supports code_execution and google_search_retrieval with proper result detection
|
|
395
|
+
- **Multi-mode Support**: Handles coordination-only, tools-only, and mixed scenarios
|
|
396
|
+
- **Cost Tracking**: Tracks token usage, search count, and code execution count
|
|
397
|
+
- **MassGen Compatible**: Full integration with orchestrator and agent patterns
|
|
398
|
+
|
|
399
|
+
**Usage Examples:**
|
|
400
|
+
```python
|
|
401
|
+
# CLI usage
|
|
402
|
+
python -m massgen.cli --backend gemini --model gemini-2.5-flash "Your question"
|
|
403
|
+
|
|
404
|
+
# Configuration
|
|
405
|
+
AgentConfig.create_gemini_config(
|
|
406
|
+
model="gemini-2.5-flash",
|
|
407
|
+
enable_web_search=True,
|
|
408
|
+
enable_code_execution=True
|
|
409
|
+
)
|
|
410
|
+
```
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# OpenAI Responses API Tool Call Format Documentation
|
|
2
|
+
|
|
3
|
+
## Key Points for MassGen Framework
|
|
4
|
+
|
|
5
|
+
### Tool Call Format (Model Output)
|
|
6
|
+
```json
|
|
7
|
+
{
|
|
8
|
+
"type": "function_call",
|
|
9
|
+
"id": "fc_12345xyz",
|
|
10
|
+
"call_id": "call_12345xyz",
|
|
11
|
+
"name": "get_weather",
|
|
12
|
+
"arguments": "{\"location\":\"Paris, France\"}"
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
### Tool Result Format (Input to Model)
|
|
17
|
+
```json
|
|
18
|
+
{
|
|
19
|
+
"type": "function_call_output",
|
|
20
|
+
"call_id": "call_12345xyz",
|
|
21
|
+
"output": "Temperature is 15°C"
|
|
22
|
+
}
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Critical Flow for Multi-Turn Conversations
|
|
26
|
+
|
|
27
|
+
When handling tool calls across multiple turns:
|
|
28
|
+
|
|
29
|
+
1. **Model makes tool call** - Returns function_call object with call_id
|
|
30
|
+
2. **Execute function** - Run your code with the arguments
|
|
31
|
+
3. **Add BOTH messages to input array**:
|
|
32
|
+
```python
|
|
33
|
+
input_messages.append(tool_call) # append model's function call message
|
|
34
|
+
input_messages.append({ # append result message
|
|
35
|
+
"type": "function_call_output",
|
|
36
|
+
"call_id": tool_call.call_id,
|
|
37
|
+
"output": str(result)
|
|
38
|
+
})
|
|
39
|
+
```
|
|
40
|
+
4. **Call model again** with complete conversation history
|
|
41
|
+
|
|
42
|
+
## Key Requirements
|
|
43
|
+
|
|
44
|
+
- **Tool result messages MUST reference the exact call_id from the original tool call**
|
|
45
|
+
- **Both the tool call AND tool result must be in the conversation history**
|
|
46
|
+
- **Tool results use "output" field, not "content"**
|
|
47
|
+
- **Arguments are JSON strings, not objects**
|
|
48
|
+
|
|
49
|
+
## Error Handling Pattern
|
|
50
|
+
|
|
51
|
+
For error messages to tools, follow the same pattern:
|
|
52
|
+
```python
|
|
53
|
+
# Agent made invalid tool call with call_id "call_123"
|
|
54
|
+
error_message = {
|
|
55
|
+
"type": "function_call_output",
|
|
56
|
+
"call_id": "call_123",
|
|
57
|
+
"output": "Error: You can only vote once per response. Please vote for just ONE agent."
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# Add both the original tool call AND error message to conversation
|
|
61
|
+
input_messages.append(original_tool_call)
|
|
62
|
+
input_messages.append(error_message)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
This ensures the API can match the tool result to the original call.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MassGen Backend System - Multi-Provider LLM Integration
|
|
3
|
+
|
|
4
|
+
Supports multiple LLM providers with standardized StreamChunk interface:
|
|
5
|
+
- Response API (standard format with tool support)
|
|
6
|
+
- Grok/xAI (Chat Completions API compatible)
|
|
7
|
+
- Claude (Messages API with multi-tool support)
|
|
8
|
+
- Gemini (structured output for coordination)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from .base import LLMBackend, StreamChunk, TokenUsage
|
|
12
|
+
from .response import ResponseBackend
|
|
13
|
+
from .grok import GrokBackend
|
|
14
|
+
from .claude import ClaudeBackend
|
|
15
|
+
from .gemini import GeminiBackend
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"LLMBackend",
|
|
19
|
+
"StreamChunk",
|
|
20
|
+
"TokenUsage",
|
|
21
|
+
"ResponseBackend",
|
|
22
|
+
"GrokBackend",
|
|
23
|
+
"ClaudeBackend",
|
|
24
|
+
"GeminiBackend",
|
|
25
|
+
]
|
massgen/backend/base.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Base backend interface for LLM providers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Dict, List, Any, AsyncGenerator, Optional
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class StreamChunk:
|
|
14
|
+
"""Standardized chunk format for streaming responses."""
|
|
15
|
+
|
|
16
|
+
type: str # "content", "tool_calls", "builtin_tool_results", "complete_message", "complete_response", "done", "error", "agent_status"
|
|
17
|
+
content: Optional[str] = None
|
|
18
|
+
tool_calls: Optional[List[Dict[str, Any]]] = (
|
|
19
|
+
None # User-defined function tools (need execution)
|
|
20
|
+
)
|
|
21
|
+
builtin_tool_results: Optional[List[Dict[str, Any]]] = (
|
|
22
|
+
None # Provider builtin tools (already executed)
|
|
23
|
+
)
|
|
24
|
+
complete_message: Optional[Dict[str, Any]] = None # Complete assistant message
|
|
25
|
+
response: Optional[Dict[str, Any]] = None # Raw Responses API response
|
|
26
|
+
error: Optional[str] = None
|
|
27
|
+
source: Optional[str] = None # Source identifier (e.g., agent_id, "orchestrator")
|
|
28
|
+
status: Optional[str] = None # For agent status updates
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class TokenUsage:
|
|
33
|
+
"""Token usage and cost tracking."""
|
|
34
|
+
|
|
35
|
+
input_tokens: int = 0
|
|
36
|
+
output_tokens: int = 0
|
|
37
|
+
estimated_cost: float = 0.0
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class LLMBackend(ABC):
|
|
41
|
+
"""Abstract base class for LLM providers."""
|
|
42
|
+
|
|
43
|
+
def __init__(self, api_key: Optional[str] = None, **kwargs):
|
|
44
|
+
self.api_key = api_key
|
|
45
|
+
self.config = kwargs
|
|
46
|
+
self.token_usage = TokenUsage()
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
async def stream_with_tools(
|
|
50
|
+
self, messages: List[Dict[str, Any]], tools: List[Dict[str, Any]], **kwargs
|
|
51
|
+
) -> AsyncGenerator[StreamChunk, None]:
|
|
52
|
+
"""
|
|
53
|
+
Stream a response with tool calling support.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
messages: Conversation messages
|
|
57
|
+
tools: Available tools schema
|
|
58
|
+
**kwargs: Additional provider-specific parameters including model
|
|
59
|
+
|
|
60
|
+
Yields:
|
|
61
|
+
StreamChunk: Standardized response chunks
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def get_provider_name(self) -> str:
|
|
67
|
+
"""Get the name of this provider."""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def estimate_tokens(self, text: str) -> int:
|
|
72
|
+
"""Estimate token count for text."""
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def calculate_cost(
|
|
77
|
+
self, input_tokens: int, output_tokens: int, model: str
|
|
78
|
+
) -> float:
|
|
79
|
+
"""Calculate cost for token usage."""
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
def update_token_usage(
|
|
83
|
+
self, messages: List[Dict[str, Any]], response_content: str, model: str
|
|
84
|
+
):
|
|
85
|
+
"""Update token usage tracking."""
|
|
86
|
+
# Estimate input tokens from messages
|
|
87
|
+
input_text = str(messages)
|
|
88
|
+
input_tokens = self.estimate_tokens(input_text)
|
|
89
|
+
|
|
90
|
+
# Estimate output tokens from response
|
|
91
|
+
output_tokens = self.estimate_tokens(response_content)
|
|
92
|
+
|
|
93
|
+
# Update totals
|
|
94
|
+
self.token_usage.input_tokens += input_tokens
|
|
95
|
+
self.token_usage.output_tokens += output_tokens
|
|
96
|
+
|
|
97
|
+
# Calculate cost
|
|
98
|
+
cost = self.calculate_cost(input_tokens, output_tokens, model)
|
|
99
|
+
self.token_usage.estimated_cost += cost
|
|
100
|
+
|
|
101
|
+
def get_token_usage(self) -> TokenUsage:
|
|
102
|
+
"""Get current token usage."""
|
|
103
|
+
return self.token_usage
|
|
104
|
+
|
|
105
|
+
def reset_token_usage(self):
|
|
106
|
+
"""Reset token usage tracking."""
|
|
107
|
+
self.token_usage = TokenUsage()
|
|
108
|
+
|
|
109
|
+
def get_supported_builtin_tools(self) -> List[str]:
|
|
110
|
+
"""Get list of builtin tools supported by this provider."""
|
|
111
|
+
return []
|
|
112
|
+
|
|
113
|
+
def extract_tool_name(self, tool_call: Dict[str, Any]) -> str:
|
|
114
|
+
"""
|
|
115
|
+
Extract tool name from a tool call in this backend's format.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
tool_call: Tool call data structure from this backend
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Tool name string
|
|
122
|
+
"""
|
|
123
|
+
# Default implementation assumes Chat Completions format
|
|
124
|
+
return tool_call.get("function", {}).get("name", "unknown")
|
|
125
|
+
|
|
126
|
+
def extract_tool_arguments(self, tool_call: Dict[str, Any]) -> Dict[str, Any]:
|
|
127
|
+
"""
|
|
128
|
+
Extract tool arguments from a tool call in this backend's format.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
tool_call: Tool call data structure from this backend
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Tool arguments dictionary
|
|
135
|
+
"""
|
|
136
|
+
# Default implementation assumes Chat Completions format
|
|
137
|
+
return tool_call.get("function", {}).get("arguments", {})
|
|
138
|
+
|
|
139
|
+
def extract_tool_call_id(self, tool_call: Dict[str, Any]) -> str:
|
|
140
|
+
"""
|
|
141
|
+
Extract tool call ID from a tool call in this backend's format.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
tool_call: Tool call data structure from this backend
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Tool call ID string
|
|
148
|
+
"""
|
|
149
|
+
# Default implementation assumes Chat Completions format
|
|
150
|
+
return tool_call.get("id", "")
|
|
151
|
+
|
|
152
|
+
def create_tool_result_message(
|
|
153
|
+
self, tool_call: Dict[str, Any], result_content: str
|
|
154
|
+
) -> Dict[str, Any]:
|
|
155
|
+
"""
|
|
156
|
+
Create a tool result message in this backend's expected format.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
tool_call: Original tool call data structure
|
|
160
|
+
result_content: The result content to send back
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Tool result message in backend's expected format
|
|
164
|
+
"""
|
|
165
|
+
# Default implementation assumes Chat Completions format
|
|
166
|
+
tool_call_id = self.extract_tool_call_id(tool_call)
|
|
167
|
+
return {"role": "tool", "tool_call_id": tool_call_id, "content": result_content}
|
|
168
|
+
|
|
169
|
+
def extract_tool_result_content(self, tool_result_message: Dict[str, Any]) -> str:
|
|
170
|
+
"""
|
|
171
|
+
Extract the content/output from a tool result message in this backend's format.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
tool_result_message: Tool result message created by this backend
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
The content/output string from the message
|
|
178
|
+
"""
|
|
179
|
+
# Default implementation assumes Chat Completions format
|
|
180
|
+
return tool_result_message.get("content", "")
|