massgen 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of massgen might be problematic. Click here for more details.

Files changed (76) hide show
  1. massgen/__init__.py +94 -0
  2. massgen/agent_config.py +507 -0
  3. massgen/backend/CLAUDE_API_RESEARCH.md +266 -0
  4. massgen/backend/Function calling openai responses.md +1161 -0
  5. massgen/backend/GEMINI_API_DOCUMENTATION.md +410 -0
  6. massgen/backend/OPENAI_RESPONSES_API_FORMAT.md +65 -0
  7. massgen/backend/__init__.py +25 -0
  8. massgen/backend/base.py +180 -0
  9. massgen/backend/chat_completions.py +228 -0
  10. massgen/backend/claude.py +661 -0
  11. massgen/backend/gemini.py +652 -0
  12. massgen/backend/grok.py +187 -0
  13. massgen/backend/response.py +397 -0
  14. massgen/chat_agent.py +440 -0
  15. massgen/cli.py +686 -0
  16. massgen/configs/README.md +293 -0
  17. massgen/configs/creative_team.yaml +53 -0
  18. massgen/configs/gemini_4o_claude.yaml +31 -0
  19. massgen/configs/news_analysis.yaml +51 -0
  20. massgen/configs/research_team.yaml +51 -0
  21. massgen/configs/single_agent.yaml +18 -0
  22. massgen/configs/single_flash2.5.yaml +44 -0
  23. massgen/configs/technical_analysis.yaml +51 -0
  24. massgen/configs/three_agents_default.yaml +31 -0
  25. massgen/configs/travel_planning.yaml +51 -0
  26. massgen/configs/two_agents.yaml +39 -0
  27. massgen/frontend/__init__.py +20 -0
  28. massgen/frontend/coordination_ui.py +945 -0
  29. massgen/frontend/displays/__init__.py +24 -0
  30. massgen/frontend/displays/base_display.py +83 -0
  31. massgen/frontend/displays/rich_terminal_display.py +3497 -0
  32. massgen/frontend/displays/simple_display.py +93 -0
  33. massgen/frontend/displays/terminal_display.py +381 -0
  34. massgen/frontend/logging/__init__.py +9 -0
  35. massgen/frontend/logging/realtime_logger.py +197 -0
  36. massgen/message_templates.py +431 -0
  37. massgen/orchestrator.py +1222 -0
  38. massgen/tests/__init__.py +10 -0
  39. massgen/tests/multi_turn_conversation_design.md +214 -0
  40. massgen/tests/multiturn_llm_input_analysis.md +189 -0
  41. massgen/tests/test_case_studies.md +113 -0
  42. massgen/tests/test_claude_backend.py +310 -0
  43. massgen/tests/test_grok_backend.py +160 -0
  44. massgen/tests/test_message_context_building.py +293 -0
  45. massgen/tests/test_rich_terminal_display.py +378 -0
  46. massgen/tests/test_v3_3agents.py +117 -0
  47. massgen/tests/test_v3_simple.py +216 -0
  48. massgen/tests/test_v3_three_agents.py +272 -0
  49. massgen/tests/test_v3_two_agents.py +176 -0
  50. massgen/utils.py +79 -0
  51. massgen/v1/README.md +330 -0
  52. massgen/v1/__init__.py +91 -0
  53. massgen/v1/agent.py +605 -0
  54. massgen/v1/agents.py +330 -0
  55. massgen/v1/backends/gemini.py +584 -0
  56. massgen/v1/backends/grok.py +410 -0
  57. massgen/v1/backends/oai.py +571 -0
  58. massgen/v1/cli.py +351 -0
  59. massgen/v1/config.py +169 -0
  60. massgen/v1/examples/fast-4o-mini-config.yaml +44 -0
  61. massgen/v1/examples/fast_config.yaml +44 -0
  62. massgen/v1/examples/production.yaml +70 -0
  63. massgen/v1/examples/single_agent.yaml +39 -0
  64. massgen/v1/logging.py +974 -0
  65. massgen/v1/main.py +368 -0
  66. massgen/v1/orchestrator.py +1138 -0
  67. massgen/v1/streaming_display.py +1190 -0
  68. massgen/v1/tools.py +160 -0
  69. massgen/v1/types.py +245 -0
  70. massgen/v1/utils.py +199 -0
  71. massgen-0.0.3.dist-info/METADATA +568 -0
  72. massgen-0.0.3.dist-info/RECORD +76 -0
  73. massgen-0.0.3.dist-info/WHEEL +5 -0
  74. massgen-0.0.3.dist-info/entry_points.txt +2 -0
  75. massgen-0.0.3.dist-info/licenses/LICENSE +204 -0
  76. massgen-0.0.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,410 @@
1
+ # Gemini API Documentation for Backend Integration
2
+
3
+ ## Overview
4
+
5
+ The Gemini API provides access to Google's latest generative AI models with multimodal capabilities, streaming support, and function calling.
6
+
7
+ ## Authentication
8
+
9
+ - Requires API key from Google AI Studio
10
+ - Set up authentication in Python client
11
+
12
+ ## Models Available
13
+
14
+ 1. **Gemini 2.5 Pro**: Most powerful thinking model with features for complex reasoning
15
+ 2. **Gemini 2.5 Flash**: Newest multimodal model with next generation features
16
+ 3. **Gemini 2.5 Flash-Lite**: Lighter version
17
+
18
+ **Note**: Starting April 29, 2025, Gemini 1.5 Pro and Gemini 1.5 Flash models are not available in projects with no prior usage.
19
+
20
+ ## Python SDK Installation & Basic Usage
21
+
22
+ ```bash
23
+ pip install -q -U google-genai
24
+ ```
25
+
26
+ ```python
27
+ from google import genai
28
+
29
+ client = genai.Client()
30
+
31
+ response = client.models.generate_content(
32
+ model="gemini-2.5-flash",
33
+ contents="Explain how AI works in a few words",
34
+ )
35
+
36
+ print(response.text)
37
+ ```
38
+
39
+ ## Streaming Implementation
40
+
41
+ ### Synchronous Streaming
42
+ ```python
43
+ for chunk in client.models.generate_content_stream(
44
+ model='gemini-2.0-flash',
45
+ contents='Tell me a story in 300 words.'
46
+ ):
47
+ print(chunk.text)
48
+ print("_" * 80)
49
+ ```
50
+
51
+ ### Asynchronous Streaming
52
+ ```python
53
+ async for chunk in await client.aio.models.generate_content_stream(
54
+ model='gemini-2.0-flash',
55
+ contents="Write a cute story about cats."
56
+ ):
57
+ if chunk.text:
58
+ print(chunk.text)
59
+ print("_" * 80)
60
+ ```
61
+
62
+ ### Async Concurrent Execution
63
+ ```python
64
+ async def get_response():
65
+ async for chunk in await client.aio.models.generate_content_stream(
66
+ model='gemini-2.0-flash',
67
+ contents='Tell me a story in 500 words.'
68
+ ):
69
+ if chunk.text:
70
+ print(chunk.text)
71
+ print("_" * 80)
72
+
73
+ async def something_else():
74
+ for i in range(5):
75
+ print("==========not blocked!==========")
76
+ await asyncio.sleep(1)
77
+
78
+ async def async_demo():
79
+ task1 = asyncio.create_task(get_response())
80
+ task2 = asyncio.create_task(something_else())
81
+ await asyncio.gather(task1, task2)
82
+ ```
83
+
84
+ ## Function Calling
85
+
86
+ ### Overview
87
+ - Allows models to interact with external tools and APIs
88
+ - Three primary use cases:
89
+ 1. Augment Knowledge
90
+ 2. Extend Capabilities
91
+ 3. Take Actions
92
+
93
+ ### Function Call Workflow
94
+ 1. Define function declarations with:
95
+ - Name
96
+ - Description
97
+ - Parameters (type, properties)
98
+
99
+ 2. Call model with function declarations
100
+ 3. Model decides whether to:
101
+ - Generate text response
102
+ - Call specified function(s)
103
+
104
+ ### Function Call Modes
105
+ - **AUTO** (default): Flexible response
106
+ - **ANY**: Force function call
107
+ - **NONE**: Prohibit function calls
108
+
109
+ ### Supported Capabilities
110
+ - Parallel function calling
111
+ - Compositional (sequential) function calling
112
+ - Automatic function calling (Python SDK)
113
+
114
+ ### Best Practices
115
+ - Provide clear, specific function descriptions
116
+ - Use strong typing for parameters
117
+ - Limit total number of tools (10-20 recommended)
118
+ - Implement robust error handling
119
+ - Be mindful of security and token limits
120
+
121
+ ### Supported Models for Function Calling
122
+ - Gemini 2.5 Pro
123
+ - Gemini 2.5 Flash
124
+ - Gemini 2.5 Flash-Lite
125
+
126
+ ## Structured Output
127
+
128
+ ### Overview
129
+ Structured output allows constraining model responses to specific JSON schemas or enums, ensuring predictable data formats.
130
+
131
+ ### Implementation with Pydantic Models
132
+
133
+ ```python
134
+ from google import genai
135
+ from pydantic import BaseModel, Field
136
+ import enum
137
+
138
+ class ActionType(enum.Enum):
139
+ VOTE = "vote"
140
+ NEW_ANSWER = "new_answer"
141
+
142
+ class VoteAction(BaseModel):
143
+ action: ActionType = Field(default=ActionType.VOTE)
144
+ agent_id: str = Field(description="Agent ID to vote for")
145
+ reason: str = Field(description="Reason for voting")
146
+
147
+ class CoordinationResponse(BaseModel):
148
+ action_type: ActionType
149
+ vote_data: VoteAction | None = None
150
+
151
+ client = genai.Client()
152
+
153
+ response = client.models.generate_content(
154
+ model="gemini-2.5-flash",
155
+ contents="Choose the best agent and explain why.",
156
+ config={
157
+ "response_mime_type": "application/json",
158
+ "response_schema": CoordinationResponse,
159
+ }
160
+ )
161
+
162
+ # Response will be structured JSON matching the schema
163
+ ```
164
+
165
+ ### Enum-Only Responses
166
+
167
+ ```python
168
+ class Instrument(enum.Enum):
169
+ PERCUSSION = "Percussion"
170
+ STRING = "String"
171
+ WIND = "Wind"
172
+
173
+ response = client.models.generate_content(
174
+ model='gemini-2.5-flash',
175
+ contents='What type of instrument is an oboe?',
176
+ config={
177
+ 'response_mime_type': 'text/x.enum',
178
+ 'response_schema': Instrument,
179
+ }
180
+ )
181
+ ```
182
+
183
+ ### Best Practices for Structured Output
184
+ - Keep schemas simple to avoid `InvalidArgument: 400` errors
185
+ - Use Pydantic models for complex JSON structures
186
+ - Add field descriptions for clarity
187
+ - Provide clear context in prompts
188
+ - Use `propertyOrdering` for consistent output order
189
+
190
+ ## Builtin Tools
191
+
192
+ ### Code Execution
193
+
194
+ **Overview:**
195
+ - Executes Python code within the model's runtime environment
196
+ - Maximum execution time: 30 seconds
197
+ - Can regenerate code up to 5 times if errors occur
198
+ - No additional charge beyond standard token pricing
199
+
200
+ **Supported Libraries:**
201
+ - numpy, pandas, matplotlib, scikit-learn
202
+ - Cannot install custom libraries
203
+ - Can generate Matplotlib graphs and handle file inputs (CSV, text)
204
+
205
+ **Configuration:**
206
+ ```python
207
+ from google.genai import types
208
+
209
+ code_tool = types.Tool(code_execution=types.ToolCodeExecution())
210
+ config = types.GenerateContentConfig(tools=[code_tool])
211
+
212
+ response = client.models.generate_content(
213
+ model="gemini-2.5-flash",
214
+ contents="Calculate sum of first 50 prime numbers",
215
+ config=config
216
+ )
217
+ ```
218
+
219
+ **Response Format:**
220
+ - `text`: Model's explanatory text
221
+ - `executableCode`: Generated Python code
222
+ - `codeExecutionResult`: Execution output
223
+ - Access via `response.candidates[0].content.parts`
224
+
225
+ **Limitations:**
226
+ - Python only
227
+ - Cannot return non-code artifacts
228
+ - Maximum file input ~2MB
229
+ - Some variation in performance
230
+
231
+ ### Grounding (Web Search)
232
+
233
+ **Overview:**
234
+ - Provides real-time web information for factual accuracy
235
+ - Includes citations and source attribution
236
+ - Single billable use per request (even with multiple queries)
237
+
238
+ **Configuration:**
239
+ ```python
240
+ from google.genai import types
241
+
242
+ grounding_tool = types.Tool(google_search=types.GoogleSearch())
243
+ config = types.GenerateContentConfig(tools=[grounding_tool])
244
+
245
+ response = client.models.generate_content(
246
+ model="gemini-2.5-flash",
247
+ contents="Latest AI developments in 2025",
248
+ config=config
249
+ )
250
+ ```
251
+
252
+ **Response Metadata:**
253
+ Access via `response.candidates[0].grounding_metadata`:
254
+ - `webSearchQueries`: Search queries used
255
+ - `groundingChunks`: Web sources (URI and title)
256
+ - `groundingSupports`: Links text segments to sources
257
+
258
+ **Best Practices:**
259
+ - Process citations using `groundingSupports` and `groundingChunks`
260
+ - Use for current events and factual verification
261
+ - Review Search tool notebook for detailed examples
262
+
263
+ ### URL Context (Experimental)
264
+
265
+ **Overview:**
266
+ - Process up to 20 URLs per request as additional context
267
+ - Extract and analyze content from web pages
268
+ - Currently free during experimental phase
269
+
270
+ **Capabilities:**
271
+ - Extract key data points from web pages
272
+ - Compare information across multiple URLs
273
+ - Synthesize data from multiple sources
274
+ - Answer questions based on webpage content
275
+
276
+ **Limitations:**
277
+ - Works best with standard web pages
278
+ - Not recommended for multimedia (YouTube videos)
279
+ - Daily quotas: 1500 queries per project, 100 per user
280
+ - Available on gemini-2.5-pro and gemini-2.5-flash
281
+
282
+ **Example Use Cases:**
283
+ ```python
284
+ # Compare recipes from multiple URLs
285
+ "Compare recipes from URL1 and URL2"
286
+
287
+ # Extract schedule information
288
+ "Give me three day events schedule based on URL"
289
+ ```
290
+
291
+ ## Additional Capabilities
292
+
293
+ - **Multimodal input**: text, images, video
294
+ - **Long context support**: millions of tokens
295
+ - **Structured output generation** (see above)
296
+ - **Native image generation**
297
+ - **Embeddings** for RAG workflows
298
+ - **OpenAI-compatible interface**: Can use OpenAI Python library with `stream=True`
299
+
300
+ ## Integration Notes for Backend
301
+
302
+ ### Key Implementation Points:
303
+ 1. Use `google.generativeai` (imported as `genai`) for direct API access
304
+ 2. Use `from google import genai` with `genai.Client()` for newer client patterns
305
+ 3. Use `generate_content()` with `stream=True` for streaming
306
+ 4. Check for `chunk.text` to ensure non-empty chunks
307
+ 5. Configure structured output with `config={"response_mime_type": "application/json", "response_schema": Schema}`
308
+ 6. Compatible with asyncio patterns needed for architecture
309
+
310
+ ### Correct Package Usage:
311
+ ```python
312
+ # Correct import (google-genai package)
313
+ from google import genai
314
+
315
+ # Client-based approach (recommended)
316
+ client = genai.Client()
317
+ client.models.generate_content(...)
318
+
319
+ # Note: Old google-generativeai package is deprecated
320
+ # Use google-genai instead: pip install -q -U google-genai
321
+ ```
322
+
323
+ ### Authentication Setup:
324
+ - Get API key from Google AI Studio
325
+ - Set `GOOGLE_API_KEY` or `GEMINI_API_KEY` environment variable
326
+ - Use `genai.configure(api_key=api_key)` for direct API access
327
+ - Handle authentication errors appropriately
328
+
329
+ ### Error Handling:
330
+ - Implement robust error handling for API failures
331
+ - Handle rate limits and quota exceeded scenarios
332
+ - Manage streaming connection failures gracefully
333
+ - Handle `InvalidArgument: 400` errors for complex schemas
334
+
335
+ ### Pricing and Rate Limits:
336
+ - Pricing details: https://ai.google.dev/pricing
337
+ - Rate limits: https://ai.google.dev/gemini-api/docs/rate-limits
338
+ - Monitor usage and implement cost controls
339
+
340
+ ## Tool Usage Restrictions & Multi-Tool Support
341
+
342
+ ### Regular Gemini API (Stable)
343
+ **✅ Supported Combinations:**
344
+ - `code_execution` + `grounding` (includes search) - **RECOMMENDED**
345
+ - `function_declarations` only (user-defined tools)
346
+
347
+ **❌ NOT Supported:**
348
+ - `code_execution` + `function_declarations`
349
+ - `grounding` + `function_declarations`
350
+ - All three tool types together
351
+
352
+ ### Live API (Preview/Experimental)
353
+ **✅ Multi-Tool Support:**
354
+ - Can combine `google_search` + `code_execution` + `function_declarations`
355
+ - Full flexibility but comes with major limitations
356
+
357
+ **🚨 Live API Restrictions (NOT Recommended for MassGen):**
358
+ - **Status**: Preview/experimental - unstable for production
359
+ - **Session Limits**: 3 free, 50-1000 paid (too restrictive)
360
+ - **Real-time focus**: WebSocket-based, designed for audio/video
361
+ - **Cost**: 50% premium over regular API
362
+ - **Availability**: Not guaranteed, capacity varies
363
+ - **Complexity**: Requires WebSocket implementation
364
+
365
+ ### Recommendation for MassGen Backend
366
+ **✅ Use Regular API with `code_execution + grounding`:**
367
+ - Stable, production-ready
368
+ - Covers both code execution and web search needs
369
+ - Standard REST endpoints
370
+ - Predictable pricing and limits
371
+ - No session restrictions
372
+
373
+ **❌ Avoid Live API:**
374
+ - Session limits incompatible with multi-agent scaling
375
+ - Preview status unsuitable for production
376
+ - Unnecessary complexity for text-based coordination
377
+
378
+ ## Implementation Status for MassGen
379
+
380
+ **✅ COMPLETED**: GeminiBackend class implemented with:
381
+ - [x] Google Gemini API integration with proper authentication
382
+ - [x] Structured output for coordination (vote/new_answer) using JSON schemas
383
+ - [x] Streaming functionality compatible with StreamChunk architecture
384
+ - [x] Cost calculation for Gemini 2.5 models (Flash, Flash-Lite, Pro)
385
+ - [x] Error handling for Gemini-specific responses and API limitations
386
+ - [x] Support for builtin tools (code_execution + grounding/web search)
387
+ - [x] Integration with SingleAgent and orchestrator patterns
388
+ - [x] Tool result detection and streaming for code execution and web search
389
+ - [x] CLI and configuration support with AgentConfig.create_gemini_config()
390
+ - [x] NO Live API support (uses regular API only)
391
+
392
+ **Key Features:**
393
+ - **Structured Output**: Uses `response_mime_type: "application/json"` with Pydantic schemas for coordination
394
+ - **Builtin Tools**: Supports code_execution and google_search_retrieval with proper result detection
395
+ - **Multi-mode Support**: Handles coordination-only, tools-only, and mixed scenarios
396
+ - **Cost Tracking**: Tracks token usage, search count, and code execution count
397
+ - **MassGen Compatible**: Full integration with orchestrator and agent patterns
398
+
399
+ **Usage Examples:**
400
+ ```python
401
+ # CLI usage
402
+ python -m massgen.cli --backend gemini --model gemini-2.5-flash "Your question"
403
+
404
+ # Configuration
405
+ AgentConfig.create_gemini_config(
406
+ model="gemini-2.5-flash",
407
+ enable_web_search=True,
408
+ enable_code_execution=True
409
+ )
410
+ ```
@@ -0,0 +1,65 @@
1
+ # OpenAI Responses API Tool Call Format Documentation
2
+
3
+ ## Key Points for MassGen Framework
4
+
5
+ ### Tool Call Format (Model Output)
6
+ ```json
7
+ {
8
+ "type": "function_call",
9
+ "id": "fc_12345xyz",
10
+ "call_id": "call_12345xyz",
11
+ "name": "get_weather",
12
+ "arguments": "{\"location\":\"Paris, France\"}"
13
+ }
14
+ ```
15
+
16
+ ### Tool Result Format (Input to Model)
17
+ ```json
18
+ {
19
+ "type": "function_call_output",
20
+ "call_id": "call_12345xyz",
21
+ "output": "Temperature is 15°C"
22
+ }
23
+ ```
24
+
25
+ ## Critical Flow for Multi-Turn Conversations
26
+
27
+ When handling tool calls across multiple turns:
28
+
29
+ 1. **Model makes tool call** - Returns function_call object with call_id
30
+ 2. **Execute function** - Run your code with the arguments
31
+ 3. **Add BOTH messages to input array**:
32
+ ```python
33
+ input_messages.append(tool_call) # append model's function call message
34
+ input_messages.append({ # append result message
35
+ "type": "function_call_output",
36
+ "call_id": tool_call.call_id,
37
+ "output": str(result)
38
+ })
39
+ ```
40
+ 4. **Call model again** with complete conversation history
41
+
42
+ ## Key Requirements
43
+
44
+ - **Tool result messages MUST reference the exact call_id from the original tool call**
45
+ - **Both the tool call AND tool result must be in the conversation history**
46
+ - **Tool results use "output" field, not "content"**
47
+ - **Arguments are JSON strings, not objects**
48
+
49
+ ## Error Handling Pattern
50
+
51
+ For error messages to tools, follow the same pattern:
52
+ ```python
53
+ # Agent made invalid tool call with call_id "call_123"
54
+ error_message = {
55
+ "type": "function_call_output",
56
+ "call_id": "call_123",
57
+ "output": "Error: You can only vote once per response. Please vote for just ONE agent."
58
+ }
59
+
60
+ # Add both the original tool call AND error message to conversation
61
+ input_messages.append(original_tool_call)
62
+ input_messages.append(error_message)
63
+ ```
64
+
65
+ This ensures the API can match the tool result to the original call.
@@ -0,0 +1,25 @@
1
+ """
2
+ MassGen Backend System - Multi-Provider LLM Integration
3
+
4
+ Supports multiple LLM providers with standardized StreamChunk interface:
5
+ - Response API (standard format with tool support)
6
+ - Grok/xAI (Chat Completions API compatible)
7
+ - Claude (Messages API with multi-tool support)
8
+ - Gemini (structured output for coordination)
9
+ """
10
+
11
+ from .base import LLMBackend, StreamChunk, TokenUsage
12
+ from .response import ResponseBackend
13
+ from .grok import GrokBackend
14
+ from .claude import ClaudeBackend
15
+ from .gemini import GeminiBackend
16
+
17
+ __all__ = [
18
+ "LLMBackend",
19
+ "StreamChunk",
20
+ "TokenUsage",
21
+ "ResponseBackend",
22
+ "GrokBackend",
23
+ "ClaudeBackend",
24
+ "GeminiBackend",
25
+ ]
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ """
4
+ Base backend interface for LLM providers.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Dict, List, Any, AsyncGenerator, Optional
9
+ from dataclasses import dataclass
10
+
11
+
12
+ @dataclass
13
+ class StreamChunk:
14
+ """Standardized chunk format for streaming responses."""
15
+
16
+ type: str # "content", "tool_calls", "builtin_tool_results", "complete_message", "complete_response", "done", "error", "agent_status"
17
+ content: Optional[str] = None
18
+ tool_calls: Optional[List[Dict[str, Any]]] = (
19
+ None # User-defined function tools (need execution)
20
+ )
21
+ builtin_tool_results: Optional[List[Dict[str, Any]]] = (
22
+ None # Provider builtin tools (already executed)
23
+ )
24
+ complete_message: Optional[Dict[str, Any]] = None # Complete assistant message
25
+ response: Optional[Dict[str, Any]] = None # Raw Responses API response
26
+ error: Optional[str] = None
27
+ source: Optional[str] = None # Source identifier (e.g., agent_id, "orchestrator")
28
+ status: Optional[str] = None # For agent status updates
29
+
30
+
31
+ @dataclass
32
+ class TokenUsage:
33
+ """Token usage and cost tracking."""
34
+
35
+ input_tokens: int = 0
36
+ output_tokens: int = 0
37
+ estimated_cost: float = 0.0
38
+
39
+
40
+ class LLMBackend(ABC):
41
+ """Abstract base class for LLM providers."""
42
+
43
+ def __init__(self, api_key: Optional[str] = None, **kwargs):
44
+ self.api_key = api_key
45
+ self.config = kwargs
46
+ self.token_usage = TokenUsage()
47
+
48
+ @abstractmethod
49
+ async def stream_with_tools(
50
+ self, messages: List[Dict[str, Any]], tools: List[Dict[str, Any]], **kwargs
51
+ ) -> AsyncGenerator[StreamChunk, None]:
52
+ """
53
+ Stream a response with tool calling support.
54
+
55
+ Args:
56
+ messages: Conversation messages
57
+ tools: Available tools schema
58
+ **kwargs: Additional provider-specific parameters including model
59
+
60
+ Yields:
61
+ StreamChunk: Standardized response chunks
62
+ """
63
+ pass
64
+
65
+ @abstractmethod
66
+ def get_provider_name(self) -> str:
67
+ """Get the name of this provider."""
68
+ pass
69
+
70
+ @abstractmethod
71
+ def estimate_tokens(self, text: str) -> int:
72
+ """Estimate token count for text."""
73
+ pass
74
+
75
+ @abstractmethod
76
+ def calculate_cost(
77
+ self, input_tokens: int, output_tokens: int, model: str
78
+ ) -> float:
79
+ """Calculate cost for token usage."""
80
+ pass
81
+
82
+ def update_token_usage(
83
+ self, messages: List[Dict[str, Any]], response_content: str, model: str
84
+ ):
85
+ """Update token usage tracking."""
86
+ # Estimate input tokens from messages
87
+ input_text = str(messages)
88
+ input_tokens = self.estimate_tokens(input_text)
89
+
90
+ # Estimate output tokens from response
91
+ output_tokens = self.estimate_tokens(response_content)
92
+
93
+ # Update totals
94
+ self.token_usage.input_tokens += input_tokens
95
+ self.token_usage.output_tokens += output_tokens
96
+
97
+ # Calculate cost
98
+ cost = self.calculate_cost(input_tokens, output_tokens, model)
99
+ self.token_usage.estimated_cost += cost
100
+
101
+ def get_token_usage(self) -> TokenUsage:
102
+ """Get current token usage."""
103
+ return self.token_usage
104
+
105
+ def reset_token_usage(self):
106
+ """Reset token usage tracking."""
107
+ self.token_usage = TokenUsage()
108
+
109
+ def get_supported_builtin_tools(self) -> List[str]:
110
+ """Get list of builtin tools supported by this provider."""
111
+ return []
112
+
113
+ def extract_tool_name(self, tool_call: Dict[str, Any]) -> str:
114
+ """
115
+ Extract tool name from a tool call in this backend's format.
116
+
117
+ Args:
118
+ tool_call: Tool call data structure from this backend
119
+
120
+ Returns:
121
+ Tool name string
122
+ """
123
+ # Default implementation assumes Chat Completions format
124
+ return tool_call.get("function", {}).get("name", "unknown")
125
+
126
+ def extract_tool_arguments(self, tool_call: Dict[str, Any]) -> Dict[str, Any]:
127
+ """
128
+ Extract tool arguments from a tool call in this backend's format.
129
+
130
+ Args:
131
+ tool_call: Tool call data structure from this backend
132
+
133
+ Returns:
134
+ Tool arguments dictionary
135
+ """
136
+ # Default implementation assumes Chat Completions format
137
+ return tool_call.get("function", {}).get("arguments", {})
138
+
139
+ def extract_tool_call_id(self, tool_call: Dict[str, Any]) -> str:
140
+ """
141
+ Extract tool call ID from a tool call in this backend's format.
142
+
143
+ Args:
144
+ tool_call: Tool call data structure from this backend
145
+
146
+ Returns:
147
+ Tool call ID string
148
+ """
149
+ # Default implementation assumes Chat Completions format
150
+ return tool_call.get("id", "")
151
+
152
+ def create_tool_result_message(
153
+ self, tool_call: Dict[str, Any], result_content: str
154
+ ) -> Dict[str, Any]:
155
+ """
156
+ Create a tool result message in this backend's expected format.
157
+
158
+ Args:
159
+ tool_call: Original tool call data structure
160
+ result_content: The result content to send back
161
+
162
+ Returns:
163
+ Tool result message in backend's expected format
164
+ """
165
+ # Default implementation assumes Chat Completions format
166
+ tool_call_id = self.extract_tool_call_id(tool_call)
167
+ return {"role": "tool", "tool_call_id": tool_call_id, "content": result_content}
168
+
169
+ def extract_tool_result_content(self, tool_result_message: Dict[str, Any]) -> str:
170
+ """
171
+ Extract the content/output from a tool result message in this backend's format.
172
+
173
+ Args:
174
+ tool_result_message: Tool result message created by this backend
175
+
176
+ Returns:
177
+ The content/output string from the message
178
+ """
179
+ # Default implementation assumes Chat Completions format
180
+ return tool_result_message.get("content", "")