agnt5 0.2.8a13__cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of agnt5 might be problematic. Click here for more details.

agnt5/lm.py ADDED
@@ -0,0 +1,969 @@
1
+ """Language Model interface for AGNT5 SDK.
2
+
3
+ Simplified API inspired by Vercel AI SDK for seamless multi-provider LLM access.
4
+ Uses Rust-backed implementation via PyO3 for performance and reliability.
5
+
6
+ Basic Usage:
7
+ >>> from agnt5 import lm
8
+ >>>
9
+ >>> # Simple generation
10
+ >>> response = await lm.generate(
11
+ ... model="openai/gpt-4o-mini",
12
+ ... prompt="What is love?",
13
+ ... temperature=0.7
14
+ ... )
15
+ >>> print(response.text)
16
+ >>>
17
+ >>> # Streaming
18
+ >>> async for chunk in lm.stream(
19
+ ... model="anthropic/claude-3-5-haiku",
20
+ ... prompt="Write a story"
21
+ ... ):
22
+ ... print(chunk, end="", flush=True)
23
+
24
+ Supported Providers (via model prefix):
25
+ - openai/model-name
26
+ - anthropic/model-name
27
+ - groq/model-name
28
+ - openrouter/provider/model-name
29
+ - azure/model-name
30
+ - bedrock/model-name
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ import json
36
+ from abc import ABC, abstractmethod
37
+ from dataclasses import dataclass, field
38
+ from enum import Enum
39
+ from typing import Any, AsyncIterator, Dict, List, Optional
40
+
41
+ from ._schema_utils import detect_format_type
42
+ from .context import get_current_context
43
+
44
+ try:
45
+ from ._core import LanguageModel as RustLanguageModel
46
+ from ._core import LanguageModelConfig as RustLanguageModelConfig
47
+ from ._core import Response as RustResponse
48
+ from ._core import StreamChunk as RustStreamChunk
49
+ from ._core import Usage as RustUsage
50
+ _RUST_AVAILABLE = True
51
+ except ImportError:
52
+ _RUST_AVAILABLE = False
53
+ RustLanguageModel = None
54
+ RustLanguageModelConfig = None
55
+ RustResponse = None
56
+ RustStreamChunk = None
57
+ RustUsage = None
58
+
59
+
60
+ # Keep Python classes for backward compatibility and convenience
61
+ class MessageRole(str, Enum):
62
+ """Message role in conversation."""
63
+
64
+ SYSTEM = "system"
65
+ USER = "user"
66
+ ASSISTANT = "assistant"
67
+
68
+
69
+ @dataclass
70
+ class Message:
71
+ """Conversation message."""
72
+
73
+ role: MessageRole
74
+ content: str
75
+
76
+ @staticmethod
77
+ def system(content: str) -> Message:
78
+ """Create system message."""
79
+ return Message(role=MessageRole.SYSTEM, content=content)
80
+
81
+ @staticmethod
82
+ def user(content: str) -> Message:
83
+ """Create user message."""
84
+ return Message(role=MessageRole.USER, content=content)
85
+
86
+ @staticmethod
87
+ def assistant(content: str) -> Message:
88
+ """Create assistant message."""
89
+ return Message(role=MessageRole.ASSISTANT, content=content)
90
+
91
+
92
+ @dataclass
93
+ class ToolDefinition:
94
+ """Tool definition for LLM."""
95
+
96
+ name: str
97
+ description: Optional[str] = None
98
+ parameters: Optional[Dict[str, Any]] = None
99
+
100
+
101
+ class ToolChoice(str, Enum):
102
+ """Tool choice mode."""
103
+
104
+ AUTO = "auto"
105
+ NONE = "none"
106
+ REQUIRED = "required"
107
+
108
+
109
+ class BuiltInTool(str, Enum):
110
+ """Built-in tools for OpenAI Responses API.
111
+
112
+ These are platform-provided tools that don't require implementation:
113
+ - WEB_SEARCH: Real-time web search capability
114
+ - CODE_INTERPRETER: Execute Python code in a sandboxed environment
115
+ - FILE_SEARCH: Search through uploaded files
116
+ """
117
+
118
+ WEB_SEARCH = "web_search_preview"
119
+ CODE_INTERPRETER = "code_interpreter"
120
+ FILE_SEARCH = "file_search"
121
+
122
+
123
+ class ReasoningEffort(str, Enum):
124
+ """Reasoning effort level for o-series models (o1, o3, etc.).
125
+
126
+ Controls the amount of reasoning/thinking the model performs:
127
+ - MINIMAL: Fast responses with basic reasoning
128
+ - MEDIUM: Balanced reasoning and speed (default)
129
+ - HIGH: Deep reasoning, slower but more thorough
130
+ """
131
+
132
+ MINIMAL = "minimal"
133
+ MEDIUM = "medium"
134
+ HIGH = "high"
135
+
136
+
137
+ class Modality(str, Enum):
138
+ """Output modalities for multimodal models.
139
+
140
+ Specifies the types of content the model can generate:
141
+ - TEXT: Standard text output
142
+ - AUDIO: Audio output (e.g., for text-to-speech models)
143
+ - IMAGE: Image generation (future capability)
144
+ """
145
+
146
+ TEXT = "text"
147
+ AUDIO = "audio"
148
+ IMAGE = "image"
149
+
150
+
151
+ @dataclass
152
+ class ModelConfig:
153
+ """Advanced model configuration for custom endpoints and settings.
154
+
155
+ Use this for advanced scenarios like custom API endpoints, special headers,
156
+ or overriding default timeouts. Most users won't need this - the basic
157
+ model string with temperature/max_tokens is sufficient for common cases.
158
+
159
+ Example:
160
+ >>> from agnt5.lm import ModelConfig
161
+ >>> from agnt5 import Agent
162
+ >>>
163
+ >>> # Custom API endpoint
164
+ >>> config = ModelConfig(
165
+ ... base_url="https://custom-api.example.com",
166
+ ... api_key="custom-key",
167
+ ... timeout=60,
168
+ ... headers={"X-Custom-Header": "value"}
169
+ ... )
170
+ >>>
171
+ >>> agent = Agent(
172
+ ... name="custom_agent",
173
+ ... model="openai/gpt-4o-mini",
174
+ ... instructions="...",
175
+ ... model_config=config
176
+ ... )
177
+ """
178
+ base_url: Optional[str] = None
179
+ api_key: Optional[str] = None
180
+ timeout: Optional[int] = None
181
+ headers: Optional[Dict[str, str]] = None
182
+
183
+
184
+ @dataclass
185
+ class GenerationConfig:
186
+ """LLM generation configuration.
187
+
188
+ Supports both Chat Completions and Responses API parameters.
189
+ """
190
+
191
+ # Standard parameters (both APIs)
192
+ temperature: Optional[float] = None
193
+ max_tokens: Optional[int] = None
194
+ top_p: Optional[float] = None
195
+
196
+ # Responses API specific parameters
197
+ built_in_tools: List[BuiltInTool] = field(default_factory=list)
198
+ reasoning_effort: Optional[ReasoningEffort] = None
199
+ modalities: Optional[List[Modality]] = None
200
+ store: Optional[bool] = None # Enable server-side conversation state
201
+ previous_response_id: Optional[str] = None # Continue previous conversation
202
+
203
+
204
+ @dataclass
205
+ class TokenUsage:
206
+ """Token usage statistics."""
207
+
208
+ prompt_tokens: int
209
+ completion_tokens: int
210
+ total_tokens: int
211
+
212
+
213
+ @dataclass
214
+ class GenerateResponse:
215
+ """Response from LLM generation."""
216
+
217
+ text: str
218
+ usage: Optional[TokenUsage] = None
219
+ finish_reason: Optional[str] = None
220
+ tool_calls: Optional[List[Dict[str, Any]]] = None
221
+ response_id: Optional[str] = None # Response ID for conversation continuation (Responses API)
222
+ _rust_response: Optional[Any] = field(default=None, repr=False)
223
+
224
+ @property
225
+ def structured_output(self) -> Optional[Any]:
226
+ """Parsed structured output (Pydantic model, dataclass, or dict).
227
+
228
+ Returns the parsed object when response_format is specified.
229
+ This is the recommended property name for accessing structured output.
230
+
231
+ Returns:
232
+ Parsed object according to the specified response_format, or None if not available
233
+ """
234
+ if self._rust_response and hasattr(self._rust_response, 'object'):
235
+ return self._rust_response.object
236
+ return None
237
+
238
+ @property
239
+ def parsed(self) -> Optional[Any]:
240
+ """Alias for structured_output (OpenAI SDK compatibility).
241
+
242
+ Returns:
243
+ Same as structured_output
244
+ """
245
+ return self.structured_output
246
+
247
+ @property
248
+ def object(self) -> Optional[Any]:
249
+ """Alias for structured_output.
250
+
251
+ Returns:
252
+ Same as structured_output
253
+ """
254
+ return self.structured_output
255
+
256
+
257
+ @dataclass
258
+ class GenerateRequest:
259
+ """Request for LLM generation."""
260
+
261
+ model: str
262
+ messages: List[Message] = field(default_factory=list)
263
+ system_prompt: Optional[str] = None
264
+ tools: List[ToolDefinition] = field(default_factory=list)
265
+ tool_choice: Optional[ToolChoice] = None
266
+ config: GenerationConfig = field(default_factory=GenerationConfig)
267
+ response_schema: Optional[str] = None # JSON-encoded schema for structured output
268
+
269
+
270
+ # Abstract base class for language models
271
+ # This exists primarily for testing/mocking purposes
272
+ class LanguageModel(ABC):
273
+ """Abstract base class for language model implementations.
274
+
275
+ This class defines the interface that all language models must implement.
276
+ It's primarily used for testing and mocking, as production code should use
277
+ the module-level generate() and stream() functions instead.
278
+ """
279
+
280
+ @abstractmethod
281
+ async def generate(self, request: GenerateRequest) -> GenerateResponse:
282
+ """Generate completion from LLM.
283
+
284
+ Args:
285
+ request: Generation request with model, messages, and configuration
286
+
287
+ Returns:
288
+ GenerateResponse with text, usage, and optional tool calls
289
+ """
290
+ pass
291
+
292
+ @abstractmethod
293
+ async def stream(self, request: GenerateRequest) -> AsyncIterator[str]:
294
+ """Stream completion from LLM.
295
+
296
+ Args:
297
+ request: Generation request with model, messages, and configuration
298
+
299
+ Yields:
300
+ Text chunks as they are generated
301
+ """
302
+ pass
303
+
304
+
305
+ # Internal wrapper for the Rust-backed implementation
306
+ # Users should use the module-level generate() and stream() functions instead
307
+ class _LanguageModel(LanguageModel):
308
+ """Internal Language Model wrapper using Rust SDK core.
309
+
310
+ This class is for internal use only. Users should use the module-level
311
+ lm.generate() and lm.stream() functions for a simpler interface.
312
+ """
313
+
314
+ def __init__(
315
+ self,
316
+ provider: Optional[str] = None,
317
+ default_model: Optional[str] = None,
318
+ ):
319
+ """Initialize language model.
320
+
321
+ Args:
322
+ provider: Provider name (e.g., 'openai', 'anthropic', 'azure', 'bedrock', 'groq', 'openrouter')
323
+ If None, provider will be auto-detected from model prefix (e.g., 'openai/gpt-4o')
324
+ default_model: Default model to use if not specified in requests
325
+ """
326
+ if not _RUST_AVAILABLE:
327
+ raise ImportError(
328
+ "Rust extension not available. Please rebuild the SDK with: "
329
+ "cd sdk/sdk-python && maturin develop"
330
+ )
331
+
332
+ self._provider = provider
333
+ self._default_model = default_model
334
+
335
+ # Create config object for Rust
336
+ config = RustLanguageModelConfig(
337
+ default_model=default_model,
338
+ default_provider=provider,
339
+ )
340
+
341
+ self._rust_lm = RustLanguageModel(config=config)
342
+
343
+ def _prepare_model_name(self, model: str) -> str:
344
+ """Prepare model name with provider prefix if needed.
345
+
346
+ Args:
347
+ model: Model name (e.g., 'gpt-4o-mini' or 'openai/gpt-4o-mini')
348
+
349
+ Returns:
350
+ Model name with provider prefix (e.g., 'openai/gpt-4o-mini')
351
+ """
352
+ # If model already has a prefix, return as is
353
+ # This handles cases like OpenRouter where models already have their provider prefix
354
+ # (e.g., 'anthropic/claude-3.5-haiku' for OpenRouter)
355
+ if '/' in model:
356
+ return model
357
+
358
+ # If we have a default provider, prefix the model
359
+ if self._provider:
360
+ return f"{self._provider}/{model}"
361
+
362
+ # Otherwise return as is and let Rust handle the error
363
+ return model
364
+
365
+ async def generate(self, request: GenerateRequest) -> GenerateResponse:
366
+ """Generate completion from LLM.
367
+
368
+ Args:
369
+ request: Generation request with model, messages, and configuration
370
+
371
+ Returns:
372
+ GenerateResponse with text, usage, and optional tool calls
373
+ """
374
+ # Convert Python request to structured format for Rust
375
+ prompt = self._build_prompt_messages(request)
376
+
377
+ # Prepare model name with provider prefix
378
+ model = self._prepare_model_name(request.model)
379
+
380
+ # Build kwargs for Rust
381
+ kwargs: dict[str, Any] = {
382
+ "model": model,
383
+ }
384
+
385
+ # Always pass provider explicitly if set
386
+ # For gateway providers like OpenRouter, this allows them to handle
387
+ # models with provider prefixes (e.g., openrouter can handle anthropic/claude-3.5-haiku)
388
+ if self._provider:
389
+ kwargs["provider"] = self._provider
390
+
391
+ # Pass system prompt separately if provided
392
+ if request.system_prompt:
393
+ kwargs["system_prompt"] = request.system_prompt
394
+
395
+ if request.config.temperature is not None:
396
+ kwargs["temperature"] = request.config.temperature
397
+ if request.config.max_tokens is not None:
398
+ kwargs["max_tokens"] = request.config.max_tokens
399
+ if request.config.top_p is not None:
400
+ kwargs["top_p"] = request.config.top_p
401
+
402
+ # Pass response schema for structured output if provided
403
+ if request.response_schema is not None:
404
+ kwargs["response_schema_kw"] = request.response_schema
405
+
406
+ # Pass Responses API specific parameters
407
+ if request.config.built_in_tools:
408
+ # Serialize built-in tools to JSON for Rust
409
+ built_in_tools_list = [tool.value for tool in request.config.built_in_tools]
410
+ kwargs["built_in_tools"] = json.dumps(built_in_tools_list)
411
+
412
+ if request.config.reasoning_effort is not None:
413
+ kwargs["reasoning_effort"] = request.config.reasoning_effort.value
414
+
415
+ if request.config.modalities is not None:
416
+ modalities_list = [modality.value for modality in request.config.modalities]
417
+ kwargs["modalities"] = json.dumps(modalities_list)
418
+
419
+ if request.config.store is not None:
420
+ kwargs["store"] = request.config.store
421
+
422
+ if request.config.previous_response_id is not None:
423
+ kwargs["previous_response_id"] = request.config.previous_response_id
424
+
425
+ # Pass tools and tool_choice to Rust
426
+ if request.tools:
427
+ # Serialize tools to JSON for Rust
428
+ tools_list = [
429
+ {
430
+ "name": tool.name,
431
+ "description": tool.description,
432
+ "parameters": tool.parameters,
433
+ }
434
+ for tool in request.tools
435
+ ]
436
+ tools_json = json.dumps(tools_list)
437
+ kwargs["tools"] = tools_json
438
+
439
+ if request.tool_choice:
440
+ # Serialize tool_choice to JSON for Rust
441
+ kwargs["tool_choice"] = json.dumps(request.tool_choice.value)
442
+
443
+ # Pass runtime_context for proper trace linking
444
+ # Try to get from current context if available
445
+ current_ctx = get_current_context()
446
+ if current_ctx and hasattr(current_ctx, '_runtime_context') and current_ctx._runtime_context:
447
+ kwargs["runtime_context"] = current_ctx._runtime_context
448
+
449
+ # Emit checkpoint if called within a workflow context
450
+ from .context import get_workflow_context
451
+ import time
452
+ workflow_ctx = get_workflow_context()
453
+
454
+ # Get trace context for event linkage
455
+ trace_id = None
456
+ span_id = None
457
+ try:
458
+ from opentelemetry import trace
459
+ span = trace.get_current_span()
460
+ if span.is_recording():
461
+ span_context = span.get_span_context()
462
+ trace_id = format(span_context.trace_id, '032x')
463
+ span_id = format(span_context.span_id, '016x')
464
+ except Exception:
465
+ pass # Tracing not available, continue without
466
+
467
+ # Emit started event
468
+ if workflow_ctx and trace_id:
469
+ workflow_ctx._send_checkpoint("agent.llm.call.started", {
470
+ "model": model,
471
+ "provider": self._provider,
472
+ "trace_id": trace_id,
473
+ "span_id": span_id,
474
+ "timestamp": time.time_ns() // 1_000_000,
475
+ })
476
+
477
+ try:
478
+ # Call Rust implementation - it returns a proper Python coroutine now
479
+ # Using pyo3-async-runtimes for truly async HTTP calls without blocking
480
+ rust_response = await self._rust_lm.generate(prompt=prompt, **kwargs)
481
+
482
+ # Convert Rust response to Python
483
+ response = self._convert_response(rust_response)
484
+
485
+ # Emit completion event with token usage and cost
486
+ if workflow_ctx and trace_id:
487
+ event_data = {
488
+ "model": model,
489
+ "provider": self._provider,
490
+ "trace_id": trace_id,
491
+ "span_id": span_id,
492
+ "timestamp": time.time_ns() // 1_000_000,
493
+ }
494
+
495
+ # Add token usage if available
496
+ if response.usage:
497
+ event_data["input_tokens"] = response.usage.prompt_tokens
498
+ event_data["output_tokens"] = response.usage.completion_tokens
499
+ event_data["total_tokens"] = response.usage.total_tokens
500
+
501
+ # Calculate cost (Rust already calculated it in span, but we can recalculate for event)
502
+ # Cost will be available in the span via trace_id link
503
+
504
+ workflow_ctx._send_checkpoint("agent.llm.call.completed", event_data)
505
+
506
+ return response
507
+ except Exception as e:
508
+ # Emit failed event
509
+ if workflow_ctx and trace_id:
510
+ workflow_ctx._send_checkpoint("agent.llm.call.failed", {
511
+ "model": model,
512
+ "provider": self._provider,
513
+ "error": str(e),
514
+ "error_type": type(e).__name__,
515
+ "trace_id": trace_id,
516
+ "span_id": span_id,
517
+ "timestamp": time.time_ns() // 1_000_000,
518
+ })
519
+ raise
520
+
521
+ async def stream(self, request: GenerateRequest) -> AsyncIterator[str]:
522
+ """Stream completion from LLM.
523
+
524
+ Args:
525
+ request: Generation request with model, messages, and configuration
526
+
527
+ Yields:
528
+ Text chunks as they are generated
529
+ """
530
+ # Convert Python request to structured format for Rust
531
+ prompt = self._build_prompt_messages(request)
532
+
533
+ # Prepare model name with provider prefix
534
+ model = self._prepare_model_name(request.model)
535
+
536
+ # Build kwargs for Rust
537
+ kwargs: dict[str, Any] = {
538
+ "model": model,
539
+ }
540
+
541
+ # Always pass provider explicitly if set
542
+ # For gateway providers like OpenRouter, this allows them to handle
543
+ # models with provider prefixes (e.g., openrouter can handle anthropic/claude-3.5-haiku)
544
+ if self._provider:
545
+ kwargs["provider"] = self._provider
546
+
547
+ # Pass system prompt separately if provided
548
+ if request.system_prompt:
549
+ kwargs["system_prompt"] = request.system_prompt
550
+
551
+ if request.config.temperature is not None:
552
+ kwargs["temperature"] = request.config.temperature
553
+ if request.config.max_tokens is not None:
554
+ kwargs["max_tokens"] = request.config.max_tokens
555
+ if request.config.top_p is not None:
556
+ kwargs["top_p"] = request.config.top_p
557
+
558
+ # Pass Responses API specific parameters
559
+ if request.config.built_in_tools:
560
+ # Serialize built-in tools to JSON for Rust
561
+ built_in_tools_list = [tool.value for tool in request.config.built_in_tools]
562
+ kwargs["built_in_tools"] = json.dumps(built_in_tools_list)
563
+
564
+ if request.config.reasoning_effort is not None:
565
+ kwargs["reasoning_effort"] = request.config.reasoning_effort.value
566
+
567
+ if request.config.modalities is not None:
568
+ modalities_list = [modality.value for modality in request.config.modalities]
569
+ kwargs["modalities"] = json.dumps(modalities_list)
570
+
571
+ if request.config.store is not None:
572
+ kwargs["store"] = request.config.store
573
+
574
+ if request.config.previous_response_id is not None:
575
+ kwargs["previous_response_id"] = request.config.previous_response_id
576
+
577
+ # Pass tools and tool_choice to Rust
578
+ if request.tools:
579
+ # Serialize tools to JSON for Rust
580
+ tools_list = [
581
+ {
582
+ "name": tool.name,
583
+ "description": tool.description,
584
+ "parameters": tool.parameters,
585
+ }
586
+ for tool in request.tools
587
+ ]
588
+ kwargs["tools"] = json.dumps(tools_list)
589
+
590
+ if request.tool_choice:
591
+ # Serialize tool_choice to JSON for Rust
592
+ kwargs["tool_choice"] = json.dumps(request.tool_choice.value)
593
+
594
+ # Emit checkpoint if called within a workflow context
595
+ from .context import get_workflow_context
596
+ import time
597
+ workflow_ctx = get_workflow_context()
598
+
599
+ # Get trace context for event linkage
600
+ trace_id = None
601
+ span_id = None
602
+ try:
603
+ from opentelemetry import trace
604
+ span = trace.get_current_span()
605
+ if span.is_recording():
606
+ span_context = span.get_span_context()
607
+ trace_id = format(span_context.trace_id, '032x')
608
+ span_id = format(span_context.span_id, '016x')
609
+ except Exception:
610
+ pass # Tracing not available, continue without
611
+
612
+ # Emit started event
613
+ if workflow_ctx and trace_id:
614
+ workflow_ctx._send_checkpoint("agent.llm.call.started", {
615
+ "model": model,
616
+ "provider": self._provider,
617
+ "streaming": True,
618
+ "trace_id": trace_id,
619
+ "span_id": span_id,
620
+ "timestamp": time.time_ns() // 1_000_000,
621
+ })
622
+
623
+ try:
624
+ # Call Rust implementation - it returns a proper Python coroutine now
625
+ # Using pyo3-async-runtimes for truly async streaming without blocking
626
+ rust_chunks = await self._rust_lm.stream(prompt=prompt, **kwargs)
627
+
628
+ # Yield each chunk
629
+ for chunk in rust_chunks:
630
+ if chunk.text:
631
+ yield chunk.text
632
+
633
+ # Emit completion event (note: streaming doesn't provide token counts)
634
+ if workflow_ctx and trace_id:
635
+ workflow_ctx._send_checkpoint("agent.llm.call.completed", {
636
+ "model": model,
637
+ "provider": self._provider,
638
+ "streaming": True,
639
+ "trace_id": trace_id,
640
+ "span_id": span_id,
641
+ "timestamp": time.time_ns() // 1_000_000,
642
+ })
643
+ except Exception as e:
644
+ # Emit failed event
645
+ if workflow_ctx and trace_id:
646
+ workflow_ctx._send_checkpoint("agent.llm.call.failed", {
647
+ "model": model,
648
+ "provider": self._provider,
649
+ "streaming": True,
650
+ "error": str(e),
651
+ "error_type": type(e).__name__,
652
+ "trace_id": trace_id,
653
+ "span_id": span_id,
654
+ "timestamp": time.time_ns() // 1_000_000,
655
+ })
656
+ raise
657
+
658
+ def _build_prompt_messages(self, request: GenerateRequest) -> List[Dict[str, str]]:
659
+ """Build structured message list for Rust.
660
+
661
+ Rust expects a list of dicts with 'role' and 'content' keys.
662
+ System prompt is passed separately via kwargs.
663
+
664
+ Args:
665
+ request: Generation request with messages
666
+
667
+ Returns:
668
+ List of message dicts with role and content
669
+ """
670
+ # Convert messages to Rust format (list of dicts with role and content)
671
+ messages = []
672
+ for msg in request.messages:
673
+ messages.append({
674
+ "role": msg.role.value, # "system", "user", or "assistant"
675
+ "content": msg.content
676
+ })
677
+
678
+ # If no messages and no system prompt, return a default user message
679
+ if not messages and not request.system_prompt:
680
+ messages.append({
681
+ "role": "user",
682
+ "content": ""
683
+ })
684
+
685
+ return messages
686
+
687
+ def _convert_response(self, rust_response: RustResponse) -> GenerateResponse:
688
+ """Convert Rust response to Python response."""
689
+ usage = None
690
+ if rust_response.usage:
691
+ usage = TokenUsage(
692
+ prompt_tokens=rust_response.usage.prompt_tokens,
693
+ completion_tokens=rust_response.usage.completion_tokens,
694
+ total_tokens=rust_response.usage.total_tokens,
695
+ )
696
+
697
+ # Extract tool_calls from Rust response
698
+ tool_calls = None
699
+ if hasattr(rust_response, 'tool_calls') and rust_response.tool_calls:
700
+ tool_calls = rust_response.tool_calls
701
+
702
+ # Extract response_id from Rust response (for Responses API)
703
+ response_id = None
704
+ if hasattr(rust_response, 'response_id') and rust_response.response_id:
705
+ response_id = rust_response.response_id
706
+
707
+ return GenerateResponse(
708
+ text=rust_response.content,
709
+ usage=usage,
710
+ finish_reason=None, # TODO: Add finish_reason to Rust response
711
+ tool_calls=tool_calls,
712
+ response_id=response_id,
713
+ _rust_response=rust_response, # Store for .structured_output access
714
+ )
715
+
716
+
717
+ # ============================================================================
718
+ # Simplified API (Recommended)
719
+ # ============================================================================
720
+ # This is the recommended simple interface for most use cases
721
+
722
+ async def generate(
723
+ model: str,
724
+ prompt: Optional[str] = None,
725
+ messages: Optional[List[Dict[str, str]]] = None,
726
+ system_prompt: Optional[str] = None,
727
+ temperature: Optional[float] = None,
728
+ max_tokens: Optional[int] = None,
729
+ top_p: Optional[float] = None,
730
+ response_format: Optional[Any] = None,
731
+ # Responses API specific parameters
732
+ built_in_tools: Optional[List[BuiltInTool]] = None,
733
+ reasoning_effort: Optional[ReasoningEffort] = None,
734
+ modalities: Optional[List[Modality]] = None,
735
+ store: Optional[bool] = None,
736
+ previous_response_id: Optional[str] = None,
737
+ ) -> GenerateResponse:
738
+ """Generate text using any LLM provider (simplified API).
739
+
740
+ This is the recommended way to use the LLM API. Provider is auto-detected
741
+ from the model prefix (e.g., 'openai/gpt-4o-mini', 'anthropic/claude-3-5-haiku').
742
+
743
+ Args:
744
+ model: Model identifier with provider prefix (e.g., 'openai/gpt-4o-mini')
745
+ prompt: Simple text prompt (for single-turn requests)
746
+ messages: List of message dicts with 'role' and 'content' (for multi-turn)
747
+ system_prompt: Optional system prompt
748
+ temperature: Sampling temperature (0.0-2.0)
749
+ max_tokens: Maximum tokens to generate
750
+ top_p: Nucleus sampling parameter
751
+ response_format: Pydantic model, dataclass, or JSON schema dict for structured output
752
+ built_in_tools: List of built-in tools (OpenAI Responses API only)
753
+ reasoning_effort: Reasoning effort level for o-series models (OpenAI Responses API only)
754
+ modalities: Output modalities (text, audio, image) (OpenAI Responses API only)
755
+ store: Enable server-side conversation state (OpenAI Responses API only)
756
+ previous_response_id: Continue from previous response (OpenAI Responses API only)
757
+
758
+ Returns:
759
+ GenerateResponse with text, usage, and optional structured output
760
+
761
+ Examples:
762
+ Simple prompt:
763
+ >>> response = await generate(
764
+ ... model="openai/gpt-4o-mini",
765
+ ... prompt="What is love?",
766
+ ... temperature=0.7
767
+ ... )
768
+ >>> print(response.text)
769
+
770
+ Structured output with dataclass:
771
+ >>> from dataclasses import dataclass
772
+ >>>
773
+ >>> @dataclass
774
+ ... class CodeReview:
775
+ ... issues: list[str]
776
+ ... suggestions: list[str]
777
+ ... overall_quality: int
778
+ >>>
779
+ >>> response = await generate(
780
+ ... model="openai/gpt-4o",
781
+ ... prompt="Analyze this code...",
782
+ ... response_format=CodeReview
783
+ ... )
784
+ >>> review = response.structured_output # Returns dict
785
+ """
786
+ # Validate input
787
+ if not prompt and not messages:
788
+ raise ValueError("Either 'prompt' or 'messages' must be provided")
789
+ if prompt and messages:
790
+ raise ValueError("Provide either 'prompt' or 'messages', not both")
791
+
792
+ # Auto-detect provider from model prefix
793
+ if '/' not in model:
794
+ raise ValueError(
795
+ f"Model must include provider prefix (e.g., 'openai/{model}'). "
796
+ f"Supported providers: openai, anthropic, groq, openrouter, azure, bedrock"
797
+ )
798
+
799
+ provider, model_name = model.split('/', 1)
800
+
801
+ # Convert response_format to JSON schema if provided
802
+ response_schema_json = None
803
+ if response_format is not None:
804
+ format_type, json_schema = detect_format_type(response_format)
805
+ response_schema_json = json.dumps(json_schema)
806
+
807
+ # Create language model client
808
+ lm = _LanguageModel(provider=provider.lower(), default_model=None)
809
+
810
+ # Build messages list
811
+ if prompt:
812
+ msg_list = [{"role": "user", "content": prompt}]
813
+ else:
814
+ msg_list = messages or []
815
+
816
+ # Convert to Message objects for internal API
817
+ message_objects = []
818
+ for msg in msg_list:
819
+ role = MessageRole(msg["role"])
820
+ if role == MessageRole.USER:
821
+ message_objects.append(Message.user(msg["content"]))
822
+ elif role == MessageRole.ASSISTANT:
823
+ message_objects.append(Message.assistant(msg["content"]))
824
+ elif role == MessageRole.SYSTEM:
825
+ message_objects.append(Message.system(msg["content"]))
826
+
827
+ # Build request with Responses API parameters
828
+ config = GenerationConfig(
829
+ temperature=temperature,
830
+ max_tokens=max_tokens,
831
+ top_p=top_p,
832
+ built_in_tools=built_in_tools or [],
833
+ reasoning_effort=reasoning_effort,
834
+ modalities=modalities,
835
+ store=store,
836
+ previous_response_id=previous_response_id,
837
+ )
838
+
839
+ request = GenerateRequest(
840
+ model=model,
841
+ messages=message_objects,
842
+ system_prompt=system_prompt,
843
+ config=config,
844
+ response_schema=response_schema_json,
845
+ )
846
+
847
+ # Checkpoints are emitted by _LanguageModel.generate() internally
848
+ # to avoid duplication. No need to emit them here.
849
+
850
+ # Generate and return
851
+ result = await lm.generate(request)
852
+ return result
853
+
854
+
855
+ async def stream(
856
+ model: str,
857
+ prompt: Optional[str] = None,
858
+ messages: Optional[List[Dict[str, str]]] = None,
859
+ system_prompt: Optional[str] = None,
860
+ temperature: Optional[float] = None,
861
+ max_tokens: Optional[int] = None,
862
+ top_p: Optional[float] = None,
863
+ # Responses API specific parameters
864
+ built_in_tools: Optional[List[BuiltInTool]] = None,
865
+ reasoning_effort: Optional[ReasoningEffort] = None,
866
+ modalities: Optional[List[Modality]] = None,
867
+ store: Optional[bool] = None,
868
+ previous_response_id: Optional[str] = None,
869
+ ) -> AsyncIterator[str]:
870
+ """Stream text using any LLM provider (simplified API).
871
+
872
+ This is the recommended way to use streaming. Provider is auto-detected
873
+ from the model prefix (e.g., 'openai/gpt-4o-mini', 'anthropic/claude-3-5-haiku').
874
+
875
+ Args:
876
+ model: Model identifier with provider prefix (e.g., 'openai/gpt-4o-mini')
877
+ prompt: Simple text prompt (for single-turn requests)
878
+ messages: List of message dicts with 'role' and 'content' (for multi-turn)
879
+ system_prompt: Optional system prompt
880
+ temperature: Sampling temperature (0.0-2.0)
881
+ max_tokens: Maximum tokens to generate
882
+ top_p: Nucleus sampling parameter
883
+ built_in_tools: List of built-in tools (OpenAI Responses API only)
884
+ reasoning_effort: Reasoning effort level for o-series models (OpenAI Responses API only)
885
+ modalities: Output modalities (text, audio, image) (OpenAI Responses API only)
886
+ store: Enable server-side conversation state (OpenAI Responses API only)
887
+ previous_response_id: Continue from previous response (OpenAI Responses API only)
888
+
889
+ Yields:
890
+ Text chunks as they are generated
891
+
892
+ Examples:
893
+ Simple streaming:
894
+ >>> async for chunk in stream(
895
+ ... model="openai/gpt-4o-mini",
896
+ ... prompt="Write a story"
897
+ ... ):
898
+ ... print(chunk, end="", flush=True)
899
+
900
+ Streaming conversation:
901
+ >>> async for chunk in stream(
902
+ ... model="groq/llama-3.3-70b-versatile",
903
+ ... messages=[
904
+ ... {"role": "user", "content": "Tell me a joke"}
905
+ ... ],
906
+ ... temperature=0.9
907
+ ... ):
908
+ ... print(chunk, end="")
909
+ """
910
+ # Validate input
911
+ if not prompt and not messages:
912
+ raise ValueError("Either 'prompt' or 'messages' must be provided")
913
+ if prompt and messages:
914
+ raise ValueError("Provide either 'prompt' or 'messages', not both")
915
+
916
+ # Auto-detect provider from model prefix
917
+ if '/' not in model:
918
+ raise ValueError(
919
+ f"Model must include provider prefix (e.g., 'openai/{model}'). "
920
+ f"Supported providers: openai, anthropic, groq, openrouter, azure, bedrock"
921
+ )
922
+
923
+ provider, model_name = model.split('/', 1)
924
+
925
+ # Create language model client
926
+ lm = _LanguageModel(provider=provider.lower(), default_model=None)
927
+
928
+ # Build messages list
929
+ if prompt:
930
+ msg_list = [{"role": "user", "content": prompt}]
931
+ else:
932
+ msg_list = messages or []
933
+
934
+ # Convert to Message objects for internal API
935
+ message_objects = []
936
+ for msg in msg_list:
937
+ role = MessageRole(msg["role"])
938
+ if role == MessageRole.USER:
939
+ message_objects.append(Message.user(msg["content"]))
940
+ elif role == MessageRole.ASSISTANT:
941
+ message_objects.append(Message.assistant(msg["content"]))
942
+ elif role == MessageRole.SYSTEM:
943
+ message_objects.append(Message.system(msg["content"]))
944
+
945
+ # Build request with Responses API parameters
946
+ config = GenerationConfig(
947
+ temperature=temperature,
948
+ max_tokens=max_tokens,
949
+ top_p=top_p,
950
+ built_in_tools=built_in_tools or [],
951
+ reasoning_effort=reasoning_effort,
952
+ modalities=modalities,
953
+ store=store,
954
+ previous_response_id=previous_response_id,
955
+ )
956
+
957
+ request = GenerateRequest(
958
+ model=model,
959
+ messages=message_objects,
960
+ system_prompt=system_prompt,
961
+ config=config,
962
+ )
963
+
964
+ # Events are emitted by _LanguageModel.stream() internally
965
+ # (agent.llm.call.started/completed/failed with trace linkage)
966
+
967
+ # Stream and yield chunks
968
+ async for chunk in lm.stream(request):
969
+ yield chunk