agnt5 0.2.8a2__cp310-abi3-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of agnt5 might be problematic. Click here for more details.

agnt5/lm.py ADDED
@@ -0,0 +1,705 @@
1
+ """Language Model interface for AGNT5 SDK.
2
+
3
+ Simplified API inspired by Vercel AI SDK for seamless multi-provider LLM access.
4
+ Uses Rust-backed implementation via PyO3 for performance and reliability.
5
+
6
+ Basic Usage:
7
+ >>> from agnt5 import lm
8
+ >>>
9
+ >>> # Simple generation
10
+ >>> response = await lm.generate(
11
+ ... model="openai/gpt-4o-mini",
12
+ ... prompt="What is love?",
13
+ ... temperature=0.7
14
+ ... )
15
+ >>> print(response.text)
16
+ >>>
17
+ >>> # Streaming
18
+ >>> async for chunk in lm.stream(
19
+ ... model="anthropic/claude-3-5-haiku",
20
+ ... prompt="Write a story"
21
+ ... ):
22
+ ... print(chunk, end="", flush=True)
23
+
24
+ Supported Providers (via model prefix):
25
+ - openai/model-name
26
+ - anthropic/model-name
27
+ - groq/model-name
28
+ - openrouter/provider/model-name
29
+ - azure/model-name
30
+ - bedrock/model-name
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ import json
36
+ from abc import ABC, abstractmethod
37
+ from dataclasses import dataclass, field
38
+ from enum import Enum
39
+ from typing import Any, AsyncIterator, Dict, List, Optional
40
+
41
+ from ._schema_utils import detect_format_type
42
+
43
+ try:
44
+ from ._core import LanguageModel as RustLanguageModel
45
+ from ._core import LanguageModelConfig as RustLanguageModelConfig
46
+ from ._core import Response as RustResponse
47
+ from ._core import StreamChunk as RustStreamChunk
48
+ from ._core import Usage as RustUsage
49
+ _RUST_AVAILABLE = True
50
+ except ImportError:
51
+ _RUST_AVAILABLE = False
52
+ RustLanguageModel = None
53
+ RustLanguageModelConfig = None
54
+ RustResponse = None
55
+ RustStreamChunk = None
56
+ RustUsage = None
57
+
58
+
59
+ # Keep Python classes for backward compatibility and convenience
60
+ class MessageRole(str, Enum):
61
+ """Message role in conversation."""
62
+
63
+ SYSTEM = "system"
64
+ USER = "user"
65
+ ASSISTANT = "assistant"
66
+
67
+
68
+ @dataclass
69
+ class Message:
70
+ """Conversation message."""
71
+
72
+ role: MessageRole
73
+ content: str
74
+
75
+ @staticmethod
76
+ def system(content: str) -> Message:
77
+ """Create system message."""
78
+ return Message(role=MessageRole.SYSTEM, content=content)
79
+
80
+ @staticmethod
81
+ def user(content: str) -> Message:
82
+ """Create user message."""
83
+ return Message(role=MessageRole.USER, content=content)
84
+
85
+ @staticmethod
86
+ def assistant(content: str) -> Message:
87
+ """Create assistant message."""
88
+ return Message(role=MessageRole.ASSISTANT, content=content)
89
+
90
+
91
+ @dataclass
92
+ class ToolDefinition:
93
+ """Tool definition for LLM."""
94
+
95
+ name: str
96
+ description: Optional[str] = None
97
+ parameters: Optional[Dict[str, Any]] = None
98
+
99
+
100
+ class ToolChoice(str, Enum):
101
+ """Tool choice mode."""
102
+
103
+ AUTO = "auto"
104
+ NONE = "none"
105
+ REQUIRED = "required"
106
+
107
+
108
+ @dataclass
109
+ class ModelConfig:
110
+ """Advanced model configuration for custom endpoints and settings.
111
+
112
+ Use this for advanced scenarios like custom API endpoints, special headers,
113
+ or overriding default timeouts. Most users won't need this - the basic
114
+ model string with temperature/max_tokens is sufficient for common cases.
115
+
116
+ Example:
117
+ >>> from agnt5.lm import ModelConfig
118
+ >>> from agnt5 import Agent
119
+ >>>
120
+ >>> # Custom API endpoint
121
+ >>> config = ModelConfig(
122
+ ... base_url="https://custom-api.example.com",
123
+ ... api_key="custom-key",
124
+ ... timeout=60,
125
+ ... headers={"X-Custom-Header": "value"}
126
+ ... )
127
+ >>>
128
+ >>> agent = Agent(
129
+ ... name="custom_agent",
130
+ ... model="openai/gpt-4o-mini",
131
+ ... instructions="...",
132
+ ... model_config=config
133
+ ... )
134
+ """
135
+ base_url: Optional[str] = None
136
+ api_key: Optional[str] = None
137
+ timeout: Optional[int] = None
138
+ headers: Optional[Dict[str, str]] = None
139
+
140
+
141
+ @dataclass
142
+ class GenerationConfig:
143
+ """LLM generation configuration."""
144
+
145
+ temperature: Optional[float] = None
146
+ max_tokens: Optional[int] = None
147
+ top_p: Optional[float] = None
148
+
149
+
150
+ @dataclass
151
+ class TokenUsage:
152
+ """Token usage statistics."""
153
+
154
+ prompt_tokens: int
155
+ completion_tokens: int
156
+ total_tokens: int
157
+
158
+
159
+ @dataclass
160
+ class GenerateResponse:
161
+ """Response from LLM generation."""
162
+
163
+ text: str
164
+ usage: Optional[TokenUsage] = None
165
+ finish_reason: Optional[str] = None
166
+ tool_calls: Optional[List[Dict[str, Any]]] = None
167
+ _rust_response: Optional[Any] = field(default=None, repr=False)
168
+
169
+ @property
170
+ def structured_output(self) -> Optional[Any]:
171
+ """Parsed structured output (Pydantic model, dataclass, or dict).
172
+
173
+ Returns the parsed object when response_format is specified.
174
+ This is the recommended property name for accessing structured output.
175
+
176
+ Returns:
177
+ Parsed object according to the specified response_format, or None if not available
178
+ """
179
+ if self._rust_response and hasattr(self._rust_response, 'object'):
180
+ return self._rust_response.object
181
+ return None
182
+
183
+ @property
184
+ def parsed(self) -> Optional[Any]:
185
+ """Alias for structured_output (OpenAI SDK compatibility).
186
+
187
+ Returns:
188
+ Same as structured_output
189
+ """
190
+ return self.structured_output
191
+
192
+ @property
193
+ def object(self) -> Optional[Any]:
194
+ """Alias for structured_output.
195
+
196
+ Returns:
197
+ Same as structured_output
198
+ """
199
+ return self.structured_output
200
+
201
+
202
+ @dataclass
203
+ class GenerateRequest:
204
+ """Request for LLM generation."""
205
+
206
+ model: str
207
+ messages: List[Message] = field(default_factory=list)
208
+ system_prompt: Optional[str] = None
209
+ tools: List[ToolDefinition] = field(default_factory=list)
210
+ tool_choice: Optional[ToolChoice] = None
211
+ config: GenerationConfig = field(default_factory=GenerationConfig)
212
+ response_schema: Optional[str] = None # JSON-encoded schema for structured output
213
+
214
+
215
+ # Abstract base class for language models
216
+ # This exists primarily for testing/mocking purposes
217
+ class LanguageModel(ABC):
218
+ """Abstract base class for language model implementations.
219
+
220
+ This class defines the interface that all language models must implement.
221
+ It's primarily used for testing and mocking, as production code should use
222
+ the module-level generate() and stream() functions instead.
223
+ """
224
+
225
+ @abstractmethod
226
+ async def generate(self, request: GenerateRequest) -> GenerateResponse:
227
+ """Generate completion from LLM.
228
+
229
+ Args:
230
+ request: Generation request with model, messages, and configuration
231
+
232
+ Returns:
233
+ GenerateResponse with text, usage, and optional tool calls
234
+ """
235
+ pass
236
+
237
+ @abstractmethod
238
+ async def stream(self, request: GenerateRequest) -> AsyncIterator[str]:
239
+ """Stream completion from LLM.
240
+
241
+ Args:
242
+ request: Generation request with model, messages, and configuration
243
+
244
+ Yields:
245
+ Text chunks as they are generated
246
+ """
247
+ pass
248
+
249
+
250
+ # Internal wrapper for the Rust-backed implementation
251
+ # Users should use the module-level generate() and stream() functions instead
252
+ class _LanguageModel(LanguageModel):
253
+ """Internal Language Model wrapper using Rust SDK core.
254
+
255
+ This class is for internal use only. Users should use the module-level
256
+ lm.generate() and lm.stream() functions for a simpler interface.
257
+ """
258
+
259
+ def __init__(
260
+ self,
261
+ provider: Optional[str] = None,
262
+ default_model: Optional[str] = None,
263
+ ):
264
+ """Initialize language model.
265
+
266
+ Args:
267
+ provider: Provider name (e.g., 'openai', 'anthropic', 'azure', 'bedrock', 'groq', 'openrouter')
268
+ If None, provider will be auto-detected from model prefix (e.g., 'openai/gpt-4o')
269
+ default_model: Default model to use if not specified in requests
270
+ """
271
+ if not _RUST_AVAILABLE:
272
+ raise ImportError(
273
+ "Rust extension not available. Please rebuild the SDK with: "
274
+ "cd sdk/sdk-python && maturin develop"
275
+ )
276
+
277
+ self._provider = provider
278
+ self._default_model = default_model
279
+
280
+ # Create config object for Rust
281
+ config = RustLanguageModelConfig(
282
+ default_model=default_model,
283
+ default_provider=provider,
284
+ )
285
+
286
+ self._rust_lm = RustLanguageModel(config=config)
287
+
288
+ def _prepare_model_name(self, model: str) -> str:
289
+ """Prepare model name with provider prefix if needed.
290
+
291
+ Args:
292
+ model: Model name (e.g., 'gpt-4o-mini' or 'openai/gpt-4o-mini')
293
+
294
+ Returns:
295
+ Model name with provider prefix (e.g., 'openai/gpt-4o-mini')
296
+ """
297
+ # If model already has a prefix, return as is
298
+ # This handles cases like OpenRouter where models already have their provider prefix
299
+ # (e.g., 'anthropic/claude-3.5-haiku' for OpenRouter)
300
+ if '/' in model:
301
+ return model
302
+
303
+ # If we have a default provider, prefix the model
304
+ if self._provider:
305
+ return f"{self._provider}/{model}"
306
+
307
+ # Otherwise return as is and let Rust handle the error
308
+ return model
309
+
310
+ async def generate(self, request: GenerateRequest) -> GenerateResponse:
311
+ """Generate completion from LLM.
312
+
313
+ Args:
314
+ request: Generation request with model, messages, and configuration
315
+
316
+ Returns:
317
+ GenerateResponse with text, usage, and optional tool calls
318
+ """
319
+ # Convert Python request to structured format for Rust
320
+ prompt = self._build_prompt_messages(request)
321
+
322
+ # Prepare model name with provider prefix
323
+ model = self._prepare_model_name(request.model)
324
+
325
+ # Build kwargs for Rust
326
+ kwargs = {
327
+ "model": model,
328
+ }
329
+
330
+ # Always pass provider explicitly if set
331
+ # For gateway providers like OpenRouter, this allows them to handle
332
+ # models with provider prefixes (e.g., openrouter can handle anthropic/claude-3.5-haiku)
333
+ if self._provider:
334
+ kwargs["provider"] = self._provider
335
+
336
+ # Pass system prompt separately if provided
337
+ if request.system_prompt:
338
+ kwargs["system_prompt"] = request.system_prompt
339
+
340
+ if request.config.temperature is not None:
341
+ kwargs["temperature"] = request.config.temperature
342
+ if request.config.max_tokens is not None:
343
+ kwargs["max_tokens"] = request.config.max_tokens
344
+ if request.config.top_p is not None:
345
+ kwargs["top_p"] = request.config.top_p
346
+
347
+ # Pass response schema for structured output if provided
348
+ if request.response_schema is not None:
349
+ kwargs["response_schema_kw"] = request.response_schema
350
+
351
+ # Pass tools and tool_choice to Rust
352
+ if request.tools:
353
+ # Serialize tools to JSON for Rust
354
+ tools_list = [
355
+ {
356
+ "name": tool.name,
357
+ "description": tool.description,
358
+ "parameters": tool.parameters,
359
+ }
360
+ for tool in request.tools
361
+ ]
362
+ tools_json = json.dumps(tools_list)
363
+ kwargs["tools"] = tools_json
364
+
365
+ if request.tool_choice:
366
+ # Serialize tool_choice to JSON for Rust
367
+ kwargs["tool_choice"] = json.dumps(request.tool_choice.value)
368
+
369
+ # Call Rust implementation - it returns a proper Python coroutine now
370
+ # Using pyo3-async-runtimes for truly async HTTP calls without blocking
371
+ rust_response = await self._rust_lm.generate(prompt=prompt, **kwargs)
372
+
373
+ # Convert Rust response to Python
374
+ return self._convert_response(rust_response)
375
+
376
+ async def stream(self, request: GenerateRequest) -> AsyncIterator[str]:
377
+ """Stream completion from LLM.
378
+
379
+ Args:
380
+ request: Generation request with model, messages, and configuration
381
+
382
+ Yields:
383
+ Text chunks as they are generated
384
+ """
385
+ # Convert Python request to structured format for Rust
386
+ prompt = self._build_prompt_messages(request)
387
+
388
+ # Prepare model name with provider prefix
389
+ model = self._prepare_model_name(request.model)
390
+
391
+ # Build kwargs for Rust
392
+ kwargs = {
393
+ "model": model,
394
+ }
395
+
396
+ # Always pass provider explicitly if set
397
+ # For gateway providers like OpenRouter, this allows them to handle
398
+ # models with provider prefixes (e.g., openrouter can handle anthropic/claude-3.5-haiku)
399
+ if self._provider:
400
+ kwargs["provider"] = self._provider
401
+
402
+ # Pass system prompt separately if provided
403
+ if request.system_prompt:
404
+ kwargs["system_prompt"] = request.system_prompt
405
+
406
+ if request.config.temperature is not None:
407
+ kwargs["temperature"] = request.config.temperature
408
+ if request.config.max_tokens is not None:
409
+ kwargs["max_tokens"] = request.config.max_tokens
410
+ if request.config.top_p is not None:
411
+ kwargs["top_p"] = request.config.top_p
412
+
413
+ # Pass tools and tool_choice to Rust
414
+ if request.tools:
415
+ # Serialize tools to JSON for Rust
416
+ tools_list = [
417
+ {
418
+ "name": tool.name,
419
+ "description": tool.description,
420
+ "parameters": tool.parameters,
421
+ }
422
+ for tool in request.tools
423
+ ]
424
+ kwargs["tools"] = json.dumps(tools_list)
425
+
426
+ if request.tool_choice:
427
+ # Serialize tool_choice to JSON for Rust
428
+ kwargs["tool_choice"] = json.dumps(request.tool_choice.value)
429
+
430
+ # Call Rust implementation - it returns a proper Python coroutine now
431
+ # Using pyo3-async-runtimes for truly async streaming without blocking
432
+ rust_chunks = await self._rust_lm.stream(prompt=prompt, **kwargs)
433
+
434
+ # Yield each chunk
435
+ for chunk in rust_chunks:
436
+ if chunk.text:
437
+ yield chunk.text
438
+
439
+ def _build_prompt_messages(self, request: GenerateRequest) -> List[Dict[str, str]]:
440
+ """Build structured message list for Rust.
441
+
442
+ Rust expects a list of dicts with 'role' and 'content' keys.
443
+ System prompt is passed separately via kwargs.
444
+
445
+ Args:
446
+ request: Generation request with messages
447
+
448
+ Returns:
449
+ List of message dicts with role and content
450
+ """
451
+ # Convert messages to Rust format (list of dicts with role and content)
452
+ messages = []
453
+ for msg in request.messages:
454
+ messages.append({
455
+ "role": msg.role.value, # "system", "user", or "assistant"
456
+ "content": msg.content
457
+ })
458
+
459
+ # If no messages and no system prompt, return a default user message
460
+ if not messages and not request.system_prompt:
461
+ messages.append({
462
+ "role": "user",
463
+ "content": ""
464
+ })
465
+
466
+ return messages
467
+
468
+ def _convert_response(self, rust_response: RustResponse) -> GenerateResponse:
469
+ """Convert Rust response to Python response."""
470
+ usage = None
471
+ if rust_response.usage:
472
+ usage = TokenUsage(
473
+ prompt_tokens=rust_response.usage.prompt_tokens,
474
+ completion_tokens=rust_response.usage.completion_tokens,
475
+ total_tokens=rust_response.usage.total_tokens,
476
+ )
477
+
478
+ # Extract tool_calls from Rust response
479
+ tool_calls = None
480
+ if hasattr(rust_response, 'tool_calls') and rust_response.tool_calls:
481
+ tool_calls = rust_response.tool_calls
482
+
483
+ return GenerateResponse(
484
+ text=rust_response.content,
485
+ usage=usage,
486
+ finish_reason=None, # TODO: Add finish_reason to Rust response
487
+ tool_calls=tool_calls,
488
+ _rust_response=rust_response, # Store for .structured_output access
489
+ )
490
+
491
+
492
+ # ============================================================================
493
+ # Simplified API (Recommended)
494
+ # ============================================================================
495
+ # This is the recommended simple interface for most use cases
496
+
497
+ async def generate(
498
+ model: str,
499
+ prompt: Optional[str] = None,
500
+ messages: Optional[List[Dict[str, str]]] = None,
501
+ system_prompt: Optional[str] = None,
502
+ temperature: Optional[float] = None,
503
+ max_tokens: Optional[int] = None,
504
+ top_p: Optional[float] = None,
505
+ response_format: Optional[Any] = None,
506
+ ) -> GenerateResponse:
507
+ """Generate text using any LLM provider (simplified API).
508
+
509
+ This is the recommended way to use the LLM API. Provider is auto-detected
510
+ from the model prefix (e.g., 'openai/gpt-4o-mini', 'anthropic/claude-3-5-haiku').
511
+
512
+ Args:
513
+ model: Model identifier with provider prefix (e.g., 'openai/gpt-4o-mini')
514
+ prompt: Simple text prompt (for single-turn requests)
515
+ messages: List of message dicts with 'role' and 'content' (for multi-turn)
516
+ system_prompt: Optional system prompt
517
+ temperature: Sampling temperature (0.0-2.0)
518
+ max_tokens: Maximum tokens to generate
519
+ top_p: Nucleus sampling parameter
520
+ response_format: Pydantic model, dataclass, or JSON schema dict for structured output
521
+
522
+ Returns:
523
+ GenerateResponse with text, usage, and optional structured output
524
+
525
+ Examples:
526
+ Simple prompt:
527
+ >>> response = await generate(
528
+ ... model="openai/gpt-4o-mini",
529
+ ... prompt="What is love?",
530
+ ... temperature=0.7
531
+ ... )
532
+ >>> print(response.text)
533
+
534
+ Structured output with dataclass:
535
+ >>> from dataclasses import dataclass
536
+ >>>
537
+ >>> @dataclass
538
+ ... class CodeReview:
539
+ ... issues: list[str]
540
+ ... suggestions: list[str]
541
+ ... overall_quality: int
542
+ >>>
543
+ >>> response = await generate(
544
+ ... model="openai/gpt-4o",
545
+ ... prompt="Analyze this code...",
546
+ ... response_format=CodeReview
547
+ ... )
548
+ >>> review = response.structured_output # Returns dict
549
+ """
550
+ # Validate input
551
+ if not prompt and not messages:
552
+ raise ValueError("Either 'prompt' or 'messages' must be provided")
553
+ if prompt and messages:
554
+ raise ValueError("Provide either 'prompt' or 'messages', not both")
555
+
556
+ # Auto-detect provider from model prefix
557
+ if '/' not in model:
558
+ raise ValueError(
559
+ f"Model must include provider prefix (e.g., 'openai/{model}'). "
560
+ f"Supported providers: openai, anthropic, groq, openrouter, azure, bedrock"
561
+ )
562
+
563
+ provider, model_name = model.split('/', 1)
564
+
565
+ # Convert response_format to JSON schema if provided
566
+ response_schema_json = None
567
+ if response_format is not None:
568
+ format_type, json_schema = detect_format_type(response_format)
569
+ response_schema_json = json.dumps(json_schema)
570
+
571
+ # Create language model client
572
+ lm = _LanguageModel(provider=provider.lower(), default_model=None)
573
+
574
+ # Build messages list
575
+ if prompt:
576
+ msg_list = [{"role": "user", "content": prompt}]
577
+ else:
578
+ msg_list = messages
579
+
580
+ # Convert to Message objects for internal API
581
+ message_objects = []
582
+ for msg in msg_list:
583
+ role = MessageRole(msg["role"])
584
+ if role == MessageRole.USER:
585
+ message_objects.append(Message.user(msg["content"]))
586
+ elif role == MessageRole.ASSISTANT:
587
+ message_objects.append(Message.assistant(msg["content"]))
588
+ elif role == MessageRole.SYSTEM:
589
+ message_objects.append(Message.system(msg["content"]))
590
+
591
+ # Build request
592
+ config = GenerationConfig(
593
+ temperature=temperature,
594
+ max_tokens=max_tokens,
595
+ top_p=top_p,
596
+ )
597
+
598
+ request = GenerateRequest(
599
+ model=model,
600
+ messages=message_objects,
601
+ system_prompt=system_prompt,
602
+ config=config,
603
+ response_schema=response_schema_json,
604
+ )
605
+
606
+ # Generate and return
607
+ return await lm.generate(request)
608
+
609
+
610
+ async def stream(
611
+ model: str,
612
+ prompt: Optional[str] = None,
613
+ messages: Optional[List[Dict[str, str]]] = None,
614
+ system_prompt: Optional[str] = None,
615
+ temperature: Optional[float] = None,
616
+ max_tokens: Optional[int] = None,
617
+ top_p: Optional[float] = None,
618
+ ) -> AsyncIterator[str]:
619
+ """Stream text using any LLM provider (simplified API).
620
+
621
+ This is the recommended way to use streaming. Provider is auto-detected
622
+ from the model prefix (e.g., 'openai/gpt-4o-mini', 'anthropic/claude-3-5-haiku').
623
+
624
+ Args:
625
+ model: Model identifier with provider prefix (e.g., 'openai/gpt-4o-mini')
626
+ prompt: Simple text prompt (for single-turn requests)
627
+ messages: List of message dicts with 'role' and 'content' (for multi-turn)
628
+ system_prompt: Optional system prompt
629
+ temperature: Sampling temperature (0.0-2.0)
630
+ max_tokens: Maximum tokens to generate
631
+ top_p: Nucleus sampling parameter
632
+
633
+ Yields:
634
+ Text chunks as they are generated
635
+
636
+ Examples:
637
+ Simple streaming:
638
+ >>> async for chunk in stream(
639
+ ... model="openai/gpt-4o-mini",
640
+ ... prompt="Write a story"
641
+ ... ):
642
+ ... print(chunk, end="", flush=True)
643
+
644
+ Streaming conversation:
645
+ >>> async for chunk in stream(
646
+ ... model="groq/llama-3.3-70b-versatile",
647
+ ... messages=[
648
+ ... {"role": "user", "content": "Tell me a joke"}
649
+ ... ],
650
+ ... temperature=0.9
651
+ ... ):
652
+ ... print(chunk, end="")
653
+ """
654
+ # Validate input
655
+ if not prompt and not messages:
656
+ raise ValueError("Either 'prompt' or 'messages' must be provided")
657
+ if prompt and messages:
658
+ raise ValueError("Provide either 'prompt' or 'messages', not both")
659
+
660
+ # Auto-detect provider from model prefix
661
+ if '/' not in model:
662
+ raise ValueError(
663
+ f"Model must include provider prefix (e.g., 'openai/{model}'). "
664
+ f"Supported providers: openai, anthropic, groq, openrouter, azure, bedrock"
665
+ )
666
+
667
+ provider, model_name = model.split('/', 1)
668
+
669
+ # Create language model client
670
+ lm = _LanguageModel(provider=provider.lower(), default_model=None)
671
+
672
+ # Build messages list
673
+ if prompt:
674
+ msg_list = [{"role": "user", "content": prompt}]
675
+ else:
676
+ msg_list = messages
677
+
678
+ # Convert to Message objects for internal API
679
+ message_objects = []
680
+ for msg in msg_list:
681
+ role = MessageRole(msg["role"])
682
+ if role == MessageRole.USER:
683
+ message_objects.append(Message.user(msg["content"]))
684
+ elif role == MessageRole.ASSISTANT:
685
+ message_objects.append(Message.assistant(msg["content"]))
686
+ elif role == MessageRole.SYSTEM:
687
+ message_objects.append(Message.system(msg["content"]))
688
+
689
+ # Build request
690
+ config = GenerationConfig(
691
+ temperature=temperature,
692
+ max_tokens=max_tokens,
693
+ top_p=top_p,
694
+ )
695
+
696
+ request = GenerateRequest(
697
+ model=model,
698
+ messages=message_objects,
699
+ system_prompt=system_prompt,
700
+ config=config,
701
+ )
702
+
703
+ # Stream and yield chunks
704
+ async for chunk in lm.stream(request):
705
+ yield chunk