praisonaiagents 0.0.46__py3-none-any.whl → 0.0.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,823 @@
1
+ import logging
2
+ import os
3
+ import warnings
4
+ from typing import Any, Dict, List, Optional, Union, Literal, Callable
5
+ from pydantic import BaseModel
6
+ import time
7
+ import json
8
+ from ..main import (
9
+ display_error,
10
+ display_tool_call,
11
+ display_instruction,
12
+ display_interaction,
13
+ display_generating,
14
+ display_self_reflection,
15
+ ReflectionOutput,
16
+ )
17
+ from rich.console import Console
18
+ from rich.live import Live
19
+
20
+ class LLMContextLengthExceededException(Exception):
21
+ """Raised when LLM context length is exceeded"""
22
+ def __init__(self, message: str):
23
+ self.message = message
24
+ super().__init__(self.message)
25
+
26
+ def _is_context_limit_error(self, error_message: str) -> bool:
27
+ """Check if error is related to context length"""
28
+ context_limit_phrases = [
29
+ "maximum context length",
30
+ "context window is too long",
31
+ "context length exceeded",
32
+ "context_length_exceeded"
33
+ ]
34
+ return any(phrase in error_message.lower() for phrase in context_limit_phrases)
35
+
36
+ class LLM:
37
+ """
38
+ Easy to use wrapper for language models. Supports multiple providers like OpenAI,
39
+ Anthropic, and others through LiteLLM.
40
+ """
41
+
42
+ # Default window sizes for different models (75% of actual to be safe)
43
+ MODEL_WINDOWS = {
44
+ # OpenAI
45
+ "gpt-4": 6144, # 8,192 actual
46
+ "gpt-4o": 96000, # 128,000 actual
47
+ "gpt-4o-mini": 96000, # 128,000 actual
48
+ "gpt-4-turbo": 96000, # 128,000 actual
49
+ "o1-preview": 96000, # 128,000 actual
50
+ "o1-mini": 96000, # 128,000 actual
51
+
52
+ # Anthropic
53
+ "claude-3-5-sonnet": 12288, # 16,384 actual
54
+ "claude-3-sonnet": 12288, # 16,384 actual
55
+ "claude-3-opus": 96000, # 128,000 actual
56
+ "claude-3-haiku": 96000, # 128,000 actual
57
+
58
+ # Gemini
59
+ "gemini-2.0-flash": 786432, # 1,048,576 actual
60
+ "gemini-1.5-pro": 1572864, # 2,097,152 actual
61
+ "gemini-1.5-flash": 786432, # 1,048,576 actual
62
+ "gemini-1.5-flash-8b": 786432, # 1,048,576 actual
63
+
64
+ # Deepseek
65
+ "deepseek-chat": 96000, # 128,000 actual
66
+
67
+ # Groq
68
+ "gemma2-9b-it": 6144, # 8,192 actual
69
+ "gemma-7b-it": 6144, # 8,192 actual
70
+ "llama3-70b-8192": 6144, # 8,192 actual
71
+ "llama3-8b-8192": 6144, # 8,192 actual
72
+ "mixtral-8x7b-32768": 24576, # 32,768 actual
73
+ "llama-3.3-70b-versatile": 96000, # 128,000 actual
74
+ "llama-3.3-70b-instruct": 96000, # 128,000 actual
75
+
76
+ # Other llama models
77
+ "llama-3.1-70b-versatile": 98304, # 131,072 actual
78
+ "llama-3.1-8b-instant": 98304, # 131,072 actual
79
+ "llama-3.2-1b-preview": 6144, # 8,192 actual
80
+ "llama-3.2-3b-preview": 6144, # 8,192 actual
81
+ "llama-3.2-11b-text-preview": 6144, # 8,192 actual
82
+ "llama-3.2-90b-text-preview": 6144 # 8,192 actual
83
+ }
84
+
85
+ def __init__(
86
+ self,
87
+ model: str,
88
+ timeout: Optional[int] = None,
89
+ temperature: Optional[float] = None,
90
+ top_p: Optional[float] = None,
91
+ n: Optional[int] = None,
92
+ max_tokens: Optional[int] = None,
93
+ presence_penalty: Optional[float] = None,
94
+ frequency_penalty: Optional[float] = None,
95
+ logit_bias: Optional[Dict[int, float]] = None,
96
+ response_format: Optional[Dict[str, Any]] = None,
97
+ seed: Optional[int] = None,
98
+ logprobs: Optional[bool] = None,
99
+ top_logprobs: Optional[int] = None,
100
+ api_version: Optional[str] = None,
101
+ stop_phrases: Optional[Union[str, List[str]]] = None,
102
+ api_key: Optional[str] = None,
103
+ base_url: Optional[str] = None,
104
+ events: List[Any] = [],
105
+ **extra_settings
106
+ ):
107
+ try:
108
+ import litellm
109
+ # Set litellm options globally
110
+ litellm.set_verbose = False
111
+ litellm.success_callback = []
112
+ litellm._async_success_callback = []
113
+ litellm.callbacks = []
114
+ # Additional logging suppression
115
+ litellm.suppress_debug_messages = True
116
+ logging.getLogger("litellm.utils").setLevel(logging.WARNING)
117
+ logging.getLogger("litellm.main").setLevel(logging.WARNING)
118
+ except ImportError:
119
+ raise ImportError(
120
+ "LiteLLM is required but not installed. "
121
+ "Please install with: pip install 'praisonaiagents[llm]'"
122
+ )
123
+
124
+ self.model = model
125
+ self.timeout = timeout
126
+ self.temperature = temperature
127
+ self.top_p = top_p
128
+ self.n = n
129
+ self.max_tokens = max_tokens
130
+ self.presence_penalty = presence_penalty
131
+ self.frequency_penalty = frequency_penalty
132
+ self.logit_bias = logit_bias
133
+ self.response_format = response_format
134
+ self.seed = seed
135
+ self.logprobs = logprobs
136
+ self.top_logprobs = top_logprobs
137
+ self.api_version = api_version
138
+ self.stop_phrases = stop_phrases
139
+ self.api_key = api_key
140
+ self.base_url = base_url
141
+ self.events = events
142
+ self.extra_settings = extra_settings
143
+ self.console = Console()
144
+ self.chat_history = []
145
+ self.verbose = extra_settings.get('verbose', True)
146
+ self.markdown = extra_settings.get('markdown', True)
147
+ self.self_reflect = extra_settings.get('self_reflect', False)
148
+ self.max_reflect = extra_settings.get('max_reflect', 3)
149
+ self.min_reflect = extra_settings.get('min_reflect', 1)
150
+
151
+ # Enable error dropping for cleaner output
152
+ litellm.drop_params = True
153
+ self._setup_event_tracking(events)
154
+
155
+ def get_response(
156
+ self,
157
+ prompt: Union[str, List[Dict]],
158
+ system_prompt: Optional[str] = None,
159
+ chat_history: Optional[List[Dict]] = None,
160
+ temperature: float = 0.2,
161
+ tools: Optional[List[Any]] = None,
162
+ output_json: Optional[BaseModel] = None,
163
+ output_pydantic: Optional[BaseModel] = None,
164
+ verbose: bool = True,
165
+ markdown: bool = True,
166
+ self_reflect: bool = False,
167
+ max_reflect: int = 3,
168
+ min_reflect: int = 1,
169
+ console: Optional[Console] = None,
170
+ agent_name: Optional[str] = None,
171
+ agent_role: Optional[str] = None,
172
+ agent_tools: Optional[List[str]] = None,
173
+ execute_tool_fn: Optional[Callable] = None,
174
+ **kwargs
175
+ ) -> str:
176
+ """Enhanced get_response with all OpenAI-like features"""
177
+ try:
178
+ import litellm
179
+
180
+ # Disable litellm debug messages
181
+ litellm.set_verbose = False
182
+
183
+ # Build messages list
184
+ messages = []
185
+ if system_prompt:
186
+ if output_json:
187
+ system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(output_json.model_json_schema())}"
188
+ elif output_pydantic:
189
+ system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(output_pydantic.model_json_schema())}"
190
+ messages.append({"role": "system", "content": system_prompt})
191
+
192
+ if chat_history:
193
+ messages.extend(chat_history)
194
+
195
+ # Handle prompt modifications for JSON output
196
+ original_prompt = prompt
197
+ if output_json or output_pydantic:
198
+ if isinstance(prompt, str):
199
+ prompt += "\nReturn ONLY a valid JSON object. No other text or explanation."
200
+ elif isinstance(prompt, list):
201
+ for item in prompt:
202
+ if item["type"] == "text":
203
+ item["text"] += "\nReturn ONLY a valid JSON object. No other text or explanation."
204
+ break
205
+
206
+ # Add prompt to messages
207
+ if isinstance(prompt, list):
208
+ messages.append({"role": "user", "content": prompt})
209
+ else:
210
+ messages.append({"role": "user", "content": prompt})
211
+
212
+ start_time = time.time()
213
+ reflection_count = 0
214
+
215
+ while True:
216
+ try:
217
+ if verbose:
218
+ display_text = prompt
219
+ if isinstance(prompt, list):
220
+ display_text = next((item["text"] for item in prompt if item["type"] == "text"), "")
221
+
222
+ if display_text and str(display_text).strip():
223
+ display_instruction(
224
+ f"Agent {agent_name} is processing prompt: {display_text}",
225
+ console=console,
226
+ agent_name=agent_name,
227
+ agent_role=agent_role,
228
+ agent_tools=agent_tools
229
+ )
230
+
231
+ # Get response from LiteLLM
232
+ start_time = time.time()
233
+ if verbose:
234
+ with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
235
+ response_text = ""
236
+ for chunk in litellm.completion(
237
+ model=self.model,
238
+ messages=messages,
239
+ temperature=temperature,
240
+ stream=True,
241
+ **kwargs
242
+ ):
243
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
244
+ content = chunk.choices[0].delta.content
245
+ response_text += content
246
+ live.update(display_generating(response_text, start_time))
247
+ else:
248
+ # Non-verbose mode, just collect the response
249
+ response_text = ""
250
+ for chunk in litellm.completion(
251
+ model=self.model,
252
+ messages=messages,
253
+ temperature=temperature,
254
+ stream=True,
255
+ **kwargs
256
+ ):
257
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
258
+ response_text += chunk.choices[0].delta.content
259
+
260
+ response_text = response_text.strip()
261
+
262
+ # Get final completion to check for tool calls
263
+ final_response = litellm.completion(
264
+ model=self.model,
265
+ messages=messages,
266
+ temperature=temperature,
267
+ stream=False, # No streaming for tool call check
268
+ **kwargs
269
+ )
270
+
271
+ tool_calls = final_response["choices"][0]["message"].get("tool_calls")
272
+
273
+ # Handle tool calls
274
+ if tool_calls and execute_tool_fn:
275
+ messages.append({
276
+ "role": "assistant",
277
+ "content": response_text,
278
+ "tool_calls": tool_calls
279
+ })
280
+
281
+ for tool_call in tool_calls:
282
+ function_name = tool_call["function"]["name"]
283
+ arguments = json.loads(tool_call["function"]["arguments"])
284
+
285
+ if verbose:
286
+ display_tool_call(f"Agent {agent_name} is calling function '{function_name}' with arguments: {arguments}", console=console)
287
+
288
+ tool_result = execute_tool_fn(function_name, arguments)
289
+
290
+ if tool_result:
291
+ if verbose:
292
+ display_tool_call(f"Function '{function_name}' returned: {tool_result}", console=console)
293
+ messages.append({
294
+ "role": "tool",
295
+ "tool_call_id": tool_call["id"],
296
+ "content": json.dumps(tool_result)
297
+ })
298
+ else:
299
+ messages.append({
300
+ "role": "tool",
301
+ "tool_call_id": tool_call["id"],
302
+ "content": "Function returned an empty output"
303
+ })
304
+
305
+ # Get response after tool calls with streaming
306
+ if verbose:
307
+ with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
308
+ response_text = ""
309
+ for chunk in litellm.completion(
310
+ model=self.model,
311
+ messages=messages,
312
+ temperature=temperature,
313
+ stream=True
314
+ ):
315
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
316
+ content = chunk.choices[0].delta.content
317
+ response_text += content
318
+ live.update(display_generating(response_text, start_time))
319
+ else:
320
+ response_text = ""
321
+ for chunk in litellm.completion(
322
+ model=self.model,
323
+ messages=messages,
324
+ temperature=temperature,
325
+ stream=True
326
+ ):
327
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
328
+ response_text += chunk.choices[0].delta.content
329
+
330
+ response_text = response_text.strip()
331
+
332
+ # Handle output formatting
333
+ if output_json or output_pydantic:
334
+ self.chat_history.append({"role": "user", "content": original_prompt})
335
+ self.chat_history.append({"role": "assistant", "content": response_text})
336
+ if verbose:
337
+ display_interaction(original_prompt, response_text, markdown=markdown,
338
+ generation_time=time.time() - start_time, console=console)
339
+ return response_text
340
+
341
+ if not self_reflect:
342
+ if verbose:
343
+ display_interaction(original_prompt, response_text, markdown=markdown,
344
+ generation_time=time.time() - start_time, console=console)
345
+ return response_text
346
+
347
+ # Handle self-reflection
348
+ reflection_prompt = f"""
349
+ Reflect on your previous response: '{response_text}'.
350
+ Identify any flaws, improvements, or actions.
351
+ Provide a "satisfactory" status ('yes' or 'no').
352
+ Output MUST be JSON with 'reflection' and 'satisfactory'.
353
+ """
354
+
355
+ reflection_messages = messages + [
356
+ {"role": "assistant", "content": response_text},
357
+ {"role": "user", "content": reflection_prompt}
358
+ ]
359
+
360
+ # Get reflection response with streaming
361
+ if verbose:
362
+ with Live(display_generating("", start_time), console=console, refresh_per_second=4) as live:
363
+ reflection_text = ""
364
+ for chunk in litellm.completion(
365
+ model=self.model,
366
+ messages=reflection_messages,
367
+ temperature=temperature,
368
+ stream=True,
369
+ response_format={"type": "json_object"}
370
+ ):
371
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
372
+ content = chunk.choices[0].delta.content
373
+ reflection_text += content
374
+ live.update(display_generating(reflection_text, start_time))
375
+ else:
376
+ reflection_text = ""
377
+ for chunk in litellm.completion(
378
+ model=self.model,
379
+ messages=reflection_messages,
380
+ temperature=temperature,
381
+ stream=True,
382
+ response_format={"type": "json_object"}
383
+ ):
384
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
385
+ reflection_text += chunk.choices[0].delta.content
386
+
387
+ try:
388
+ reflection_data = json.loads(reflection_text)
389
+ satisfactory = reflection_data.get("satisfactory", "no").lower() == "yes"
390
+
391
+ if verbose:
392
+ display_self_reflection(
393
+ f"Agent {agent_name} self reflection: reflection='{reflection_data['reflection']}' satisfactory='{reflection_data['satisfactory']}'",
394
+ console=console
395
+ )
396
+
397
+ if satisfactory and reflection_count >= min_reflect - 1:
398
+ if verbose:
399
+ display_interaction(prompt, response_text, markdown=markdown,
400
+ generation_time=time.time() - start_time, console=console)
401
+ return response_text
402
+
403
+ if reflection_count >= max_reflect - 1:
404
+ if verbose:
405
+ display_interaction(prompt, response_text, markdown=markdown,
406
+ generation_time=time.time() - start_time, console=console)
407
+ return response_text
408
+
409
+ reflection_count += 1
410
+ messages.extend([
411
+ {"role": "assistant", "content": response_text},
412
+ {"role": "user", "content": reflection_prompt},
413
+ {"role": "assistant", "content": reflection_text},
414
+ {"role": "user", "content": "Now regenerate your response using the reflection you made"}
415
+ ])
416
+ continue
417
+
418
+ except json.JSONDecodeError:
419
+ reflection_count += 1
420
+ if reflection_count >= max_reflect:
421
+ return response_text
422
+ continue
423
+
424
+ except Exception as e:
425
+ display_error(f"Error in LLM response: {str(e)}")
426
+ return None
427
+
428
+ except Exception as error:
429
+ display_error(f"Error in get_response: {str(error)}")
430
+ raise
431
+
432
+ async def get_response_async(
433
+ self,
434
+ prompt: Union[str, List[Dict]],
435
+ system_prompt: Optional[str] = None,
436
+ chat_history: Optional[List[Dict]] = None,
437
+ temperature: float = 0.2,
438
+ tools: Optional[List[Any]] = None,
439
+ output_json: Optional[BaseModel] = None,
440
+ output_pydantic: Optional[BaseModel] = None,
441
+ verbose: bool = True,
442
+ markdown: bool = True,
443
+ self_reflect: bool = False,
444
+ max_reflect: int = 3,
445
+ min_reflect: int = 1,
446
+ console: Optional[Console] = None,
447
+ agent_name: Optional[str] = None,
448
+ agent_role: Optional[str] = None,
449
+ agent_tools: Optional[List[str]] = None,
450
+ execute_tool_fn: Optional[Callable] = None,
451
+ **kwargs
452
+ ) -> str:
453
+ """Async version of get_response with identical functionality."""
454
+ try:
455
+ import litellm
456
+ litellm.set_verbose = False
457
+
458
+ # Build messages list
459
+ messages = []
460
+ if system_prompt:
461
+ if output_json:
462
+ system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(output_json.model_json_schema())}"
463
+ elif output_pydantic:
464
+ system_prompt += f"\nReturn ONLY a JSON object that matches this Pydantic model: {json.dumps(output_pydantic.model_json_schema())}"
465
+ messages.append({"role": "system", "content": system_prompt})
466
+
467
+ if chat_history:
468
+ messages.extend(chat_history)
469
+
470
+ # Handle prompt modifications for JSON output
471
+ original_prompt = prompt
472
+ if output_json or output_pydantic:
473
+ if isinstance(prompt, str):
474
+ prompt += "\nReturn ONLY a valid JSON object. No other text or explanation."
475
+ elif isinstance(prompt, list):
476
+ for item in prompt:
477
+ if item["type"] == "text":
478
+ item["text"] += "\nReturn ONLY a valid JSON object. No other text or explanation."
479
+ break
480
+
481
+ # Add prompt to messages
482
+ if isinstance(prompt, list):
483
+ messages.append({"role": "user", "content": prompt})
484
+ else:
485
+ messages.append({"role": "user", "content": prompt})
486
+
487
+ start_time = time.time()
488
+ reflection_count = 0
489
+
490
+ # Format tools for LiteLLM
491
+ formatted_tools = None
492
+ if tools:
493
+ logging.info(f"Starting tool formatting for {len(tools)} tools")
494
+ formatted_tools = []
495
+ for tool in tools:
496
+ logging.info(f"Processing tool: {tool.__name__ if hasattr(tool, '__name__') else str(tool)}")
497
+ if hasattr(tool, '__name__'):
498
+ tool_name = tool.__name__
499
+ tool_doc = tool.__doc__ or "No description available"
500
+ # Get function signature
501
+ import inspect
502
+ sig = inspect.signature(tool)
503
+ logging.debug(f"Tool signature: {sig}")
504
+ params = {}
505
+ required = []
506
+ for name, param in sig.parameters.items():
507
+ logging.debug(f"Processing parameter: {name} with annotation: {param.annotation}")
508
+ param_type = "string"
509
+ if param.annotation != inspect.Parameter.empty:
510
+ if param.annotation == int:
511
+ param_type = "integer"
512
+ elif param.annotation == float:
513
+ param_type = "number"
514
+ elif param.annotation == bool:
515
+ param_type = "boolean"
516
+ elif param.annotation == Dict:
517
+ param_type = "object"
518
+ elif param.annotation == List:
519
+ param_type = "array"
520
+ elif hasattr(param.annotation, "__name__"):
521
+ param_type = param.annotation.__name__.lower()
522
+ params[name] = {"type": param_type}
523
+ if param.default == inspect.Parameter.empty:
524
+ required.append(name)
525
+
526
+ logging.debug(f"Generated parameters: {params}")
527
+ logging.debug(f"Required parameters: {required}")
528
+
529
+ tool_def = {
530
+ "type": "function",
531
+ "function": {
532
+ "name": tool_name,
533
+ "description": tool_doc,
534
+ "parameters": {
535
+ "type": "object",
536
+ "properties": params,
537
+ "required": required
538
+ }
539
+ }
540
+ }
541
+ # Ensure tool definition is JSON serializable
542
+ print(f"Generated tool definition: {tool_def}")
543
+ try:
544
+ json.dumps(tool_def) # Test serialization
545
+ logging.info(f"Generated tool definition: {tool_def}")
546
+ formatted_tools.append(tool_def)
547
+ except TypeError as e:
548
+ logging.error(f"Tool definition not JSON serializable: {e}")
549
+ continue
550
+
551
+ # Validate final tools list
552
+ if formatted_tools:
553
+ try:
554
+ json.dumps(formatted_tools) # Final serialization check
555
+ logging.info(f"Final formatted tools: {json.dumps(formatted_tools, indent=2)}")
556
+ except TypeError as e:
557
+ logging.error(f"Final tools list not JSON serializable: {e}")
558
+ formatted_tools = None
559
+
560
+ response_text = ""
561
+ if verbose:
562
+ # ----------------------------------------------------
563
+ # 1) Make the streaming call WITHOUT tools
564
+ # ----------------------------------------------------
565
+ async for chunk in await litellm.acompletion(
566
+ model=self.model,
567
+ messages=messages,
568
+ temperature=temperature,
569
+ stream=True,
570
+ **kwargs
571
+ ):
572
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
573
+ response_text += chunk.choices[0].delta.content
574
+ print("\033[K", end="\r")
575
+ print(f"Generating... {time.time() - start_time:.1f}s", end="\r")
576
+ else:
577
+ # Non-verbose streaming call, still no tools
578
+ async for chunk in await litellm.acompletion(
579
+ model=self.model,
580
+ messages=messages,
581
+ temperature=temperature,
582
+ stream=True,
583
+ **kwargs
584
+ ):
585
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
586
+ response_text += chunk.choices[0].delta.content
587
+
588
+ response_text = response_text.strip()
589
+
590
+ # ----------------------------------------------------
591
+ # 2) If tool calls are needed, do a non-streaming call
592
+ # ----------------------------------------------------
593
+ if tools and execute_tool_fn:
594
+ # Next call with tools if needed
595
+ tool_response = await litellm.acompletion(
596
+ model=self.model,
597
+ messages=messages,
598
+ temperature=temperature,
599
+ stream=False,
600
+ tools=formatted_tools, # We safely pass tools here
601
+ **kwargs
602
+ )
603
+ # handle tool_calls from tool_response as usual...
604
+ tool_calls = tool_response.choices[0].message.get("tool_calls")
605
+
606
+ if tool_calls:
607
+ messages.append({
608
+ "role": "assistant",
609
+ "content": response_text,
610
+ "tool_calls": tool_calls
611
+ })
612
+
613
+ for tool_call in tool_calls:
614
+ function_name = tool_call.function.name
615
+ arguments = json.loads(tool_call.function.arguments)
616
+
617
+ if verbose:
618
+ display_tool_call(f"Agent {agent_name} is calling function '{function_name}' with arguments: {arguments}", console=console)
619
+
620
+ tool_result = await execute_tool_fn(function_name, arguments)
621
+
622
+ if tool_result:
623
+ if verbose:
624
+ display_tool_call(f"Function '{function_name}' returned: {tool_result}", console=console)
625
+ messages.append({
626
+ "role": "tool",
627
+ "tool_call_id": tool_call.id,
628
+ "content": json.dumps(tool_result)
629
+ })
630
+ else:
631
+ messages.append({
632
+ "role": "tool",
633
+ "tool_call_id": tool_call.id,
634
+ "content": "Function returned an empty output"
635
+ })
636
+
637
+ # Get response after tool calls with streaming
638
+ response_text = ""
639
+ if verbose:
640
+ async for chunk in await litellm.acompletion(
641
+ model=self.model,
642
+ messages=messages,
643
+ temperature=temperature,
644
+ stream=True,
645
+ tools=formatted_tools,
646
+ **kwargs
647
+ ):
648
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
649
+ content = chunk.choices[0].delta.content
650
+ response_text += content
651
+ print("\033[K", end="\r")
652
+ print(f"Reflecting... {time.time() - start_time:.1f}s", end="\r")
653
+ else:
654
+ response_text = ""
655
+ for chunk in litellm.completion(
656
+ model=self.model,
657
+ messages=messages,
658
+ temperature=temperature,
659
+ stream=True,
660
+ **kwargs
661
+ ):
662
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
663
+ response_text += chunk.choices[0].delta.content
664
+
665
+ response_text = response_text.strip()
666
+
667
+ # Handle output formatting
668
+ if output_json or output_pydantic:
669
+ self.chat_history.append({"role": "user", "content": original_prompt})
670
+ self.chat_history.append({"role": "assistant", "content": response_text})
671
+ if verbose:
672
+ display_interaction(original_prompt, response_text, markdown=markdown,
673
+ generation_time=time.time() - start_time, console=console)
674
+ return response_text
675
+
676
+ if not self_reflect:
677
+ if verbose:
678
+ display_interaction(original_prompt, response_text, markdown=markdown,
679
+ generation_time=time.time() - start_time, console=console)
680
+ return response_text
681
+
682
+ # Handle self-reflection
683
+ reflection_prompt = f"""
684
+ Reflect on your previous response: '{response_text}'.
685
+ Identify any flaws, improvements, or actions.
686
+ Provide a "satisfactory" status ('yes' or 'no').
687
+ Output MUST be JSON with 'reflection' and 'satisfactory'.
688
+ """
689
+
690
+ reflection_messages = messages + [
691
+ {"role": "assistant", "content": response_text},
692
+ {"role": "user", "content": reflection_prompt}
693
+ ]
694
+
695
+ # Get reflection response
696
+ reflection_text = ""
697
+ if verbose:
698
+ async for chunk in await litellm.acompletion(
699
+ model=self.model,
700
+ messages=reflection_messages,
701
+ temperature=temperature,
702
+ stream=True,
703
+ response_format={"type": "json_object"},
704
+ **kwargs
705
+ ):
706
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
707
+ content = chunk.choices[0].delta.content
708
+ reflection_text += content
709
+ print("\033[K", end="\r")
710
+ print(f"Reflecting... {time.time() - start_time:.1f}s", end="\r")
711
+ else:
712
+ async for chunk in await litellm.acompletion(
713
+ model=self.model,
714
+ messages=reflection_messages,
715
+ temperature=temperature,
716
+ stream=True,
717
+ response_format={"type": "json_object"},
718
+ **kwargs
719
+ ):
720
+ if chunk and chunk.choices and chunk.choices[0].delta.content:
721
+ reflection_text += chunk.choices[0].delta.content
722
+
723
+ while True: # Add loop for reflection handling
724
+ try:
725
+ reflection_data = json.loads(reflection_text)
726
+ satisfactory = reflection_data.get("satisfactory", "no").lower() == "yes"
727
+
728
+ if verbose:
729
+ display_self_reflection(
730
+ f"Agent {agent_name} self reflection: reflection='{reflection_data['reflection']}' satisfactory='{reflection_data['satisfactory']}'",
731
+ console=console
732
+ )
733
+
734
+ if satisfactory and reflection_count >= min_reflect - 1:
735
+ if verbose:
736
+ display_interaction(prompt, response_text, markdown=markdown,
737
+ generation_time=time.time() - start_time, console=console)
738
+ return response_text
739
+
740
+ if reflection_count >= max_reflect - 1:
741
+ if verbose:
742
+ display_interaction(prompt, response_text, markdown=markdown,
743
+ generation_time=time.time() - start_time, console=console)
744
+ return response_text
745
+
746
+ reflection_count += 1
747
+ messages.extend([
748
+ {"role": "assistant", "content": response_text},
749
+ {"role": "user", "content": reflection_prompt},
750
+ {"role": "assistant", "content": reflection_text},
751
+ {"role": "user", "content": "Now regenerate your response using the reflection you made"}
752
+ ])
753
+ continue # Now properly in a loop
754
+
755
+ except json.JSONDecodeError:
756
+ reflection_count += 1
757
+ if reflection_count >= max_reflect:
758
+ return response_text
759
+ continue # Now properly in a loop
760
+
761
+ except Exception as error:
762
+ if LLMContextLengthExceededException(str(error))._is_context_limit_error(str(error)):
763
+ raise LLMContextLengthExceededException(str(error))
764
+ display_error(f"Error in get_response_async: {str(error)}")
765
+ raise
766
+
767
+ def can_use_tools(self) -> bool:
768
+ """Check if this model can use tool functions"""
769
+ try:
770
+ import litellm
771
+ allowed_params = litellm.get_supported_openai_params(model=self.model)
772
+ return "response_format" in allowed_params
773
+ except ImportError:
774
+ raise ImportError(
775
+ "LiteLLM is required but not installed. "
776
+ "Please install it with: pip install 'praisonaiagents[llm]'"
777
+ )
778
+ except:
779
+ return False
780
+
781
+ def can_use_stop_words(self) -> bool:
782
+ """Check if this model supports stop words"""
783
+ try:
784
+ import litellm
785
+ allowed_params = litellm.get_supported_openai_params(model=self.model)
786
+ return "stop" in allowed_params
787
+ except ImportError:
788
+ raise ImportError(
789
+ "LiteLLM is required but not installed. "
790
+ "Please install it with: pip install 'praisonaiagents[llm]'"
791
+ )
792
+ except:
793
+ return False
794
+
795
+ def get_context_size(self) -> int:
796
+ """Get safe input size limit for this model"""
797
+ for model_prefix, size in self.MODEL_WINDOWS.items():
798
+ if self.model.startswith(model_prefix):
799
+ return size
800
+ return 4000 # Safe default
801
+
802
+ def _setup_event_tracking(self, events: List[Any]) -> None:
803
+ """Setup callback functions for tracking model usage"""
804
+ try:
805
+ import litellm
806
+ except ImportError:
807
+ raise ImportError(
808
+ "LiteLLM is required but not installed. "
809
+ "Please install it with: pip install 'praisonaiagents[llm]'"
810
+ )
811
+
812
+ event_types = [type(event) for event in events]
813
+
814
+ # Remove old events of same type
815
+ for event in litellm.success_callback[:]:
816
+ if type(event) in event_types:
817
+ litellm.success_callback.remove(event)
818
+
819
+ for event in litellm._async_success_callback[:]:
820
+ if type(event) in event_types:
821
+ litellm._async_success_callback.remove(event)
822
+
823
+ litellm.callbacks = events