ai-parrot 0.8.3__cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ai-parrot might be problematic. Click here for more details.

Files changed (128) hide show
  1. ai_parrot-0.8.3.dist-info/LICENSE +21 -0
  2. ai_parrot-0.8.3.dist-info/METADATA +306 -0
  3. ai_parrot-0.8.3.dist-info/RECORD +128 -0
  4. ai_parrot-0.8.3.dist-info/WHEEL +6 -0
  5. ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
  6. parrot/__init__.py +30 -0
  7. parrot/bots/__init__.py +5 -0
  8. parrot/bots/abstract.py +1115 -0
  9. parrot/bots/agent.py +492 -0
  10. parrot/bots/basic.py +9 -0
  11. parrot/bots/bose.py +17 -0
  12. parrot/bots/chatbot.py +271 -0
  13. parrot/bots/cody.py +17 -0
  14. parrot/bots/copilot.py +117 -0
  15. parrot/bots/data.py +730 -0
  16. parrot/bots/dataframe.py +103 -0
  17. parrot/bots/hrbot.py +15 -0
  18. parrot/bots/interfaces/__init__.py +1 -0
  19. parrot/bots/interfaces/retrievers.py +12 -0
  20. parrot/bots/notebook.py +619 -0
  21. parrot/bots/odoo.py +17 -0
  22. parrot/bots/prompts/__init__.py +41 -0
  23. parrot/bots/prompts/agents.py +91 -0
  24. parrot/bots/prompts/data.py +214 -0
  25. parrot/bots/retrievals/__init__.py +1 -0
  26. parrot/bots/retrievals/constitutional.py +19 -0
  27. parrot/bots/retrievals/multi.py +122 -0
  28. parrot/bots/retrievals/retrieval.py +610 -0
  29. parrot/bots/tools/__init__.py +7 -0
  30. parrot/bots/tools/eda.py +325 -0
  31. parrot/bots/tools/pdf.py +50 -0
  32. parrot/bots/tools/plot.py +48 -0
  33. parrot/bots/troc.py +16 -0
  34. parrot/conf.py +170 -0
  35. parrot/crew/__init__.py +3 -0
  36. parrot/crew/tools/__init__.py +22 -0
  37. parrot/crew/tools/bing.py +13 -0
  38. parrot/crew/tools/config.py +43 -0
  39. parrot/crew/tools/duckgo.py +62 -0
  40. parrot/crew/tools/file.py +24 -0
  41. parrot/crew/tools/google.py +168 -0
  42. parrot/crew/tools/gtrends.py +16 -0
  43. parrot/crew/tools/md2pdf.py +25 -0
  44. parrot/crew/tools/rag.py +42 -0
  45. parrot/crew/tools/search.py +32 -0
  46. parrot/crew/tools/url.py +21 -0
  47. parrot/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
  48. parrot/handlers/__init__.py +4 -0
  49. parrot/handlers/agents.py +292 -0
  50. parrot/handlers/bots.py +196 -0
  51. parrot/handlers/chat.py +192 -0
  52. parrot/interfaces/__init__.py +6 -0
  53. parrot/interfaces/database.py +27 -0
  54. parrot/interfaces/http.py +805 -0
  55. parrot/interfaces/images/__init__.py +0 -0
  56. parrot/interfaces/images/plugins/__init__.py +18 -0
  57. parrot/interfaces/images/plugins/abstract.py +58 -0
  58. parrot/interfaces/images/plugins/exif.py +709 -0
  59. parrot/interfaces/images/plugins/hash.py +52 -0
  60. parrot/interfaces/images/plugins/vision.py +104 -0
  61. parrot/interfaces/images/plugins/yolo.py +66 -0
  62. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  63. parrot/llms/__init__.py +1 -0
  64. parrot/llms/abstract.py +69 -0
  65. parrot/llms/anthropic.py +58 -0
  66. parrot/llms/gemma.py +15 -0
  67. parrot/llms/google.py +44 -0
  68. parrot/llms/groq.py +67 -0
  69. parrot/llms/hf.py +45 -0
  70. parrot/llms/openai.py +61 -0
  71. parrot/llms/pipes.py +114 -0
  72. parrot/llms/vertex.py +89 -0
  73. parrot/loaders/__init__.py +9 -0
  74. parrot/loaders/abstract.py +628 -0
  75. parrot/loaders/files/__init__.py +0 -0
  76. parrot/loaders/files/abstract.py +39 -0
  77. parrot/loaders/files/text.py +63 -0
  78. parrot/loaders/txt.py +26 -0
  79. parrot/manager.py +333 -0
  80. parrot/models.py +504 -0
  81. parrot/py.typed +0 -0
  82. parrot/stores/__init__.py +11 -0
  83. parrot/stores/abstract.py +248 -0
  84. parrot/stores/chroma.py +188 -0
  85. parrot/stores/duck.py +162 -0
  86. parrot/stores/embeddings/__init__.py +10 -0
  87. parrot/stores/embeddings/abstract.py +46 -0
  88. parrot/stores/embeddings/base.py +52 -0
  89. parrot/stores/embeddings/bge.py +20 -0
  90. parrot/stores/embeddings/fastembed.py +17 -0
  91. parrot/stores/embeddings/google.py +18 -0
  92. parrot/stores/embeddings/huggingface.py +20 -0
  93. parrot/stores/embeddings/ollama.py +14 -0
  94. parrot/stores/embeddings/openai.py +26 -0
  95. parrot/stores/embeddings/transformers.py +21 -0
  96. parrot/stores/embeddings/vertexai.py +17 -0
  97. parrot/stores/empty.py +10 -0
  98. parrot/stores/faiss.py +160 -0
  99. parrot/stores/milvus.py +397 -0
  100. parrot/stores/postgres.py +653 -0
  101. parrot/stores/qdrant.py +170 -0
  102. parrot/tools/__init__.py +23 -0
  103. parrot/tools/abstract.py +68 -0
  104. parrot/tools/asknews.py +33 -0
  105. parrot/tools/basic.py +51 -0
  106. parrot/tools/bby.py +359 -0
  107. parrot/tools/bing.py +13 -0
  108. parrot/tools/docx.py +343 -0
  109. parrot/tools/duck.py +62 -0
  110. parrot/tools/execute.py +56 -0
  111. parrot/tools/gamma.py +28 -0
  112. parrot/tools/google.py +170 -0
  113. parrot/tools/gvoice.py +301 -0
  114. parrot/tools/results.py +278 -0
  115. parrot/tools/stack.py +27 -0
  116. parrot/tools/weather.py +70 -0
  117. parrot/tools/wikipedia.py +58 -0
  118. parrot/tools/zipcode.py +198 -0
  119. parrot/utils/__init__.py +2 -0
  120. parrot/utils/parsers/__init__.py +5 -0
  121. parrot/utils/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
  122. parrot/utils/toml.py +11 -0
  123. parrot/utils/types.cpython-39-x86_64-linux-gnu.so +0 -0
  124. parrot/utils/uv.py +11 -0
  125. parrot/version.py +10 -0
  126. resources/users/__init__.py +5 -0
  127. resources/users/handlers.py +13 -0
  128. resources/users/models.py +205 -0
parrot/bots/data.py ADDED
@@ -0,0 +1,730 @@
1
+ from pathlib import Path
2
+ from typing import Any, List, Dict, Union, Optional
3
+ import re
4
+ from datetime import datetime, timezone, timedelta
5
+ from string import Template
6
+ import redis.asyncio as aioredis
7
+ import pandas as pd
8
+ from aiohttp import web
9
+ from datamodel.typedefs import SafeDict
10
+ from datamodel.parsers.json import json_encoder, json_decoder # pylint: disable=E0611 # noqa
11
+ from langchain_core.exceptions import OutputParserException
12
+ from langchain_core.messages import HumanMessage
13
+ from langchain_experimental.tools.python.tool import PythonAstREPLTool
14
+ from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
15
+ from navconfig import BASE_DIR
16
+ from navconfig.logging import logging
17
+ from querysource.queries.qs import QS
18
+ from querysource.queries.multi import MultiQS
19
+ from ..tools import AbstractTool
20
+ from ..tools.docx import DocxGeneratorTool
21
+ from .agent import BasicAgent
22
+ from ..models import AgentResponse
23
+ from ..conf import BASE_STATIC_URL, REDIS_HISTORY_URL
24
+ from .prompts import AGENT_PROMPT_SUFFIX, FORMAT_INSTRUCTIONS
25
+ from .prompts.data import (
26
+ TOOL_CALLING_PROMPT_PREFIX,
27
+ TOOL_CALLING_PROMPT_SUFFIX,
28
+ REACT_PROMPT_PREFIX
29
+ )
30
+
31
+ ## Enable Debug:
32
+ from langchain.globals import set_debug, set_verbose
33
+
34
+ # Enable verbosity for debugging
35
+ # set_debug(True)
36
+ # set_verbose(True)
37
+
38
+
39
+ def brace_escape(text: str) -> str:
40
+ return text.replace('{', '{{').replace('}', '}}')
41
+
42
+ class PandasAgent(BasicAgent):
43
+ """
44
+ A simple agent that uses the pandas library to perform data analysis tasks.
45
+ TODO
46
+ - add notify tool (email, telegram, teams)
47
+ - specific teams tool to send private messages from agents
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ name: str = 'Agent',
53
+ agent_type: str = None,
54
+ llm: Optional[str] = None,
55
+ tools: List[AbstractTool] = None,
56
+ system_prompt: str = None,
57
+ human_prompt: str = None,
58
+ prompt_template: str = None,
59
+ df: Union[list[pd.DataFrame], Dict[str, pd.DataFrame]] = None,
60
+ query: Union[List[str], dict] = None,
61
+ **kwargs
62
+ ):
63
+ self.chatbot_id: str = None
64
+ self._queries = query
65
+ self.df = self._define_dataframe(df)
66
+ self.df_locals: dict = {}
67
+ # Agent ID:
68
+ self._prompt_prefix = None
69
+ # Must be one of 'tool-calling', 'openai-tools', 'openai-functions', or 'zero-shot-react-description'.
70
+ self.agent_type = agent_type or "tool-calling"
71
+ if self.agent_type == "tool-calling":
72
+ self._prompt_template = prompt_template or TOOL_CALLING_PROMPT_PREFIX
73
+ self._format_instructions = ''
74
+ else:
75
+ self._prompt_template = prompt_template or REACT_PROMPT_PREFIX
76
+ self._format_instructions: str = kwargs.get('format_instructions', FORMAT_INSTRUCTIONS)
77
+ self._capabilities: str = kwargs.get('capabilities', None)
78
+ self.name = name or "Pandas Agent"
79
+ self.description = "A simple agent that uses the pandas library to perform data analysis tasks."
80
+ self._static_path = BASE_DIR.joinpath('static')
81
+ self.agent_report_dir = self._static_path.joinpath('reports', 'agents')
82
+ super().__init__(
83
+ name=name,
84
+ llm=llm,
85
+ system_prompt=system_prompt,
86
+ human_prompt=human_prompt,
87
+ tools=tools,
88
+ agent_type=self.agent_type,
89
+ **kwargs
90
+ )
91
+
92
+ def _define_dataframe(
93
+ self,
94
+ df: Union[list[pd.DataFrame], Dict[str, pd.DataFrame]]
95
+ ) -> Dict[str, pd.DataFrame]:
96
+ """Define the dataframe."""
97
+ _df = {}
98
+ if isinstance(df, pd.DataFrame):
99
+ # if data is one single dataframe
100
+ _df['df1'] = df
101
+ elif isinstance(df, list):
102
+ # if data is a list of dataframes
103
+ for i, dataframe in enumerate(df):
104
+ df_name = f"df{i + 1}"
105
+ _df[df_name] = dataframe.copy()
106
+ elif isinstance(df, pd.Series):
107
+ # if data is a pandas series
108
+ # convert it to a dataframe
109
+ df = pd.DataFrame(df)
110
+ _df['df1'] = df
111
+ elif isinstance(df, dict):
112
+ _df = df
113
+ else:
114
+ raise ValueError(
115
+ f"Expected pandas DataFrame, got {type(df)}"
116
+ )
117
+ return _df
118
+
119
+ def get_query(self) -> Union[List[str], dict]:
120
+ """Get the query."""
121
+ return self._queries
122
+
123
+ def get_capabilities(self) -> str:
124
+ """Get the capabilities of the agent."""
125
+ return self._capabilities
126
+
127
+ def pandas_agent(self, **kwargs):
128
+ """
129
+ Creates a Pandas Agent.
130
+
131
+ This agent uses reasoning and tool execution iteratively to generate responses.
132
+
133
+ Returns:
134
+ RunnableMultiActionAgent: A Pandas-based agent.
135
+
136
+ ✅ Use Case: Best for decision-making and reasoning tasks where the agent must break problems down into multiple steps.
137
+
138
+ """
139
+ # Create the pandas agent
140
+ dfs = list(self.df.values()) if isinstance(self.df, dict) else self.df
141
+ # bind the tools to the LLM:
142
+ llm = self._llm.bind_tools(self.tools)
143
+ return create_pandas_dataframe_agent(
144
+ llm,
145
+ dfs,
146
+ verbose=True,
147
+ agent_type=self.agent_type,
148
+ allow_dangerous_code=True,
149
+ # prefix=self._prompt_prefix,
150
+ max_iterations=10,
151
+ extra_tools=self.tools,
152
+ agent_executor_kwargs={"memory": self.memory, "handle_parsing_errors": True},
153
+ return_intermediate_steps=True,
154
+ **kwargs
155
+ )
156
+
157
+ async def configure(self, df: pd.DataFrame = None, app=None) -> None:
158
+ """Basic Configuration of Pandas Agent.
159
+ """
160
+ if app:
161
+ if isinstance(app, web.Application):
162
+ self.app = app # register the app into the Extension
163
+ else:
164
+ self.app = app.get_app() # Nav Application
165
+ # adding this configured chatbot to app:
166
+ if self.app:
167
+ self.app[f"{self.name.lower()}_bot"] = self
168
+ if df is not None:
169
+ self.df = self._define_dataframe(df)
170
+ # Configure LLM:
171
+ self.configure_llm(use_chat=True)
172
+ # Configure VectorStore if enabled:
173
+ if self._use_vector:
174
+ self.configure_store()
175
+ # Conversation History:
176
+ self.memory = self.get_memory(input_key="input", output_key="output")
177
+ # 1. Initialize the Agent (as the base for RunnableMultiActionAgent)
178
+ self.agent = self.pandas_agent()
179
+ # 2. Create Agent Executor - This is where we typically run the agent.
180
+ self._agent = self.agent
181
+ # 3. When agent is correctly created, caching the data:
182
+ await self._cache_data(
183
+ self.chatbot_id,
184
+ self.df,
185
+ cache_expiration=24
186
+ )
187
+
188
+ def mimefromext(self, ext: str) -> str:
189
+ """Get the mime type from the file extension."""
190
+ mime_types = {
191
+ '.csv': 'text/csv',
192
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
193
+ '.xls': 'application/vnd.ms-excel',
194
+ '.json': 'application/json',
195
+ '.txt': 'text/plain',
196
+ '.html': 'text/html',
197
+ '.htm': 'text/html',
198
+ '.pdf': 'application/pdf',
199
+ '.png': 'image/png',
200
+ '.jpg': 'image/jpeg',
201
+ '.jpeg': 'image/jpeg',
202
+ '.gif': 'image/gif',
203
+ '.svg': 'image/svg+xml',
204
+ '.md': 'text/markdown',
205
+ '.ogg': 'audio/ogg',
206
+ '.wav': 'audio/wav',
207
+ '.mp3': 'audio/mpeg',
208
+ '.mp4': 'video/mp4',
209
+ }
210
+ return mime_types.get(ext, None)
211
+
212
+ def extract_filenames(self, response: AgentResponse) -> List[Path]:
213
+ """Extract filenames from the content."""
214
+ # Split the content by lines
215
+ output_lines = response.output.splitlines()
216
+ current_filename = ""
217
+ filenames = {}
218
+ for line in output_lines:
219
+ if 'filename:' in line:
220
+ current_filename = line.split('filename:')[1].strip()
221
+ if current_filename:
222
+ try:
223
+ filename_path = Path(current_filename).resolve()
224
+ if filename_path.is_file():
225
+ content_type = self.mimefromext(filename_path.suffix)
226
+ url = str(filename_path).replace(str(self._static_path), BASE_STATIC_URL)
227
+ filenames[filename_path.name] = {
228
+ 'content_type': content_type,
229
+ 'file_path': filename_path,
230
+ 'filename': filename_path.name,
231
+ 'url': url
232
+ }
233
+ continue
234
+ except AttributeError:
235
+ pass
236
+ if filenames:
237
+ response.filename = filenames
238
+
239
+ async def invoke(
240
+ self,
241
+ query: str, llm: Optional[Any] = None,
242
+ llm_params: Optional[Dict[str, Any]] = None
243
+ ):
244
+ """Invoke the agent with optional LLM override.
245
+
246
+ Args:
247
+ query (str): The query to ask the chatbot.
248
+ llm (Optional[Any]): Optional LLM to use for this specific invocation.
249
+ llm_params (Optional[Dict[str, Any]]): Optional parameters to modify LLM behavior
250
+ (temperature, max_tokens, etc.)
251
+
252
+ Returns:
253
+ str: The response from the chatbot.
254
+
255
+ """
256
+ original_agent = None
257
+ original_llm = None
258
+ try:
259
+ # If a different LLM or parameters are provided, create a temporary agent
260
+ if llm is not None:
261
+ # Get the current LLM if we're just updating parameters
262
+ current_llm = llm if llm is not None else self._llm
263
+ # Store original LLM for reference
264
+ original_llm = self._llm
265
+ # Store original agent for reference
266
+ original_agent = self._agent
267
+ # Temporarily update the instance LLM
268
+ self._llm = current_llm
269
+ # Create a new agent with the updated LLM
270
+ self._agent = self.pandas_agent()
271
+ # Invoke the agent with the query
272
+ result = await self._agent.ainvoke(
273
+ {"input": query}
274
+ )
275
+ except Exception as e:
276
+ return None, e
277
+ try:
278
+ # Parse tool outputs if present
279
+ try:
280
+ if isinstance(result, dict):
281
+ output = result.get('output', '')
282
+ if '```tool_outputs' in output:
283
+ tool_json_match = re.search(r'```tool_outputs\n(.*?)\n```', output, re.DOTALL)
284
+ if tool_json_match:
285
+ tool_json = tool_json_match.group(1)
286
+ # Parse the JSON
287
+ tool_data = json_decoder(tool_json)
288
+ # Get the actual content
289
+ if "python_repl_ast_response" in tool_data:
290
+ python_response = tool_data["python_repl_ast_response"]
291
+ if isinstance(python_response, dict) and "content" in python_response:
292
+ # Replace the output with just the content
293
+ result['output'] = python_response["content"].strip()
294
+ except Exception as parse_error:
295
+ self.logger.error(
296
+ f"Error parsing tool output: {parse_error}"
297
+ )
298
+ response = AgentResponse(question=query, **result)
299
+ # check if return is a file:
300
+ try:
301
+ self.extract_filenames(response)
302
+ except Exception as exc:
303
+ self.logger.error(
304
+ f"Unable to extract filenames: {exc}"
305
+ )
306
+ # Restore the original agent if any:
307
+ if original_llm is not None:
308
+ self._llm = original_llm
309
+ self._agent = original_agent
310
+ try:
311
+ return self.as_markdown(
312
+ response
313
+ ), response
314
+ except Exception as exc:
315
+ self.logger.exception(
316
+ f"Error on response: {exc}"
317
+ )
318
+ return result.get('output', None), None
319
+ except Exception as e:
320
+ return result, e
321
+
322
+ def _configure_python_tool(self, df_locals: dict, **kwargs) -> PythonAstREPLTool:
323
+ """Configure the Python tool."""
324
+ # Create the Python tool with the given locals and globals
325
+ df_locals['execution_results'] = {}
326
+ # Create the Python REPL tool
327
+ PythonAstREPLTool_init = PythonAstREPLTool.__init__
328
+
329
+ def PythonAstREPLTool_init_wrapper(self, *args, **kwargs):
330
+ PythonAstREPLTool_init(self, *args, **kwargs)
331
+ self.globals = self.locals
332
+
333
+ PythonAstREPLTool.__init__ = PythonAstREPLTool_init_wrapper
334
+ python_tool = PythonAstREPLTool(
335
+ locals=df_locals,
336
+ globals=kwargs.get('globals', {}),
337
+ verbose=True,
338
+ **kwargs
339
+ )
340
+
341
+ # Add essential library imports and helper functions
342
+ setup_code = """
343
+ # Ensure essential libraries are imported
344
+ import pandas as pd
345
+ import numpy as np
346
+ import matplotlib.pyplot as plt
347
+ import seaborn as sns
348
+ from collections import Counter, defaultdict
349
+ from parrot.bots.tools import quick_eda, generate_eda_report, list_available_dataframes, create_plot, generate_pdf_from_html
350
+
351
+ # Set plotting style
352
+ plt.style.use('seaborn-v0_8-whitegrid')
353
+ sns.set_palette('Set2')
354
+
355
+ # Verify pandas is loaded correctly
356
+ print(f"Pandas version: {pd.__version__}")
357
+ """
358
+ try:
359
+ python_tool.run(setup_code)
360
+ except Exception as e:
361
+ self.logger.error(
362
+ f"Error setting up python tool: {e}"
363
+ )
364
+ print(':: PYTHON TOOL > ', python_tool)
365
+ return python_tool
366
+
367
+ def _metrics_guide(self, df_key: str, df_name: str, columns: list) -> str:
368
+ """Generate a guide for the dataframe columns."""
369
+ # Create a markdown table with column category, column name, type and with dataframe is present:
370
+ table = "\n| Category | Column Name | Type | DataFrame | Dataframe Name |\n"
371
+ table += "|------------------|-------------|------|-----------|\n"
372
+ for column in columns:
373
+ # Get the column name
374
+ column_name = column
375
+ # split by "_" and first element is the category (if any):
376
+ # Get the column category
377
+ try:
378
+ column_category = column.split('_')[0]
379
+ except IndexError:
380
+ column_category = df_name
381
+ # Get the type of the column
382
+ column_type = str(self.df[df_name][column].dtype)
383
+ # Add the row to the table
384
+ table += f"| {column_category} | {column_name} | {column_type} | {df_key} | {df_name} |\n\n\n"
385
+ # Add a note about the dataframe
386
+ table += f"\nNote: {df_key} is also available as {df_name}\n"
387
+ return table
388
+
389
+ def define_prompt(self, prompt, **kwargs):
390
+ now = datetime.now(timezone.utc).strftime("%Y-%m-%d")
391
+ self.agent_report_dir = self._static_path.joinpath(str(self.chatbot_id))
392
+ if self.agent_report_dir.exists() is False:
393
+ self.agent_report_dir.mkdir(parents=True, exist_ok=True)
394
+ # Add dataframe information
395
+ num_dfs = len(self.df)
396
+ self.df_locals['agent_report_dir'] = self.agent_report_dir
397
+ df_info = ''
398
+ for i, (df_name, df) in enumerate(self.df.items()):
399
+ df_key = f"df{i + 1}"
400
+ self.df_locals[df_name] = df
401
+ self.df_locals[df_key] = df
402
+ row_count = len(df)
403
+ self.df_locals[f"{df_key}_row_count"] = row_count
404
+ # Get basic dataframe info
405
+ df_shape = f"DataFrame Shape: {df.shape[0]} rows × {df.shape[1]} columns"
406
+ df_columns = self._metrics_guide(df_key, df_name, df.columns.tolist())
407
+ # Generate summary statistics
408
+ summary_stats = brace_escape(df.describe(include='all').to_markdown())
409
+ df_head = brace_escape(df.head(4).to_markdown())
410
+ # Create df_info block
411
+ if self.agent_type == "tool-calling":
412
+ df_info += f"""
413
+ ## Dataframe Name: {df_key}:
414
+ ## DataFrame: {df_key} (also accessible as {df_name} in Python code)
415
+
416
+ **Shape {df_key}**: {df_shape}
417
+
418
+ **Column Details (Name, Type, Category)**:
419
+ {df_columns}
420
+
421
+ **First 4 Rows (`print({df_key}.head(4).to_markdown())`)**:
422
+ {df_head}
423
+
424
+ **Summary Statistics (`print({df_key}.describe(include='all').to_markdown())`)**:
425
+ {summary_stats}
426
+ """
427
+ else:
428
+ df_info += f"""
429
+ ## DataFrame {df_key}:
430
+
431
+ ### {df_key} Shape:
432
+ {df_shape}
433
+
434
+ ### {df_key} Info:
435
+ {df_columns}
436
+
437
+ ### {df_key} Summary Statistics:
438
+ {summary_stats}
439
+ """
440
+ # Configure Python tool:
441
+ python_tool = self._configure_python_tool(
442
+ df_locals=self.df_locals,
443
+ **kwargs
444
+ )
445
+ # Add the Python tool to the tools list
446
+ self.tools.append(python_tool)
447
+ # List of Tools:
448
+ list_of_tools = ""
449
+ for tool in self.tools:
450
+ name = tool.name
451
+ description = tool.description # noqa pylint: disable=E1101
452
+ list_of_tools += f'- {name}: {description}\n'
453
+ list_of_tools += "\n"
454
+ tools_names = [tool.name for tool in self.tools]
455
+ capabilities = ''
456
+ if self._capabilities:
457
+ capabilities = "**Your Capabilities:**\n"
458
+ capabilities += self.sanitize_prompt_text(self._capabilities) + "\n"
459
+ # Create the prompt
460
+ sanitized_backstory = ''
461
+ if self.backstory:
462
+ sanitized_backstory = self.sanitize_prompt_text(self.backstory)
463
+ tmpl = Template(self._prompt_template)
464
+ self._prompt_prefix = tmpl.safe_substitute(
465
+ name=self.name,
466
+ description=self.description,
467
+ list_of_tools=list_of_tools,
468
+ backstory=sanitized_backstory,
469
+ capabilities=capabilities,
470
+ today_date=now,
471
+ system_prompt_base=prompt,
472
+ tools=", ".join(tools_names),
473
+ format_instructions=self._format_instructions.format(
474
+ tool_names=", ".join(tools_names)),
475
+ df_info=df_info,
476
+ num_dfs=num_dfs,
477
+ rationale=self.rationale,
478
+ agent_report_dir=self.agent_report_dir,
479
+ **kwargs
480
+ )
481
+ if self.agent_type == "tool-calling":
482
+ tmpl = Template(TOOL_CALLING_PROMPT_SUFFIX)
483
+ self._prompt_suffix = tmpl.safe_substitute(
484
+ df_info=df_info,
485
+ num_dfs=num_dfs
486
+ )
487
+ else:
488
+ self._prompt_suffix = AGENT_PROMPT_SUFFIX
489
+
490
+ def default_backstory(self) -> str:
491
+ return "You are a helpful assistant built to provide comprehensive guidance and support on data calculations and data analysis working with pandas dataframes."
492
+
493
+ @staticmethod
494
+ async def call_qs(queries: list) -> Dict[str, pd.DataFrame]:
495
+ """
496
+ call_qs.
497
+ description: Call the QuerySource queries.
498
+
499
+ This method is used to execute multiple queries and files on the QueryObject.
500
+ It returns a dictionary with the results.
501
+ """
502
+ dfs = {}
503
+ for query in queries:
504
+ if not isinstance(query, str):
505
+ raise ValueError(
506
+ f"Query {query} is not a string."
507
+ )
508
+ # now, the only query accepted is a slug:
509
+ try:
510
+ qy = QS(
511
+ slug=query
512
+ )
513
+ df, error = await qy.query(output_format='pandas')
514
+ if error:
515
+ raise ValueError(
516
+ f"Query {query} fail with error {error}."
517
+ )
518
+ if not isinstance(df, pd.DataFrame):
519
+ raise ValueError(
520
+ f"Query {query} is not returning a dataframe."
521
+ )
522
+ dfs[query] = df
523
+ except ValueError:
524
+ raise
525
+ except Exception as e:
526
+ raise ValueError(
527
+ f"Error executing Query {query}: {e}"
528
+ )
529
+ return dfs
530
+
531
+ @staticmethod
532
+ async def call_multiquery(query: dict) -> Dict[str, pd.DataFrame]:
533
+ """
534
+ call_multiquery.
535
+ description: Call the MultiQuery queries.
536
+
537
+ This method is used to execute multiple queries and files on the QueryObject.
538
+ It returns a dictionary with the results.
539
+ """
540
+ data = {}
541
+ _queries = query.pop('queries', {})
542
+ _files = query.pop('files', {})
543
+ if not _queries and not _files:
544
+ raise ValueError(
545
+ "Queries or files are required."
546
+ )
547
+ try:
548
+ ## Step 1: Running all Queries and Files on QueryObject
549
+ qs = MultiQS(
550
+ slug=[],
551
+ queries=_queries,
552
+ files=_files,
553
+ query=query,
554
+ conditions=data,
555
+ return_all=True
556
+ )
557
+ result, _ = await qs.execute()
558
+ except Exception as e:
559
+ raise ValueError(
560
+ f"Error executing MultiQuery: {e}"
561
+ )
562
+ if not isinstance(result, dict):
563
+ raise ValueError(
564
+ "MultiQuery is not returning a dictionary."
565
+ )
566
+ # MultiQuery returns a dictionary with the results
567
+ return result
568
+
569
+ @classmethod
570
+ async def gen_data(
571
+ cls,
572
+ query: Union[list, dict],
573
+ agent_name: Optional[str] = None,
574
+ refresh: bool = False,
575
+ cache_expiration: int = 48,
576
+ no_cache: bool = False
577
+ ) -> Dict[str, pd.DataFrame]:
578
+ """
579
+ gen_data.
580
+
581
+ Generate the dataframes required for the agent to work, with Redis caching support.
582
+
583
+ Parameters:
584
+ -----------
585
+ query : Union[list, dict]
586
+ The query or queries to execute to generate dataframes.
587
+ refresh : bool
588
+ If True, forces regeneration of dataframes even if cached versions exist.
589
+ cache_expiration_hours : int
590
+ Number of hours to keep the cached dataframes (default: 48).
591
+ no_cache : bool
592
+ If True, disables caching even if no agent_name is provided.
593
+
594
+ Returns:
595
+ --------
596
+ Dict[str, pd.DataFrame]
597
+ A Dictionary of named pandas DataFrames generated from the queries.
598
+ """
599
+ # If agent_name is provided, we'll use Redis caching
600
+ if not agent_name:
601
+ agent_name = cls.chatbot_id
602
+
603
+ if not refresh:
604
+ # Try to get cached dataframes
605
+ cached_dfs = await cls._get_cached_data(agent_name)
606
+ if cached_dfs:
607
+ return cached_dfs
608
+
609
+ # Generate dataframes from query if no cache exists or refresh is True
610
+ dfs = await cls._execute_query(query)
611
+
612
+ # If agent_name is provided, cache the generated dataframes
613
+ if no_cache is False:
614
+ await cls._cache_data(agent_name, dfs, cache_expiration)
615
+ return dfs
616
+
617
+ @classmethod
618
+ async def _execute_query(cls, query: Union[list, dict]) -> Dict[str, pd.DataFrame]:
619
+ """Execute the query and return the generated dataframes."""
620
+ if isinstance(query, dict):
621
+ # is a MultiQuery execution, use the MultiQS class engine to do it:
622
+ try:
623
+ return await cls.call_multiquery(query)
624
+ except ValueError as e:
625
+ raise ValueError(f"Error creating Query For Agent: {e}")
626
+ elif isinstance(query, (str, list)):
627
+ if isinstance(query, str):
628
+ query = [query]
629
+ try:
630
+ return await cls.call_qs(query)
631
+ except ValueError as e:
632
+ raise ValueError(f"Error creating Query For Agent: {e}")
633
+ else:
634
+ raise ValueError(
635
+ f"Expected a list of queries or a dictionary, got {type(query)}"
636
+ )
637
+
638
+ @classmethod
639
+ async def _get_redis_connection(cls):
640
+ """Get a connection to Redis."""
641
+ # You should adjust these parameters according to your Redis configuration
642
+ # Consider using environment variables for these settings
643
+ return await aioredis.Redis.from_url(
644
+ REDIS_HISTORY_URL,
645
+ decode_responses=True
646
+ )
647
+
648
+ @classmethod
649
+ async def _get_cached_data(cls, agent_name: str) -> Optional[Dict[str, pd.DataFrame]]:
650
+ """
651
+ Retrieve cached data from Redis if they exist.
652
+
653
+ Returns None if no cache exists or on error.
654
+ """
655
+ try:
656
+ redis_conn = await cls._get_redis_connection()
657
+ # Check if the agent key exists
658
+ key = f"agent_{agent_name}"
659
+ if not await redis_conn.exists(key):
660
+ await redis_conn.close()
661
+ return None
662
+
663
+ # Get all dataframe keys stored for this agent
664
+ df_keys = await redis_conn.hkeys(key)
665
+ if not df_keys:
666
+ await redis_conn.close()
667
+ return None
668
+
669
+ # Retrieve and convert each dataframe
670
+ dataframes = {}
671
+ for df_key in df_keys:
672
+ df_json = await redis_conn.hget(key, df_key)
673
+ if df_json:
674
+ # Convert from JSON to dataframe
675
+ df_data = json_decoder(df_json)
676
+ df = pd.DataFrame.from_records(df_data)
677
+ dataframes[df_key] = df
678
+
679
+ await redis_conn.close()
680
+ return dataframes if dataframes else None
681
+
682
+ except Exception as e:
683
+ # Log the error but continue execution without cache
684
+ print(f"Error retrieving cache: {e}")
685
+ return None
686
+
687
+ @classmethod
688
+ async def _cache_data(
689
+ cls,
690
+ agent_name: str,
691
+ dataframes: Dict[str, pd.DataFrame],
692
+ cache_expiration: int
693
+ ) -> None:
694
+ """
695
+ Cache the given dataframes in Redis.
696
+
697
+ The dataframes are stored as JSON records under a hash key named after the agent.
698
+ """
699
+ try:
700
+ if not dataframes:
701
+ return
702
+
703
+ redis_conn = await cls._get_redis_connection()
704
+ key = f"agent_{agent_name}"
705
+
706
+ # Delete any existing cache for this agent
707
+ await redis_conn.delete(key)
708
+ hkeys = await redis_conn.hkeys(key)
709
+ if hkeys:
710
+ await redis_conn.hdel(key, *hkeys)
711
+
712
+ # Store each dataframe under the agent's hash
713
+ for df_key, df in dataframes.items():
714
+ # Convert DataFrame to JSON
715
+ df_json = json_encoder(df.to_dict(orient='records'))
716
+ await redis_conn.hset(key, df_key, df_json)
717
+
718
+ # Set expiration time
719
+ expiration = timedelta(hours=cache_expiration)
720
+ await redis_conn.expire(key, int(expiration.total_seconds()))
721
+
722
+ logging.info(
723
+ f"Data was cached for agent {agent_name} with expiration of {cache_expiration} hours"
724
+ )
725
+
726
+ await redis_conn.close()
727
+
728
+ except Exception as e:
729
+ # Log the error but continue execution
730
+ print(f"Error caching dataframes: {e}")