ai-parrot 0.8.3__cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- ai_parrot-0.8.3.dist-info/LICENSE +21 -0
- ai_parrot-0.8.3.dist-info/METADATA +306 -0
- ai_parrot-0.8.3.dist-info/RECORD +128 -0
- ai_parrot-0.8.3.dist-info/WHEEL +6 -0
- ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
- parrot/__init__.py +30 -0
- parrot/bots/__init__.py +5 -0
- parrot/bots/abstract.py +1115 -0
- parrot/bots/agent.py +492 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/bose.py +17 -0
- parrot/bots/chatbot.py +271 -0
- parrot/bots/cody.py +17 -0
- parrot/bots/copilot.py +117 -0
- parrot/bots/data.py +730 -0
- parrot/bots/dataframe.py +103 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/interfaces/__init__.py +1 -0
- parrot/bots/interfaces/retrievers.py +12 -0
- parrot/bots/notebook.py +619 -0
- parrot/bots/odoo.py +17 -0
- parrot/bots/prompts/__init__.py +41 -0
- parrot/bots/prompts/agents.py +91 -0
- parrot/bots/prompts/data.py +214 -0
- parrot/bots/retrievals/__init__.py +1 -0
- parrot/bots/retrievals/constitutional.py +19 -0
- parrot/bots/retrievals/multi.py +122 -0
- parrot/bots/retrievals/retrieval.py +610 -0
- parrot/bots/tools/__init__.py +7 -0
- parrot/bots/tools/eda.py +325 -0
- parrot/bots/tools/pdf.py +50 -0
- parrot/bots/tools/plot.py +48 -0
- parrot/bots/troc.py +16 -0
- parrot/conf.py +170 -0
- parrot/crew/__init__.py +3 -0
- parrot/crew/tools/__init__.py +22 -0
- parrot/crew/tools/bing.py +13 -0
- parrot/crew/tools/config.py +43 -0
- parrot/crew/tools/duckgo.py +62 -0
- parrot/crew/tools/file.py +24 -0
- parrot/crew/tools/google.py +168 -0
- parrot/crew/tools/gtrends.py +16 -0
- parrot/crew/tools/md2pdf.py +25 -0
- parrot/crew/tools/rag.py +42 -0
- parrot/crew/tools/search.py +32 -0
- parrot/crew/tools/url.py +21 -0
- parrot/exceptions.cpython-311-x86_64-linux-gnu.so +0 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agents.py +292 -0
- parrot/handlers/bots.py +196 -0
- parrot/handlers/chat.py +192 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/http.py +805 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +18 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/exif.py +709 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/llms/__init__.py +1 -0
- parrot/llms/abstract.py +69 -0
- parrot/llms/anthropic.py +58 -0
- parrot/llms/gemma.py +15 -0
- parrot/llms/google.py +44 -0
- parrot/llms/groq.py +67 -0
- parrot/llms/hf.py +45 -0
- parrot/llms/openai.py +61 -0
- parrot/llms/pipes.py +114 -0
- parrot/llms/vertex.py +89 -0
- parrot/loaders/__init__.py +9 -0
- parrot/loaders/abstract.py +628 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/txt.py +26 -0
- parrot/manager.py +333 -0
- parrot/models.py +504 -0
- parrot/py.typed +0 -0
- parrot/stores/__init__.py +11 -0
- parrot/stores/abstract.py +248 -0
- parrot/stores/chroma.py +188 -0
- parrot/stores/duck.py +162 -0
- parrot/stores/embeddings/__init__.py +10 -0
- parrot/stores/embeddings/abstract.py +46 -0
- parrot/stores/embeddings/base.py +52 -0
- parrot/stores/embeddings/bge.py +20 -0
- parrot/stores/embeddings/fastembed.py +17 -0
- parrot/stores/embeddings/google.py +18 -0
- parrot/stores/embeddings/huggingface.py +20 -0
- parrot/stores/embeddings/ollama.py +14 -0
- parrot/stores/embeddings/openai.py +26 -0
- parrot/stores/embeddings/transformers.py +21 -0
- parrot/stores/embeddings/vertexai.py +17 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss.py +160 -0
- parrot/stores/milvus.py +397 -0
- parrot/stores/postgres.py +653 -0
- parrot/stores/qdrant.py +170 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +68 -0
- parrot/tools/asknews.py +33 -0
- parrot/tools/basic.py +51 -0
- parrot/tools/bby.py +359 -0
- parrot/tools/bing.py +13 -0
- parrot/tools/docx.py +343 -0
- parrot/tools/duck.py +62 -0
- parrot/tools/execute.py +56 -0
- parrot/tools/gamma.py +28 -0
- parrot/tools/google.py +170 -0
- parrot/tools/gvoice.py +301 -0
- parrot/tools/results.py +278 -0
- parrot/tools/stack.py +27 -0
- parrot/tools/weather.py +70 -0
- parrot/tools/wikipedia.py +58 -0
- parrot/tools/zipcode.py +198 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.cpython-311-x86_64-linux-gnu.so +0 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpython-311-x86_64-linux-gnu.so +0 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- resources/users/__init__.py +5 -0
- resources/users/handlers.py +13 -0
- resources/users/models.py +205 -0
parrot/bots/data.py
ADDED
|
@@ -0,0 +1,730 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Any, List, Dict, Union, Optional
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime, timezone, timedelta
|
|
5
|
+
from string import Template
|
|
6
|
+
import redis.asyncio as aioredis
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from aiohttp import web
|
|
9
|
+
from datamodel.typedefs import SafeDict
|
|
10
|
+
from datamodel.parsers.json import json_encoder, json_decoder # pylint: disable=E0611 # noqa
|
|
11
|
+
from langchain_core.exceptions import OutputParserException
|
|
12
|
+
from langchain_core.messages import HumanMessage
|
|
13
|
+
from langchain_experimental.tools.python.tool import PythonAstREPLTool
|
|
14
|
+
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
|
|
15
|
+
from navconfig import BASE_DIR
|
|
16
|
+
from navconfig.logging import logging
|
|
17
|
+
from querysource.queries.qs import QS
|
|
18
|
+
from querysource.queries.multi import MultiQS
|
|
19
|
+
from ..tools import AbstractTool
|
|
20
|
+
from ..tools.docx import DocxGeneratorTool
|
|
21
|
+
from .agent import BasicAgent
|
|
22
|
+
from ..models import AgentResponse
|
|
23
|
+
from ..conf import BASE_STATIC_URL, REDIS_HISTORY_URL
|
|
24
|
+
from .prompts import AGENT_PROMPT_SUFFIX, FORMAT_INSTRUCTIONS
|
|
25
|
+
from .prompts.data import (
|
|
26
|
+
TOOL_CALLING_PROMPT_PREFIX,
|
|
27
|
+
TOOL_CALLING_PROMPT_SUFFIX,
|
|
28
|
+
REACT_PROMPT_PREFIX
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
## Enable Debug:
|
|
32
|
+
from langchain.globals import set_debug, set_verbose
|
|
33
|
+
|
|
34
|
+
# Enable verbosity for debugging
|
|
35
|
+
# set_debug(True)
|
|
36
|
+
# set_verbose(True)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def brace_escape(text: str) -> str:
|
|
40
|
+
return text.replace('{', '{{').replace('}', '}}')
|
|
41
|
+
|
|
42
|
+
class PandasAgent(BasicAgent):
|
|
43
|
+
"""
|
|
44
|
+
A simple agent that uses the pandas library to perform data analysis tasks.
|
|
45
|
+
TODO
|
|
46
|
+
- add notify tool (email, telegram, teams)
|
|
47
|
+
- specific teams tool to send private messages from agents
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
name: str = 'Agent',
|
|
53
|
+
agent_type: str = None,
|
|
54
|
+
llm: Optional[str] = None,
|
|
55
|
+
tools: List[AbstractTool] = None,
|
|
56
|
+
system_prompt: str = None,
|
|
57
|
+
human_prompt: str = None,
|
|
58
|
+
prompt_template: str = None,
|
|
59
|
+
df: Union[list[pd.DataFrame], Dict[str, pd.DataFrame]] = None,
|
|
60
|
+
query: Union[List[str], dict] = None,
|
|
61
|
+
**kwargs
|
|
62
|
+
):
|
|
63
|
+
self.chatbot_id: str = None
|
|
64
|
+
self._queries = query
|
|
65
|
+
self.df = self._define_dataframe(df)
|
|
66
|
+
self.df_locals: dict = {}
|
|
67
|
+
# Agent ID:
|
|
68
|
+
self._prompt_prefix = None
|
|
69
|
+
# Must be one of 'tool-calling', 'openai-tools', 'openai-functions', or 'zero-shot-react-description'.
|
|
70
|
+
self.agent_type = agent_type or "tool-calling"
|
|
71
|
+
if self.agent_type == "tool-calling":
|
|
72
|
+
self._prompt_template = prompt_template or TOOL_CALLING_PROMPT_PREFIX
|
|
73
|
+
self._format_instructions = ''
|
|
74
|
+
else:
|
|
75
|
+
self._prompt_template = prompt_template or REACT_PROMPT_PREFIX
|
|
76
|
+
self._format_instructions: str = kwargs.get('format_instructions', FORMAT_INSTRUCTIONS)
|
|
77
|
+
self._capabilities: str = kwargs.get('capabilities', None)
|
|
78
|
+
self.name = name or "Pandas Agent"
|
|
79
|
+
self.description = "A simple agent that uses the pandas library to perform data analysis tasks."
|
|
80
|
+
self._static_path = BASE_DIR.joinpath('static')
|
|
81
|
+
self.agent_report_dir = self._static_path.joinpath('reports', 'agents')
|
|
82
|
+
super().__init__(
|
|
83
|
+
name=name,
|
|
84
|
+
llm=llm,
|
|
85
|
+
system_prompt=system_prompt,
|
|
86
|
+
human_prompt=human_prompt,
|
|
87
|
+
tools=tools,
|
|
88
|
+
agent_type=self.agent_type,
|
|
89
|
+
**kwargs
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def _define_dataframe(
|
|
93
|
+
self,
|
|
94
|
+
df: Union[list[pd.DataFrame], Dict[str, pd.DataFrame]]
|
|
95
|
+
) -> Dict[str, pd.DataFrame]:
|
|
96
|
+
"""Define the dataframe."""
|
|
97
|
+
_df = {}
|
|
98
|
+
if isinstance(df, pd.DataFrame):
|
|
99
|
+
# if data is one single dataframe
|
|
100
|
+
_df['df1'] = df
|
|
101
|
+
elif isinstance(df, list):
|
|
102
|
+
# if data is a list of dataframes
|
|
103
|
+
for i, dataframe in enumerate(df):
|
|
104
|
+
df_name = f"df{i + 1}"
|
|
105
|
+
_df[df_name] = dataframe.copy()
|
|
106
|
+
elif isinstance(df, pd.Series):
|
|
107
|
+
# if data is a pandas series
|
|
108
|
+
# convert it to a dataframe
|
|
109
|
+
df = pd.DataFrame(df)
|
|
110
|
+
_df['df1'] = df
|
|
111
|
+
elif isinstance(df, dict):
|
|
112
|
+
_df = df
|
|
113
|
+
else:
|
|
114
|
+
raise ValueError(
|
|
115
|
+
f"Expected pandas DataFrame, got {type(df)}"
|
|
116
|
+
)
|
|
117
|
+
return _df
|
|
118
|
+
|
|
119
|
+
def get_query(self) -> Union[List[str], dict]:
|
|
120
|
+
"""Get the query."""
|
|
121
|
+
return self._queries
|
|
122
|
+
|
|
123
|
+
def get_capabilities(self) -> str:
|
|
124
|
+
"""Get the capabilities of the agent."""
|
|
125
|
+
return self._capabilities
|
|
126
|
+
|
|
127
|
+
def pandas_agent(self, **kwargs):
|
|
128
|
+
"""
|
|
129
|
+
Creates a Pandas Agent.
|
|
130
|
+
|
|
131
|
+
This agent uses reasoning and tool execution iteratively to generate responses.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
RunnableMultiActionAgent: A Pandas-based agent.
|
|
135
|
+
|
|
136
|
+
✅ Use Case: Best for decision-making and reasoning tasks where the agent must break problems down into multiple steps.
|
|
137
|
+
|
|
138
|
+
"""
|
|
139
|
+
# Create the pandas agent
|
|
140
|
+
dfs = list(self.df.values()) if isinstance(self.df, dict) else self.df
|
|
141
|
+
# bind the tools to the LLM:
|
|
142
|
+
llm = self._llm.bind_tools(self.tools)
|
|
143
|
+
return create_pandas_dataframe_agent(
|
|
144
|
+
llm,
|
|
145
|
+
dfs,
|
|
146
|
+
verbose=True,
|
|
147
|
+
agent_type=self.agent_type,
|
|
148
|
+
allow_dangerous_code=True,
|
|
149
|
+
# prefix=self._prompt_prefix,
|
|
150
|
+
max_iterations=10,
|
|
151
|
+
extra_tools=self.tools,
|
|
152
|
+
agent_executor_kwargs={"memory": self.memory, "handle_parsing_errors": True},
|
|
153
|
+
return_intermediate_steps=True,
|
|
154
|
+
**kwargs
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
async def configure(self, df: pd.DataFrame = None, app=None) -> None:
|
|
158
|
+
"""Basic Configuration of Pandas Agent.
|
|
159
|
+
"""
|
|
160
|
+
if app:
|
|
161
|
+
if isinstance(app, web.Application):
|
|
162
|
+
self.app = app # register the app into the Extension
|
|
163
|
+
else:
|
|
164
|
+
self.app = app.get_app() # Nav Application
|
|
165
|
+
# adding this configured chatbot to app:
|
|
166
|
+
if self.app:
|
|
167
|
+
self.app[f"{self.name.lower()}_bot"] = self
|
|
168
|
+
if df is not None:
|
|
169
|
+
self.df = self._define_dataframe(df)
|
|
170
|
+
# Configure LLM:
|
|
171
|
+
self.configure_llm(use_chat=True)
|
|
172
|
+
# Configure VectorStore if enabled:
|
|
173
|
+
if self._use_vector:
|
|
174
|
+
self.configure_store()
|
|
175
|
+
# Conversation History:
|
|
176
|
+
self.memory = self.get_memory(input_key="input", output_key="output")
|
|
177
|
+
# 1. Initialize the Agent (as the base for RunnableMultiActionAgent)
|
|
178
|
+
self.agent = self.pandas_agent()
|
|
179
|
+
# 2. Create Agent Executor - This is where we typically run the agent.
|
|
180
|
+
self._agent = self.agent
|
|
181
|
+
# 3. When agent is correctly created, caching the data:
|
|
182
|
+
await self._cache_data(
|
|
183
|
+
self.chatbot_id,
|
|
184
|
+
self.df,
|
|
185
|
+
cache_expiration=24
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def mimefromext(self, ext: str) -> str:
|
|
189
|
+
"""Get the mime type from the file extension."""
|
|
190
|
+
mime_types = {
|
|
191
|
+
'.csv': 'text/csv',
|
|
192
|
+
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
193
|
+
'.xls': 'application/vnd.ms-excel',
|
|
194
|
+
'.json': 'application/json',
|
|
195
|
+
'.txt': 'text/plain',
|
|
196
|
+
'.html': 'text/html',
|
|
197
|
+
'.htm': 'text/html',
|
|
198
|
+
'.pdf': 'application/pdf',
|
|
199
|
+
'.png': 'image/png',
|
|
200
|
+
'.jpg': 'image/jpeg',
|
|
201
|
+
'.jpeg': 'image/jpeg',
|
|
202
|
+
'.gif': 'image/gif',
|
|
203
|
+
'.svg': 'image/svg+xml',
|
|
204
|
+
'.md': 'text/markdown',
|
|
205
|
+
'.ogg': 'audio/ogg',
|
|
206
|
+
'.wav': 'audio/wav',
|
|
207
|
+
'.mp3': 'audio/mpeg',
|
|
208
|
+
'.mp4': 'video/mp4',
|
|
209
|
+
}
|
|
210
|
+
return mime_types.get(ext, None)
|
|
211
|
+
|
|
212
|
+
def extract_filenames(self, response: AgentResponse) -> List[Path]:
|
|
213
|
+
"""Extract filenames from the content."""
|
|
214
|
+
# Split the content by lines
|
|
215
|
+
output_lines = response.output.splitlines()
|
|
216
|
+
current_filename = ""
|
|
217
|
+
filenames = {}
|
|
218
|
+
for line in output_lines:
|
|
219
|
+
if 'filename:' in line:
|
|
220
|
+
current_filename = line.split('filename:')[1].strip()
|
|
221
|
+
if current_filename:
|
|
222
|
+
try:
|
|
223
|
+
filename_path = Path(current_filename).resolve()
|
|
224
|
+
if filename_path.is_file():
|
|
225
|
+
content_type = self.mimefromext(filename_path.suffix)
|
|
226
|
+
url = str(filename_path).replace(str(self._static_path), BASE_STATIC_URL)
|
|
227
|
+
filenames[filename_path.name] = {
|
|
228
|
+
'content_type': content_type,
|
|
229
|
+
'file_path': filename_path,
|
|
230
|
+
'filename': filename_path.name,
|
|
231
|
+
'url': url
|
|
232
|
+
}
|
|
233
|
+
continue
|
|
234
|
+
except AttributeError:
|
|
235
|
+
pass
|
|
236
|
+
if filenames:
|
|
237
|
+
response.filename = filenames
|
|
238
|
+
|
|
239
|
+
async def invoke(
|
|
240
|
+
self,
|
|
241
|
+
query: str, llm: Optional[Any] = None,
|
|
242
|
+
llm_params: Optional[Dict[str, Any]] = None
|
|
243
|
+
):
|
|
244
|
+
"""Invoke the agent with optional LLM override.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
query (str): The query to ask the chatbot.
|
|
248
|
+
llm (Optional[Any]): Optional LLM to use for this specific invocation.
|
|
249
|
+
llm_params (Optional[Dict[str, Any]]): Optional parameters to modify LLM behavior
|
|
250
|
+
(temperature, max_tokens, etc.)
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
str: The response from the chatbot.
|
|
254
|
+
|
|
255
|
+
"""
|
|
256
|
+
original_agent = None
|
|
257
|
+
original_llm = None
|
|
258
|
+
try:
|
|
259
|
+
# If a different LLM or parameters are provided, create a temporary agent
|
|
260
|
+
if llm is not None:
|
|
261
|
+
# Get the current LLM if we're just updating parameters
|
|
262
|
+
current_llm = llm if llm is not None else self._llm
|
|
263
|
+
# Store original LLM for reference
|
|
264
|
+
original_llm = self._llm
|
|
265
|
+
# Store original agent for reference
|
|
266
|
+
original_agent = self._agent
|
|
267
|
+
# Temporarily update the instance LLM
|
|
268
|
+
self._llm = current_llm
|
|
269
|
+
# Create a new agent with the updated LLM
|
|
270
|
+
self._agent = self.pandas_agent()
|
|
271
|
+
# Invoke the agent with the query
|
|
272
|
+
result = await self._agent.ainvoke(
|
|
273
|
+
{"input": query}
|
|
274
|
+
)
|
|
275
|
+
except Exception as e:
|
|
276
|
+
return None, e
|
|
277
|
+
try:
|
|
278
|
+
# Parse tool outputs if present
|
|
279
|
+
try:
|
|
280
|
+
if isinstance(result, dict):
|
|
281
|
+
output = result.get('output', '')
|
|
282
|
+
if '```tool_outputs' in output:
|
|
283
|
+
tool_json_match = re.search(r'```tool_outputs\n(.*?)\n```', output, re.DOTALL)
|
|
284
|
+
if tool_json_match:
|
|
285
|
+
tool_json = tool_json_match.group(1)
|
|
286
|
+
# Parse the JSON
|
|
287
|
+
tool_data = json_decoder(tool_json)
|
|
288
|
+
# Get the actual content
|
|
289
|
+
if "python_repl_ast_response" in tool_data:
|
|
290
|
+
python_response = tool_data["python_repl_ast_response"]
|
|
291
|
+
if isinstance(python_response, dict) and "content" in python_response:
|
|
292
|
+
# Replace the output with just the content
|
|
293
|
+
result['output'] = python_response["content"].strip()
|
|
294
|
+
except Exception as parse_error:
|
|
295
|
+
self.logger.error(
|
|
296
|
+
f"Error parsing tool output: {parse_error}"
|
|
297
|
+
)
|
|
298
|
+
response = AgentResponse(question=query, **result)
|
|
299
|
+
# check if return is a file:
|
|
300
|
+
try:
|
|
301
|
+
self.extract_filenames(response)
|
|
302
|
+
except Exception as exc:
|
|
303
|
+
self.logger.error(
|
|
304
|
+
f"Unable to extract filenames: {exc}"
|
|
305
|
+
)
|
|
306
|
+
# Restore the original agent if any:
|
|
307
|
+
if original_llm is not None:
|
|
308
|
+
self._llm = original_llm
|
|
309
|
+
self._agent = original_agent
|
|
310
|
+
try:
|
|
311
|
+
return self.as_markdown(
|
|
312
|
+
response
|
|
313
|
+
), response
|
|
314
|
+
except Exception as exc:
|
|
315
|
+
self.logger.exception(
|
|
316
|
+
f"Error on response: {exc}"
|
|
317
|
+
)
|
|
318
|
+
return result.get('output', None), None
|
|
319
|
+
except Exception as e:
|
|
320
|
+
return result, e
|
|
321
|
+
|
|
322
|
+
def _configure_python_tool(self, df_locals: dict, **kwargs) -> PythonAstREPLTool:
|
|
323
|
+
"""Configure the Python tool."""
|
|
324
|
+
# Create the Python tool with the given locals and globals
|
|
325
|
+
df_locals['execution_results'] = {}
|
|
326
|
+
# Create the Python REPL tool
|
|
327
|
+
PythonAstREPLTool_init = PythonAstREPLTool.__init__
|
|
328
|
+
|
|
329
|
+
def PythonAstREPLTool_init_wrapper(self, *args, **kwargs):
|
|
330
|
+
PythonAstREPLTool_init(self, *args, **kwargs)
|
|
331
|
+
self.globals = self.locals
|
|
332
|
+
|
|
333
|
+
PythonAstREPLTool.__init__ = PythonAstREPLTool_init_wrapper
|
|
334
|
+
python_tool = PythonAstREPLTool(
|
|
335
|
+
locals=df_locals,
|
|
336
|
+
globals=kwargs.get('globals', {}),
|
|
337
|
+
verbose=True,
|
|
338
|
+
**kwargs
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Add essential library imports and helper functions
|
|
342
|
+
setup_code = """
|
|
343
|
+
# Ensure essential libraries are imported
|
|
344
|
+
import pandas as pd
|
|
345
|
+
import numpy as np
|
|
346
|
+
import matplotlib.pyplot as plt
|
|
347
|
+
import seaborn as sns
|
|
348
|
+
from collections import Counter, defaultdict
|
|
349
|
+
from parrot.bots.tools import quick_eda, generate_eda_report, list_available_dataframes, create_plot, generate_pdf_from_html
|
|
350
|
+
|
|
351
|
+
# Set plotting style
|
|
352
|
+
plt.style.use('seaborn-v0_8-whitegrid')
|
|
353
|
+
sns.set_palette('Set2')
|
|
354
|
+
|
|
355
|
+
# Verify pandas is loaded correctly
|
|
356
|
+
print(f"Pandas version: {pd.__version__}")
|
|
357
|
+
"""
|
|
358
|
+
try:
|
|
359
|
+
python_tool.run(setup_code)
|
|
360
|
+
except Exception as e:
|
|
361
|
+
self.logger.error(
|
|
362
|
+
f"Error setting up python tool: {e}"
|
|
363
|
+
)
|
|
364
|
+
print(':: PYTHON TOOL > ', python_tool)
|
|
365
|
+
return python_tool
|
|
366
|
+
|
|
367
|
+
def _metrics_guide(self, df_key: str, df_name: str, columns: list) -> str:
|
|
368
|
+
"""Generate a guide for the dataframe columns."""
|
|
369
|
+
# Create a markdown table with column category, column name, type and with dataframe is present:
|
|
370
|
+
table = "\n| Category | Column Name | Type | DataFrame | Dataframe Name |\n"
|
|
371
|
+
table += "|------------------|-------------|------|-----------|\n"
|
|
372
|
+
for column in columns:
|
|
373
|
+
# Get the column name
|
|
374
|
+
column_name = column
|
|
375
|
+
# split by "_" and first element is the category (if any):
|
|
376
|
+
# Get the column category
|
|
377
|
+
try:
|
|
378
|
+
column_category = column.split('_')[0]
|
|
379
|
+
except IndexError:
|
|
380
|
+
column_category = df_name
|
|
381
|
+
# Get the type of the column
|
|
382
|
+
column_type = str(self.df[df_name][column].dtype)
|
|
383
|
+
# Add the row to the table
|
|
384
|
+
table += f"| {column_category} | {column_name} | {column_type} | {df_key} | {df_name} |\n\n\n"
|
|
385
|
+
# Add a note about the dataframe
|
|
386
|
+
table += f"\nNote: {df_key} is also available as {df_name}\n"
|
|
387
|
+
return table
|
|
388
|
+
|
|
389
|
+
def define_prompt(self, prompt, **kwargs):
|
|
390
|
+
now = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
391
|
+
self.agent_report_dir = self._static_path.joinpath(str(self.chatbot_id))
|
|
392
|
+
if self.agent_report_dir.exists() is False:
|
|
393
|
+
self.agent_report_dir.mkdir(parents=True, exist_ok=True)
|
|
394
|
+
# Add dataframe information
|
|
395
|
+
num_dfs = len(self.df)
|
|
396
|
+
self.df_locals['agent_report_dir'] = self.agent_report_dir
|
|
397
|
+
df_info = ''
|
|
398
|
+
for i, (df_name, df) in enumerate(self.df.items()):
|
|
399
|
+
df_key = f"df{i + 1}"
|
|
400
|
+
self.df_locals[df_name] = df
|
|
401
|
+
self.df_locals[df_key] = df
|
|
402
|
+
row_count = len(df)
|
|
403
|
+
self.df_locals[f"{df_key}_row_count"] = row_count
|
|
404
|
+
# Get basic dataframe info
|
|
405
|
+
df_shape = f"DataFrame Shape: {df.shape[0]} rows × {df.shape[1]} columns"
|
|
406
|
+
df_columns = self._metrics_guide(df_key, df_name, df.columns.tolist())
|
|
407
|
+
# Generate summary statistics
|
|
408
|
+
summary_stats = brace_escape(df.describe(include='all').to_markdown())
|
|
409
|
+
df_head = brace_escape(df.head(4).to_markdown())
|
|
410
|
+
# Create df_info block
|
|
411
|
+
if self.agent_type == "tool-calling":
|
|
412
|
+
df_info += f"""
|
|
413
|
+
## Dataframe Name: {df_key}:
|
|
414
|
+
## DataFrame: {df_key} (also accessible as {df_name} in Python code)
|
|
415
|
+
|
|
416
|
+
**Shape {df_key}**: {df_shape}
|
|
417
|
+
|
|
418
|
+
**Column Details (Name, Type, Category)**:
|
|
419
|
+
{df_columns}
|
|
420
|
+
|
|
421
|
+
**First 4 Rows (`print({df_key}.head(4).to_markdown())`)**:
|
|
422
|
+
{df_head}
|
|
423
|
+
|
|
424
|
+
**Summary Statistics (`print({df_key}.describe(include='all').to_markdown())`)**:
|
|
425
|
+
{summary_stats}
|
|
426
|
+
"""
|
|
427
|
+
else:
|
|
428
|
+
df_info += f"""
|
|
429
|
+
## DataFrame {df_key}:
|
|
430
|
+
|
|
431
|
+
### {df_key} Shape:
|
|
432
|
+
{df_shape}
|
|
433
|
+
|
|
434
|
+
### {df_key} Info:
|
|
435
|
+
{df_columns}
|
|
436
|
+
|
|
437
|
+
### {df_key} Summary Statistics:
|
|
438
|
+
{summary_stats}
|
|
439
|
+
"""
|
|
440
|
+
# Configure Python tool:
|
|
441
|
+
python_tool = self._configure_python_tool(
|
|
442
|
+
df_locals=self.df_locals,
|
|
443
|
+
**kwargs
|
|
444
|
+
)
|
|
445
|
+
# Add the Python tool to the tools list
|
|
446
|
+
self.tools.append(python_tool)
|
|
447
|
+
# List of Tools:
|
|
448
|
+
list_of_tools = ""
|
|
449
|
+
for tool in self.tools:
|
|
450
|
+
name = tool.name
|
|
451
|
+
description = tool.description # noqa pylint: disable=E1101
|
|
452
|
+
list_of_tools += f'- {name}: {description}\n'
|
|
453
|
+
list_of_tools += "\n"
|
|
454
|
+
tools_names = [tool.name for tool in self.tools]
|
|
455
|
+
capabilities = ''
|
|
456
|
+
if self._capabilities:
|
|
457
|
+
capabilities = "**Your Capabilities:**\n"
|
|
458
|
+
capabilities += self.sanitize_prompt_text(self._capabilities) + "\n"
|
|
459
|
+
# Create the prompt
|
|
460
|
+
sanitized_backstory = ''
|
|
461
|
+
if self.backstory:
|
|
462
|
+
sanitized_backstory = self.sanitize_prompt_text(self.backstory)
|
|
463
|
+
tmpl = Template(self._prompt_template)
|
|
464
|
+
self._prompt_prefix = tmpl.safe_substitute(
|
|
465
|
+
name=self.name,
|
|
466
|
+
description=self.description,
|
|
467
|
+
list_of_tools=list_of_tools,
|
|
468
|
+
backstory=sanitized_backstory,
|
|
469
|
+
capabilities=capabilities,
|
|
470
|
+
today_date=now,
|
|
471
|
+
system_prompt_base=prompt,
|
|
472
|
+
tools=", ".join(tools_names),
|
|
473
|
+
format_instructions=self._format_instructions.format(
|
|
474
|
+
tool_names=", ".join(tools_names)),
|
|
475
|
+
df_info=df_info,
|
|
476
|
+
num_dfs=num_dfs,
|
|
477
|
+
rationale=self.rationale,
|
|
478
|
+
agent_report_dir=self.agent_report_dir,
|
|
479
|
+
**kwargs
|
|
480
|
+
)
|
|
481
|
+
if self.agent_type == "tool-calling":
|
|
482
|
+
tmpl = Template(TOOL_CALLING_PROMPT_SUFFIX)
|
|
483
|
+
self._prompt_suffix = tmpl.safe_substitute(
|
|
484
|
+
df_info=df_info,
|
|
485
|
+
num_dfs=num_dfs
|
|
486
|
+
)
|
|
487
|
+
else:
|
|
488
|
+
self._prompt_suffix = AGENT_PROMPT_SUFFIX
|
|
489
|
+
|
|
490
|
+
def default_backstory(self) -> str:
|
|
491
|
+
return "You are a helpful assistant built to provide comprehensive guidance and support on data calculations and data analysis working with pandas dataframes."
|
|
492
|
+
|
|
493
|
+
@staticmethod
|
|
494
|
+
async def call_qs(queries: list) -> Dict[str, pd.DataFrame]:
|
|
495
|
+
"""
|
|
496
|
+
call_qs.
|
|
497
|
+
description: Call the QuerySource queries.
|
|
498
|
+
|
|
499
|
+
This method is used to execute multiple queries and files on the QueryObject.
|
|
500
|
+
It returns a dictionary with the results.
|
|
501
|
+
"""
|
|
502
|
+
dfs = {}
|
|
503
|
+
for query in queries:
|
|
504
|
+
if not isinstance(query, str):
|
|
505
|
+
raise ValueError(
|
|
506
|
+
f"Query {query} is not a string."
|
|
507
|
+
)
|
|
508
|
+
# now, the only query accepted is a slug:
|
|
509
|
+
try:
|
|
510
|
+
qy = QS(
|
|
511
|
+
slug=query
|
|
512
|
+
)
|
|
513
|
+
df, error = await qy.query(output_format='pandas')
|
|
514
|
+
if error:
|
|
515
|
+
raise ValueError(
|
|
516
|
+
f"Query {query} fail with error {error}."
|
|
517
|
+
)
|
|
518
|
+
if not isinstance(df, pd.DataFrame):
|
|
519
|
+
raise ValueError(
|
|
520
|
+
f"Query {query} is not returning a dataframe."
|
|
521
|
+
)
|
|
522
|
+
dfs[query] = df
|
|
523
|
+
except ValueError:
|
|
524
|
+
raise
|
|
525
|
+
except Exception as e:
|
|
526
|
+
raise ValueError(
|
|
527
|
+
f"Error executing Query {query}: {e}"
|
|
528
|
+
)
|
|
529
|
+
return dfs
|
|
530
|
+
|
|
531
|
+
@staticmethod
|
|
532
|
+
async def call_multiquery(query: dict) -> Dict[str, pd.DataFrame]:
|
|
533
|
+
"""
|
|
534
|
+
call_multiquery.
|
|
535
|
+
description: Call the MultiQuery queries.
|
|
536
|
+
|
|
537
|
+
This method is used to execute multiple queries and files on the QueryObject.
|
|
538
|
+
It returns a dictionary with the results.
|
|
539
|
+
"""
|
|
540
|
+
data = {}
|
|
541
|
+
_queries = query.pop('queries', {})
|
|
542
|
+
_files = query.pop('files', {})
|
|
543
|
+
if not _queries and not _files:
|
|
544
|
+
raise ValueError(
|
|
545
|
+
"Queries or files are required."
|
|
546
|
+
)
|
|
547
|
+
try:
|
|
548
|
+
## Step 1: Running all Queries and Files on QueryObject
|
|
549
|
+
qs = MultiQS(
|
|
550
|
+
slug=[],
|
|
551
|
+
queries=_queries,
|
|
552
|
+
files=_files,
|
|
553
|
+
query=query,
|
|
554
|
+
conditions=data,
|
|
555
|
+
return_all=True
|
|
556
|
+
)
|
|
557
|
+
result, _ = await qs.execute()
|
|
558
|
+
except Exception as e:
|
|
559
|
+
raise ValueError(
|
|
560
|
+
f"Error executing MultiQuery: {e}"
|
|
561
|
+
)
|
|
562
|
+
if not isinstance(result, dict):
|
|
563
|
+
raise ValueError(
|
|
564
|
+
"MultiQuery is not returning a dictionary."
|
|
565
|
+
)
|
|
566
|
+
# MultiQuery returns a dictionary with the results
|
|
567
|
+
return result
|
|
568
|
+
|
|
569
|
+
@classmethod
|
|
570
|
+
async def gen_data(
|
|
571
|
+
cls,
|
|
572
|
+
query: Union[list, dict],
|
|
573
|
+
agent_name: Optional[str] = None,
|
|
574
|
+
refresh: bool = False,
|
|
575
|
+
cache_expiration: int = 48,
|
|
576
|
+
no_cache: bool = False
|
|
577
|
+
) -> Dict[str, pd.DataFrame]:
|
|
578
|
+
"""
|
|
579
|
+
gen_data.
|
|
580
|
+
|
|
581
|
+
Generate the dataframes required for the agent to work, with Redis caching support.
|
|
582
|
+
|
|
583
|
+
Parameters:
|
|
584
|
+
-----------
|
|
585
|
+
query : Union[list, dict]
|
|
586
|
+
The query or queries to execute to generate dataframes.
|
|
587
|
+
refresh : bool
|
|
588
|
+
If True, forces regeneration of dataframes even if cached versions exist.
|
|
589
|
+
cache_expiration_hours : int
|
|
590
|
+
Number of hours to keep the cached dataframes (default: 48).
|
|
591
|
+
no_cache : bool
|
|
592
|
+
If True, disables caching even if no agent_name is provided.
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
--------
|
|
596
|
+
Dict[str, pd.DataFrame]
|
|
597
|
+
A Dictionary of named pandas DataFrames generated from the queries.
|
|
598
|
+
"""
|
|
599
|
+
# If agent_name is provided, we'll use Redis caching
|
|
600
|
+
if not agent_name:
|
|
601
|
+
agent_name = cls.chatbot_id
|
|
602
|
+
|
|
603
|
+
if not refresh:
|
|
604
|
+
# Try to get cached dataframes
|
|
605
|
+
cached_dfs = await cls._get_cached_data(agent_name)
|
|
606
|
+
if cached_dfs:
|
|
607
|
+
return cached_dfs
|
|
608
|
+
|
|
609
|
+
# Generate dataframes from query if no cache exists or refresh is True
|
|
610
|
+
dfs = await cls._execute_query(query)
|
|
611
|
+
|
|
612
|
+
# If agent_name is provided, cache the generated dataframes
|
|
613
|
+
if no_cache is False:
|
|
614
|
+
await cls._cache_data(agent_name, dfs, cache_expiration)
|
|
615
|
+
return dfs
|
|
616
|
+
|
|
617
|
+
@classmethod
|
|
618
|
+
async def _execute_query(cls, query: Union[list, dict]) -> Dict[str, pd.DataFrame]:
|
|
619
|
+
"""Execute the query and return the generated dataframes."""
|
|
620
|
+
if isinstance(query, dict):
|
|
621
|
+
# is a MultiQuery execution, use the MultiQS class engine to do it:
|
|
622
|
+
try:
|
|
623
|
+
return await cls.call_multiquery(query)
|
|
624
|
+
except ValueError as e:
|
|
625
|
+
raise ValueError(f"Error creating Query For Agent: {e}")
|
|
626
|
+
elif isinstance(query, (str, list)):
|
|
627
|
+
if isinstance(query, str):
|
|
628
|
+
query = [query]
|
|
629
|
+
try:
|
|
630
|
+
return await cls.call_qs(query)
|
|
631
|
+
except ValueError as e:
|
|
632
|
+
raise ValueError(f"Error creating Query For Agent: {e}")
|
|
633
|
+
else:
|
|
634
|
+
raise ValueError(
|
|
635
|
+
f"Expected a list of queries or a dictionary, got {type(query)}"
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
@classmethod
|
|
639
|
+
async def _get_redis_connection(cls):
|
|
640
|
+
"""Get a connection to Redis."""
|
|
641
|
+
# You should adjust these parameters according to your Redis configuration
|
|
642
|
+
# Consider using environment variables for these settings
|
|
643
|
+
return await aioredis.Redis.from_url(
|
|
644
|
+
REDIS_HISTORY_URL,
|
|
645
|
+
decode_responses=True
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
@classmethod
|
|
649
|
+
async def _get_cached_data(cls, agent_name: str) -> Optional[Dict[str, pd.DataFrame]]:
|
|
650
|
+
"""
|
|
651
|
+
Retrieve cached data from Redis if they exist.
|
|
652
|
+
|
|
653
|
+
Returns None if no cache exists or on error.
|
|
654
|
+
"""
|
|
655
|
+
try:
|
|
656
|
+
redis_conn = await cls._get_redis_connection()
|
|
657
|
+
# Check if the agent key exists
|
|
658
|
+
key = f"agent_{agent_name}"
|
|
659
|
+
if not await redis_conn.exists(key):
|
|
660
|
+
await redis_conn.close()
|
|
661
|
+
return None
|
|
662
|
+
|
|
663
|
+
# Get all dataframe keys stored for this agent
|
|
664
|
+
df_keys = await redis_conn.hkeys(key)
|
|
665
|
+
if not df_keys:
|
|
666
|
+
await redis_conn.close()
|
|
667
|
+
return None
|
|
668
|
+
|
|
669
|
+
# Retrieve and convert each dataframe
|
|
670
|
+
dataframes = {}
|
|
671
|
+
for df_key in df_keys:
|
|
672
|
+
df_json = await redis_conn.hget(key, df_key)
|
|
673
|
+
if df_json:
|
|
674
|
+
# Convert from JSON to dataframe
|
|
675
|
+
df_data = json_decoder(df_json)
|
|
676
|
+
df = pd.DataFrame.from_records(df_data)
|
|
677
|
+
dataframes[df_key] = df
|
|
678
|
+
|
|
679
|
+
await redis_conn.close()
|
|
680
|
+
return dataframes if dataframes else None
|
|
681
|
+
|
|
682
|
+
except Exception as e:
|
|
683
|
+
# Log the error but continue execution without cache
|
|
684
|
+
print(f"Error retrieving cache: {e}")
|
|
685
|
+
return None
|
|
686
|
+
|
|
687
|
+
@classmethod
|
|
688
|
+
async def _cache_data(
|
|
689
|
+
cls,
|
|
690
|
+
agent_name: str,
|
|
691
|
+
dataframes: Dict[str, pd.DataFrame],
|
|
692
|
+
cache_expiration: int
|
|
693
|
+
) -> None:
|
|
694
|
+
"""
|
|
695
|
+
Cache the given dataframes in Redis.
|
|
696
|
+
|
|
697
|
+
The dataframes are stored as JSON records under a hash key named after the agent.
|
|
698
|
+
"""
|
|
699
|
+
try:
|
|
700
|
+
if not dataframes:
|
|
701
|
+
return
|
|
702
|
+
|
|
703
|
+
redis_conn = await cls._get_redis_connection()
|
|
704
|
+
key = f"agent_{agent_name}"
|
|
705
|
+
|
|
706
|
+
# Delete any existing cache for this agent
|
|
707
|
+
await redis_conn.delete(key)
|
|
708
|
+
hkeys = await redis_conn.hkeys(key)
|
|
709
|
+
if hkeys:
|
|
710
|
+
await redis_conn.hdel(key, *hkeys)
|
|
711
|
+
|
|
712
|
+
# Store each dataframe under the agent's hash
|
|
713
|
+
for df_key, df in dataframes.items():
|
|
714
|
+
# Convert DataFrame to JSON
|
|
715
|
+
df_json = json_encoder(df.to_dict(orient='records'))
|
|
716
|
+
await redis_conn.hset(key, df_key, df_json)
|
|
717
|
+
|
|
718
|
+
# Set expiration time
|
|
719
|
+
expiration = timedelta(hours=cache_expiration)
|
|
720
|
+
await redis_conn.expire(key, int(expiration.total_seconds()))
|
|
721
|
+
|
|
722
|
+
logging.info(
|
|
723
|
+
f"Data was cached for agent {agent_name} with expiration of {cache_expiration} hours"
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
await redis_conn.close()
|
|
727
|
+
|
|
728
|
+
except Exception as e:
|
|
729
|
+
# Log the error but continue execution
|
|
730
|
+
print(f"Error caching dataframes: {e}")
|