ai-parrot 0.8.3__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- ai_parrot-0.8.3.dist-info/LICENSE +21 -0
- ai_parrot-0.8.3.dist-info/METADATA +306 -0
- ai_parrot-0.8.3.dist-info/RECORD +128 -0
- ai_parrot-0.8.3.dist-info/WHEEL +6 -0
- ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
- parrot/__init__.py +30 -0
- parrot/bots/__init__.py +5 -0
- parrot/bots/abstract.py +1115 -0
- parrot/bots/agent.py +492 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/bose.py +17 -0
- parrot/bots/chatbot.py +271 -0
- parrot/bots/cody.py +17 -0
- parrot/bots/copilot.py +117 -0
- parrot/bots/data.py +730 -0
- parrot/bots/dataframe.py +103 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/interfaces/__init__.py +1 -0
- parrot/bots/interfaces/retrievers.py +12 -0
- parrot/bots/notebook.py +619 -0
- parrot/bots/odoo.py +17 -0
- parrot/bots/prompts/__init__.py +41 -0
- parrot/bots/prompts/agents.py +91 -0
- parrot/bots/prompts/data.py +214 -0
- parrot/bots/retrievals/__init__.py +1 -0
- parrot/bots/retrievals/constitutional.py +19 -0
- parrot/bots/retrievals/multi.py +122 -0
- parrot/bots/retrievals/retrieval.py +610 -0
- parrot/bots/tools/__init__.py +7 -0
- parrot/bots/tools/eda.py +325 -0
- parrot/bots/tools/pdf.py +50 -0
- parrot/bots/tools/plot.py +48 -0
- parrot/bots/troc.py +16 -0
- parrot/conf.py +170 -0
- parrot/crew/__init__.py +3 -0
- parrot/crew/tools/__init__.py +22 -0
- parrot/crew/tools/bing.py +13 -0
- parrot/crew/tools/config.py +43 -0
- parrot/crew/tools/duckgo.py +62 -0
- parrot/crew/tools/file.py +24 -0
- parrot/crew/tools/google.py +168 -0
- parrot/crew/tools/gtrends.py +16 -0
- parrot/crew/tools/md2pdf.py +25 -0
- parrot/crew/tools/rag.py +42 -0
- parrot/crew/tools/search.py +32 -0
- parrot/crew/tools/url.py +21 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agents.py +292 -0
- parrot/handlers/bots.py +196 -0
- parrot/handlers/chat.py +192 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/http.py +805 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +18 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/exif.py +709 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/llms/__init__.py +1 -0
- parrot/llms/abstract.py +69 -0
- parrot/llms/anthropic.py +58 -0
- parrot/llms/gemma.py +15 -0
- parrot/llms/google.py +44 -0
- parrot/llms/groq.py +67 -0
- parrot/llms/hf.py +45 -0
- parrot/llms/openai.py +61 -0
- parrot/llms/pipes.py +114 -0
- parrot/llms/vertex.py +89 -0
- parrot/loaders/__init__.py +9 -0
- parrot/loaders/abstract.py +628 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/txt.py +26 -0
- parrot/manager.py +333 -0
- parrot/models.py +504 -0
- parrot/py.typed +0 -0
- parrot/stores/__init__.py +11 -0
- parrot/stores/abstract.py +248 -0
- parrot/stores/chroma.py +188 -0
- parrot/stores/duck.py +162 -0
- parrot/stores/embeddings/__init__.py +10 -0
- parrot/stores/embeddings/abstract.py +46 -0
- parrot/stores/embeddings/base.py +52 -0
- parrot/stores/embeddings/bge.py +20 -0
- parrot/stores/embeddings/fastembed.py +17 -0
- parrot/stores/embeddings/google.py +18 -0
- parrot/stores/embeddings/huggingface.py +20 -0
- parrot/stores/embeddings/ollama.py +14 -0
- parrot/stores/embeddings/openai.py +26 -0
- parrot/stores/embeddings/transformers.py +21 -0
- parrot/stores/embeddings/vertexai.py +17 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss.py +160 -0
- parrot/stores/milvus.py +397 -0
- parrot/stores/postgres.py +653 -0
- parrot/stores/qdrant.py +170 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +68 -0
- parrot/tools/asknews.py +33 -0
- parrot/tools/basic.py +51 -0
- parrot/tools/bby.py +359 -0
- parrot/tools/bing.py +13 -0
- parrot/tools/docx.py +343 -0
- parrot/tools/duck.py +62 -0
- parrot/tools/execute.py +56 -0
- parrot/tools/gamma.py +28 -0
- parrot/tools/google.py +170 -0
- parrot/tools/gvoice.py +301 -0
- parrot/tools/results.py +278 -0
- parrot/tools/stack.py +27 -0
- parrot/tools/weather.py +70 -0
- parrot/tools/wikipedia.py +58 -0
- parrot/tools/zipcode.py +198 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- resources/users/__init__.py +5 -0
- resources/users/handlers.py +13 -0
- resources/users/models.py +205 -0
parrot/bots/notebook.py
ADDED
|
@@ -0,0 +1,619 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NotebookAgent - Specialized agent for handling Word documents, converting to Markdown,
|
|
3
|
+
and generating narrated summaries.
|
|
4
|
+
"""
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from typing import List, Optional, Dict, Any, Union
|
|
12
|
+
from langchain.agents import create_openai_tools_agent
|
|
13
|
+
from langchain.agents.agent import AgentExecutor
|
|
14
|
+
from langchain.tools import BaseTool
|
|
15
|
+
from langchain_experimental.tools.python.tool import PythonAstREPLTool
|
|
16
|
+
from langchain_core.messages import AIMessage
|
|
17
|
+
from langchain.prompts import SystemMessagePromptTemplate
|
|
18
|
+
|
|
19
|
+
from navconfig import BASE_DIR
|
|
20
|
+
from parrot.conf import BASE_STATIC_URL
|
|
21
|
+
from parrot.tools import WordToMarkdownTool, GoogleVoiceTool
|
|
22
|
+
from parrot.tools.abstract import AbstractTool
|
|
23
|
+
from parrot.utils import SafeDict
|
|
24
|
+
from parrot.models import AgentResponse
|
|
25
|
+
|
|
26
|
+
from .agent import BasicAgent
|
|
27
|
+
|
|
28
|
+
# Define format instructions directly instead of importing
|
|
29
|
+
FORMAT_INSTRUCTIONS = """
|
|
30
|
+
FORMAT INSTRUCTIONS:
|
|
31
|
+
When responding to user queries, follow these formatting guidelines:
|
|
32
|
+
1. Use markdown for structured responses
|
|
33
|
+
2. Use bullet points for lists
|
|
34
|
+
3. Use headers for sections (# for main headers, ## for subheaders)
|
|
35
|
+
4. Include code blocks with triple backticks when showing code
|
|
36
|
+
5. Format tables using markdown table syntax
|
|
37
|
+
6. For document analysis, highlight key findings and insights
|
|
38
|
+
7. When generating summaries, organize by main themes or sections
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
# Define system prompts for document processing
|
|
42
|
+
NOTEBOOK_PROMPT_PREFIX = """
|
|
43
|
+
You are a professional document assistant named {name}, specialized in analysis, summarization, and extraction of key information from text documents. You have access to the following tools:
|
|
44
|
+
|
|
45
|
+
**Answer the following questions as best you can. You have access to the following tools:**
|
|
46
|
+
|
|
47
|
+
- {tools}\n
|
|
48
|
+
|
|
49
|
+
Use these tools effectively to provide accurate and comprehensive document analysis:
|
|
50
|
+
{list_of_tools}
|
|
51
|
+
|
|
52
|
+
Current date: {today_date}
|
|
53
|
+
|
|
54
|
+
{system_prompt_base}
|
|
55
|
+
|
|
56
|
+
{rationale}
|
|
57
|
+
|
|
58
|
+
## Document Analysis Capabilities
|
|
59
|
+
|
|
60
|
+
When analyzing documents, follow these comprehensive guidelines:
|
|
61
|
+
|
|
62
|
+
1. **Document Conversion and Processing**
|
|
63
|
+
- Use the word_to_markdown_tool to convert Word documents to Markdown format
|
|
64
|
+
- Process the resulting markdown to identify structure, sections, and key elements
|
|
65
|
+
- Preserve document formatting and structure when relevant to understanding
|
|
66
|
+
|
|
67
|
+
2. **Content Analysis**
|
|
68
|
+
- Identify key themes, topics, and main arguments in the document
|
|
69
|
+
- Extract important facts, figures, quotes, and statistics
|
|
70
|
+
- Recognize patterns in the content and logical structure
|
|
71
|
+
- Analyze tone, style, and language used in the document
|
|
72
|
+
|
|
73
|
+
3. **Summarization Techniques**
|
|
74
|
+
- Create executive summaries capturing the essential points
|
|
75
|
+
- Develop section-by-section summaries for longer documents
|
|
76
|
+
- Use bullet points for key takeaways
|
|
77
|
+
- Preserve the author's original intent and meaning
|
|
78
|
+
- Highlight the most important insights and conclusions
|
|
79
|
+
|
|
80
|
+
4. **Audio Narration**
|
|
81
|
+
- When requested, generate clear, well-structured audio summaries
|
|
82
|
+
- Format text for natural-sounding speech using GoogleVoiceTool
|
|
83
|
+
- Structure narration with clear introduction, body, and conclusion
|
|
84
|
+
- Use transitions between major points and sections
|
|
85
|
+
- Emphasize key information through pacing and structure
|
|
86
|
+
|
|
87
|
+
5. **Special Document Elements**
|
|
88
|
+
- Properly handle tables, charts, and figures by describing their content
|
|
89
|
+
- Extract and process lists, bullet points, and numbered items
|
|
90
|
+
- Identify and analyze headers, footers, and metadata
|
|
91
|
+
- Process citations, references, and bibliographic information
|
|
92
|
+
|
|
93
|
+
6. **Output Formatting**
|
|
94
|
+
- Use markdown formatting for structured responses
|
|
95
|
+
- Organize information hierarchically with headers and subheaders
|
|
96
|
+
- Present extracted information in tables when appropriate
|
|
97
|
+
- Use code blocks for technical content or examples
|
|
98
|
+
- Highlight key quotes or important excerpts
|
|
99
|
+
|
|
100
|
+
If a document is complex or lengthy, break it down into logical sections for better analysis. Always preserve the original meaning and context of the document while making the content more accessible to the user.
|
|
101
|
+
|
|
102
|
+
To analyze a document, first convert it from Word to Markdown using the word_to_markdown_tool, then work with the markdown content to provide your analysis, summary, or narration.
|
|
103
|
+
|
|
104
|
+
When asked to generate an audio summary, follow these steps:
|
|
105
|
+
1. Create a clear, concise summary of the document
|
|
106
|
+
2. Structure the summary for verbal presentation
|
|
107
|
+
3. Use the GoogleVoiceTool to generate the audio narration
|
|
108
|
+
4. Return both the text summary and the audio file information
|
|
109
|
+
|
|
110
|
+
{format_instructions}
|
|
111
|
+
|
|
112
|
+
Always begin by understanding what the user wants to do with their document. Ask for clarification if needed.
|
|
113
|
+
Be helpful, professional, and thorough in your document analysis.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
NOTEBOOK_PROMPT_SUFFIX = """
|
|
117
|
+
Always begin by understanding what the user wants to do with their document. Ask for clarification if needed.
|
|
118
|
+
Be helpful, professional, and thorough in your document analysis.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class NotebookAgent(BasicAgent):
|
|
123
|
+
"""
|
|
124
|
+
An agent specialized for working with documents - converting Word docs to Markdown,
|
|
125
|
+
analyzing content, and generating narrated summaries.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
def __init__(
|
|
129
|
+
self,
|
|
130
|
+
name: str = 'Document Assistant',
|
|
131
|
+
agent_type: str = None,
|
|
132
|
+
llm: Optional[str] = None,
|
|
133
|
+
tools: List[AbstractTool] = None,
|
|
134
|
+
system_prompt: str = None,
|
|
135
|
+
human_prompt: str = None,
|
|
136
|
+
prompt_template: str = None,
|
|
137
|
+
document_url: Optional[str] = None,
|
|
138
|
+
**kwargs
|
|
139
|
+
):
|
|
140
|
+
self._document_url = document_url
|
|
141
|
+
self._document_content = None
|
|
142
|
+
self._document_metadata = {}
|
|
143
|
+
|
|
144
|
+
# Agent ID and configuration
|
|
145
|
+
self._prompt_prefix = NOTEBOOK_PROMPT_PREFIX
|
|
146
|
+
self._prompt_suffix = NOTEBOOK_PROMPT_SUFFIX
|
|
147
|
+
self._prompt_template = prompt_template
|
|
148
|
+
self._capabilities: str = kwargs.get('capabilities', None)
|
|
149
|
+
self._format_instructions: str = kwargs.get('format_instructions', FORMAT_INSTRUCTIONS)
|
|
150
|
+
|
|
151
|
+
self.name = name or "Document Assistant"
|
|
152
|
+
self.description = "An agent specialized for working with documents, converting Word to Markdown, and generating narrated summaries."
|
|
153
|
+
|
|
154
|
+
# Set up directories for outputs
|
|
155
|
+
self._static_path = BASE_DIR.joinpath('static')
|
|
156
|
+
self.agent_audio_dir = self._static_path.joinpath('audio', 'agents')
|
|
157
|
+
self.agent_docs_dir = self._static_path.joinpath('docs', 'agents')
|
|
158
|
+
|
|
159
|
+
# Convert string to SystemMessagePromptTemplate
|
|
160
|
+
system_prompt_text = system_prompt or self.default_backstory()
|
|
161
|
+
self.system_prompt = SystemMessagePromptTemplate.from_template(system_prompt_text)
|
|
162
|
+
|
|
163
|
+
# Note: NO system_prompt is passed to the parent constructor
|
|
164
|
+
super().__init__(
|
|
165
|
+
name=name,
|
|
166
|
+
llm=llm,
|
|
167
|
+
human_prompt=human_prompt,
|
|
168
|
+
tools=tools or [],
|
|
169
|
+
**kwargs
|
|
170
|
+
)
|
|
171
|
+
# Define agent type
|
|
172
|
+
self.agent_type = agent_type or "react"
|
|
173
|
+
|
|
174
|
+
async def configure(self, document_url: str = None, app=None) -> None:
|
|
175
|
+
"""Configure the NotebookAgent with necessary tools and setup."""
|
|
176
|
+
await super().configure(app)
|
|
177
|
+
|
|
178
|
+
# Set document URL if provided
|
|
179
|
+
if document_url:
|
|
180
|
+
self._document_url = document_url
|
|
181
|
+
|
|
182
|
+
# Initialize document processing tools if not already present
|
|
183
|
+
self._init_tools()
|
|
184
|
+
|
|
185
|
+
# Similar a PandasAgent: usa agent_type para decidir
|
|
186
|
+
if self.agent_type == 'openai':
|
|
187
|
+
self.agent = self.openai_agent()
|
|
188
|
+
elif self.agent_type == 'openai-tools':
|
|
189
|
+
self.agent = self.openai_tools_agent()
|
|
190
|
+
else:
|
|
191
|
+
# Fallback a react para compatibilidad con todos los modelos
|
|
192
|
+
self.agent = self.react_agent()
|
|
193
|
+
|
|
194
|
+
# Create executor from agent
|
|
195
|
+
self._agent = self.get_executor(self.agent, self.tools)
|
|
196
|
+
|
|
197
|
+
def _define_prompt(self):
|
|
198
|
+
"""Define the prompt for the agent with document-specific formatting."""
|
|
199
|
+
now = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
200
|
+
list_of_tools = ""
|
|
201
|
+
for tool in self.tools:
|
|
202
|
+
name = tool.name
|
|
203
|
+
description = tool.description
|
|
204
|
+
list_of_tools += f'- {name}: {description}\n'
|
|
205
|
+
list_of_tools += "\n"
|
|
206
|
+
|
|
207
|
+
# Base prompts components
|
|
208
|
+
format_instructions = self._format_instructions or FORMAT_INSTRUCTIONS
|
|
209
|
+
rationale = self._capabilities or ""
|
|
210
|
+
|
|
211
|
+
# Format the prompt template with our specific values
|
|
212
|
+
final_prompt = self._prompt_prefix.format_map(
|
|
213
|
+
SafeDict(
|
|
214
|
+
today_date=now,
|
|
215
|
+
list_of_tools=list_of_tools,
|
|
216
|
+
system_prompt_base=self.default_backstory(),
|
|
217
|
+
format_instructions=format_instructions,
|
|
218
|
+
rationale=rationale,
|
|
219
|
+
name=self.name,
|
|
220
|
+
tools=", ".join([tool.name for tool in self.tools])
|
|
221
|
+
)
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Create the chat prompt template
|
|
225
|
+
from langchain.prompts import (
|
|
226
|
+
ChatPromptTemplate,
|
|
227
|
+
SystemMessagePromptTemplate,
|
|
228
|
+
HumanMessagePromptTemplate,
|
|
229
|
+
MessagesPlaceholder
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Define a structured system message
|
|
233
|
+
system_message = f"""
|
|
234
|
+
Today is {now}. You are {self.name}, a document processing assistant.
|
|
235
|
+
Your job is to help users analyze documents, extract information, and generate summaries.
|
|
236
|
+
|
|
237
|
+
When working with documents, first convert them using the word_to_markdown_tool,
|
|
238
|
+
then analyze the content and provide insights or summaries as requested.
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
# Important: Add agent_scratchpad to the prompt
|
|
242
|
+
chat_prompt = ChatPromptTemplate.from_messages([
|
|
243
|
+
SystemMessagePromptTemplate.from_template(system_message),
|
|
244
|
+
HumanMessagePromptTemplate.from_template(final_prompt),
|
|
245
|
+
# Add a placeholder for the agent's scratchpad/intermediate steps
|
|
246
|
+
MessagesPlaceholder(variable_name="agent_scratchpad")
|
|
247
|
+
])
|
|
248
|
+
|
|
249
|
+
self.prompt = chat_prompt.partial(
|
|
250
|
+
tools=self.tools,
|
|
251
|
+
tool_names=", ".join([tool.name for tool in self.tools]),
|
|
252
|
+
name=self.name
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
async def load_document(self, url: str) -> Dict[str, Any]:
|
|
256
|
+
"""
|
|
257
|
+
Load a document from a URL using WordToMarkdownTool.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
url: URL of the Word document to load
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Dictionary with document content and metadata
|
|
264
|
+
"""
|
|
265
|
+
if not url:
|
|
266
|
+
return {"error": "No document URL provided"}
|
|
267
|
+
|
|
268
|
+
word_tool = next((tool for tool in self.tools if tool.name == "word_to_markdown_tool"), None)
|
|
269
|
+
|
|
270
|
+
if not word_tool:
|
|
271
|
+
return {"error": "WordToMarkdownTool not available"}
|
|
272
|
+
|
|
273
|
+
try:
|
|
274
|
+
# Use the tool to load and convert the document
|
|
275
|
+
result = await word_tool._arun(url)
|
|
276
|
+
|
|
277
|
+
if not result.get("success", False):
|
|
278
|
+
return {"error": result.get("error", "Unknown error loading document")}
|
|
279
|
+
|
|
280
|
+
self._document_content = result.get("markdown", "")
|
|
281
|
+
self._document_metadata = {
|
|
282
|
+
"source_url": url,
|
|
283
|
+
"loaded_at": datetime.now().isoformat(),
|
|
284
|
+
"format": "markdown"
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
"content": self._document_content,
|
|
289
|
+
"metadata": self._document_metadata,
|
|
290
|
+
"success": True
|
|
291
|
+
}
|
|
292
|
+
except Exception as e:
|
|
293
|
+
return {"error": f"Error loading document: {str(e)}"}
|
|
294
|
+
|
|
295
|
+
async def generate_summary_direct(self, max_length: int = 500) -> Dict[str, Any]:
|
|
296
|
+
"""Generate a summary directly using the LLM without the agent."""
|
|
297
|
+
if not self._document_content:
|
|
298
|
+
print("Error: No document content available to summarize")
|
|
299
|
+
return {"error": "No document content available to summarize"}
|
|
300
|
+
|
|
301
|
+
# Añadir un mensaje de depuración para ver el contenido del documento
|
|
302
|
+
content_length = len(self._document_content)
|
|
303
|
+
print(f"Generating summary directly with LLM for document with {content_length} characters")
|
|
304
|
+
|
|
305
|
+
try:
|
|
306
|
+
# Create a more robust summarization prompt
|
|
307
|
+
prompt = f"""
|
|
308
|
+
I need you to analyze this document and create a clear summary.
|
|
309
|
+
|
|
310
|
+
{self._document_content[:10000]}
|
|
311
|
+
|
|
312
|
+
Please provide a comprehensive summary that:
|
|
313
|
+
1. Captures the main points and themes
|
|
314
|
+
2. Is well-structured with headers for major sections
|
|
315
|
+
3. Uses bullet points for key details when appropriate
|
|
316
|
+
4. Is suitable for audio narration
|
|
317
|
+
|
|
318
|
+
Focus on providing value and clarity in your summary.
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
# Use direct invocation to debug the response
|
|
322
|
+
print("Sending prompt to LLM...")
|
|
323
|
+
print(f"Prompt preview: {prompt[:100]}...")
|
|
324
|
+
|
|
325
|
+
# Usar el LLM directamente (no el agente)
|
|
326
|
+
summary_text = await self._llm.ainvoke(prompt)
|
|
327
|
+
|
|
328
|
+
if not summary_text:
|
|
329
|
+
print("Warning: Generated summary is empty!")
|
|
330
|
+
return {"error": "Failed to generate summary"}
|
|
331
|
+
|
|
332
|
+
print(f"Summary generated, length: {len(summary_text)} characters")
|
|
333
|
+
|
|
334
|
+
# Generate audio from the summary
|
|
335
|
+
print("Generating audio...")
|
|
336
|
+
audio_info = await self._generate_audio(summary_text)
|
|
337
|
+
|
|
338
|
+
return {
|
|
339
|
+
"summary": summary_text,
|
|
340
|
+
"audio": audio_info,
|
|
341
|
+
"success": True
|
|
342
|
+
}
|
|
343
|
+
except Exception as e:
|
|
344
|
+
import traceback
|
|
345
|
+
print(f"Error generating summary: {str(e)}")
|
|
346
|
+
print(traceback.format_exc())
|
|
347
|
+
return {"error": f"Error generating summary: {str(e)}"}
|
|
348
|
+
|
|
349
|
+
async def _preprocess_text_for_speech(self, text: str) -> str:
|
|
350
|
+
"""
|
|
351
|
+
Preprocesa el texto Markdown para convertirlo en texto conversacional para podcast.
|
|
352
|
+
Elimina marcas de formato pero preserva el flujo natural del discurso.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
text: Texto en formato Markdown
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
Texto fluido y conversacional optimizado para síntesis de voz
|
|
359
|
+
"""
|
|
360
|
+
# Remover marcas de negrita/cursiva sin agregar texto explicativo
|
|
361
|
+
text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # Quitar **negrita**
|
|
362
|
+
text = re.sub(r'\*(.*?)\*', r'\1', text) # Quitar *cursiva*
|
|
363
|
+
text = re.sub(r'__(.*?)__', r'\1', text) # Quitar __negrita__
|
|
364
|
+
text = re.sub(r'_(.*?)_', r'\1', text) # Quitar _cursiva_
|
|
365
|
+
|
|
366
|
+
# Mejorar listas para que suenen naturales (sin "Punto:")
|
|
367
|
+
text = re.sub(r'^\s*[\*\-\+]\s+', '', text, flags=re.MULTILINE) # Listas sin orden
|
|
368
|
+
text = re.sub(r'^\s*(\d+)\.\s+', '', text, flags=re.MULTILINE) # Listas numeradas
|
|
369
|
+
|
|
370
|
+
# Convertir encabezados manteniendo el texto original (sin "Sección:")
|
|
371
|
+
text = re.sub(r'^#{1,6}\s+(.*)', r'\1', text, flags=re.MULTILINE)
|
|
372
|
+
|
|
373
|
+
# Limpiar otros elementos Markdown
|
|
374
|
+
text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text) # Enlaces: solo texto
|
|
375
|
+
text = re.sub(r'`(.*?)`', r'\1', text) # Quitar texto en `código`
|
|
376
|
+
text = re.sub(r'~~(.*?)~~', r'\1', text) # Quitar ~~tachado~~
|
|
377
|
+
|
|
378
|
+
# Eliminar bloques de código que no son relevantes para audio
|
|
379
|
+
text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
|
|
380
|
+
|
|
381
|
+
# Eliminar caracteres especiales innecesarios para voz
|
|
382
|
+
text = re.sub(r'[|]', ' ', text) # Quitar pipes (comunes en tablas)
|
|
383
|
+
|
|
384
|
+
# Tratar con dobles puntos y viñetas para mejor fluidez
|
|
385
|
+
text = re.sub(r':\s*\n', '. ', text) # Convertir ":" seguido de salto de línea en punto
|
|
386
|
+
|
|
387
|
+
# Agregar pausas naturales después de párrafos para respiración
|
|
388
|
+
text = re.sub(r'\n{2,}', '. ', text) # Convertir múltiples saltos en pausa
|
|
389
|
+
|
|
390
|
+
# Normalizar espacios y puntuación para mejor fluidez
|
|
391
|
+
text = re.sub(r'\s{2,}', ' ', text) # Limitar espacios consecutivos
|
|
392
|
+
text = re.sub(r'\.{2,}', '.', text) # Convertir múltiples puntos en uno solo
|
|
393
|
+
text = re.sub(r'\.\s*\.', '.', text) # Eliminar dobles puntos consecutivos
|
|
394
|
+
|
|
395
|
+
# Mejorar la transición entre oraciones
|
|
396
|
+
text = re.sub(r'([.!?])\s+([A-Z])', r'\1 \2', text) # Asegurar espacio después de puntuación
|
|
397
|
+
|
|
398
|
+
print(f"Texto preprocesado para síntesis de voz. Longitud original: {len(text)}, nueva: {len(text)}")
|
|
399
|
+
|
|
400
|
+
# Opcional: Agregar instrucciones sutiles de narración si es necesario
|
|
401
|
+
# text = "En este resumen: " + text
|
|
402
|
+
|
|
403
|
+
return text
|
|
404
|
+
|
|
405
|
+
async def _generate_audio(self, text: str, voice_gender: str = "FEMALE") -> Dict[str, Any]:
|
|
406
|
+
"""
|
|
407
|
+
Generate audio narration from text using GoogleVoiceTool.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
text: Text to convert to audio
|
|
411
|
+
voice_gender: Gender of the voice (MALE or FEMALE)
|
|
412
|
+
|
|
413
|
+
Returns:
|
|
414
|
+
Dictionary with audio file information
|
|
415
|
+
"""
|
|
416
|
+
try:
|
|
417
|
+
# Find the voice tool
|
|
418
|
+
voice_tool = next((tool for tool in self.tools if tool.name == "podcast_generator_tool"), None)
|
|
419
|
+
|
|
420
|
+
if not voice_tool:
|
|
421
|
+
print("Voice tool not found! Available tools: " + ", ".join([t.name for t in self.tools]))
|
|
422
|
+
return {}
|
|
423
|
+
|
|
424
|
+
# Ensure output directory exists
|
|
425
|
+
os.makedirs(str(self.agent_audio_dir), exist_ok=True)
|
|
426
|
+
|
|
427
|
+
# Preprocesar el texto para eliminar caracteres de Markdown y mejorar la lectura
|
|
428
|
+
print("Preprocesando texto para síntesis de voz...")
|
|
429
|
+
processed_text = await self._preprocess_text_for_speech(text)
|
|
430
|
+
|
|
431
|
+
print(f"Generating audio using voice tool (direct query)...")
|
|
432
|
+
|
|
433
|
+
# Pasar el texto preprocesado directamente
|
|
434
|
+
result = await voice_tool._arun(query=processed_text)
|
|
435
|
+
|
|
436
|
+
# Process result
|
|
437
|
+
if isinstance(result, str):
|
|
438
|
+
try:
|
|
439
|
+
result = json.loads(result)
|
|
440
|
+
except:
|
|
441
|
+
result = {"message": result}
|
|
442
|
+
|
|
443
|
+
print(f"Voice tool result: {result}")
|
|
444
|
+
|
|
445
|
+
# Verificar que el archivo exista
|
|
446
|
+
if "file_path" in result and os.path.exists(result["file_path"]):
|
|
447
|
+
file_path = result["file_path"]
|
|
448
|
+
# URL relativa para acceso web - CORREGIDO
|
|
449
|
+
url = str(file_path).replace(str(self._static_path), BASE_STATIC_URL)
|
|
450
|
+
result["url"] = url
|
|
451
|
+
result["filename"] = os.path.basename(file_path)
|
|
452
|
+
print(f"Audio generated successfully at: {file_path}")
|
|
453
|
+
print(f"Audio URL: {url}")
|
|
454
|
+
else:
|
|
455
|
+
print(f"Audio file path not found in result or file doesn't exist")
|
|
456
|
+
if "file_path" in result:
|
|
457
|
+
print(f"Expected path was: {result['file_path']}")
|
|
458
|
+
if "error" in result:
|
|
459
|
+
print(f"Error reported by tool: {result['error']}")
|
|
460
|
+
|
|
461
|
+
return result
|
|
462
|
+
except Exception as e:
|
|
463
|
+
import traceback
|
|
464
|
+
print(f"Error generating audio: {e}")
|
|
465
|
+
print(traceback.format_exc())
|
|
466
|
+
return {}
|
|
467
|
+
|
|
468
|
+
def extract_filenames(self, response: AgentResponse) -> Dict[str, Dict[str, Any]]:
|
|
469
|
+
"""Extract filenames from the content."""
|
|
470
|
+
# Split the content by lines
|
|
471
|
+
output_lines = response.output.splitlines()
|
|
472
|
+
current_filename = ""
|
|
473
|
+
filenames = {}
|
|
474
|
+
|
|
475
|
+
for line in output_lines:
|
|
476
|
+
if 'filename:' in line:
|
|
477
|
+
current_filename = line.split('filename:')[1].strip()
|
|
478
|
+
if current_filename:
|
|
479
|
+
try:
|
|
480
|
+
filename_path = Path(current_filename).resolve()
|
|
481
|
+
if filename_path.is_file():
|
|
482
|
+
content_type = self.mimefromext(filename_path.suffix)
|
|
483
|
+
url = str(filename_path).replace(str(self._static_path), BASE_STATIC_URL)
|
|
484
|
+
filenames[filename_path.name] = {
|
|
485
|
+
'content_type': content_type,
|
|
486
|
+
'file_path': filename_path,
|
|
487
|
+
'filename': filename_path.name,
|
|
488
|
+
'url': url
|
|
489
|
+
}
|
|
490
|
+
continue
|
|
491
|
+
except AttributeError:
|
|
492
|
+
pass
|
|
493
|
+
|
|
494
|
+
if filenames:
|
|
495
|
+
response.filename = filenames
|
|
496
|
+
|
|
497
|
+
return filenames
|
|
498
|
+
|
|
499
|
+
def mimefromext(self, ext: str) -> str:
|
|
500
|
+
"""Get the mime type from the file extension."""
|
|
501
|
+
mime_types = {
|
|
502
|
+
'.csv': 'text/csv',
|
|
503
|
+
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
504
|
+
'.xls': 'application/vnd.ms-excel',
|
|
505
|
+
'.json': 'application/json',
|
|
506
|
+
'.txt': 'text/plain',
|
|
507
|
+
'.html': 'text/html',
|
|
508
|
+
'.htm': 'text/html',
|
|
509
|
+
'.pdf': 'application/pdf',
|
|
510
|
+
'.png': 'image/png',
|
|
511
|
+
'.jpg': 'image/jpeg',
|
|
512
|
+
'.jpeg': 'image/jpeg',
|
|
513
|
+
'.gif': 'image/gif',
|
|
514
|
+
'.svg': 'image/svg+xml',
|
|
515
|
+
'.md': 'text/markdown',
|
|
516
|
+
'.ogg': 'audio/ogg',
|
|
517
|
+
'.wav': 'audio/wav',
|
|
518
|
+
'.mp3': 'audio/mpeg',
|
|
519
|
+
'.mp4': 'video/mp4',
|
|
520
|
+
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
521
|
+
}
|
|
522
|
+
return mime_types.get(ext, 'application/octet-stream')
|
|
523
|
+
|
|
524
|
+
def default_backstory(self) -> str:
|
|
525
|
+
return "You are a helpful document assistant built to provide analysis, summaries, and insights from text documents. You can convert Word documents to Markdown and generate audio summaries."
|
|
526
|
+
|
|
527
|
+
def _init_tools(self):
|
|
528
|
+
"""Initialize tools needed for document processing."""
|
|
529
|
+
# Check if we already have required tools
|
|
530
|
+
has_word_tool = any(tool.name == "word_to_markdown_tool" for tool in self.tools)
|
|
531
|
+
has_voice_tool = any(tool.name == "podcast_generator_tool" for tool in self.tools)
|
|
532
|
+
|
|
533
|
+
# Add WordToMarkdownTool if not present
|
|
534
|
+
if not has_word_tool:
|
|
535
|
+
word_tool = WordToMarkdownTool()
|
|
536
|
+
self.tools.append(word_tool)
|
|
537
|
+
|
|
538
|
+
# Add GoogleVoiceTool if not present
|
|
539
|
+
if not has_voice_tool:
|
|
540
|
+
voice_tool = GoogleVoiceTool()
|
|
541
|
+
print(f"Added voice tool with name: {voice_tool.name}")
|
|
542
|
+
self.tools.append(voice_tool)
|
|
543
|
+
|
|
544
|
+
# Log the tools we're using
|
|
545
|
+
tool_names = [tool.name for tool in self.tools]
|
|
546
|
+
print(f"NotebookAgent initialized with tools: {', '.join(tool_names)}")
|
|
547
|
+
|
|
548
|
+
async def process_document_workflow(self, document_url: str) -> Dict[str, Any]:
|
|
549
|
+
"""
|
|
550
|
+
Run a complete document processing workflow:
|
|
551
|
+
1. Load and convert document
|
|
552
|
+
2. Generate a summary
|
|
553
|
+
3. Create an audio narration
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
document_url: URL to the Word document
|
|
557
|
+
|
|
558
|
+
Returns:
|
|
559
|
+
Dictionary with document content, summary, and audio information
|
|
560
|
+
"""
|
|
561
|
+
# Step 1: Load the document
|
|
562
|
+
document_result = await self.load_document(document_url)
|
|
563
|
+
|
|
564
|
+
if "error" in document_result:
|
|
565
|
+
return document_result
|
|
566
|
+
|
|
567
|
+
# Step 2: Generate summary and audio using direct method
|
|
568
|
+
try:
|
|
569
|
+
summary_result = await self.generate_summary_direct()
|
|
570
|
+
except Exception as e:
|
|
571
|
+
print(f"Error in direct summary generation: {e}")
|
|
572
|
+
summary_result = {"error": str(e), "summary": "", "audio": {}}
|
|
573
|
+
|
|
574
|
+
# Combine results
|
|
575
|
+
return {
|
|
576
|
+
"document": {
|
|
577
|
+
"content": self._document_content[:500] + "..." if len(self._document_content) > 500 else self._document_content,
|
|
578
|
+
"metadata": self._document_metadata
|
|
579
|
+
},
|
|
580
|
+
"summary": summary_result.get("summary", ""),
|
|
581
|
+
"audio": summary_result.get("audio", {}),
|
|
582
|
+
"success": True
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
def react_agent(self):
|
|
586
|
+
"""Create a ReAct agent for better compatibility with different LLMs."""
|
|
587
|
+
from langchain.agents import create_react_agent
|
|
588
|
+
|
|
589
|
+
# Define a prompt template for the agent
|
|
590
|
+
agent = create_react_agent(
|
|
591
|
+
llm=self._llm,
|
|
592
|
+
tools=self.tools,
|
|
593
|
+
prompt=self.prompt
|
|
594
|
+
)
|
|
595
|
+
return agent
|
|
596
|
+
|
|
597
|
+
def openai_tools_agent(self):
|
|
598
|
+
"""Create an OpenAI Tools agent - this is the original method."""
|
|
599
|
+
from langchain.agents import create_openai_tools_agent
|
|
600
|
+
|
|
601
|
+
agent = create_openai_tools_agent(
|
|
602
|
+
self._llm,
|
|
603
|
+
self.tools,
|
|
604
|
+
self.prompt
|
|
605
|
+
)
|
|
606
|
+
return agent
|
|
607
|
+
|
|
608
|
+
def get_executor(self, agent, tools):
|
|
609
|
+
"""Create an agent executor with proper output keys."""
|
|
610
|
+
from langchain.agents.agent import AgentExecutor
|
|
611
|
+
|
|
612
|
+
# Creamos el executor con una clave de salida definida
|
|
613
|
+
return AgentExecutor.from_agent_and_tools(
|
|
614
|
+
agent=agent,
|
|
615
|
+
tools=tools,
|
|
616
|
+
verbose=True,
|
|
617
|
+
return_intermediate_steps=True,
|
|
618
|
+
output_keys=["output"], # Define explícitamente la clave de salida
|
|
619
|
+
)
|
parrot/bots/odoo.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .chatbot import Chatbot
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class OddieBot(Chatbot):
|
|
5
|
+
"""Represents an agent in Navigator.
|
|
6
|
+
|
|
7
|
+
Each agent has a name, a role, a goal, a backstory,
|
|
8
|
+
and an optional language model (llm).
|
|
9
|
+
"""
|
|
10
|
+
name: str = 'Oddie'
|
|
11
|
+
company: str = 'T-ROC Global'
|
|
12
|
+
company_website: str = 'https://www.trocglobal.com'
|
|
13
|
+
contact_information = 'communications@trocglobal.com'
|
|
14
|
+
contact_form = 'https://www.surveymonkey.com/r/TROC_Suggestion_Box'
|
|
15
|
+
role = "Odoo and ERP Specialist and Odoo Programmer"
|
|
16
|
+
goal = "To provide information and support on Odoo and ERP systems, help with troubleshooting, and answer any questions you may have about any Odoo and ERP systems implementation."
|
|
17
|
+
specialty_area = 'Bring useful information about Odoo ERP, documentation, usage, samples, etc.'
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Collection of useful prompts for Chatbots.
|
|
3
|
+
"""
|
|
4
|
+
from .agents import AGENT_PROMPT, AGENT_PROMPT_SUFFIX, FORMAT_INSTRUCTIONS
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
BASIC_SYSTEM_PROMPT = """
|
|
8
|
+
Your name is $name.
|
|
9
|
+
|
|
10
|
+
You are a $role with several capabilities:
|
|
11
|
+
$capabilities
|
|
12
|
+
|
|
13
|
+
$backstory
|
|
14
|
+
|
|
15
|
+
I am here to help with $goal.
|
|
16
|
+
|
|
17
|
+
$pre_context
|
|
18
|
+
$context
|
|
19
|
+
|
|
20
|
+
$rationale
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
BASIC_HUMAN_PROMPT = """
|
|
25
|
+
**Chat History:**
|
|
26
|
+
{chat_history}
|
|
27
|
+
|
|
28
|
+
**Human Question:**
|
|
29
|
+
{question}
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
DEFAULT_CAPABILITIES = """
|
|
33
|
+
-Answering factual questions using your knowledge base and based on the provided context.
|
|
34
|
+
-providing explanations, and assisting with various tasks.
|
|
35
|
+
"""
|
|
36
|
+
DEFAULT_GOAL = "to assist users by providing accurate and helpful information based on the provided context and knowledge base."
|
|
37
|
+
DEFAULT_ROLE = "helpful and informative AI assistant"
|
|
38
|
+
DEFAULT_BACKHISTORY = """
|
|
39
|
+
Use the information from the provided knowledge base and provided context of documents to answer users' questions accurately.
|
|
40
|
+
Focus on answering the question directly but in detail.
|
|
41
|
+
"""
|