ai-parrot 0.8.3__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- ai_parrot-0.8.3.dist-info/LICENSE +21 -0
- ai_parrot-0.8.3.dist-info/METADATA +306 -0
- ai_parrot-0.8.3.dist-info/RECORD +128 -0
- ai_parrot-0.8.3.dist-info/WHEEL +6 -0
- ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
- parrot/__init__.py +30 -0
- parrot/bots/__init__.py +5 -0
- parrot/bots/abstract.py +1115 -0
- parrot/bots/agent.py +492 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/bose.py +17 -0
- parrot/bots/chatbot.py +271 -0
- parrot/bots/cody.py +17 -0
- parrot/bots/copilot.py +117 -0
- parrot/bots/data.py +730 -0
- parrot/bots/dataframe.py +103 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/interfaces/__init__.py +1 -0
- parrot/bots/interfaces/retrievers.py +12 -0
- parrot/bots/notebook.py +619 -0
- parrot/bots/odoo.py +17 -0
- parrot/bots/prompts/__init__.py +41 -0
- parrot/bots/prompts/agents.py +91 -0
- parrot/bots/prompts/data.py +214 -0
- parrot/bots/retrievals/__init__.py +1 -0
- parrot/bots/retrievals/constitutional.py +19 -0
- parrot/bots/retrievals/multi.py +122 -0
- parrot/bots/retrievals/retrieval.py +610 -0
- parrot/bots/tools/__init__.py +7 -0
- parrot/bots/tools/eda.py +325 -0
- parrot/bots/tools/pdf.py +50 -0
- parrot/bots/tools/plot.py +48 -0
- parrot/bots/troc.py +16 -0
- parrot/conf.py +170 -0
- parrot/crew/__init__.py +3 -0
- parrot/crew/tools/__init__.py +22 -0
- parrot/crew/tools/bing.py +13 -0
- parrot/crew/tools/config.py +43 -0
- parrot/crew/tools/duckgo.py +62 -0
- parrot/crew/tools/file.py +24 -0
- parrot/crew/tools/google.py +168 -0
- parrot/crew/tools/gtrends.py +16 -0
- parrot/crew/tools/md2pdf.py +25 -0
- parrot/crew/tools/rag.py +42 -0
- parrot/crew/tools/search.py +32 -0
- parrot/crew/tools/url.py +21 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agents.py +292 -0
- parrot/handlers/bots.py +196 -0
- parrot/handlers/chat.py +192 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/http.py +805 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +18 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/exif.py +709 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/llms/__init__.py +1 -0
- parrot/llms/abstract.py +69 -0
- parrot/llms/anthropic.py +58 -0
- parrot/llms/gemma.py +15 -0
- parrot/llms/google.py +44 -0
- parrot/llms/groq.py +67 -0
- parrot/llms/hf.py +45 -0
- parrot/llms/openai.py +61 -0
- parrot/llms/pipes.py +114 -0
- parrot/llms/vertex.py +89 -0
- parrot/loaders/__init__.py +9 -0
- parrot/loaders/abstract.py +628 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/txt.py +26 -0
- parrot/manager.py +333 -0
- parrot/models.py +504 -0
- parrot/py.typed +0 -0
- parrot/stores/__init__.py +11 -0
- parrot/stores/abstract.py +248 -0
- parrot/stores/chroma.py +188 -0
- parrot/stores/duck.py +162 -0
- parrot/stores/embeddings/__init__.py +10 -0
- parrot/stores/embeddings/abstract.py +46 -0
- parrot/stores/embeddings/base.py +52 -0
- parrot/stores/embeddings/bge.py +20 -0
- parrot/stores/embeddings/fastembed.py +17 -0
- parrot/stores/embeddings/google.py +18 -0
- parrot/stores/embeddings/huggingface.py +20 -0
- parrot/stores/embeddings/ollama.py +14 -0
- parrot/stores/embeddings/openai.py +26 -0
- parrot/stores/embeddings/transformers.py +21 -0
- parrot/stores/embeddings/vertexai.py +17 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss.py +160 -0
- parrot/stores/milvus.py +397 -0
- parrot/stores/postgres.py +653 -0
- parrot/stores/qdrant.py +170 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +68 -0
- parrot/tools/asknews.py +33 -0
- parrot/tools/basic.py +51 -0
- parrot/tools/bby.py +359 -0
- parrot/tools/bing.py +13 -0
- parrot/tools/docx.py +343 -0
- parrot/tools/duck.py +62 -0
- parrot/tools/execute.py +56 -0
- parrot/tools/gamma.py +28 -0
- parrot/tools/google.py +170 -0
- parrot/tools/gvoice.py +301 -0
- parrot/tools/results.py +278 -0
- parrot/tools/stack.py +27 -0
- parrot/tools/weather.py +70 -0
- parrot/tools/wikipedia.py +58 -0
- parrot/tools/zipcode.py +198 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- resources/users/__init__.py +5 -0
- resources/users/handlers.py +13 -0
- resources/users/models.py +205 -0
parrot/tools/docx.py
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import tempfile
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import uuid
|
|
6
|
+
import asyncio
|
|
7
|
+
from typing import Optional, Dict, Any
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
import aiohttp
|
|
10
|
+
import aiofiles
|
|
11
|
+
from docx import Document
|
|
12
|
+
from docx.shared import Pt, Inches
|
|
13
|
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
14
|
+
from langchain.tools import BaseTool
|
|
15
|
+
from markdownify import markdownify as md
|
|
16
|
+
import mammoth
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DocxGeneratorTool(BaseTool):
|
|
20
|
+
"""Microsoft Word DOCX Generator Tool."""
|
|
21
|
+
name: str = "generate_ms_word_document"
|
|
22
|
+
description: str = "Use this tool for generating DOCX, provide text in markdown format with sections, headings."
|
|
23
|
+
output_dir: str = None
|
|
24
|
+
|
|
25
|
+
def __init__(self, output_dir=None):
|
|
26
|
+
"""Initialize the DOCX generator tool."""
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.output_dir = output_dir
|
|
29
|
+
|
|
30
|
+
def _run(self, markdown_text: str, filename: str = None) -> dict:
|
|
31
|
+
"""Generate a DOCX document from markdown text."""
|
|
32
|
+
try:
|
|
33
|
+
# Create a unique filename if not provided
|
|
34
|
+
if not filename:
|
|
35
|
+
filename = f"document_{uuid.uuid4().hex[:8]}.docx"
|
|
36
|
+
elif not filename.lower().endswith('.docx'):
|
|
37
|
+
filename = f"{filename}.docx"
|
|
38
|
+
|
|
39
|
+
# Get output directory, use current directory if not specified
|
|
40
|
+
output_dir = self.output_dir or '.'
|
|
41
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
42
|
+
|
|
43
|
+
# Create full file path
|
|
44
|
+
file_path = Path(output_dir) / filename
|
|
45
|
+
|
|
46
|
+
# Process the markdown to handle any format issues
|
|
47
|
+
processed_text = self._preprocess_markdown(markdown_text)
|
|
48
|
+
|
|
49
|
+
# Convert markdown to DOCX
|
|
50
|
+
self._markdown_to_docx(processed_text, file_path)
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
"filename": filename,
|
|
54
|
+
"file_path": str(file_path),
|
|
55
|
+
"content_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
56
|
+
"type": "docx"
|
|
57
|
+
}
|
|
58
|
+
except Exception as e:
|
|
59
|
+
# If any error occurs, return information about the error
|
|
60
|
+
return {
|
|
61
|
+
"error": str(e),
|
|
62
|
+
"original_markdown": markdown_text
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def _preprocess_markdown(self, text):
|
|
66
|
+
"""Preprocess markdown to handle common issues."""
|
|
67
|
+
# Replace placeholder variables with empty strings
|
|
68
|
+
text = re.sub(r'\{[a-zA-Z0-9_]+\}', '', text)
|
|
69
|
+
|
|
70
|
+
# Handle f-strings that weren't evaluated
|
|
71
|
+
text = re.sub(r'f"""(.*?)"""', r'\1', text, flags=re.DOTALL)
|
|
72
|
+
text = re.sub(r"f'''(.*?)'''", r'\1', text, flags=re.DOTALL)
|
|
73
|
+
|
|
74
|
+
# Remove triple backticks and language indicators (common in code blocks)
|
|
75
|
+
text = re.sub(r'```[a-zA-Z]*\n', '', text)
|
|
76
|
+
text = re.sub(r'```', '', text)
|
|
77
|
+
|
|
78
|
+
# Fix any heading issues (ensure space after #)
|
|
79
|
+
text = re.sub(r'(#+)([^ \n])', r'\1 \2', text)
|
|
80
|
+
|
|
81
|
+
return text
|
|
82
|
+
|
|
83
|
+
def _markdown_to_docx(self, markdown_text, output_path):
|
|
84
|
+
"""Convert markdown text to a DOCX document."""
|
|
85
|
+
# Create a new Document
|
|
86
|
+
doc = Document()
|
|
87
|
+
|
|
88
|
+
# Set document margins
|
|
89
|
+
sections = doc.sections
|
|
90
|
+
for section in sections:
|
|
91
|
+
section.top_margin = Inches(1)
|
|
92
|
+
section.bottom_margin = Inches(1)
|
|
93
|
+
section.left_margin = Inches(1)
|
|
94
|
+
section.right_margin = Inches(1)
|
|
95
|
+
|
|
96
|
+
# Split the markdown into lines for processing
|
|
97
|
+
lines = markdown_text.split('\n')
|
|
98
|
+
|
|
99
|
+
# Process each line
|
|
100
|
+
i = 0
|
|
101
|
+
while i < len(lines):
|
|
102
|
+
line = lines[i].strip()
|
|
103
|
+
|
|
104
|
+
# Handle headings
|
|
105
|
+
if line.startswith('#'):
|
|
106
|
+
# Count the number of # to determine heading level
|
|
107
|
+
level = 0
|
|
108
|
+
while level < len(line) and line[level] == '#':
|
|
109
|
+
level += 1
|
|
110
|
+
|
|
111
|
+
# Get the heading text
|
|
112
|
+
heading_text = line[level:].strip()
|
|
113
|
+
|
|
114
|
+
# Add the heading with appropriate style
|
|
115
|
+
if level <= 9: # Word supports heading levels 1-9
|
|
116
|
+
heading = doc.add_heading(heading_text, level=level)
|
|
117
|
+
else:
|
|
118
|
+
# If level is beyond supported, default to level 9
|
|
119
|
+
heading = doc.add_heading(heading_text, level=9)
|
|
120
|
+
|
|
121
|
+
# Handle bullet lists
|
|
122
|
+
elif line.startswith('* ') or line.startswith('- '):
|
|
123
|
+
text = line[2:].strip()
|
|
124
|
+
p = doc.add_paragraph()
|
|
125
|
+
p.style = 'List Bullet'
|
|
126
|
+
p.add_run(text)
|
|
127
|
+
|
|
128
|
+
# Handle numbered lists
|
|
129
|
+
elif re.match(r'^\d+\.\s', line):
|
|
130
|
+
text = re.sub(r'^\d+\.\s', '', line).strip()
|
|
131
|
+
p = doc.add_paragraph()
|
|
132
|
+
p.style = 'List Number'
|
|
133
|
+
p.add_run(text)
|
|
134
|
+
|
|
135
|
+
# Handle blockquotes
|
|
136
|
+
elif line.startswith('> '):
|
|
137
|
+
text = line[2:].strip()
|
|
138
|
+
p = doc.add_paragraph()
|
|
139
|
+
p.style = 'Quote'
|
|
140
|
+
p.add_run(text)
|
|
141
|
+
|
|
142
|
+
# Handle horizontal rules
|
|
143
|
+
elif line == '---' or line == '***' or line == '___':
|
|
144
|
+
doc.add_paragraph('_' * 50)
|
|
145
|
+
|
|
146
|
+
# Handle paragraphs (including empty lines)
|
|
147
|
+
else:
|
|
148
|
+
# Skip completely empty lines
|
|
149
|
+
if not line and i + 1 < len(lines) and not lines[i+1].strip():
|
|
150
|
+
i += 1
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
# Start a new paragraph
|
|
154
|
+
p = doc.add_paragraph()
|
|
155
|
+
|
|
156
|
+
# Add the text, handling bold and italic formatting
|
|
157
|
+
text = line
|
|
158
|
+
|
|
159
|
+
# Process basic markdown formatting
|
|
160
|
+
# Bold: **text** or __text__
|
|
161
|
+
bold_pattern = r'\*\*(.*?)\*\*|__(.*?)__'
|
|
162
|
+
# Italic: *text* or _text_
|
|
163
|
+
italic_pattern = r'\*(.*?)\*|_(.*?)_'
|
|
164
|
+
|
|
165
|
+
# Find all formatting markers
|
|
166
|
+
formatting_markers = []
|
|
167
|
+
|
|
168
|
+
# Find bold text
|
|
169
|
+
for match in re.finditer(bold_pattern, text):
|
|
170
|
+
start, end = match.span()
|
|
171
|
+
content = match.group(1) or match.group(2)
|
|
172
|
+
formatting_markers.append((start, end, content, 'bold'))
|
|
173
|
+
|
|
174
|
+
# Find italic text
|
|
175
|
+
for match in re.finditer(italic_pattern, text):
|
|
176
|
+
start, end = match.span()
|
|
177
|
+
content = match.group(1) or match.group(2)
|
|
178
|
+
formatting_markers.append((start, end, content, 'italic'))
|
|
179
|
+
|
|
180
|
+
# Sort markers by start position
|
|
181
|
+
formatting_markers.sort(key=lambda x: x[0])
|
|
182
|
+
|
|
183
|
+
# Apply formatting
|
|
184
|
+
if formatting_markers:
|
|
185
|
+
# Add text with formatting
|
|
186
|
+
current_pos = 0
|
|
187
|
+
for start, end, content, format_type in formatting_markers:
|
|
188
|
+
# Add text before the formatted part
|
|
189
|
+
if start > current_pos:
|
|
190
|
+
p.add_run(text[current_pos:start])
|
|
191
|
+
|
|
192
|
+
# Add the formatted text
|
|
193
|
+
run = p.add_run(content)
|
|
194
|
+
if format_type == 'bold':
|
|
195
|
+
run.bold = True
|
|
196
|
+
elif format_type == 'italic':
|
|
197
|
+
run.italic = True
|
|
198
|
+
|
|
199
|
+
current_pos = end
|
|
200
|
+
|
|
201
|
+
# Add any remaining text
|
|
202
|
+
if current_pos < len(text):
|
|
203
|
+
p.add_run(text[current_pos:])
|
|
204
|
+
else:
|
|
205
|
+
# No formatting, add the entire line
|
|
206
|
+
p.add_run(text)
|
|
207
|
+
|
|
208
|
+
i += 1
|
|
209
|
+
|
|
210
|
+
# Add a table if markdown contains a table-like structure
|
|
211
|
+
if '|' in markdown_text:
|
|
212
|
+
self._try_add_tables(doc, markdown_text)
|
|
213
|
+
|
|
214
|
+
# Save the document
|
|
215
|
+
doc.save(output_path)
|
|
216
|
+
|
|
217
|
+
def _try_add_tables(self, doc, markdown_text):
|
|
218
|
+
"""Try to extract and add tables from markdown text."""
|
|
219
|
+
# Find potential table rows (lines containing |)
|
|
220
|
+
table_lines = [line.strip() for line in markdown_text.split('\n')
|
|
221
|
+
if '|' in line and line.strip().startswith('|')]
|
|
222
|
+
|
|
223
|
+
if len(table_lines) >= 2: # Need at least header and separator
|
|
224
|
+
# Add a section break before the table
|
|
225
|
+
doc.add_paragraph()
|
|
226
|
+
|
|
227
|
+
# Extract header row
|
|
228
|
+
header_cells = [cell.strip() for cell in table_lines[0].split('|')[1:-1]]
|
|
229
|
+
|
|
230
|
+
# Check for separator row (contains only -, |, :)
|
|
231
|
+
if all(all(c in '-:|' for c in cell.strip()) for cell in table_lines[1].split('|')[1:-1]):
|
|
232
|
+
data_rows = table_lines[2:]
|
|
233
|
+
else:
|
|
234
|
+
data_rows = table_lines[1:]
|
|
235
|
+
|
|
236
|
+
# Create table
|
|
237
|
+
table = doc.add_table(rows=len(data_rows)+1, cols=len(header_cells))
|
|
238
|
+
table.style = 'Table Grid'
|
|
239
|
+
|
|
240
|
+
# Add header
|
|
241
|
+
for i, cell in enumerate(header_cells):
|
|
242
|
+
table.cell(0, i).text = cell
|
|
243
|
+
|
|
244
|
+
# Add data rows
|
|
245
|
+
for row_idx, row in enumerate(data_rows):
|
|
246
|
+
cells = [cell.strip() for cell in row.split('|')[1:-1]]
|
|
247
|
+
for col_idx, cell in enumerate(cells):
|
|
248
|
+
if col_idx < len(header_cells): # Ensure we don't exceed columns
|
|
249
|
+
table.cell(row_idx+1, col_idx).text = cell
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class WordToMarkdownTool(BaseTool):
|
|
253
|
+
"""Converts a Word document to Markdown format by downloading it from a URL."""
|
|
254
|
+
name: str = "word_to_markdown_tool"
|
|
255
|
+
description: str = (
|
|
256
|
+
"Converts a Word document to Markdown format from a URL. "
|
|
257
|
+
"This tool downloads the Word document from the provided URL, "
|
|
258
|
+
"converts it to Markdown format, and returns the content. "
|
|
259
|
+
"Useful for processing Word documents and making them easier to analyze by LLMs."
|
|
260
|
+
"\nThe input must be the complete URL of the Word document."
|
|
261
|
+
)
|
|
262
|
+
return_direct: bool = False
|
|
263
|
+
_temp_dir: Optional[str] = None
|
|
264
|
+
|
|
265
|
+
async def _download_file(self, url: str) -> str:
|
|
266
|
+
"""Downloads a file from a URL to a temporary file."""
|
|
267
|
+
# Create a temporary directory if it doesn't exist
|
|
268
|
+
if not self._temp_dir:
|
|
269
|
+
self._temp_dir = tempfile.mkdtemp()
|
|
270
|
+
|
|
271
|
+
# Get the filename from the URL
|
|
272
|
+
parsed_url = urlparse(url)
|
|
273
|
+
filename = os.path.basename(parsed_url.path)
|
|
274
|
+
if not filename.endswith(('.docx', '.doc')):
|
|
275
|
+
filename += '.docx' # Add extension if it doesn't exist
|
|
276
|
+
|
|
277
|
+
# Complete path to the temporary file
|
|
278
|
+
file_path = os.path.join(self._temp_dir, filename)
|
|
279
|
+
|
|
280
|
+
# Download the file
|
|
281
|
+
async with aiohttp.ClientSession() as session:
|
|
282
|
+
async with session.get(url) as response:
|
|
283
|
+
if response.status != 200:
|
|
284
|
+
raise Exception(f"Error downloading the file: {response.status}")
|
|
285
|
+
|
|
286
|
+
# Save the file
|
|
287
|
+
async with aiofiles.open(file_path, 'wb') as f:
|
|
288
|
+
await f.write(await response.read())
|
|
289
|
+
|
|
290
|
+
return file_path
|
|
291
|
+
|
|
292
|
+
async def _convert_to_markdown(self, file_path: str) -> str:
|
|
293
|
+
"""Converts a Word document to Markdown."""
|
|
294
|
+
# Use mammoth to convert to HTML and then to Markdown
|
|
295
|
+
with open(file_path, "rb") as docx_file:
|
|
296
|
+
result = mammoth.convert_to_html(docx_file)
|
|
297
|
+
html = result.value
|
|
298
|
+
markdown_text = md(html)
|
|
299
|
+
|
|
300
|
+
# If there are warning messages, add them as a comment at the beginning
|
|
301
|
+
if result.messages:
|
|
302
|
+
warnings = "\n".join([f"<!-- Warning: {msg} -->" for msg in result.messages])
|
|
303
|
+
markdown_text = f"{warnings}\n\n{markdown_text}"
|
|
304
|
+
|
|
305
|
+
return markdown_text
|
|
306
|
+
|
|
307
|
+
async def _process_word_document(self, url: str) -> Dict[str, Any]:
|
|
308
|
+
"""Processes a Word document from a URL and converts it to Markdown."""
|
|
309
|
+
try:
|
|
310
|
+
file_path = await self._download_file(url)
|
|
311
|
+
markdown_text = await self._convert_to_markdown(file_path)
|
|
312
|
+
|
|
313
|
+
# Cleanup of temporary files
|
|
314
|
+
if os.path.exists(file_path):
|
|
315
|
+
os.remove(file_path)
|
|
316
|
+
|
|
317
|
+
return {
|
|
318
|
+
"markdown": markdown_text,
|
|
319
|
+
"source_url": url,
|
|
320
|
+
"success": True
|
|
321
|
+
}
|
|
322
|
+
except Exception as e:
|
|
323
|
+
return {
|
|
324
|
+
"error": str(e),
|
|
325
|
+
"source_url": url,
|
|
326
|
+
"success": False
|
|
327
|
+
}
|
|
328
|
+
finally:
|
|
329
|
+
# Ensure cleanup of the temporary directory if it's empty
|
|
330
|
+
if self._temp_dir and os.path.exists(self._temp_dir) and not os.listdir(self._temp_dir):
|
|
331
|
+
os.rmdir(self._temp_dir)
|
|
332
|
+
|
|
333
|
+
async def _arun(self, url: str) -> Dict[str, Any]:
|
|
334
|
+
"""Runs the tool asynchronously."""
|
|
335
|
+
return await self._process_word_document(url)
|
|
336
|
+
|
|
337
|
+
def _run(self, url: str) -> Dict[str, Any]:
|
|
338
|
+
"""Runs the tool synchronously."""
|
|
339
|
+
loop = asyncio.get_event_loop()
|
|
340
|
+
if loop.is_running():
|
|
341
|
+
return loop.run_until_complete(self._process_word_document(url))
|
|
342
|
+
else:
|
|
343
|
+
return asyncio.run(self._process_word_document(url))
|
parrot/tools/duck.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from pydantic import PrivateAttr
|
|
3
|
+
from langchain_community.tools.ddg_search.tool import (
|
|
4
|
+
DuckDuckGoSearchResults,
|
|
5
|
+
DuckDuckGoSearchAPIWrapper
|
|
6
|
+
)
|
|
7
|
+
from duckduckgo_search import DDGS
|
|
8
|
+
from langchain.tools import BaseTool
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DuckDuckGoSearchTool(BaseTool):
|
|
12
|
+
"""Web Search tool using Duck Duck Go API."""
|
|
13
|
+
name: str = "duckduckgo_search"
|
|
14
|
+
description: str = "Search the web using DuckDuckGo Search."
|
|
15
|
+
source: Any = None
|
|
16
|
+
max_results: int = 5
|
|
17
|
+
region: str = None
|
|
18
|
+
|
|
19
|
+
def __init__(self, source: str = "news", results: int = 5, region: str = 'wt-wt', **kwargs: Any):
|
|
20
|
+
super().__init__(**kwargs)
|
|
21
|
+
self.source = source
|
|
22
|
+
self.max_results = results
|
|
23
|
+
self.region = region
|
|
24
|
+
|
|
25
|
+
def _run(self, query: str) -> dict:
|
|
26
|
+
"""Run the DuckDuckGo Search Tool."""
|
|
27
|
+
wrapper = DuckDuckGoSearchAPIWrapper(
|
|
28
|
+
region=self.region,
|
|
29
|
+
time="y",
|
|
30
|
+
max_results=self.max_results
|
|
31
|
+
)
|
|
32
|
+
search = DuckDuckGoSearchResults(
|
|
33
|
+
api_wrapper=wrapper,
|
|
34
|
+
source=self.source
|
|
35
|
+
)
|
|
36
|
+
return search.run(query)
|
|
37
|
+
|
|
38
|
+
class DuckDuckGoRelevantSearch(BaseTool):
|
|
39
|
+
"""Web Search tool using Duck Duck Go API."""
|
|
40
|
+
name: str = "duckduckgo_relevant_search"
|
|
41
|
+
description: str = "Search the web and extract most relevant information based on DuckDuckGo Search API"
|
|
42
|
+
_max_results: PrivateAttr
|
|
43
|
+
_region: PrivateAttr
|
|
44
|
+
|
|
45
|
+
def __init__(self, results: int = 5, region: str = 'wt-wt', **kwargs: Any):
|
|
46
|
+
super().__init__(**kwargs)
|
|
47
|
+
self._max_results = results
|
|
48
|
+
self._region = region
|
|
49
|
+
|
|
50
|
+
def _run(
|
|
51
|
+
self,
|
|
52
|
+
query: str,
|
|
53
|
+
**kwargs: Any,
|
|
54
|
+
) -> Any:
|
|
55
|
+
"""Search Internet for relevant information based on a query."""
|
|
56
|
+
search = DDGS()
|
|
57
|
+
return search.text(
|
|
58
|
+
keywords=query,
|
|
59
|
+
region=self._region,
|
|
60
|
+
safesearch='moderate',
|
|
61
|
+
max_results=self._max_results
|
|
62
|
+
)
|
parrot/tools/execute.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Executable Python REPL Tool.
|
|
3
|
+
"""
|
|
4
|
+
import io
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
from langchain_experimental.tools.python.tool import PythonAstREPLTool
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExecutablePythonREPLTool(PythonAstREPLTool):
|
|
12
|
+
"""
|
|
13
|
+
Executable Python REPL Tool.
|
|
14
|
+
"""
|
|
15
|
+
def execute_code(self, code: str) -> str:
|
|
16
|
+
"""
|
|
17
|
+
Execute the provided Python code and return the output.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
code (str): The Python code to execute.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
str: The output of the executed code.
|
|
24
|
+
"""
|
|
25
|
+
try:
|
|
26
|
+
# Set up a namespace for execution
|
|
27
|
+
namespace = {}
|
|
28
|
+
exec(code, namespace)
|
|
29
|
+
|
|
30
|
+
# Check if a plot was created
|
|
31
|
+
if 'plt' in namespace:
|
|
32
|
+
buf = io.BytesIO()
|
|
33
|
+
plt.savefig(buf, format='png')
|
|
34
|
+
plt.close()
|
|
35
|
+
buf.seek(0)
|
|
36
|
+
# Encode the image in base64
|
|
37
|
+
# Encode the image in base64
|
|
38
|
+
img_str = base64.b64encode(buf.read()).decode('utf-8')
|
|
39
|
+
|
|
40
|
+
# Prepare the JSON output
|
|
41
|
+
result = {
|
|
42
|
+
"image": {
|
|
43
|
+
"format": "png",
|
|
44
|
+
"base64": img_str
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
# Return both the code and the JSON result
|
|
48
|
+
return f"**Code Executed**:\n```python\n{code}\n```\n\n**Result**:\n{json.dumps(result)}"
|
|
49
|
+
else:
|
|
50
|
+
return f"**Code Executed**:\n```python\n{code}\n```\n\n"
|
|
51
|
+
|
|
52
|
+
except Exception as e:
|
|
53
|
+
return f"Error executing code: {e}"
|
|
54
|
+
|
|
55
|
+
def __call__(self, code: str) -> str:
|
|
56
|
+
return self.execute_code(code)
|
parrot/tools/gamma.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import urllib.parse
|
|
2
|
+
from langchain.tools import BaseTool
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class GammaLink(BaseTool):
|
|
6
|
+
"""Generate a link to Gamma.app with the provided text."""
|
|
7
|
+
name: str = "gamma_link"
|
|
8
|
+
description: str = (
|
|
9
|
+
"Generate a Link to Gamma.App to be used as presentation."
|
|
10
|
+
" This tool is useful for creating URLs for presentations in Gamma.app."
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
def _run(self, query: str) -> dict:
|
|
14
|
+
"""
|
|
15
|
+
Generate a link to Gamma.app with the provided text.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
text (str): The text to be included in the Gamma link.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
str: The Gamma link containing the provided text.
|
|
22
|
+
"""
|
|
23
|
+
base_url = "https://gamma.app"
|
|
24
|
+
encoded_text = urllib.parse.quote(query)
|
|
25
|
+
return {
|
|
26
|
+
"url": f"{base_url}/create?content={encoded_text}",
|
|
27
|
+
"text": query
|
|
28
|
+
}
|
parrot/tools/google.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
import requests
|
|
3
|
+
from googleapiclient.discovery import build
|
|
4
|
+
from pydantic import PrivateAttr
|
|
5
|
+
# from crewai_tools import BaseTool
|
|
6
|
+
from langchain.tools import BaseTool
|
|
7
|
+
from navconfig import config
|
|
8
|
+
from ..conf import GOOGLE_API_KEY
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GoogleSearchTool(BaseTool):
|
|
12
|
+
"""Web Search tool using Google API."""
|
|
13
|
+
name: str = "Google Web Search"
|
|
14
|
+
description: str = (
|
|
15
|
+
"Search the web using Google Search API, useful when you need to answer questions about current events.",
|
|
16
|
+
" Use this tool more than the Wikipedia tool if you are asked about current events, recent information, or news"
|
|
17
|
+
)
|
|
18
|
+
source: str = 'news'
|
|
19
|
+
max_results: int = 5
|
|
20
|
+
region: str = 'US'
|
|
21
|
+
# Fields populated during init (not required for validation)
|
|
22
|
+
cse_id: Optional[str] = None
|
|
23
|
+
search_key: Optional[str] = None
|
|
24
|
+
kwargs: Optional[dict] = None
|
|
25
|
+
|
|
26
|
+
def __init__(self, source: str = "news", results: int = 5, **kwargs):
|
|
27
|
+
super().__init__(**kwargs)
|
|
28
|
+
self.source = source
|
|
29
|
+
self.max_results = results
|
|
30
|
+
self.cse_id = config.get('GOOGLE_SEARCH_ENGINE_ID')
|
|
31
|
+
self.search_key = config.get('GOOGLE_SEARCH_API_KEY')
|
|
32
|
+
self.kwargs = kwargs
|
|
33
|
+
|
|
34
|
+
def _run(self, query: str) -> list:
|
|
35
|
+
"""Run the Google Search Tool."""
|
|
36
|
+
service = build("customsearch", "v1", developerKey=self.search_key)
|
|
37
|
+
res = service.cse().list( # pylint: disable=no-member
|
|
38
|
+
q=query,
|
|
39
|
+
cx=self.cse_id,
|
|
40
|
+
num=self.max_results,
|
|
41
|
+
**self.kwargs
|
|
42
|
+
).execute()
|
|
43
|
+
results = []
|
|
44
|
+
for item in res['items']:
|
|
45
|
+
results.append(
|
|
46
|
+
{
|
|
47
|
+
'snippet': item['snippet'],
|
|
48
|
+
'title': item['title'],
|
|
49
|
+
'link': item['link'],
|
|
50
|
+
'description': item['snippet']
|
|
51
|
+
}
|
|
52
|
+
)
|
|
53
|
+
return results
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class GoogleSiteSearchTool(BaseTool):
|
|
57
|
+
"""Web Search under a site using Google API."""
|
|
58
|
+
name: str = "Google Site Search"
|
|
59
|
+
description: str = "Search under a Site using Google Search API"
|
|
60
|
+
source: str = 'news'
|
|
61
|
+
max_results: int = 5
|
|
62
|
+
region: str = ''
|
|
63
|
+
|
|
64
|
+
def __init__(self, site: str = "news", results: int = 5, **kwargs):
|
|
65
|
+
super().__init__(**kwargs)
|
|
66
|
+
self.source = site
|
|
67
|
+
self.max_results = results
|
|
68
|
+
self._cse_id = config.get('GOOGLE_SEARCH_ENGINE_ID')
|
|
69
|
+
self._search_key = config.get('GOOGLE_SEARCH_API_KEY')
|
|
70
|
+
self._kwargs = kwargs
|
|
71
|
+
|
|
72
|
+
def _run(self, query: str) -> dict:
|
|
73
|
+
"""Run the Google Search Tool."""
|
|
74
|
+
service = build("customsearch", "v1", developerKey=self._search_key)
|
|
75
|
+
qs = f'{query} site:{self.source}'
|
|
76
|
+
res = service.cse().list( # pylint: disable=no-member
|
|
77
|
+
q=qs,
|
|
78
|
+
cx=self._cse_id,
|
|
79
|
+
num=self.max_results,
|
|
80
|
+
**self._kwargs
|
|
81
|
+
).execute()
|
|
82
|
+
results = []
|
|
83
|
+
for item in res['items']:
|
|
84
|
+
results.append(
|
|
85
|
+
{
|
|
86
|
+
'snippet': item['snippet'],
|
|
87
|
+
'title': item['title'],
|
|
88
|
+
'link': item['link'],
|
|
89
|
+
'description': item['snippet']
|
|
90
|
+
}
|
|
91
|
+
)
|
|
92
|
+
return results
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class GoogleLocationFinder(BaseTool):
|
|
96
|
+
""" LocationFinder class for finding locations."""
|
|
97
|
+
name: str = "google_maps_location_finder"
|
|
98
|
+
description: str = (
|
|
99
|
+
"Search for location information, use this tool to find latitude, longitude and other geographical information from locations."
|
|
100
|
+
" Provide the complete address to this tool to receive location information"
|
|
101
|
+
)
|
|
102
|
+
google_key: str = None
|
|
103
|
+
base_url: str = "https://maps.googleapis.com/maps/api/geocode/json"
|
|
104
|
+
kwargs: Optional[dict] = None
|
|
105
|
+
|
|
106
|
+
def __init__(self, **kwargs):
|
|
107
|
+
super().__init__(**kwargs)
|
|
108
|
+
self.google_key = kwargs.get('api_key', GOOGLE_API_KEY)
|
|
109
|
+
self.kwargs = kwargs
|
|
110
|
+
|
|
111
|
+
def extract_location(self, data):
|
|
112
|
+
city = state = state_code = zipcode = None
|
|
113
|
+
try:
|
|
114
|
+
for component in data['address_components']:
|
|
115
|
+
if 'locality' in component['types']:
|
|
116
|
+
city = component['long_name']
|
|
117
|
+
elif 'administrative_area_level_1' in component['types']:
|
|
118
|
+
state_code = component['short_name']
|
|
119
|
+
state = component['long_name']
|
|
120
|
+
elif 'postal_code' in component['types']:
|
|
121
|
+
zipcode = component['long_name']
|
|
122
|
+
except Exception:
|
|
123
|
+
pass
|
|
124
|
+
return city, state, state_code, zipcode
|
|
125
|
+
|
|
126
|
+
def _run(self, query: str) -> dict:
|
|
127
|
+
"""Find Location."""
|
|
128
|
+
params = {
|
|
129
|
+
"address": query,
|
|
130
|
+
"key": self.google_key
|
|
131
|
+
}
|
|
132
|
+
response = requests.get(
|
|
133
|
+
self.base_url,
|
|
134
|
+
params=params
|
|
135
|
+
)
|
|
136
|
+
if response.status_code == 200:
|
|
137
|
+
result = response.json()
|
|
138
|
+
if result['status'] == 'OK':
|
|
139
|
+
location = result['results'][0]
|
|
140
|
+
city, state, state_code, zipcode = self.extract_location(
|
|
141
|
+
location
|
|
142
|
+
)
|
|
143
|
+
return {
|
|
144
|
+
"latitude": location['geometry']['location']['lat'],
|
|
145
|
+
"longitude": location['geometry']['location']['lng'],
|
|
146
|
+
"address": location['formatted_address'],
|
|
147
|
+
"place_id": location['place_id'],
|
|
148
|
+
"zipcode": zipcode,
|
|
149
|
+
"city": city,
|
|
150
|
+
"state": state,
|
|
151
|
+
"state_code": state_code
|
|
152
|
+
}
|
|
153
|
+
return None
|
|
154
|
+
else:
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
class GoogleRouteSearch(BaseTool):
|
|
158
|
+
"""Route Search tool using Google Maps API."""
|
|
159
|
+
name: str = "google_maps_route_search"
|
|
160
|
+
description: str = "Search for a Route to a location using Google Maps, using this tool if answers questions about how to reach a location."
|
|
161
|
+
google_key: str = None
|
|
162
|
+
base_url: str = 'https://maps.googleapis.com/maps/api/directions/json'
|
|
163
|
+
|
|
164
|
+
def __init__(self, **kwargs):
|
|
165
|
+
super().__init__(**kwargs)
|
|
166
|
+
self._key_ = kwargs.get('api_key', GOOGLE_API_KEY)
|
|
167
|
+
self._kwargs = kwargs
|
|
168
|
+
|
|
169
|
+
def _run(self, query: str) -> dict:
|
|
170
|
+
departure_time = 'now'
|