ai-parrot 0.8.3__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- ai_parrot-0.8.3.dist-info/LICENSE +21 -0
- ai_parrot-0.8.3.dist-info/METADATA +306 -0
- ai_parrot-0.8.3.dist-info/RECORD +128 -0
- ai_parrot-0.8.3.dist-info/WHEEL +6 -0
- ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
- parrot/__init__.py +30 -0
- parrot/bots/__init__.py +5 -0
- parrot/bots/abstract.py +1115 -0
- parrot/bots/agent.py +492 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/bose.py +17 -0
- parrot/bots/chatbot.py +271 -0
- parrot/bots/cody.py +17 -0
- parrot/bots/copilot.py +117 -0
- parrot/bots/data.py +730 -0
- parrot/bots/dataframe.py +103 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/interfaces/__init__.py +1 -0
- parrot/bots/interfaces/retrievers.py +12 -0
- parrot/bots/notebook.py +619 -0
- parrot/bots/odoo.py +17 -0
- parrot/bots/prompts/__init__.py +41 -0
- parrot/bots/prompts/agents.py +91 -0
- parrot/bots/prompts/data.py +214 -0
- parrot/bots/retrievals/__init__.py +1 -0
- parrot/bots/retrievals/constitutional.py +19 -0
- parrot/bots/retrievals/multi.py +122 -0
- parrot/bots/retrievals/retrieval.py +610 -0
- parrot/bots/tools/__init__.py +7 -0
- parrot/bots/tools/eda.py +325 -0
- parrot/bots/tools/pdf.py +50 -0
- parrot/bots/tools/plot.py +48 -0
- parrot/bots/troc.py +16 -0
- parrot/conf.py +170 -0
- parrot/crew/__init__.py +3 -0
- parrot/crew/tools/__init__.py +22 -0
- parrot/crew/tools/bing.py +13 -0
- parrot/crew/tools/config.py +43 -0
- parrot/crew/tools/duckgo.py +62 -0
- parrot/crew/tools/file.py +24 -0
- parrot/crew/tools/google.py +168 -0
- parrot/crew/tools/gtrends.py +16 -0
- parrot/crew/tools/md2pdf.py +25 -0
- parrot/crew/tools/rag.py +42 -0
- parrot/crew/tools/search.py +32 -0
- parrot/crew/tools/url.py +21 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agents.py +292 -0
- parrot/handlers/bots.py +196 -0
- parrot/handlers/chat.py +192 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/http.py +805 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +18 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/exif.py +709 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/llms/__init__.py +1 -0
- parrot/llms/abstract.py +69 -0
- parrot/llms/anthropic.py +58 -0
- parrot/llms/gemma.py +15 -0
- parrot/llms/google.py +44 -0
- parrot/llms/groq.py +67 -0
- parrot/llms/hf.py +45 -0
- parrot/llms/openai.py +61 -0
- parrot/llms/pipes.py +114 -0
- parrot/llms/vertex.py +89 -0
- parrot/loaders/__init__.py +9 -0
- parrot/loaders/abstract.py +628 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/txt.py +26 -0
- parrot/manager.py +333 -0
- parrot/models.py +504 -0
- parrot/py.typed +0 -0
- parrot/stores/__init__.py +11 -0
- parrot/stores/abstract.py +248 -0
- parrot/stores/chroma.py +188 -0
- parrot/stores/duck.py +162 -0
- parrot/stores/embeddings/__init__.py +10 -0
- parrot/stores/embeddings/abstract.py +46 -0
- parrot/stores/embeddings/base.py +52 -0
- parrot/stores/embeddings/bge.py +20 -0
- parrot/stores/embeddings/fastembed.py +17 -0
- parrot/stores/embeddings/google.py +18 -0
- parrot/stores/embeddings/huggingface.py +20 -0
- parrot/stores/embeddings/ollama.py +14 -0
- parrot/stores/embeddings/openai.py +26 -0
- parrot/stores/embeddings/transformers.py +21 -0
- parrot/stores/embeddings/vertexai.py +17 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss.py +160 -0
- parrot/stores/milvus.py +397 -0
- parrot/stores/postgres.py +653 -0
- parrot/stores/qdrant.py +170 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +68 -0
- parrot/tools/asknews.py +33 -0
- parrot/tools/basic.py +51 -0
- parrot/tools/bby.py +359 -0
- parrot/tools/bing.py +13 -0
- parrot/tools/docx.py +343 -0
- parrot/tools/duck.py +62 -0
- parrot/tools/execute.py +56 -0
- parrot/tools/gamma.py +28 -0
- parrot/tools/google.py +170 -0
- parrot/tools/gvoice.py +301 -0
- parrot/tools/results.py +278 -0
- parrot/tools/stack.py +27 -0
- parrot/tools/weather.py +70 -0
- parrot/tools/wikipedia.py +58 -0
- parrot/tools/zipcode.py +198 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- resources/users/__init__.py +5 -0
- resources/users/handlers.py +13 -0
- resources/users/models.py +205 -0
parrot/bots/tools/eda.py
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
import matplotlib
|
|
2
|
+
matplotlib.use('Agg') # Use non-interactive backend
|
|
3
|
+
|
|
4
|
+
import matplotlib.pyplot as plt
|
|
5
|
+
# Import profiling
|
|
6
|
+
from ydata_profiling import ProfileReport
|
|
7
|
+
import seaborn as sns
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import base64
|
|
10
|
+
import io
|
|
11
|
+
from html import escape
|
|
12
|
+
import os
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
import re
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def df_to_html_with_style(df_input, title=""):
|
|
18
|
+
"""Converts a DataFrame to an HTML table with some basic styling."""
|
|
19
|
+
styler = df_input.style.set_table_attributes('class="dataframe"')
|
|
20
|
+
if title:
|
|
21
|
+
styler = styler.set_caption(title)
|
|
22
|
+
# You can add more complex styling here if needed
|
|
23
|
+
# e.g., .format('{:.2f}') for floats, add bar charts, etc.
|
|
24
|
+
return styler.to_html()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def plot_to_base64(plt_figure):
|
|
28
|
+
"""Saves the current matplotlib figure to a base64 encoded string."""
|
|
29
|
+
buf = io.BytesIO()
|
|
30
|
+
plt_figure.savefig(buf, format='png', bbox_inches='tight')
|
|
31
|
+
buf.seek(0)
|
|
32
|
+
img_base64 = base64.b64encode(buf.read()).decode('utf-8')
|
|
33
|
+
buf.close()
|
|
34
|
+
plt.close(plt_figure) # Close the figure to free memory
|
|
35
|
+
return img_base64
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def generate_eda_report(dataframe=None, report_dir=None, df_name="DataFrame", minimal=False, explorative=True):
|
|
39
|
+
"""Generates a ydata-profiling report and saves it to an HTML file.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
dataframe: The pandas DataFrame to profile (defaults to 'df' if None)
|
|
43
|
+
df_name: Name to use in the report title and filename
|
|
44
|
+
minimal: Set to True for faster, less detailed reports
|
|
45
|
+
explorative: Set to True for detailed explorative analysis
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
dict: Contains file path and other metadata
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
# Use provided dataframe or default to df
|
|
52
|
+
df_to_profile = dataframe if dataframe is not None else None
|
|
53
|
+
|
|
54
|
+
# Generate timestamp for the filename
|
|
55
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
56
|
+
|
|
57
|
+
# Create filename with timestamp
|
|
58
|
+
output_filename = f"profile_report_{df_name}_{timestamp}.html"
|
|
59
|
+
output_path = os.path.join(report_dir, output_filename)
|
|
60
|
+
|
|
61
|
+
# Configure report options
|
|
62
|
+
config_kwargs = {
|
|
63
|
+
"title": f"Profiling Report for {df_name}",
|
|
64
|
+
"progress_bar": False,
|
|
65
|
+
"minimal": minimal,
|
|
66
|
+
"explorative": explorative
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
print(
|
|
70
|
+
f"Generating profile report for dataframe with shape {df_to_profile.shape}..."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Generate the report (might take a while for large datasets)
|
|
74
|
+
profile = ProfileReport(df_to_profile, **config_kwargs)
|
|
75
|
+
|
|
76
|
+
# Save to file
|
|
77
|
+
profile.to_file(output_path)
|
|
78
|
+
|
|
79
|
+
print(f"✅ Profile report saved to: {output_path}")
|
|
80
|
+
|
|
81
|
+
# Return structured info with file path
|
|
82
|
+
return {
|
|
83
|
+
"type": "file",
|
|
84
|
+
"file_type": "html",
|
|
85
|
+
"message": "Profile report generated successfully",
|
|
86
|
+
"file_path": output_path,
|
|
87
|
+
"report_url": f"file://{os.path.abspath(output_path)}",
|
|
88
|
+
"df_name": df_name,
|
|
89
|
+
"df_shape": df_to_profile.shape
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
except Exception as e:
|
|
93
|
+
print(f"❌ Error generating profile report: {str(e)}")
|
|
94
|
+
return {
|
|
95
|
+
"type": "error",
|
|
96
|
+
"message": f"Failed to generate profile report: {str(e)}"
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def quick_eda(dataframe=None, report_dir=None):
|
|
101
|
+
"""Performs a quick Exploratory Data Analysis (EDA) on a pandas DataFrame
|
|
102
|
+
and saves the results as an HTML file in the agent_report_dir directory."""
|
|
103
|
+
# Create the directory if it doesn't exist
|
|
104
|
+
os.makedirs(report_dir, exist_ok=True)
|
|
105
|
+
|
|
106
|
+
# Generate a timestamp for the filename
|
|
107
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
108
|
+
|
|
109
|
+
# Create filename if not provided
|
|
110
|
+
output_filename = f"eda_report_{timestamp}.html"
|
|
111
|
+
|
|
112
|
+
# Create full path
|
|
113
|
+
output_path = os.path.join(report_dir, output_filename)
|
|
114
|
+
|
|
115
|
+
df_to_analyze = dataframe if dataframe is not None else None
|
|
116
|
+
if not isinstance(df_to_analyze, pd.DataFrame):
|
|
117
|
+
return "<p>Error: Input is not a valid pandas DataFrame.</p>"
|
|
118
|
+
|
|
119
|
+
html_parts = []
|
|
120
|
+
|
|
121
|
+
# --- Basic HTML Setup and Styling ---
|
|
122
|
+
html_parts.append("""
|
|
123
|
+
<!DOCTYPE html>
|
|
124
|
+
<html lang="en">
|
|
125
|
+
<head>
|
|
126
|
+
<meta charset="UTF-8">
|
|
127
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
128
|
+
<title>Quick EDA Report</title>
|
|
129
|
+
<style>
|
|
130
|
+
body { font-family: sans-serif; margin: 20px; }
|
|
131
|
+
h1, h2, h3 { color: #333; border-bottom: 1px solid #ccc; padding-bottom: 5px; }
|
|
132
|
+
h2 { margin-top: 30px; }
|
|
133
|
+
.dataframe { border-collapse: collapse; margin: 15px 0; font-size: 0.9em; }
|
|
134
|
+
.dataframe th, .dataframe td { border: 1px solid #ddd; padding: 8px; }
|
|
135
|
+
.dataframe th { background-color: #f2f2f2; text-align: left; }
|
|
136
|
+
.dataframe caption { caption-side: top; font-weight: bold; margin-bottom: 5px; text-align: left; font-size: 1.1em; }
|
|
137
|
+
img { max-width: 100%; height: auto; display: block; margin: 15px 0; border: 1px solid #eee; }
|
|
138
|
+
.plot-container { margin-bottom: 20px; }
|
|
139
|
+
.section { margin-bottom: 40px; padding: 15px; background-color: #f9f9f9; border-radius: 5px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
|
|
140
|
+
.missing-values { color: #d9534f; } /* Style for missing values count */
|
|
141
|
+
</style>
|
|
142
|
+
</head>
|
|
143
|
+
<body>
|
|
144
|
+
""")
|
|
145
|
+
html_parts.append("<h1>📊 Quick EDA Report</h1>")
|
|
146
|
+
|
|
147
|
+
# --- Basic Info ---
|
|
148
|
+
html_parts.append('<div class="section">')
|
|
149
|
+
html_parts.append("<h2>📏 Basic Information</h2>")
|
|
150
|
+
html_parts.append(f"<p>Shape: {df_to_analyze.shape[0]} rows, {df_to_analyze.shape[1]} columns</p>")
|
|
151
|
+
html_parts.append('</div>')
|
|
152
|
+
|
|
153
|
+
# --- Data Types ---
|
|
154
|
+
html_parts.append('<div class="section">')
|
|
155
|
+
html_parts.append("<h2>📋 Data Types</h2>")
|
|
156
|
+
dtypes_df = df_to_analyze.dtypes.to_frame(name='DataType')
|
|
157
|
+
html_parts.append(df_to_html_with_style(dtypes_df))
|
|
158
|
+
html_parts.append('</div>')
|
|
159
|
+
|
|
160
|
+
# --- Missing Values ---
|
|
161
|
+
html_parts.append('<div class="section">')
|
|
162
|
+
html_parts.append("<h2><span class='missing-values'>🔍 Missing Values</span></h2>")
|
|
163
|
+
missing = df_to_analyze.isna().sum()
|
|
164
|
+
missing_filtered = missing[missing > 0]
|
|
165
|
+
if not missing_filtered.empty:
|
|
166
|
+
missing_df = missing_filtered.to_frame(name='Missing Count')
|
|
167
|
+
missing_df['Percentage (%)'] = (missing_df['Missing Count'] / len(df_to_analyze) * 100).round(2)
|
|
168
|
+
html_parts.append(df_to_html_with_style(missing_df))
|
|
169
|
+
else:
|
|
170
|
+
html_parts.append("<p>No missing values found.</p>")
|
|
171
|
+
html_parts.append('</div>')
|
|
172
|
+
|
|
173
|
+
# --- Descriptive Statistics ---
|
|
174
|
+
html_parts.append('<div class="section">')
|
|
175
|
+
html_parts.append("<h2>📈 Descriptive Statistics (Numerical Columns)</h2>")
|
|
176
|
+
try:
|
|
177
|
+
desc_stats = df_to_analyze.describe().T
|
|
178
|
+
if not desc_stats.empty:
|
|
179
|
+
html_parts.append(df_to_html_with_style(desc_stats.round(3))) # Round for clarity
|
|
180
|
+
else:
|
|
181
|
+
html_parts.append("<p>No numerical columns to describe.</p>")
|
|
182
|
+
except Exception as e:
|
|
183
|
+
html_parts.append(f"<p>Could not generate descriptive statistics: {escape(str(e))}</p>")
|
|
184
|
+
html_parts.append('</div>')
|
|
185
|
+
|
|
186
|
+
# --- Correlations for numerical columns ---
|
|
187
|
+
numeric_cols = df_to_analyze.select_dtypes(include=['number']).columns.tolist()
|
|
188
|
+
if len(numeric_cols) > 1:
|
|
189
|
+
html_parts.append('<div class="section">')
|
|
190
|
+
html_parts.append("<h2>🔗 Correlation Matrix (Numerical Columns)</h2>")
|
|
191
|
+
try:
|
|
192
|
+
fig_corr, ax_corr = plt.subplots(figsize=(10, 8))
|
|
193
|
+
corr = df_to_analyze[numeric_cols].corr()
|
|
194
|
+
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f", ax=ax_corr)
|
|
195
|
+
ax_corr.set_title("Correlation Matrix")
|
|
196
|
+
# plt.tight_layout() # Often handled by bbox_inches='tight' in savefig
|
|
197
|
+
|
|
198
|
+
img_base64 = plot_to_base64(fig_corr)
|
|
199
|
+
html_parts.append(f'<img src="data:image/png;base64,{img_base64}" alt="Correlation Matrix">')
|
|
200
|
+
except Exception as e:
|
|
201
|
+
html_parts.append(f"<p>Could not generate correlation matrix plot: {escape(str(e))}</p>")
|
|
202
|
+
if 'fig_corr' in locals() and plt.fignum_exists(fig_corr.number): plt.close(fig_corr) # Ensure plot is closed on error
|
|
203
|
+
html_parts.append('</div>')
|
|
204
|
+
elif len(numeric_cols) <= 1:
|
|
205
|
+
html_parts.append('<div class="section">')
|
|
206
|
+
html_parts.append("<h2>🔗 Correlation Matrix (Numerical Columns)</h2>")
|
|
207
|
+
html_parts.append("<p>Not enough numerical columns (need at least 2) to calculate correlations.</p>")
|
|
208
|
+
html_parts.append('</div>')
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# --- Distribution of numerical columns ---
|
|
212
|
+
if numeric_cols:
|
|
213
|
+
html_parts.append('<div class="section">')
|
|
214
|
+
html_parts.append("<h2>📊 Numerical Distributions (Sample)</h2>")
|
|
215
|
+
# Limit to avoid overly large HTML files
|
|
216
|
+
cols_to_plot = numeric_cols[:min(len(numeric_cols), 5)]
|
|
217
|
+
html_parts.append(f"<p>Displaying distributions for the first {len(cols_to_plot)} numerical columns: {', '.join(map(escape, cols_to_plot))}</p>")
|
|
218
|
+
|
|
219
|
+
for col in cols_to_plot:
|
|
220
|
+
html_parts.append('<div class="plot-container">')
|
|
221
|
+
html_parts.append(f"<h3>Distribution of {escape(col)}</h3>")
|
|
222
|
+
try:
|
|
223
|
+
# Create a figure with two subplots
|
|
224
|
+
fig_dist, axes = plt.subplots(1, 2, figsize=(12, 4))
|
|
225
|
+
|
|
226
|
+
# Histogram
|
|
227
|
+
sns.histplot(df_to_analyze[col].dropna(), kde=True, ax=axes[0])
|
|
228
|
+
axes[0].set_title(f"Histogram of {escape(col)}")
|
|
229
|
+
|
|
230
|
+
# Boxplot
|
|
231
|
+
sns.boxplot(y=df_to_analyze[col].dropna(), ax=axes[1])
|
|
232
|
+
axes[1].set_title(f"Boxplot of {escape(col)}")
|
|
233
|
+
|
|
234
|
+
fig_dist.tight_layout()
|
|
235
|
+
img_base64 = plot_to_base64(fig_dist)
|
|
236
|
+
html_parts.append(f'<img src="data:image/png;base64,{img_base64}" alt="Distribution plot for {escape(col)}">')
|
|
237
|
+
|
|
238
|
+
except Exception as e:
|
|
239
|
+
html_parts.append(f"<p>Could not generate distribution plot for {escape(col)}: {escape(str(e))}</p>")
|
|
240
|
+
# Ensure plot is closed even if error occurs after figure creation
|
|
241
|
+
if 'fig_dist' in locals() and plt.fignum_exists(fig_dist.number): plt.close(fig_dist)
|
|
242
|
+
html_parts.append('</div>') # End plot-container
|
|
243
|
+
html_parts.append('</div>') # End section
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
# --- Top values for categorical columns ---
|
|
247
|
+
cat_cols = df_to_analyze.select_dtypes(include=['object', 'category']).columns.tolist()
|
|
248
|
+
if cat_cols:
|
|
249
|
+
html_parts.append('<div class="section">')
|
|
250
|
+
html_parts.append("<h2>📊 Categorical Value Counts (Sample)</h2>")
|
|
251
|
+
# Limit to avoid overly large HTML files
|
|
252
|
+
cols_to_plot = cat_cols[:min(len(cat_cols), 5)]
|
|
253
|
+
html_parts.append(f"<p>Displaying value counts and plots for the first {len(cols_to_plot)} categorical columns: {', '.join(map(escape, cols_to_plot))}</p>")
|
|
254
|
+
|
|
255
|
+
for col in cols_to_plot:
|
|
256
|
+
html_parts.append('<div class="plot-container">')
|
|
257
|
+
html_parts.append(f"<h3>Value Counts for: {escape(col)}</h3>")
|
|
258
|
+
try:
|
|
259
|
+
# Value Counts Table (Top 10)
|
|
260
|
+
value_counts = df_to_analyze[col].value_counts().head(10)
|
|
261
|
+
if not value_counts.empty:
|
|
262
|
+
vc_df = value_counts.to_frame(name='Count')
|
|
263
|
+
vc_df['Percentage (%)'] = (vc_df['Count'] / len(df_to_analyze[col].dropna()) * 100).round(2) # Pct of non-missing
|
|
264
|
+
html_parts.append(df_to_html_with_style(vc_df, title=f"Top {len(value_counts)} values"))
|
|
265
|
+
|
|
266
|
+
# Create bar chart for top categories
|
|
267
|
+
fig_cat, ax_cat = plt.subplots(figsize=(10, 5))
|
|
268
|
+
value_counts.plot(kind='bar', ax=ax_cat)
|
|
269
|
+
ax_cat.set_title(f"Top {len(value_counts)} values in {escape(col)}")
|
|
270
|
+
ax_cat.set_ylabel("Count")
|
|
271
|
+
ax_cat.set_xlabel(escape(col))
|
|
272
|
+
plt.xticks(rotation=45, ha='right')
|
|
273
|
+
fig_cat.tight_layout()
|
|
274
|
+
|
|
275
|
+
img_base64 = plot_to_base64(fig_cat)
|
|
276
|
+
html_parts.append(f'<img src="data:image/png;base64,{img_base64}" alt="Bar chart for top values in {escape(col)}">')
|
|
277
|
+
else:
|
|
278
|
+
html_parts.append("<p>No values found for this column.</p>")
|
|
279
|
+
|
|
280
|
+
except Exception as e:
|
|
281
|
+
html_parts.append(f"<p>Could not generate value counts/plot for {escape(col)}: {escape(str(e))}</p>")
|
|
282
|
+
if 'fig_cat' in locals() and plt.fignum_exists(fig_cat.number): plt.close(fig_cat)
|
|
283
|
+
|
|
284
|
+
html_parts.append('</div>') # End plot-container
|
|
285
|
+
html_parts.append('</div>') # End section
|
|
286
|
+
elif not cat_cols:
|
|
287
|
+
html_parts.append('<div class="section">')
|
|
288
|
+
html_parts.append("<h2>📊 Categorical Value Counts</h2>")
|
|
289
|
+
html_parts.append("<p>No categorical columns found.</p>")
|
|
290
|
+
html_parts.append('</div>')
|
|
291
|
+
|
|
292
|
+
# --- Footer ---
|
|
293
|
+
html_parts.append('<hr><p style="text-align:center; color: #666; font-size: 0.9em;">✅ Quick EDA Report Generated</p>')
|
|
294
|
+
html_parts.append("</body></html>")
|
|
295
|
+
|
|
296
|
+
# --- Combine and write to file ---
|
|
297
|
+
html_content = "\n".join(html_parts)
|
|
298
|
+
|
|
299
|
+
# Write the HTML content to the file
|
|
300
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
301
|
+
f.write(html_content)
|
|
302
|
+
|
|
303
|
+
print(f"✅ EDA report saved to: {output_path}")
|
|
304
|
+
return {
|
|
305
|
+
"message": "EDA completed successfully",
|
|
306
|
+
"file_path": output_path,
|
|
307
|
+
"report_url": f"file://{os.path.abspath(output_path)}"
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
def list_available_dataframes():
|
|
311
|
+
# List all available dataframes in the current session.
|
|
312
|
+
|
|
313
|
+
# Get all variables in the current scope
|
|
314
|
+
all_vars = list(globals().items())
|
|
315
|
+
|
|
316
|
+
# Filter for pandas DataFrames
|
|
317
|
+
dfs = [
|
|
318
|
+
(name, obj) for name, obj in all_vars if isinstance(obj, pd.DataFrame)
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
print(f"Found {len(dfs)} dataframes:")
|
|
322
|
+
for name, df in dfs:
|
|
323
|
+
print(f"- {name}: {df.shape[0]} rows × {df.shape[1]} columns")
|
|
324
|
+
|
|
325
|
+
return [name for name, _ in dfs]
|
parrot/bots/tools/pdf.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from weasyprint import HTML, CSS
|
|
4
|
+
from weasyprint.text.fonts import FontConfiguration
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def generate_pdf_from_html(html_content, report_dir):
|
|
8
|
+
"""Generate a PDF file from HTML content.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
html_content (str): HTML content to be converted to PDF.
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
# Create a FontConfiguration object
|
|
15
|
+
font_config = FontConfiguration()
|
|
16
|
+
|
|
17
|
+
# Additional CSS specifically for PDF rendering
|
|
18
|
+
pdf_css = CSS(string='''
|
|
19
|
+
@page {
|
|
20
|
+
size: letter;
|
|
21
|
+
margin: 1cm;
|
|
22
|
+
@top-center {
|
|
23
|
+
content: "Payroll Attestation Report";
|
|
24
|
+
font-size: 9pt;
|
|
25
|
+
color: #666;
|
|
26
|
+
}
|
|
27
|
+
@bottom-right {
|
|
28
|
+
content: "Page " counter(page) " of " counter(pages);
|
|
29
|
+
font-size: 9pt;
|
|
30
|
+
color: #666;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
h1 { page-break-before: always; }
|
|
34
|
+
table { page-break-inside: avoid; }
|
|
35
|
+
''', font_config=font_config)
|
|
36
|
+
|
|
37
|
+
# Generate timestamp for the filename
|
|
38
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
39
|
+
# Create filename with timestamp
|
|
40
|
+
output_filename = f"report_{timestamp}.html"
|
|
41
|
+
output_path = os.path.join(report_dir, output_filename)
|
|
42
|
+
|
|
43
|
+
# Convert HTML to PDF
|
|
44
|
+
HTML(string=html_content).write_pdf(
|
|
45
|
+
output_path,
|
|
46
|
+
stylesheets=[pdf_css],
|
|
47
|
+
font_config=font_config
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
return output_path
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import matplotlib as mpl
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
|
|
4
|
+
# Adjust the Warning Threshold
|
|
5
|
+
mpl.rcParams['figure.max_open_warning'] = 50 # Default is 20
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_plot(
|
|
9
|
+
dataframe,
|
|
10
|
+
x=None,
|
|
11
|
+
y=None,
|
|
12
|
+
kind='line',
|
|
13
|
+
title=None,
|
|
14
|
+
xlabel=None,
|
|
15
|
+
ylabel=None,
|
|
16
|
+
figsize=(10, 6),
|
|
17
|
+
save_path=None
|
|
18
|
+
):
|
|
19
|
+
"""Create and save a plot with proper cleanup."""
|
|
20
|
+
import matplotlib.pyplot as plt
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
# Create the plot
|
|
24
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
25
|
+
|
|
26
|
+
if kind == 'line':
|
|
27
|
+
dataframe.plot(x=x, y=y, kind=kind, ax=ax)
|
|
28
|
+
elif kind == 'bar':
|
|
29
|
+
dataframe.plot(x=x, y=y, kind=kind, ax=ax)
|
|
30
|
+
# Add more plot types as needed
|
|
31
|
+
|
|
32
|
+
# Add labels and title
|
|
33
|
+
if xlabel:
|
|
34
|
+
ax.set_xlabel(xlabel)
|
|
35
|
+
if ylabel:
|
|
36
|
+
ax.set_ylabel(ylabel)
|
|
37
|
+
if title:
|
|
38
|
+
ax.set_title(title)
|
|
39
|
+
|
|
40
|
+
# Save if path provided
|
|
41
|
+
if save_path:
|
|
42
|
+
plt.savefig(save_path, bbox_inches='tight')
|
|
43
|
+
return save_path
|
|
44
|
+
|
|
45
|
+
return fig
|
|
46
|
+
finally:
|
|
47
|
+
# Always close the figure to prevent memory leaks
|
|
48
|
+
plt.close(fig)
|
parrot/bots/troc.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .chatbot import Chatbot
|
|
2
|
+
|
|
3
|
+
class AskTROC(Chatbot):
|
|
4
|
+
"""Represents an Human Resources agent in Navigator.
|
|
5
|
+
|
|
6
|
+
Each agent has a name, a role, a goal, a backstory,
|
|
7
|
+
and an optional language model (llm).
|
|
8
|
+
"""
|
|
9
|
+
company_information: dict = {
|
|
10
|
+
'company': 'T-ROC Global',
|
|
11
|
+
'company_website': 'https://www.trocglobal.com',
|
|
12
|
+
'contact_email': 'communications@trocglobal.com',
|
|
13
|
+
'contact_form': 'https://www.surveymonkey.com/r/TROC_Suggestion_Box'
|
|
14
|
+
}
|
|
15
|
+
role: str = 'Expert T-ROCer'
|
|
16
|
+
goal = 'Bring useful information about T-ROC Global to employees.'
|
parrot/conf.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from navconfig import config, BASE_DIR
|
|
3
|
+
from navconfig.logging import logging
|
|
4
|
+
from navigator.conf import default_dsn, CACHE_HOST, CACHE_PORT
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# disable debug on some libraries:
|
|
8
|
+
logging.getLogger(name='httpcore').setLevel(logging.INFO)
|
|
9
|
+
logging.getLogger(name='httpx').setLevel(logging.INFO)
|
|
10
|
+
logging.getLogger(name='groq').setLevel(logging.INFO)
|
|
11
|
+
logging.getLogger(name='selenium.webdriver').setLevel(logging.WARNING)
|
|
12
|
+
logging.getLogger(name='selenium').setLevel(logging.INFO)
|
|
13
|
+
logging.getLogger(name='matplotlib').setLevel(logging.WARNING)
|
|
14
|
+
logging.getLogger(name='PIL').setLevel(logging.INFO)
|
|
15
|
+
logging.getLogger("grpc").setLevel(logging.CRITICAL)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Static directory
|
|
19
|
+
STATIC_DIR = config.get('STATIC_DIR', fallback=BASE_DIR.joinpath('static'))
|
|
20
|
+
if isinstance(STATIC_DIR, str):
|
|
21
|
+
STATIC_DIR = Path(STATIC_DIR)
|
|
22
|
+
|
|
23
|
+
# LLM Model
|
|
24
|
+
DEFAULT_LLM_MODEL_NAME = config.get('LLM_MODEL_NAME', fallback='gemini-pro')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
## MILVUS DB ##:
|
|
28
|
+
MILVUS_HOST = config.get('MILVUS_HOST', fallback='localhost')
|
|
29
|
+
MILVUS_PROTOCOL = config.get('MILVUS_PROTOCOL', fallback='http')
|
|
30
|
+
MILVUS_PORT = config.get('MILVUS_PORT', fallback=19530)
|
|
31
|
+
MILVUS_URL = config.get('MILVUS_URL')
|
|
32
|
+
MILVUS_TOKEN = config.get('MILVUS_TOKEN')
|
|
33
|
+
MILVUS_USER = config.get('MILVUS_USER')
|
|
34
|
+
MILVUS_PASSWORD = config.get('MILVUS_PASSWORD')
|
|
35
|
+
MILVUS_SECURE = config.getboolean('MILVUS_SECURE', fallback=False)
|
|
36
|
+
MILVUS_SERVER_NAME = config.get(
|
|
37
|
+
'MILVUS_SERVER_NAME'
|
|
38
|
+
)
|
|
39
|
+
MILVUS_CA_CERT = config.get('MILVUS_CA_CERT', fallback=None)
|
|
40
|
+
MILVUS_SERVER_CERT = config.get('MILVUS_SERVER_CERT', fallback=None)
|
|
41
|
+
MILVUS_SERVER_KEY = config.get('MILVUS_SERVER_KEY', fallback=None)
|
|
42
|
+
MILVUS_USE_TLSv2 = config.getboolean('MILVUS_USE_TLSv2', fallback=False)
|
|
43
|
+
|
|
44
|
+
# ScyllaDB Database:
|
|
45
|
+
SCYLLADB_DRIVER = config.get('SCYLLADB_DRIVER', fallback='scylladb')
|
|
46
|
+
SCYLLADB_HOST = config.get('SCYLLADB_HOST', fallback='localhost')
|
|
47
|
+
SCYLLADB_PORT = int(config.get('SCYLLADB_PORT', fallback=9042))
|
|
48
|
+
SCYLLADB_USERNAME = config.get('SCYLLADB_USERNAME', fallback='navigator')
|
|
49
|
+
SCYLLADB_PASSWORD = config.get('SCYLLADB_PASSWORD', fallback='navigator')
|
|
50
|
+
SCYLLADB_KEYSPACE = config.get('SCYLLADB_KEYSPACE', fallback='navigator')
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# BigQuery Configuration:
|
|
54
|
+
BIGQUERY_CREDENTIALS = config.get('BIGQUERY_CREDENTIALS')
|
|
55
|
+
BIGQUERY_PROJECT_ID = config.get('BIGQUERY_PROJECT_ID', fallback='navigator')
|
|
56
|
+
BIGQUERY_DATASET = config.get('BIGQUERY_DATASET', fallback='navigator')
|
|
57
|
+
|
|
58
|
+
# Redis History Configuration:
|
|
59
|
+
REDIS_HISTORY_DB = config.get('REDIS_HISTORY_DB', fallback=3)
|
|
60
|
+
REDIS_HISTORY_URL = f"redis://{CACHE_HOST}:{CACHE_PORT}/{REDIS_HISTORY_DB}"
|
|
61
|
+
|
|
62
|
+
def resolve_cert(crt):
|
|
63
|
+
cert = Path(crt)
|
|
64
|
+
if not cert.is_absolute():
|
|
65
|
+
cert = BASE_DIR.joinpath(cert)
|
|
66
|
+
else:
|
|
67
|
+
cert.resolve()
|
|
68
|
+
return cert
|
|
69
|
+
|
|
70
|
+
if MILVUS_SERVER_CERT:
|
|
71
|
+
MILVUS_SERVER_CERT = str(resolve_cert(MILVUS_SERVER_CERT))
|
|
72
|
+
if MILVUS_CA_CERT:
|
|
73
|
+
MILVUS_CA_CERT = str(resolve_cert(MILVUS_CA_CERT))
|
|
74
|
+
if MILVUS_SERVER_KEY:
|
|
75
|
+
MILVUS_SERVER_KEY = str(resolve_cert(MILVUS_SERVER_KEY))
|
|
76
|
+
|
|
77
|
+
# QDRANT:
|
|
78
|
+
QDRANT_PROTOCOL = config.get('QDRANT_PROTOCOL', fallback='http')
|
|
79
|
+
QDRANT_HOST = config.get('QDRANT_HOST', fallback='localhost')
|
|
80
|
+
QDRANT_PORT = config.get('QDRANT_PORT', fallback=6333)
|
|
81
|
+
QDRANT_USE_HTTPS = config.getboolean('QDRANT_USE_HTTPS', fallback=False)
|
|
82
|
+
QDRANT_URL = config.get('QDRANT_URL')
|
|
83
|
+
# QDRANT Connection Type: server or cloud
|
|
84
|
+
QDRANT_CONN_TYPE = config.get('QDRANT_CONN_TYPE', fallback='server')
|
|
85
|
+
|
|
86
|
+
# ChromaDB:
|
|
87
|
+
CHROMADB_HOST = config.get('CHROMADB_HOST', fallback='localhost')
|
|
88
|
+
CHROMADB_PORT = config.get('CHROMADB_PORT', fallback=8000)
|
|
89
|
+
|
|
90
|
+
# Embedding Device:
|
|
91
|
+
EMBEDDING_DEVICE = config.get('EMBEDDING_DEVICE', fallback='cpu')
|
|
92
|
+
EMBEDDING_DEFAULT_MODEL = config.get(
|
|
93
|
+
'EMBEDDING_DEFAULT_MODEL',
|
|
94
|
+
fallback='thenlper/gte-base'
|
|
95
|
+
)
|
|
96
|
+
MAX_VRAM_AVAILABLE = config.get('MAX_VRAM_AVAILABLE', fallback=20000)
|
|
97
|
+
RAM_AVAILABLE = config.get('RAM_AVAILABLE', fallback=819200)
|
|
98
|
+
CUDA_DEFAULT_DEVICE = config.get('CUDA_DEFAULT_DEVICE', fallback='cpu')
|
|
99
|
+
CUDA_DEFAULT_DEVICE_NUMBER = config.getint('CUDA_DEFAULT_DEVICE_NUMBER', fallback=0)
|
|
100
|
+
MAX_BATCH_SIZE = config.get('MAX_BATCH_SIZE', fallback=768)
|
|
101
|
+
|
|
102
|
+
# Enable Teams Bot:
|
|
103
|
+
ENABLE_AZURE_BOT = config.getboolean('ENABLE_AZURE_BOT', fallback=True)
|
|
104
|
+
|
|
105
|
+
## Google Services:
|
|
106
|
+
GOOGLE_API_KEY = config.get('GOOGLE_API_KEY')
|
|
107
|
+
### Google Service Credentials:
|
|
108
|
+
GA_SERVICE_ACCOUNT_NAME = config.get('GA_SERVICE_ACCOUNT_NAME', fallback="google.json")
|
|
109
|
+
GA_SERVICE_PATH = config.get('GA_SERVICE_PATH', fallback="env/google/")
|
|
110
|
+
if isinstance(GA_SERVICE_PATH, str):
|
|
111
|
+
GA_SERVICE_PATH = Path(GA_SERVICE_PATH)
|
|
112
|
+
|
|
113
|
+
GOOGLE_TTS_SERVICE = config.get(
|
|
114
|
+
'GOOGLE_TTS_SERVICE',
|
|
115
|
+
fallback=GA_SERVICE_PATH.joinpath('tts-service.json')
|
|
116
|
+
)
|
|
117
|
+
if isinstance(GOOGLE_TTS_SERVICE, str):
|
|
118
|
+
GOOGLE_TTS_SERVICE = Path(GOOGLE_TTS_SERVICE)
|
|
119
|
+
if not GOOGLE_TTS_SERVICE.is_absolute():
|
|
120
|
+
GOOGLE_TTS_SERVICE = BASE_DIR.joinpath(GOOGLE_TTS_SERVICE)
|
|
121
|
+
if not GOOGLE_TTS_SERVICE.exists():
|
|
122
|
+
GOOGLE_TTS_SERVICE = None
|
|
123
|
+
|
|
124
|
+
# BASE STATIC:
|
|
125
|
+
BASE_STATIC_URL = config.get(
|
|
126
|
+
'BASE_STATIC_URL',
|
|
127
|
+
fallback='http://localhost:5000/static'
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Google SerpAPI:
|
|
131
|
+
SERPAPI_API_KEY = config.get('SERPAPI_API_KEY')
|
|
132
|
+
|
|
133
|
+
# Groq API Key:
|
|
134
|
+
GROQ_API_KEY = config.get('GROQ_API_KEY')
|
|
135
|
+
|
|
136
|
+
# Ethical Principle:
|
|
137
|
+
ETHICAL_PRINCIPLE = config.get(
|
|
138
|
+
'ETHICAL_PRINCIPLE',
|
|
139
|
+
fallback='The model should only talk about ethical and legal things.'
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Embedding Configuration:
|
|
143
|
+
|
|
144
|
+
# VERTEX
|
|
145
|
+
VERTEX_PROJECT_ID = config.get('VERTEX_PROJECT_ID')
|
|
146
|
+
VERTEX_REGION = config.get('VERTEX_REGION')
|
|
147
|
+
|
|
148
|
+
# OpenAI:
|
|
149
|
+
OPENAI_API_KEY = config.get('OPENAI_API_KEY')
|
|
150
|
+
OPENAI_ORGANIZATION = config.get('OPENAI_ORGANIZATION')
|
|
151
|
+
|
|
152
|
+
## HTTPClioent
|
|
153
|
+
HTTPCLIENT_MAX_SEMAPHORE = config.getint("HTTPCLIENT_MAX_SEMAPHORE", fallback=5)
|
|
154
|
+
HTTPCLIENT_MAX_WORKERS = config.getint("HTTPCLIENT_MAX_WORKERS", fallback=1)
|
|
155
|
+
|
|
156
|
+
## Google API:
|
|
157
|
+
GOOGLE_API_KEY = config.get('GOOGLE_API_KEY')
|
|
158
|
+
GOOGLE_SEARCH_API_KEY = config.get('GOOGLE_SEARCH_API_KEY')
|
|
159
|
+
GOOGLE_SEARCH_ENGINE_ID = config.get('GOOGLE_SEARCH_ENGINE_ID')
|
|
160
|
+
GOOGLE_PLACES_API_KEY = config.get('GOOGLE_PLACES_API_KEY')
|
|
161
|
+
GOOGLE_CREDENTIALS_FILE = Path(
|
|
162
|
+
config.get(
|
|
163
|
+
'GOOGLE_CREDENTIALS_FILE',
|
|
164
|
+
fallback=BASE_DIR.joinpath('env', 'google', 'key.json')
|
|
165
|
+
)
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
## LLM default config:
|
|
169
|
+
DEFAULT_LLM_MODEL = config.get('LLM_MODEL', fallback='gemini-1.5-pro')
|
|
170
|
+
DEFAULT_LLM_TEMPERATURE = config.get('LLM_TEMPERATURE', fallback=0.1)
|
parrot/crew/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A Directory for adding more Tools to CrewAI
|
|
3
|
+
"""
|
|
4
|
+
from .duckgo import DuckDuckGoRelevantSearch, DuckDuckGoSearchTool
|
|
5
|
+
from .rag import RagSearchTool
|
|
6
|
+
from .file import SaveFile
|
|
7
|
+
from .bing import BingSearchTool
|
|
8
|
+
from .md2pdf import MarkdownToPDFTool
|
|
9
|
+
from .google import GoogleSearchTool, GoogleSiteSearchTool, GoogleLocationFinder
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
'GoogleSearchTool',
|
|
14
|
+
'GoogleSiteSearchTool',
|
|
15
|
+
'BingSearchTool',
|
|
16
|
+
'DuckDuckGoRelevantSearch',
|
|
17
|
+
'DuckDuckGoSearchTool',
|
|
18
|
+
'RagSearchTool',
|
|
19
|
+
'SaveFile',
|
|
20
|
+
'MarkdownToPDFTool',
|
|
21
|
+
'GoogleLocationFinder',
|
|
22
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from langchain_community.utilities.bing_search import BingSearchAPIWrapper
|
|
2
|
+
from crewai_tools import BaseTool
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BingSearchTool(BaseTool):
|
|
6
|
+
"""Microsoft Bing Search Tool."""
|
|
7
|
+
name: str = "Microsoft Bing Search"
|
|
8
|
+
description: str = "Search the web using Microsoft Bing Search API"
|
|
9
|
+
|
|
10
|
+
def _run(self, query: str) -> dict:
|
|
11
|
+
"""Run the Bing Search Tool."""
|
|
12
|
+
bing = BingSearchAPIWrapper(k=5)
|
|
13
|
+
return bing.results(query=query, num_results=5)
|