insightflow-ai 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ import pandas as pd
2
+ from .autofix import run_autofix, run_autofix_file, set_hf_token as autofix_set_token
3
+ from .autoeda import run_autoeda, run_autoeda_file, set_hf_token as autoeda_set_token
4
+ from .automodel import run_automodel, run_automodel_file, set_hf_token as automodel_set_token
5
+ from .autoreport import run_autoreport, set_hf_token as autoreport_set_token
6
+
7
+ def _sync_tokens(hf_token: str):
8
+ """Sets the token across all modules"""
9
+ autofix_set_token(hf_token)
10
+ autoeda_set_token(hf_token)
11
+ automodel_set_token(hf_token)
12
+ autoreport_set_token(hf_token)
13
+
14
+ def autofix(data, hf_token: str, save_path="cleaned_data.csv"):
15
+ """
16
+ Cleans messy data using AI.
17
+ `data` can be a string (file path) or a pandas DataFrame.
18
+ """
19
+ _sync_tokens(hf_token)
20
+ if isinstance(data, str):
21
+ return run_autofix_file(data, save_path=save_path)
22
+ return run_autofix(data, save_path=save_path)
23
+
24
+ def autoeda(data, hf_token: str, save_path="eda_report.html"):
25
+ """
26
+ Generates an Exploratory Data Analysis report.
27
+ `data` can be a string (file path) or a pandas DataFrame.
28
+ """
29
+ _sync_tokens(hf_token)
30
+ if isinstance(data, str):
31
+ return run_autoeda_file(data, save_path=save_path)
32
+ return run_autoeda(data, save_path=save_path)
33
+
34
+ def automodel(data, hf_token: str, target_col=None, task_type="auto", save_path="model_report.html"):
35
+ """
36
+ Trains multiple ML models and selects the best one.
37
+ `data` can be a string (file path) or a pandas DataFrame.
38
+ """
39
+ _sync_tokens(hf_token)
40
+ if isinstance(data, str):
41
+ return run_automodel_file(data, target_col=target_col, task_type=task_type, save_path=save_path)
42
+ return run_automodel(data, target_col=target_col, task_type=task_type, save_path=save_path)
43
+
44
+ def autoreport(eda_results: dict, eda_insights: dict, model_results: dict, model_insights: dict, target_col: str, hf_token: str, save_path="business_report.html"):
45
+ """
46
+ Compiles an advanced C-Level business report using all previous AI insights.
47
+ """
48
+ _sync_tokens(hf_token)
49
+ return run_autoreport(eda_results, eda_insights, model_results, model_insights, target_col, save_path=save_path)
50
+
51
+ __all__ = ["autofix", "autoeda", "automodel", "autoreport"]
@@ -0,0 +1,75 @@
1
+ import os
2
+ import pandas as pd
3
+ from .autoeda_agent import AutoEDAAgent
4
+
5
+
6
+ _hf_token = None
7
+
8
+
9
+ def set_hf_token(token: str):
10
+ global _hf_token
11
+ _hf_token = token
12
+ os.environ["HF_TOKEN"] = token
13
+ print("HF token set successfully.")
14
+
15
+
16
+ def get_hf_token():
17
+ global _hf_token
18
+ if _hf_token:
19
+ return _hf_token
20
+ return os.environ.get("HF_TOKEN", None)
21
+
22
+
23
+ def run_autoeda(df: pd.DataFrame, save_path: str = "eda_report.html"):
24
+ """
25
+ Runs AI-powered EDA on a DataFrame and generates an HTML report.
26
+
27
+ Args:
28
+ df: pandas DataFrame to analyze
29
+ save_path: path to save the HTML report
30
+
31
+ Returns:
32
+ Tuple of (eda_results dict, ai_insights dict)
33
+ """
34
+ token = get_hf_token()
35
+ if not token:
36
+ raise RuntimeError("HF token not set. Call set_hf_token('your_token') first.")
37
+ agent = AutoEDAAgent(token)
38
+ return agent.run(df, save_path=save_path)
39
+
40
+
41
+ def run_autoeda_file(file_path: str, save_path: str = None):
42
+ """
43
+ Loads a CSV/Excel file and runs AI-powered EDA.
44
+
45
+ Args:
46
+ file_path: path to CSV or Excel file
47
+ save_path: path to save the HTML report
48
+
49
+ Returns:
50
+ Tuple of (eda_results dict, ai_insights dict)
51
+ """
52
+ token = get_hf_token()
53
+ if not token:
54
+ raise RuntimeError("HF token not set. Call set_hf_token('your_token') first.")
55
+ agent = AutoEDAAgent(token)
56
+ return agent.run_file(file_path, save_path=save_path)
57
+
58
+
59
+ if __name__ == "__main__":
60
+ import sys
61
+
62
+ print("=" * 50)
63
+ print(" InsightFlow Auto EDA")
64
+ print("=" * 50)
65
+
66
+ token = input("\nEnter your HuggingFace API token: ").strip()
67
+ set_hf_token(token)
68
+
69
+ file_path = input("Enter the path to your data file: ").strip()
70
+ if not os.path.exists(file_path):
71
+ print(f"Error: File '{file_path}' not found.")
72
+ sys.exit(1)
73
+
74
+ run_autoeda_file(file_path)
75
+ print("\nDone!")
@@ -0,0 +1,410 @@
1
+ import os
2
+ import json
3
+ import pandas as pd
4
+ import numpy as np
5
+ from openai import OpenAI
6
+ from .eda import run_full_eda, dataset_overview, column_stats
7
+ from .report_generator import build_report
8
+
9
+
10
+ class AutoEDAAgent:
11
+ def __init__(self, hf_token: str, model: str = "zai-org/GLM-5.2:novita"):
12
+ self.client = OpenAI(base_url="https://router.huggingface.co/v1", api_key=hf_token)
13
+ self.model = model
14
+
15
+ def _ask_llm(self, eda_results):
16
+ overview = eda_results["overview"]
17
+ col_info = {}
18
+ for k, v in eda_results.get("column_stats", {}).items():
19
+ col_info[k] = {kk: vv for kk, vv in v.items() if kk != "top5"}
20
+
21
+ dist_info = {}
22
+ for k, v in eda_results.get("distributions", {}).items():
23
+ dist_info[k] = {kk: vv for kk, vv in v.items() if kk != "histogram"}
24
+
25
+ corr_matrix = eda_results.get("correlation", {}).get("matrix")
26
+ if hasattr(corr_matrix, "to_dict"):
27
+ eda_results["correlation"]["matrix"] = corr_matrix.to_dict()
28
+
29
+ prompt = f"""You are an EDA expert. Analyze these dataset results and provide insights.
30
+
31
+ OVERVIEW: {json.dumps(overview)}
32
+ COLUMN STATS: {json.dumps(col_info, default=str)}
33
+ MISSING: {json.dumps(eda_results.get("missing", {}))}
34
+ OUTLIERS: {json.dumps(eda_results.get("outliers", {}))}
35
+ STRONG CORRELATIONS: {json.dumps(eda_results.get("correlation", {}).get("strong_pairs", []))}
36
+ CATEGORICAL: {json.dumps(eda_results.get("categorical", {}), default=str)}
37
+ DISTRIBUTIONS: {json.dumps(dist_info)}
38
+
39
+ Return a JSON object with these keys:
40
+ - "summary": 2-3 sentence overall summary
41
+ - "key_findings": list of 5-8 important findings (strings)
42
+ - "warnings": list of data quality warnings (strings)
43
+ - "recommendations": list of 3-5 actionable recommendations (strings)
44
+ - "target_suggestion": which column could be a good prediction target and why (string)
45
+
46
+ Respond ONLY with valid JSON. No markdown, no explanation."""
47
+
48
+ response = self.client.chat.completions.create(
49
+ model=self.model,
50
+ messages=[{"role": "user", "content": prompt}],
51
+ temperature=0.1,
52
+ )
53
+ raw = response.choices[0].message.content.strip()
54
+ if raw.startswith("```"):
55
+ lines = [l for l in raw.split("\n") if not l.startswith("```")]
56
+ raw = "\n".join(lines)
57
+ return json.loads(raw)
58
+
59
+ def _build_html(self, df, eda_results, ai_insights, save_path):
60
+ import json
61
+ import datetime
62
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
63
+
64
+ # Safely dump data to inject into Javascript
65
+ def safe_json(obj):
66
+ return json.dumps(obj, default=str).replace("</", "<\\/")
67
+
68
+ eda_json = safe_json(eda_results)
69
+ ai_json = safe_json(ai_insights)
70
+
71
+ html = f"""<!DOCTYPE html>
72
+ <html lang="en">
73
+ <head>
74
+ <meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1.0">
75
+ <title>InsightFlow AutoEDA Pro</title>
76
+ <link href="https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;700&display=swap" rel="stylesheet">
77
+ <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
78
+ <style>
79
+ :root {{
80
+ --bg: #09090b; --surface: #18181b; --surface-hover: #27272a;
81
+ --border: #3f3f46; --border-light: #27272a;
82
+ --text: #f4f4f5; --text-muted: #a1a1aa;
83
+ --primary: #3b82f6; --primary-glow: rgba(59,130,246,0.3);
84
+ --success: #10b981; --warning: #f59e0b; --danger: #ef4444; --accent: #8b5cf6;
85
+ }}
86
+ * {{ margin:0; padding:0; box-sizing:border-box; }}
87
+ body {{ font-family:'Outfit',sans-serif; background:var(--bg); color:var(--text); line-height:1.6; padding-bottom:4rem; }}
88
+ .bg-grid {{ position:fixed; inset:0; background-image:linear-gradient(var(--border-light) 1px,transparent 1px),linear-gradient(90deg,var(--border-light) 1px,transparent 1px); background-size:40px 40px; pointer-events:none; opacity:0.3; z-index:-1; }}
89
+ .bg-glow {{ position:fixed; top: -20%; left: -10%; width: 50%; height: 50%; background: radial-gradient(circle, var(--primary-glow) 0%, transparent 60%); filter: blur(100px); z-index:-1; pointer-events:none; }}
90
+ .bg-glow-2 {{ position:fixed; bottom: -20%; right: -10%; width: 50%; height: 50%; background: radial-gradient(circle, rgba(139,92,246,0.2) 0%, transparent 60%); filter: blur(100px); z-index:-1; pointer-events:none; }}
91
+ .container {{ max-width:1400px; margin:0 auto; padding:2rem; }}
92
+
93
+ header {{ text-align:center; padding:4rem 2rem; background:var(--surface); border-radius:24px; border:1px solid var(--border); position:relative; overflow:hidden; margin-bottom:2rem; box-shadow: 0 25px 50px -12px rgba(0,0,0,0.5); }}
94
+ header::after {{ content:''; position:absolute; inset:0; background: linear-gradient(180deg, transparent, rgba(0,0,0,0.4)); pointer-events:none; }}
95
+ h1 {{ font-size:3.5rem; font-weight:800; letter-spacing:-0.05em; margin-bottom:0.5rem; background:linear-gradient(to right, #60a5fa, #c084fc); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }}
96
+ .subtitle {{ font-size:1.1rem; color:var(--text-muted); font-weight:400; }}
97
+ .timestamp {{ font-size:0.85rem; color:#52525b; margin-top:1rem; font-family:'JetBrains Mono',monospace; }}
98
+
99
+ .grid-metrics {{ display:grid; grid-template-columns:repeat(auto-fit,minmax(200px,1fr)); gap:1rem; margin-bottom:2rem; }}
100
+ .card {{ background:rgba(24,24,27,0.7); backdrop-filter:blur(16px); border:1px solid var(--border); border-radius:20px; padding:1.5rem; transition:transform 0.3s, border-color 0.3s; }}
101
+ .card:hover {{ transform:translateY(-5px); border-color:var(--primary); }}
102
+ .metric-val {{ font-size:2.5rem; font-weight:800; color:var(--text); line-height:1; margin-bottom:0.25rem; }}
103
+ .metric-lbl {{ font-size:0.8rem; color:var(--text-muted); text-transform:uppercase; letter-spacing:0.1em; font-weight:600; }}
104
+
105
+ .section-title {{ font-size:1.75rem; font-weight:700; margin-bottom:1.5rem; display:flex; align-items:center; gap:0.5rem; margin-top:3rem; }}
106
+ .section-title span {{ color:var(--primary); }}
107
+
108
+ .ai-box {{ background:linear-gradient(145deg, rgba(24,24,27,0.9), rgba(9,9,11,0.9)); border:1px solid rgba(139,92,246,0.3); position:relative; overflow:hidden; }}
109
+ .ai-box::before {{ content:''; position:absolute; top:0; left:0; right:0; height:2px; background:linear-gradient(90deg, #3b82f6, #8b5cf6, #ec4899); }}
110
+ .ai-summary {{ font-size:1.1rem; color:#d4d4d8; margin-bottom:1.5rem; line-height:1.7; }}
111
+ .insight-list {{ display:flex; flex-direction:column; gap:0.75rem; }}
112
+ .insight-item {{ padding:1rem; border-radius:12px; background:rgba(255,255,255,0.03); border-left:4px solid var(--primary); display:flex; gap:1rem; align-items:flex-start; font-size:0.95rem; }}
113
+ .insight-item.warn {{ border-color:var(--warning); background:rgba(245,158,11,0.05); }}
114
+ .insight-item.rec {{ border-color:var(--success); background:rgba(16,185,129,0.05); }}
115
+ .insight-item.target {{ border-color:var(--accent); background:rgba(139,92,246,0.1); padding:1.5rem; font-size:1.05rem; margin-top:1rem; }}
116
+ .icon {{ font-size:1.25rem; line-height:1; }}
117
+
118
+ .grid-charts {{ display:grid; grid-template-columns:repeat(auto-fit,minmax(400px,1fr)); gap:1.5rem; }}
119
+ .chart-wrapper {{ height:300px; width:100%; position:relative; }}
120
+
121
+ .table-wrap {{ overflow-x:auto; border-radius:16px; border:1px solid var(--border); background:rgba(24,24,27,0.5); backdrop-filter:blur(10px); }}
122
+ table {{ width:100%; border-collapse:collapse; text-align:left; font-size:0.9rem; }}
123
+ th, td {{ padding:1rem; border-bottom:1px solid var(--border-light); }}
124
+ th {{ background:rgba(255,255,255,0.02); font-weight:600; color:var(--text-muted); text-transform:uppercase; letter-spacing:0.05em; font-size:0.75rem; }}
125
+ tr:last-child td {{ border-bottom:none; }}
126
+ tr:hover td {{ background:rgba(255,255,255,0.02); }}
127
+ .mono {{ font-family:'JetBrains Mono',monospace; font-size:0.85rem; color:#94a3b8; }}
128
+
129
+ .badge {{ padding:0.25rem 0.6rem; border-radius:99px; font-size:0.7rem; font-weight:700; text-transform:uppercase; letter-spacing:0.05em; }}
130
+ .b-num {{ background:rgba(59,130,246,0.15); color:#60a5fa; }}
131
+ .b-cat {{ background:rgba(168,85,247,0.15); color:#c084fc; }}
132
+ .b-bool {{ background:rgba(245,158,11,0.15); color:#fbbf24; }}
133
+
134
+ .corr-grid {{ display:flex; flex-direction:column; gap:0.25rem; font-family:'JetBrains Mono',monospace; font-size:0.8rem; overflow-x:auto; padding:1rem; }}
135
+ .corr-row {{ display:flex; gap:0.25rem; }}
136
+ .corr-cell {{ width:60px; height:40px; display:flex; align-items:center; justify-content:center; border-radius:6px; color:#fff; font-weight:600; flex-shrink:0; }}
137
+ .corr-label {{ width:120px; justify-content:flex-end; padding-right:1rem; color:var(--text-muted); font-family:'Outfit',sans-serif; }}
138
+
139
+ @media (max-width:768px) {{ .grid-charts {{ grid-template-columns:1fr; }} }}
140
+ </style>
141
+ </head>
142
+ <body>
143
+ <div class="bg-grid"></div>
144
+ <div class="bg-glow"></div>
145
+ <div class="bg-glow-2"></div>
146
+ <div class="container">
147
+
148
+ <header>
149
+ <h1>InsightFlow AutoEDA</h1>
150
+ <div class="subtitle" id="h-subtitle"></div>
151
+ <div class="timestamp">Generated: {now}</div>
152
+ </header>
153
+
154
+ <div class="grid-metrics" id="metrics-container"></div>
155
+
156
+ <div class="card ai-box">
157
+ <div class="section-title" style="margin-top:0;"><span>✨</span> AI Insights</div>
158
+ <div class="ai-summary" id="ai-summary"></div>
159
+ <div class="insight-list" id="ai-insights"></div>
160
+ </div>
161
+
162
+ <h2 class="section-title"><span>📈</span> Auto Plots (Distributions & Frequencies)</h2>
163
+ <div class="grid-charts" id="auto-plots"></div>
164
+
165
+ <h2 class="section-title"><span>🔍</span> Correlation Matrix</h2>
166
+ <div class="card" style="padding:0;">
167
+ <div class="corr-grid" id="corr-matrix"></div>
168
+ </div>
169
+
170
+ <h2 class="section-title"><span>📊</span> Dataset Statistics</h2>
171
+ <div class="table-wrap">
172
+ <table id="stats-table">
173
+ <thead>
174
+ <tr><th>Feature</th><th>Type</th><th>Valid</th><th>Missing</th><th>Unique</th><th>Key Stats / Top Values</th></tr>
175
+ </thead>
176
+ <tbody></tbody>
177
+ </table>
178
+ </div>
179
+
180
+ </div>
181
+
182
+ <script>
183
+ const eda = {eda_json};
184
+ const ai = {ai_json};
185
+
186
+ // Format large numbers
187
+ const fmt = (n) => new Intl.NumberFormat().format(n);
188
+
189
+ // 1. Header & Metrics
190
+ document.getElementById('h-subtitle').innerText = `${{fmt(eda.overview.rows)}} rows × ${{fmt(eda.overview.columns)}} columns | ${{eda.overview.memory_mb}} MB`;
191
+
192
+ const metricsHtml = `
193
+ <div class="card"><div class="metric-val">${{fmt(eda.overview.rows)}}</div><div class="metric-lbl">Total Rows</div></div>
194
+ <div class="card"><div class="metric-val">${{fmt(eda.overview.columns)}}</div><div class="metric-lbl">Features</div></div>
195
+ <div class="card"><div class="metric-val">${{eda.overview.missing_pct}}%</div><div class="metric-lbl">Missing Data</div></div>
196
+ <div class="card"><div class="metric-val">${{fmt(eda.overview.duplicate_rows)}}</div><div class="metric-lbl">Duplicates</div></div>
197
+ `;
198
+ document.getElementById('metrics-container').innerHTML = metricsHtml;
199
+
200
+ // 2. AI Insights
201
+ document.getElementById('ai-summary').innerText = ai.summary || 'AI Analysis unavailable.';
202
+ let insightsHtml = '';
203
+ if(ai.key_findings) ai.key_findings.forEach(f => insightsHtml += `<div class="insight-item"><span class="icon">💡</span><div>${{f}}</div></div>`);
204
+ if(ai.warnings) ai.warnings.forEach(w => insightsHtml += `<div class="insight-item warn"><span class="icon">⚠️</span><div>${{w}}</div></div>`);
205
+ if(ai.recommendations) ai.recommendations.forEach(r => insightsHtml += `<div class="insight-item rec"><span class="icon">✅</span><div>${{r}}</div></div>`);
206
+ if(ai.target_suggestion) insightsHtml += `<div class="insight-item target"><span class="icon">🎯</span><div><strong>Target Column Suggestion:</strong><br>${{ai.target_suggestion}}</div></div>`;
207
+ document.getElementById('ai-insights').innerHTML = insightsHtml;
208
+
209
+ // 3. Stats Table
210
+ const tbody = document.querySelector('#stats-table tbody');
211
+ Object.keys(eda.column_stats).forEach(col => {{
212
+ const stat = eda.column_stats[col];
213
+ const isNum = stat.dtype.includes('int') || stat.dtype.includes('float');
214
+ const isBool = stat.dtype.includes('bool');
215
+ const badgeClass = isNum ? 'b-num' : (isBool ? 'b-bool' : 'b-cat');
216
+ const badgeText = isNum ? 'NUM' : (isBool ? 'BOOL' : 'CAT');
217
+
218
+ let extra = '';
219
+ if(isNum) extra = `μ = ${{stat.mean}} | σ = ${{stat.std}} | Range: [${{stat.min}} .. ${{stat.max}}]`;
220
+ else if(stat.top_value) extra = `Top: ${{stat.top_value}} (${{stat.top_freq}}x)`;
221
+
222
+ let missText = stat.missing > 0 ? `<span style="color:var(--danger)">${{stat.missing}} (${{stat.missing_pct}}%)</span>` : '0';
223
+
224
+ tbody.innerHTML += `
225
+ <tr>
226
+ <td style="font-weight:600; color:var(--text);">${{col}}</td>
227
+ <td><span class="badge ${{badgeClass}}">${{badgeText}}</span></td>
228
+ <td>${{stat.count}}</td>
229
+ <td>${{missText}}</td>
230
+ <td>${{stat.unique}}</td>
231
+ <td class="mono">${{extra}}</td>
232
+ </tr>
233
+ `;
234
+ }});
235
+
236
+ // 4. Auto Plots
237
+ const plotsContainer = document.getElementById('auto-plots');
238
+ let chartIndex = 0;
239
+
240
+ // Colors for charts
241
+ const colors = [
242
+ 'rgba(59, 130, 246, 0.8)', 'rgba(139, 92, 246, 0.8)',
243
+ 'rgba(16, 185, 129, 0.8)', 'rgba(245, 158, 11, 0.8)',
244
+ 'rgba(236, 72, 153, 0.8)'
245
+ ];
246
+
247
+ // Numeric Distributions (Histograms)
248
+ if(eda.distributions) {{
249
+ Object.keys(eda.distributions).forEach((col, i) => {{
250
+ const dist = eda.distributions[col];
251
+ if(!dist.histogram) return;
252
+
253
+ const cid = `chart-${{chartIndex++}}`;
254
+ plotsContainer.innerHTML += `
255
+ <div class="card">
256
+ <h3 style="font-size:1rem; margin-bottom:1rem;">${{col}} <span style="font-size:0.75rem; color:var(--text-muted); font-weight:normal;">(${{dist.skew_label}})</span></h3>
257
+ <div class="chart-wrapper"><canvas id="${{cid}}"></canvas></div>
258
+ </div>
259
+ `;
260
+
261
+ // Use timeout to allow DOM to render canvas elements first
262
+ setTimeout(() => {{
263
+ const ctx = document.getElementById(cid);
264
+ // Build labels from edges
265
+ const labels = [];
266
+ for(let j=0; j<dist.histogram.counts.length; j++){{
267
+ labels.push(`${{dist.histogram.edges[j].toFixed(1)}} - ${{dist.histogram.edges[j+1].toFixed(1)}}`);
268
+ }}
269
+
270
+ new Chart(ctx, {{
271
+ type: 'bar',
272
+ data: {{
273
+ labels: labels,
274
+ datasets: [{{
275
+ label: 'Count',
276
+ data: dist.histogram.counts,
277
+ backgroundColor: colors[i % colors.length],
278
+ borderRadius: 4
279
+ }}]
280
+ }},
281
+ options: {{
282
+ responsive: true, maintainAspectRatio: false,
283
+ plugins: {{ legend: {{ display: false }} }},
284
+ scales: {{
285
+ x: {{ ticks: {{ color: '#a1a1aa', maxTicksLimit: 6 }}, grid: {{ display: false }} }},
286
+ y: {{ ticks: {{ color: '#a1a1aa' }}, grid: {{ color: 'rgba(255,255,255,0.05)' }} }}
287
+ }}
288
+ }}
289
+ }});
290
+ }}, 100);
291
+ }});
292
+ }}
293
+
294
+ // Categorical Distributions (Doughnuts)
295
+ if(eda.categorical) {{
296
+ Object.keys(eda.categorical).forEach((col, i) => {{
297
+ const cat = eda.categorical[col];
298
+ if(!cat.top_values) return;
299
+
300
+ const cid = `chart-${{chartIndex++}}`;
301
+ plotsContainer.innerHTML += `
302
+ <div class="card">
303
+ <h3 style="font-size:1rem; margin-bottom:1rem;">${{col}} <span style="font-size:0.75rem; color:var(--text-muted); font-weight:normal;">(Top Values)</span></h3>
304
+ <div class="chart-wrapper"><canvas id="${{cid}}"></canvas></div>
305
+ </div>
306
+ `;
307
+
308
+ setTimeout(() => {{
309
+ const ctx = document.getElementById(cid);
310
+ const labels = Object.keys(cat.top_values);
311
+ const data = Object.values(cat.top_values);
312
+
313
+ new Chart(ctx, {{
314
+ type: 'doughnut',
315
+ data: {{
316
+ labels: labels,
317
+ datasets: [{{
318
+ data: data,
319
+ backgroundColor: colors,
320
+ borderWidth: 0,
321
+ hoverOffset: 4
322
+ }}]
323
+ }},
324
+ options: {{
325
+ responsive: true, maintainAspectRatio: false,
326
+ plugins: {{ legend: {{ position: 'right', labels: {{ color: '#a1a1aa' }} }} }},
327
+ cutout: '70%'
328
+ }}
329
+ }});
330
+ }}, 100);
331
+ }});
332
+ }}
333
+
334
+ // 5. Correlation Matrix
335
+ const corrContainer = document.getElementById('corr-matrix');
336
+ if(eda.correlation && eda.correlation.matrix) {{
337
+ const matrix = eda.correlation.matrix;
338
+ const cols = Object.keys(matrix);
339
+
340
+ // Header row
341
+ let headerHtml = `<div class="corr-row"><div class="corr-cell corr-label"></div>`;
342
+ cols.forEach(c => headerHtml += `<div class="corr-cell" style="color:var(--text-muted); font-size:0.7rem;">${{c.substring(0,6)}}</div>`);
343
+ headerHtml += `</div>`;
344
+ corrContainer.innerHTML += headerHtml;
345
+
346
+ // Matrix rows
347
+ cols.forEach(r => {{
348
+ let rowHtml = `<div class="corr-row"><div class="corr-cell corr-label">${{r}}</div>`;
349
+ cols.forEach(c => {{
350
+ const val = matrix[r][c];
351
+ let bg = 'rgba(255,255,255,0.02)';
352
+ if(r !== c) {{
353
+ const intensity = Math.abs(val);
354
+ bg = val > 0 ? `rgba(59,130,246,${{intensity}})` : `rgba(239,68,68,${{intensity}})`;
355
+ }}
356
+ rowHtml += `<div class="corr-cell" style="background:${{bg}};">${{val.toFixed(2)}}</div>`;
357
+ }});
358
+ rowHtml += `</div>`;
359
+ corrContainer.innerHTML += rowHtml;
360
+ }});
361
+ }} else {{
362
+ corrContainer.innerHTML = '<div style="padding:1rem; color:var(--text-muted);">Not enough numeric columns for correlation analysis.</div>';
363
+ }}
364
+
365
+ </script>
366
+ </body>
367
+ </html>"""
368
+
369
+ with open(save_path, "w", encoding="utf-8") as f:
370
+ f.write(html)
371
+ print(f" EDA report saved to: {os.path.abspath(save_path)}")
372
+ return html
373
+
374
+ def run(self, df: pd.DataFrame, save_path: str = "eda_report.html"):
375
+ print("\n" + "=" * 50)
376
+ print(" InsightFlow Auto EDA Agent")
377
+ print("=" * 50)
378
+
379
+ print("\nStep 1: Running EDA analyses...")
380
+ eda_results = run_full_eda(df)
381
+ ov = eda_results["overview"]
382
+ print(f" {ov['rows']} rows, {ov['columns']} columns")
383
+ print(f" {len(ov['numeric_cols'])} numeric, {len(ov['categorical_cols'])} categorical")
384
+ print(f" {ov['missing_cells']} missing cells, {ov['duplicate_rows']} duplicates")
385
+ print(f" {len(eda_results.get('outliers', {}))} columns with outliers")
386
+
387
+ print("\nStep 2: Asking AI for insights...")
388
+ try:
389
+ ai_insights = self._ask_llm(eda_results)
390
+ print(f" Got {len(ai_insights.get('key_findings', []))} findings")
391
+ except Exception as e:
392
+ print(f" AI failed: {e}")
393
+ ai_insights = {"summary": "AI analysis unavailable.", "key_findings": [], "warnings": [], "recommendations": [], "target_suggestion": "N/A"}
394
+
395
+ print("\nStep 3: Generating report...")
396
+ self._build_html(df, eda_results, ai_insights, save_path)
397
+
398
+ print("\n" + "=" * 50)
399
+ print(f" Report: {save_path}")
400
+ print("=" * 50)
401
+ return eda_results, ai_insights
402
+
403
+ def run_file(self, file_path: str, save_path: str = None):
404
+ from fix import smart_load_file
405
+ print("\nLoading file...")
406
+ df = smart_load_file(file_path)
407
+ if not save_path:
408
+ base = os.path.splitext(file_path)[0]
409
+ save_path = f"{base}_eda.html"
410
+ return self.run(df, save_path=save_path)