khadee-eda 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. khadee_eda/__init__.py +194 -0
  2. khadee_eda/assets/script.js +137 -0
  3. khadee_eda/assets/style.css +1336 -0
  4. khadee_eda/clean.py +287 -0
  5. khadee_eda/config.py +121 -0
  6. khadee_eda/engines/__init__.py +1 -0
  7. khadee_eda/engines/correlation_engine.py +115 -0
  8. khadee_eda/engines/dim_reduction.py +152 -0
  9. khadee_eda/engines/missing_engine.py +170 -0
  10. khadee_eda/engines/outlier_engine.py +190 -0
  11. khadee_eda/engines/stats_engine.py +200 -0
  12. khadee_eda/loader.py +221 -0
  13. khadee_eda/renderers/__init__.py +1 -0
  14. khadee_eda/renderers/chart_renderer.py +547 -0
  15. khadee_eda/renderers/html_renderer.py +128 -0
  16. khadee_eda/renderers/table_renderer.py +38 -0
  17. khadee_eda/sections/__init__.py +1 -0
  18. khadee_eda/sections/advanced_stats.py +47 -0
  19. khadee_eda/sections/correlations.py +125 -0
  20. khadee_eda/sections/distributions.py +102 -0
  21. khadee_eda/sections/interactions.py +84 -0
  22. khadee_eda/sections/missing.py +102 -0
  23. khadee_eda/sections/model_readiness.py +334 -0
  24. khadee_eda/sections/outliers.py +94 -0
  25. khadee_eda/sections/overview.py +174 -0
  26. khadee_eda/sections/sample.py +109 -0
  27. khadee_eda/sections/variables.py +348 -0
  28. khadee_eda/techniques/__init__.py +2 -0
  29. khadee_eda/techniques/china.py +125 -0
  30. khadee_eda/techniques/india.py +167 -0
  31. khadee_eda/techniques/japan.py +177 -0
  32. khadee_eda/techniques/us.py +108 -0
  33. khadee_eda/type_detector.py +169 -0
  34. khadee_eda/utils.py +130 -0
  35. khadee_eda-1.0.0.dist-info/METADATA +193 -0
  36. khadee_eda-1.0.0.dist-info/RECORD +38 -0
  37. khadee_eda-1.0.0.dist-info/WHEEL +5 -0
  38. khadee_eda-1.0.0.dist-info/top_level.txt +1 -0
khadee_eda/__init__.py ADDED
@@ -0,0 +1,194 @@
1
+ """
2
+ Khadee EDA — Deep Insights Data Profiling
3
+ ==========================================
4
+
5
+ A comprehensive EDA module that generates stunning HTML profiling reports
6
+ from any dataset format. Supports CSV, Excel, JSON, Parquet, and 10+ more formats.
7
+
8
+ Usage
9
+ -----
10
+ from khadee_eda import ProfileReport
11
+
12
+ # From file (any format — auto-detected)
13
+ report = ProfileReport("train.csv", title="My EDA Report")
14
+ report = ProfileReport("data.xlsx", title="Excel Analysis")
15
+
16
+ # From DataFrame
17
+ import pandas as pd
18
+ df = pd.read_csv("train.csv")
19
+ report = ProfileReport(df, title="My EDA Report")
20
+
21
+ # Generate HTML report
22
+ report.to_html("report.html")
23
+
24
+ # Selective sections
25
+ report = ProfileReport(df, sections=["overview", "variables", "correlations"])
26
+
27
+ # Selective techniques
28
+ report = ProfileReport(df, techniques=["us", "japan"])
29
+
30
+ Sub-modules
31
+ -----------
32
+ from khadee_eda.techniques import us, india, japan, china
33
+ from khadee_eda.engines import stats_engine, correlation_engine, missing_engine, outlier_engine
34
+ """
35
+
36
+ __version__ = "1.0.0"
37
+ __author__ = "Khadee"
38
+
39
+ import sys
40
+ import time
41
+ import warnings
42
+
43
+ import pandas as pd
44
+
45
+
46
+ def _print(msg):
47
+ """Print with UTF-8 encoding fallback for Windows consoles."""
48
+ try:
49
+ print(msg)
50
+ except UnicodeEncodeError:
51
+ print(msg.encode("ascii", errors="replace").decode("ascii"))
52
+
53
+ from .config import ALL_SECTIONS, ALL_TECHNIQUES
54
+ from .loader import load_dataset
55
+ from .type_detector import detect_types
56
+ from .renderers.html_renderer import render_html
57
+ from . import clean
58
+
59
+
60
+ class ProfileReport:
61
+ """
62
+ Generate a comprehensive EDA profiling report.
63
+
64
+ Parameters
65
+ ----------
66
+ source : str or pd.DataFrame
67
+ File path (auto-detects format from extension) or pandas DataFrame.
68
+ title : str, optional
69
+ Report title. Default: "Khadee EDA Report".
70
+ sections : list, optional
71
+ List of section IDs to include. Default: all 10 sections.
72
+ Options: overview, variables, distributions, correlations, missing,
73
+ outliers, interactions, advanced_stats, model_readiness, sample
74
+ techniques : list, optional
75
+ List of technique IDs for the Advanced Statistics section.
76
+ Default: all 4 techniques.
77
+ Options: us, india, japan, china
78
+ **kwargs : dict
79
+ Extra arguments passed to the file reader (e.g., sheet_name for Excel).
80
+
81
+ Examples
82
+ --------
83
+ >>> from khadee_eda import ProfileReport
84
+ >>> report = ProfileReport("train.csv", title="Profiling Report")
85
+ >>> report.to_html("report.html")
86
+ """
87
+
88
+ def __init__(self, source, title="Khadee EDA Report", sections=None,
89
+ techniques=None, **kwargs):
90
+ self.title = title
91
+ self.sections = sections or ALL_SECTIONS
92
+ self.techniques = techniques or ALL_TECHNIQUES
93
+ self._start_time = time.time()
94
+
95
+ # Validate sections
96
+ for s in self.sections:
97
+ if s not in ALL_SECTIONS:
98
+ raise ValueError(
99
+ f"Unknown section: '{s}'. Available: {ALL_SECTIONS}"
100
+ )
101
+
102
+ # Validate techniques
103
+ for t in self.techniques:
104
+ if t not in ALL_TECHNIQUES:
105
+ raise ValueError(
106
+ f"Unknown technique: '{t}'. Available: {ALL_TECHNIQUES}"
107
+ )
108
+
109
+ # Load data
110
+ _print("[*] Khadee EDA -- Loading dataset...")
111
+ self.df, self.metadata = load_dataset(source, **kwargs)
112
+ _print(f" [+] Loaded: {self.df.shape[0]:,} rows x {self.df.shape[1]:,} columns")
113
+
114
+ # Detect types
115
+ _print(" [*] Detecting column types...")
116
+ self.type_map = detect_types(self.df)
117
+
118
+ # Pre-compute report
119
+ _print(" [*] Analyzing data...")
120
+ self._sections_html = self._generate_sections()
121
+
122
+ elapsed = time.time() - self._start_time
123
+ _print(f" [+] Analysis complete in {elapsed:.2f}s")
124
+
125
+ def _generate_sections(self):
126
+ """Generate HTML for all requested sections."""
127
+ from .sections import (
128
+ overview, variables, distributions, correlations,
129
+ missing, outliers, interactions, advanced_stats,
130
+ model_readiness, sample,
131
+ )
132
+
133
+ section_generators = {
134
+ "overview": lambda: overview.generate(
135
+ self.df, self.type_map, self.metadata, self._start_time
136
+ ),
137
+ "variables": lambda: variables.generate(self.df, self.type_map),
138
+ "distributions": lambda: distributions.generate(self.df, self.type_map),
139
+ "correlations": lambda: correlations.generate(self.df, self.type_map),
140
+ "missing": lambda: missing.generate(self.df, self.type_map),
141
+ "outliers": lambda: outliers.generate(self.df, self.type_map),
142
+ "interactions": lambda: interactions.generate(self.df, self.type_map),
143
+ "advanced_stats": lambda: advanced_stats.generate(
144
+ self.df, self.type_map, self.techniques
145
+ ),
146
+ "model_readiness": lambda: model_readiness.generate(self.df, self.type_map),
147
+ "sample": lambda: sample.generate(self.df, self.type_map),
148
+ }
149
+
150
+ results = {}
151
+ for section_id in self.sections:
152
+ gen = section_generators.get(section_id)
153
+ if gen:
154
+ try:
155
+ results[section_id] = gen()
156
+ _print(f" [+] {section_id}")
157
+ except Exception as e:
158
+ warnings.warn(f"Error generating section '{section_id}': {e}")
159
+ results[section_id] = (
160
+ f'<div class="card"><h3 class="card-title">⚠️ Error in {section_id}</h3>'
161
+ f'<p class="error-message">{str(e)}</p></div>'
162
+ )
163
+
164
+ return results
165
+
166
+ def to_html(self, output_path="report.html"):
167
+ """
168
+ Generate and save the HTML report.
169
+
170
+ Parameters
171
+ ----------
172
+ output_path : str
173
+ Path to save the HTML report.
174
+ """
175
+ _print(" [*] Generating HTML report...")
176
+
177
+ html = render_html(self.title, self._sections_html, self.sections)
178
+
179
+ with open(output_path, "w", encoding="utf-8") as f:
180
+ f.write(html)
181
+
182
+ _print(f" [+] Report saved to: {output_path}")
183
+ return output_path
184
+
185
+ def to_html_string(self):
186
+ """Return the HTML report as a string."""
187
+ return render_html(self.title, self._sections_html, self.sections)
188
+
189
+ def __repr__(self):
190
+ return (
191
+ f"ProfileReport("
192
+ f"rows={self.df.shape[0]:,}, cols={self.df.shape[1]:,}, "
193
+ f"sections={len(self.sections)}, techniques={len(self.techniques)})"
194
+ )
@@ -0,0 +1,137 @@
1
+ /* ============================================================
2
+ Khadee EDA — Report Interactivity
3
+ ============================================================ */
4
+
5
+ // ── Sidebar Navigation Active Tracking ──
6
+ (function() {
7
+ const navLinks = document.querySelectorAll('.nav-link');
8
+ const sections = document.querySelectorAll('.report-section');
9
+
10
+ // Smooth scroll on nav click
11
+ navLinks.forEach(link => {
12
+ link.addEventListener('click', function(e) {
13
+ e.preventDefault();
14
+ const targetId = this.getAttribute('href').substring(1);
15
+ const target = document.getElementById(targetId);
16
+ if (target) {
17
+ target.scrollIntoView({ behavior: 'smooth', block: 'start' });
18
+ }
19
+ // Close mobile sidebar
20
+ document.getElementById('sidebar').classList.remove('open');
21
+ });
22
+ });
23
+
24
+ // Intersection Observer for active nav highlighting
25
+ if (sections.length > 0 && 'IntersectionObserver' in window) {
26
+ const observer = new IntersectionObserver(function(entries) {
27
+ entries.forEach(function(entry) {
28
+ if (entry.isIntersecting) {
29
+ const sectionId = entry.target.getAttribute('data-section');
30
+ navLinks.forEach(function(link) {
31
+ link.classList.remove('active');
32
+ if (link.getAttribute('data-section') === sectionId) {
33
+ link.classList.add('active');
34
+ }
35
+ });
36
+ }
37
+ });
38
+ }, {
39
+ rootMargin: '-20% 0px -70% 0px',
40
+ threshold: 0
41
+ });
42
+
43
+ sections.forEach(function(section) {
44
+ observer.observe(section);
45
+ });
46
+ }
47
+
48
+ // Set first nav link active by default
49
+ if (navLinks.length > 0) {
50
+ navLinks[0].classList.add('active');
51
+ }
52
+ })();
53
+
54
+ // ── Fade-in Animation on Scroll ──
55
+ (function() {
56
+ var sections = document.querySelectorAll('.report-section');
57
+ sections.forEach(function(section, index) {
58
+ section.style.animationDelay = (index * 0.05) + 's';
59
+ });
60
+ })();
61
+
62
+ // ── Tab Switching ──
63
+ function switchTab(btn, tabId) {
64
+ // Deactivate all tabs in the same container
65
+ var container = btn.closest('.tab-container');
66
+ if (!container) return;
67
+
68
+ container.querySelectorAll('.tab-btn').forEach(function(b) {
69
+ b.classList.remove('active');
70
+ });
71
+ container.querySelectorAll('.tab-content').forEach(function(c) {
72
+ c.style.display = 'none';
73
+ });
74
+
75
+ btn.classList.add('active');
76
+ var target = document.getElementById(tabId);
77
+ if (target) {
78
+ target.style.display = 'block';
79
+ // Trigger Plotly resize for charts that were hidden
80
+ var charts = target.querySelectorAll('.plotly-chart');
81
+ charts.forEach(function(chart) {
82
+ if (window.Plotly && chart.data) {
83
+ Plotly.Plots.resize(chart);
84
+ }
85
+ });
86
+ }
87
+ }
88
+
89
+ // ── Mobile Sidebar Toggle ──
90
+ function toggleSidebar() {
91
+ var sidebar = document.getElementById('sidebar');
92
+ sidebar.classList.toggle('open');
93
+ }
94
+
95
+ // Close sidebar on outside click (mobile)
96
+ document.addEventListener('click', function(e) {
97
+ var sidebar = document.getElementById('sidebar');
98
+ var menuBtn = document.getElementById('mobile-menu-btn');
99
+ if (sidebar && sidebar.classList.contains('open')) {
100
+ if (!sidebar.contains(e.target) && !menuBtn.contains(e.target)) {
101
+ sidebar.classList.remove('open');
102
+ }
103
+ }
104
+ });
105
+
106
+ // ── Window resize: trigger Plotly resize ──
107
+ var resizeTimeout;
108
+ window.addEventListener('resize', function() {
109
+ clearTimeout(resizeTimeout);
110
+ resizeTimeout = setTimeout(function() {
111
+ document.querySelectorAll('.plotly-chart').forEach(function(chart) {
112
+ if (window.Plotly && chart.data) {
113
+ Plotly.Plots.resize(chart);
114
+ }
115
+ });
116
+ }, 200);
117
+ });
118
+
119
+ // ── Variable Card Dropdown Switcher ──
120
+ function showVariableCard(colId) {
121
+ var cards = document.querySelectorAll('.variable-card');
122
+ cards.forEach(function(card) {
123
+ card.style.display = 'none';
124
+ });
125
+ var targetCard = document.getElementById('var-' + colId);
126
+ if (targetCard) {
127
+ targetCard.style.display = 'block';
128
+ // Resize charts inside the newly shown card
129
+ var charts = targetCard.querySelectorAll('.plotly-chart');
130
+ charts.forEach(function(chart) {
131
+ if (window.Plotly && chart.data) {
132
+ Plotly.Plots.resize(chart);
133
+ }
134
+ });
135
+ }
136
+ }
137
+