dataquick 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 quratalvi11-dotcom
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataquick
3
+ Version: 0.1.0
4
+ Summary: A fast and easy Auto EDA library for data scientists
5
+ Home-page: https://github.com/quratalvi11-dotcom/myproject1
6
+ Author: quratalvi11-dotcom
7
+ License: MIT
8
+ Requires-Python: >=3.7
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: pandas
12
+ Requires-Dist: numpy
13
+ Requires-Dist: matplotlib
14
+ Requires-Dist: seaborn
15
+ Requires-Dist: scipy
16
+ Dynamic: author
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license-file
21
+ Dynamic: requires-dist
22
+ Dynamic: requires-python
23
+ Dynamic: summary
24
+
25
+ # DataQuick
26
+
27
+ A fast and easy Auto EDA library for data scientists.
28
+
29
+ ## Features
30
+ - Auto data analysis
31
+ - Missing values report
32
+ - Data cleaning
33
+ - Auto visualizations
34
+
35
+ ## Installation
36
+ ```bash
37
+ pip install dataquick
38
+ ```
39
+
40
+ ## Usage
41
+ ```python
42
+ import pandas as pd
43
+ from dataquick.analyzer import DataAnalyzer
44
+
45
+ df = pd.read_csv("data.csv")
46
+ analyzer = DataAnalyzer(df)
47
+ analyzer.analyze()
48
+ ```
@@ -0,0 +1,24 @@
1
+ # DataQuick
2
+
3
+ A fast and easy Auto EDA library for data scientists.
4
+
5
+ ## Features
6
+ - Auto data analysis
7
+ - Missing values report
8
+ - Data cleaning
9
+ - Auto visualizations
10
+
11
+ ## Installation
12
+ ```bash
13
+ pip install dataquick
14
+ ```
15
+
16
+ ## Usage
17
+ ```python
18
+ import pandas as pd
19
+ from dataquick.analyzer import DataAnalyzer
20
+
21
+ df = pd.read_csv("data.csv")
22
+ analyzer = DataAnalyzer(df)
23
+ analyzer.analyze()
24
+ ```
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools>=42", "wheel"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,25 @@
1
+ [metadata]
2
+ name = dataquick
3
+ version = 0.1.0
4
+ author = quratalvi11-dotcom
5
+ description = A fast and easy Auto EDA library for data scientists
6
+ long_description = file: README.md
7
+ long_description_content_type = text/markdown
8
+ url = https://github.com/quratalvi11-dotcom/myproject1
9
+ license = MIT
10
+
11
+ [options]
12
+ package_dir = = src
13
+ packages = find:
14
+ python_requires = >=3.7
15
+ install_requires =
16
+ pandas
17
+ numpy
18
+ matplotlib
19
+ seaborn
20
+ scipy
21
+
22
+ [egg_info]
23
+ tag_build =
24
+ tag_date = 0
25
+
@@ -0,0 +1,21 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="dataquick",
5
+ version="0.1.0",
6
+ author="quratalvi11-dotcom",
7
+ description="A fast and easy Auto EDA library for data scientists",
8
+ long_description=open("README.md").read(),
9
+ long_description_content_type="text/markdown",
10
+ url="https://github.com/quratalvi11-dotcom/myproject1",
11
+ package_dir={"": "src"},
12
+ packages=find_packages(where="src"),
13
+ python_requires=">=3.7",
14
+ install_requires=[
15
+ "pandas",
16
+ "numpy",
17
+ "matplotlib",
18
+ "seaborn",
19
+ "scipy",
20
+ ],
21
+ )
@@ -0,0 +1,18 @@
1
+ from .analyzer import DataAnalyzer
2
+ from .cleaner import DataCleaner
3
+ from .visualizer import DataVisualizer
4
+
5
+ def analyze(df):
6
+ analyzer = DataAnalyzer(df)
7
+ analyzer.analyze()
8
+
9
+ def clean(df, strategy="mean"):
10
+ cleaner = DataCleaner(df)
11
+ return cleaner.clean(strategy=strategy)
12
+
13
+ def visualize(df):
14
+ visualizer = DataVisualizer(df)
15
+ visualizer.visualize()
16
+
17
+ __version__ = "0.1.0"
18
+ __author__ = "quratalvi11-dotcom"
@@ -0,0 +1,62 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ class DataAnalyzer:
5
+ def __init__(self, df):
6
+ if not isinstance(df, pd.DataFrame):
7
+ raise ValueError("Input must be a pandas DataFrame")
8
+ self.df = df
9
+
10
+ def basic_info(self):
11
+ print("=" * 50)
12
+ print("BASIC DATASET INFO")
13
+ print("=" * 50)
14
+ print(f"Shape : {self.df.shape[0]} rows, {self.df.shape[1]} columns")
15
+ print(f"Total Cells : {self.df.size}")
16
+ print(f"\nColumn Names : {list(self.df.columns)}")
17
+ print(f"\nData Types:")
18
+ print(self.df.dtypes)
19
+ print("=" * 50)
20
+
21
+ def missing_values(self):
22
+ print("\n" + "=" * 50)
23
+ print(" MISSING VALUES REPORT")
24
+ print("=" * 50)
25
+ missing = self.df.isnull().sum()
26
+ percent = (missing / len(self.df)) * 100
27
+ report = pd.DataFrame({
28
+ "Missing Count": missing,
29
+ "Missing %": percent.round(2)
30
+ })
31
+ report = report[report["Missing Count"] > 0]
32
+ if report.empty:
33
+ print(" No missing values found!")
34
+ else:
35
+ print(report)
36
+ print("=" * 50)
37
+
38
+ def duplicates(self):
39
+ print("\n" + "=" * 50)
40
+ print(" DUPLICATE ROWS REPORT")
41
+ print("=" * 50)
42
+ dup_count = self.df.duplicated().sum()
43
+ print(f"Duplicate Rows: {dup_count}")
44
+ if dup_count > 0:
45
+ print(" Consider removing duplicates!")
46
+ else:
47
+ print(" No duplicate rows found!")
48
+ print("=" * 50)
49
+
50
+ def statistics(self):
51
+ print("\n" + "=" * 50)
52
+ print("STATISTICAL SUMMARY")
53
+ print("=" * 50)
54
+ print(self.df.describe())
55
+ print("=" * 50)
56
+
57
+ def analyze(self):
58
+ self.basic_info()
59
+ self.missing_values()
60
+ self.duplicates()
61
+ self.statistics()
62
+ print("\n Analysis Complete!")
@@ -0,0 +1,61 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ class DataCleaner:
5
+ def __init__(self, df):
6
+ if not isinstance(df, pd.DataFrame):
7
+ raise ValueError("Input must be a pandas DataFrame")
8
+ self.df = df.copy()
9
+
10
+ def remove_duplicates(self):
11
+ before = len(self.df)
12
+ self.df = self.df.drop_duplicates()
13
+ after = len(self.df)
14
+ print(f"Removed {before - after} duplicate rows")
15
+ return self
16
+
17
+ def fill_missing(self, strategy="mean"):
18
+ """
19
+ strategy: 'mean', 'median', 'mode', 'drop'
20
+ """
21
+ if strategy == "drop":
22
+ before = len(self.df)
23
+ self.df = self.df.dropna()
24
+ print(f"Dropped {before - len(self.df)} rows with missing values")
25
+
26
+ elif strategy == "mean":
27
+ numeric_cols = self.df.select_dtypes(include=np.number).columns
28
+ self.df[numeric_cols] = self.df[numeric_cols].fillna(self.df[numeric_cols].mean())
29
+ print("Filled missing values with mean")
30
+
31
+ elif strategy == "median":
32
+ numeric_cols = self.df.select_dtypes(include=np.number).columns
33
+ self.df[numeric_cols] = self.df[numeric_cols].fillna(self.df[numeric_cols].median())
34
+ print("Filled missing values with median")
35
+
36
+ elif strategy == "mode":
37
+ for col in self.df.columns:
38
+ self.df[col] = self.df[col].fillna(self.df[col].mode()[0])
39
+ print("Filled missing values with mode")
40
+
41
+ return self
42
+
43
+ def fix_dtypes(self):
44
+ for col in self.df.columns:
45
+ try:
46
+ self.df[col] = pd.to_numeric(self.df[col])
47
+ except:
48
+ pass
49
+ print("Fixed data types where possible")
50
+ return self
51
+
52
+ def clean(self, strategy="mean"):
53
+ print("=" * 50)
54
+ print("AUTO CLEANING STARTED")
55
+ print("=" * 50)
56
+ self.remove_duplicates()
57
+ self.fill_missing(strategy=strategy)
58
+ self.fix_dtypes()
59
+ print("=" * 50)
60
+ print("Cleaning Complete!")
61
+ return self.df
@@ -0,0 +1,87 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+ class DataVisualizer:
7
+ def __init__(self, df):
8
+ if not isinstance(df, pd.DataFrame):
9
+ raise ValueError("Input must be a pandas DataFrame")
10
+ self.df = df
11
+
12
+ def plot_histograms(self):
13
+ numeric_cols = self.df.select_dtypes(include=np.number).columns
14
+ if len(numeric_cols) == 0:
15
+ print("No numeric columns found for histogram!")
16
+ return
17
+ print("Plotting Histograms...")
18
+ self.df[numeric_cols].hist(
19
+ figsize=(15, 10),
20
+ bins=20,
21
+ color="steelblue",
22
+ edgecolor="black"
23
+ )
24
+ plt.suptitle("Histograms of Numeric Columns", fontsize=16)
25
+ plt.tight_layout()
26
+ plt.show()
27
+
28
+ def plot_correlation(self):
29
+ numeric_cols = self.df.select_dtypes(include=np.number)
30
+ if numeric_cols.empty:
31
+ print("No numeric columns found for correlation!")
32
+ return
33
+ print("Plotting Correlation Heatmap...")
34
+ plt.figure(figsize=(12, 8))
35
+ sns.heatmap(
36
+ numeric_cols.corr(),
37
+ annot=True,
38
+ fmt=".2f",
39
+ cmap="coolwarm",
40
+ linewidths=0.5
41
+ )
42
+ plt.title("Correlation Heatmap", fontsize=16)
43
+ plt.tight_layout()
44
+ plt.show()
45
+
46
+ def plot_missing(self):
47
+ missing = self.df.isnull().sum()
48
+ missing = missing[missing > 0]
49
+ if missing.empty:
50
+ print("No missing values to plot!")
51
+ return
52
+ print("Plotting Missing Values...")
53
+ plt.figure(figsize=(10, 6))
54
+ sns.barplot(x=missing.index, y=missing.values, color="tomato")
55
+ plt.title("Missing Values Per Column", fontsize=16)
56
+ plt.xlabel("Columns")
57
+ plt.ylabel("Missing Count")
58
+ plt.xticks(rotation=45)
59
+ plt.tight_layout()
60
+ plt.show()
61
+
62
+ def plot_boxplots(self):
63
+ numeric_cols = self.df.select_dtypes(include=np.number).columns
64
+ if len(numeric_cols) == 0:
65
+ print("No numeric columns found for boxplot!")
66
+ return
67
+ print("Plotting Boxplots...")
68
+ plt.figure(figsize=(15, 8))
69
+ self.df[numeric_cols].plot(
70
+ kind="box",
71
+ figsize=(15, 8),
72
+ patch_artist=True
73
+ )
74
+ plt.title("Boxplots for Outlier Detection", fontsize=16)
75
+ plt.tight_layout()
76
+ plt.show()
77
+
78
+ def visualize(self):
79
+ print("=" * 50)
80
+ print("AUTO VISUALIZATION STARTED")
81
+ print("=" * 50)
82
+ self.plot_histograms()
83
+ self.plot_correlation()
84
+ self.plot_missing()
85
+ self.plot_boxplots()
86
+ print("=" * 50)
87
+ print("Visualization Complete!")
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataquick
3
+ Version: 0.1.0
4
+ Summary: A fast and easy Auto EDA library for data scientists
5
+ Home-page: https://github.com/quratalvi11-dotcom/myproject1
6
+ Author: quratalvi11-dotcom
7
+ License: MIT
8
+ Requires-Python: >=3.7
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: pandas
12
+ Requires-Dist: numpy
13
+ Requires-Dist: matplotlib
14
+ Requires-Dist: seaborn
15
+ Requires-Dist: scipy
16
+ Dynamic: author
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license-file
21
+ Dynamic: requires-dist
22
+ Dynamic: requires-python
23
+ Dynamic: summary
24
+
25
+ # DataQuick
26
+
27
+ A fast and easy Auto EDA library for data scientists.
28
+
29
+ ## Features
30
+ - Auto data analysis
31
+ - Missing values report
32
+ - Data cleaning
33
+ - Auto visualizations
34
+
35
+ ## Installation
36
+ ```bash
37
+ pip install dataquick
38
+ ```
39
+
40
+ ## Usage
41
+ ```python
42
+ import pandas as pd
43
+ from dataquick.analyzer import DataAnalyzer
44
+
45
+ df = pd.read_csv("data.csv")
46
+ analyzer = DataAnalyzer(df)
47
+ analyzer.analyze()
48
+ ```
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.cfg
5
+ setup.py
6
+ src/dataquick/__init__.py
7
+ src/dataquick/analyzer.py
8
+ src/dataquick/cleaner.py
9
+ src/dataquick/visualizer.py
10
+ src/dataquick.egg-info/PKG-INFO
11
+ src/dataquick.egg-info/SOURCES.txt
12
+ src/dataquick.egg-info/dependency_links.txt
13
+ src/dataquick.egg-info/requires.txt
14
+ src/dataquick.egg-info/top_level.txt
@@ -0,0 +1,5 @@
1
+ pandas
2
+ numpy
3
+ matplotlib
4
+ seaborn
5
+ scipy
@@ -0,0 +1 @@
1
+ dataquick