dataquick 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataquick-0.1.0/LICENSE +21 -0
- dataquick-0.1.0/PKG-INFO +48 -0
- dataquick-0.1.0/README.md +24 -0
- dataquick-0.1.0/pyproject.toml +3 -0
- dataquick-0.1.0/setup.cfg +25 -0
- dataquick-0.1.0/setup.py +21 -0
- dataquick-0.1.0/src/dataquick/__init__.py +18 -0
- dataquick-0.1.0/src/dataquick/analyzer.py +62 -0
- dataquick-0.1.0/src/dataquick/cleaner.py +61 -0
- dataquick-0.1.0/src/dataquick/visualizer.py +87 -0
- dataquick-0.1.0/src/dataquick.egg-info/PKG-INFO +48 -0
- dataquick-0.1.0/src/dataquick.egg-info/SOURCES.txt +14 -0
- dataquick-0.1.0/src/dataquick.egg-info/dependency_links.txt +1 -0
- dataquick-0.1.0/src/dataquick.egg-info/requires.txt +5 -0
- dataquick-0.1.0/src/dataquick.egg-info/top_level.txt +1 -0
dataquick-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 quratalvi11-dotcom
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dataquick-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dataquick
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A fast and easy Auto EDA library for data scientists
|
|
5
|
+
Home-page: https://github.com/quratalvi11-dotcom/myproject1
|
|
6
|
+
Author: quratalvi11-dotcom
|
|
7
|
+
License: MIT
|
|
8
|
+
Requires-Python: >=3.7
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: numpy
|
|
13
|
+
Requires-Dist: matplotlib
|
|
14
|
+
Requires-Dist: seaborn
|
|
15
|
+
Requires-Dist: scipy
|
|
16
|
+
Dynamic: author
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: home-page
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
Dynamic: requires-dist
|
|
22
|
+
Dynamic: requires-python
|
|
23
|
+
Dynamic: summary
|
|
24
|
+
|
|
25
|
+
# DataQuick
|
|
26
|
+
|
|
27
|
+
A fast and easy Auto EDA library for data scientists.
|
|
28
|
+
|
|
29
|
+
## Features
|
|
30
|
+
- Auto data analysis
|
|
31
|
+
- Missing values report
|
|
32
|
+
- Data cleaning
|
|
33
|
+
- Auto visualizations
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
```bash
|
|
37
|
+
pip install dataquick
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
```python
|
|
42
|
+
import pandas as pd
|
|
43
|
+
from dataquick.analyzer import DataAnalyzer
|
|
44
|
+
|
|
45
|
+
df = pd.read_csv("data.csv")
|
|
46
|
+
analyzer = DataAnalyzer(df)
|
|
47
|
+
analyzer.analyze()
|
|
48
|
+
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# DataQuick
|
|
2
|
+
|
|
3
|
+
A fast and easy Auto EDA library for data scientists.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
- Auto data analysis
|
|
7
|
+
- Missing values report
|
|
8
|
+
- Data cleaning
|
|
9
|
+
- Auto visualizations
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
```bash
|
|
13
|
+
pip install dataquick
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Usage
|
|
17
|
+
```python
|
|
18
|
+
import pandas as pd
|
|
19
|
+
from dataquick.analyzer import DataAnalyzer
|
|
20
|
+
|
|
21
|
+
df = pd.read_csv("data.csv")
|
|
22
|
+
analyzer = DataAnalyzer(df)
|
|
23
|
+
analyzer.analyze()
|
|
24
|
+
```
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[metadata]
|
|
2
|
+
name = dataquick
|
|
3
|
+
version = 0.1.0
|
|
4
|
+
author = quratalvi11-dotcom
|
|
5
|
+
description = A fast and easy Auto EDA library for data scientists
|
|
6
|
+
long_description = file: README.md
|
|
7
|
+
long_description_content_type = text/markdown
|
|
8
|
+
url = https://github.com/quratalvi11-dotcom/myproject1
|
|
9
|
+
license = MIT
|
|
10
|
+
|
|
11
|
+
[options]
|
|
12
|
+
package_dir = = src
|
|
13
|
+
packages = find:
|
|
14
|
+
python_requires = >=3.7
|
|
15
|
+
install_requires =
|
|
16
|
+
pandas
|
|
17
|
+
numpy
|
|
18
|
+
matplotlib
|
|
19
|
+
seaborn
|
|
20
|
+
scipy
|
|
21
|
+
|
|
22
|
+
[egg_info]
|
|
23
|
+
tag_build =
|
|
24
|
+
tag_date = 0
|
|
25
|
+
|
dataquick-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="dataquick",
|
|
5
|
+
version="0.1.0",
|
|
6
|
+
author="quratalvi11-dotcom",
|
|
7
|
+
description="A fast and easy Auto EDA library for data scientists",
|
|
8
|
+
long_description=open("README.md").read(),
|
|
9
|
+
long_description_content_type="text/markdown",
|
|
10
|
+
url="https://github.com/quratalvi11-dotcom/myproject1",
|
|
11
|
+
package_dir={"": "src"},
|
|
12
|
+
packages=find_packages(where="src"),
|
|
13
|
+
python_requires=">=3.7",
|
|
14
|
+
install_requires=[
|
|
15
|
+
"pandas",
|
|
16
|
+
"numpy",
|
|
17
|
+
"matplotlib",
|
|
18
|
+
"seaborn",
|
|
19
|
+
"scipy",
|
|
20
|
+
],
|
|
21
|
+
)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from .analyzer import DataAnalyzer
|
|
2
|
+
from .cleaner import DataCleaner
|
|
3
|
+
from .visualizer import DataVisualizer
|
|
4
|
+
|
|
5
|
+
def analyze(df):
|
|
6
|
+
analyzer = DataAnalyzer(df)
|
|
7
|
+
analyzer.analyze()
|
|
8
|
+
|
|
9
|
+
def clean(df, strategy="mean"):
|
|
10
|
+
cleaner = DataCleaner(df)
|
|
11
|
+
return cleaner.clean(strategy=strategy)
|
|
12
|
+
|
|
13
|
+
def visualize(df):
|
|
14
|
+
visualizer = DataVisualizer(df)
|
|
15
|
+
visualizer.visualize()
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.0"
|
|
18
|
+
__author__ = "quratalvi11-dotcom"
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
class DataAnalyzer:
|
|
5
|
+
def __init__(self, df):
|
|
6
|
+
if not isinstance(df, pd.DataFrame):
|
|
7
|
+
raise ValueError("Input must be a pandas DataFrame")
|
|
8
|
+
self.df = df
|
|
9
|
+
|
|
10
|
+
def basic_info(self):
|
|
11
|
+
print("=" * 50)
|
|
12
|
+
print("BASIC DATASET INFO")
|
|
13
|
+
print("=" * 50)
|
|
14
|
+
print(f"Shape : {self.df.shape[0]} rows, {self.df.shape[1]} columns")
|
|
15
|
+
print(f"Total Cells : {self.df.size}")
|
|
16
|
+
print(f"\nColumn Names : {list(self.df.columns)}")
|
|
17
|
+
print(f"\nData Types:")
|
|
18
|
+
print(self.df.dtypes)
|
|
19
|
+
print("=" * 50)
|
|
20
|
+
|
|
21
|
+
def missing_values(self):
|
|
22
|
+
print("\n" + "=" * 50)
|
|
23
|
+
print(" MISSING VALUES REPORT")
|
|
24
|
+
print("=" * 50)
|
|
25
|
+
missing = self.df.isnull().sum()
|
|
26
|
+
percent = (missing / len(self.df)) * 100
|
|
27
|
+
report = pd.DataFrame({
|
|
28
|
+
"Missing Count": missing,
|
|
29
|
+
"Missing %": percent.round(2)
|
|
30
|
+
})
|
|
31
|
+
report = report[report["Missing Count"] > 0]
|
|
32
|
+
if report.empty:
|
|
33
|
+
print(" No missing values found!")
|
|
34
|
+
else:
|
|
35
|
+
print(report)
|
|
36
|
+
print("=" * 50)
|
|
37
|
+
|
|
38
|
+
def duplicates(self):
|
|
39
|
+
print("\n" + "=" * 50)
|
|
40
|
+
print(" DUPLICATE ROWS REPORT")
|
|
41
|
+
print("=" * 50)
|
|
42
|
+
dup_count = self.df.duplicated().sum()
|
|
43
|
+
print(f"Duplicate Rows: {dup_count}")
|
|
44
|
+
if dup_count > 0:
|
|
45
|
+
print(" Consider removing duplicates!")
|
|
46
|
+
else:
|
|
47
|
+
print(" No duplicate rows found!")
|
|
48
|
+
print("=" * 50)
|
|
49
|
+
|
|
50
|
+
def statistics(self):
|
|
51
|
+
print("\n" + "=" * 50)
|
|
52
|
+
print("STATISTICAL SUMMARY")
|
|
53
|
+
print("=" * 50)
|
|
54
|
+
print(self.df.describe())
|
|
55
|
+
print("=" * 50)
|
|
56
|
+
|
|
57
|
+
def analyze(self):
|
|
58
|
+
self.basic_info()
|
|
59
|
+
self.missing_values()
|
|
60
|
+
self.duplicates()
|
|
61
|
+
self.statistics()
|
|
62
|
+
print("\n Analysis Complete!")
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
class DataCleaner:
|
|
5
|
+
def __init__(self, df):
|
|
6
|
+
if not isinstance(df, pd.DataFrame):
|
|
7
|
+
raise ValueError("Input must be a pandas DataFrame")
|
|
8
|
+
self.df = df.copy()
|
|
9
|
+
|
|
10
|
+
def remove_duplicates(self):
|
|
11
|
+
before = len(self.df)
|
|
12
|
+
self.df = self.df.drop_duplicates()
|
|
13
|
+
after = len(self.df)
|
|
14
|
+
print(f"Removed {before - after} duplicate rows")
|
|
15
|
+
return self
|
|
16
|
+
|
|
17
|
+
def fill_missing(self, strategy="mean"):
|
|
18
|
+
"""
|
|
19
|
+
strategy: 'mean', 'median', 'mode', 'drop'
|
|
20
|
+
"""
|
|
21
|
+
if strategy == "drop":
|
|
22
|
+
before = len(self.df)
|
|
23
|
+
self.df = self.df.dropna()
|
|
24
|
+
print(f"Dropped {before - len(self.df)} rows with missing values")
|
|
25
|
+
|
|
26
|
+
elif strategy == "mean":
|
|
27
|
+
numeric_cols = self.df.select_dtypes(include=np.number).columns
|
|
28
|
+
self.df[numeric_cols] = self.df[numeric_cols].fillna(self.df[numeric_cols].mean())
|
|
29
|
+
print("Filled missing values with mean")
|
|
30
|
+
|
|
31
|
+
elif strategy == "median":
|
|
32
|
+
numeric_cols = self.df.select_dtypes(include=np.number).columns
|
|
33
|
+
self.df[numeric_cols] = self.df[numeric_cols].fillna(self.df[numeric_cols].median())
|
|
34
|
+
print("Filled missing values with median")
|
|
35
|
+
|
|
36
|
+
elif strategy == "mode":
|
|
37
|
+
for col in self.df.columns:
|
|
38
|
+
self.df[col] = self.df[col].fillna(self.df[col].mode()[0])
|
|
39
|
+
print("Filled missing values with mode")
|
|
40
|
+
|
|
41
|
+
return self
|
|
42
|
+
|
|
43
|
+
def fix_dtypes(self):
|
|
44
|
+
for col in self.df.columns:
|
|
45
|
+
try:
|
|
46
|
+
self.df[col] = pd.to_numeric(self.df[col])
|
|
47
|
+
except:
|
|
48
|
+
pass
|
|
49
|
+
print("Fixed data types where possible")
|
|
50
|
+
return self
|
|
51
|
+
|
|
52
|
+
def clean(self, strategy="mean"):
|
|
53
|
+
print("=" * 50)
|
|
54
|
+
print("AUTO CLEANING STARTED")
|
|
55
|
+
print("=" * 50)
|
|
56
|
+
self.remove_duplicates()
|
|
57
|
+
self.fill_missing(strategy=strategy)
|
|
58
|
+
self.fix_dtypes()
|
|
59
|
+
print("=" * 50)
|
|
60
|
+
print("Cleaning Complete!")
|
|
61
|
+
return self.df
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import seaborn as sns
|
|
5
|
+
|
|
6
|
+
class DataVisualizer:
|
|
7
|
+
def __init__(self, df):
|
|
8
|
+
if not isinstance(df, pd.DataFrame):
|
|
9
|
+
raise ValueError("Input must be a pandas DataFrame")
|
|
10
|
+
self.df = df
|
|
11
|
+
|
|
12
|
+
def plot_histograms(self):
|
|
13
|
+
numeric_cols = self.df.select_dtypes(include=np.number).columns
|
|
14
|
+
if len(numeric_cols) == 0:
|
|
15
|
+
print("No numeric columns found for histogram!")
|
|
16
|
+
return
|
|
17
|
+
print("Plotting Histograms...")
|
|
18
|
+
self.df[numeric_cols].hist(
|
|
19
|
+
figsize=(15, 10),
|
|
20
|
+
bins=20,
|
|
21
|
+
color="steelblue",
|
|
22
|
+
edgecolor="black"
|
|
23
|
+
)
|
|
24
|
+
plt.suptitle("Histograms of Numeric Columns", fontsize=16)
|
|
25
|
+
plt.tight_layout()
|
|
26
|
+
plt.show()
|
|
27
|
+
|
|
28
|
+
def plot_correlation(self):
|
|
29
|
+
numeric_cols = self.df.select_dtypes(include=np.number)
|
|
30
|
+
if numeric_cols.empty:
|
|
31
|
+
print("No numeric columns found for correlation!")
|
|
32
|
+
return
|
|
33
|
+
print("Plotting Correlation Heatmap...")
|
|
34
|
+
plt.figure(figsize=(12, 8))
|
|
35
|
+
sns.heatmap(
|
|
36
|
+
numeric_cols.corr(),
|
|
37
|
+
annot=True,
|
|
38
|
+
fmt=".2f",
|
|
39
|
+
cmap="coolwarm",
|
|
40
|
+
linewidths=0.5
|
|
41
|
+
)
|
|
42
|
+
plt.title("Correlation Heatmap", fontsize=16)
|
|
43
|
+
plt.tight_layout()
|
|
44
|
+
plt.show()
|
|
45
|
+
|
|
46
|
+
def plot_missing(self):
|
|
47
|
+
missing = self.df.isnull().sum()
|
|
48
|
+
missing = missing[missing > 0]
|
|
49
|
+
if missing.empty:
|
|
50
|
+
print("No missing values to plot!")
|
|
51
|
+
return
|
|
52
|
+
print("Plotting Missing Values...")
|
|
53
|
+
plt.figure(figsize=(10, 6))
|
|
54
|
+
sns.barplot(x=missing.index, y=missing.values, color="tomato")
|
|
55
|
+
plt.title("Missing Values Per Column", fontsize=16)
|
|
56
|
+
plt.xlabel("Columns")
|
|
57
|
+
plt.ylabel("Missing Count")
|
|
58
|
+
plt.xticks(rotation=45)
|
|
59
|
+
plt.tight_layout()
|
|
60
|
+
plt.show()
|
|
61
|
+
|
|
62
|
+
def plot_boxplots(self):
|
|
63
|
+
numeric_cols = self.df.select_dtypes(include=np.number).columns
|
|
64
|
+
if len(numeric_cols) == 0:
|
|
65
|
+
print("No numeric columns found for boxplot!")
|
|
66
|
+
return
|
|
67
|
+
print("Plotting Boxplots...")
|
|
68
|
+
plt.figure(figsize=(15, 8))
|
|
69
|
+
self.df[numeric_cols].plot(
|
|
70
|
+
kind="box",
|
|
71
|
+
figsize=(15, 8),
|
|
72
|
+
patch_artist=True
|
|
73
|
+
)
|
|
74
|
+
plt.title("Boxplots for Outlier Detection", fontsize=16)
|
|
75
|
+
plt.tight_layout()
|
|
76
|
+
plt.show()
|
|
77
|
+
|
|
78
|
+
def visualize(self):
|
|
79
|
+
print("=" * 50)
|
|
80
|
+
print("AUTO VISUALIZATION STARTED")
|
|
81
|
+
print("=" * 50)
|
|
82
|
+
self.plot_histograms()
|
|
83
|
+
self.plot_correlation()
|
|
84
|
+
self.plot_missing()
|
|
85
|
+
self.plot_boxplots()
|
|
86
|
+
print("=" * 50)
|
|
87
|
+
print("Visualization Complete!")
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dataquick
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A fast and easy Auto EDA library for data scientists
|
|
5
|
+
Home-page: https://github.com/quratalvi11-dotcom/myproject1
|
|
6
|
+
Author: quratalvi11-dotcom
|
|
7
|
+
License: MIT
|
|
8
|
+
Requires-Python: >=3.7
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: numpy
|
|
13
|
+
Requires-Dist: matplotlib
|
|
14
|
+
Requires-Dist: seaborn
|
|
15
|
+
Requires-Dist: scipy
|
|
16
|
+
Dynamic: author
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: home-page
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
Dynamic: requires-dist
|
|
22
|
+
Dynamic: requires-python
|
|
23
|
+
Dynamic: summary
|
|
24
|
+
|
|
25
|
+
# DataQuick
|
|
26
|
+
|
|
27
|
+
A fast and easy Auto EDA library for data scientists.
|
|
28
|
+
|
|
29
|
+
## Features
|
|
30
|
+
- Auto data analysis
|
|
31
|
+
- Missing values report
|
|
32
|
+
- Data cleaning
|
|
33
|
+
- Auto visualizations
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
```bash
|
|
37
|
+
pip install dataquick
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
```python
|
|
42
|
+
import pandas as pd
|
|
43
|
+
from dataquick.analyzer import DataAnalyzer
|
|
44
|
+
|
|
45
|
+
df = pd.read_csv("data.csv")
|
|
46
|
+
analyzer = DataAnalyzer(df)
|
|
47
|
+
analyzer.analyze()
|
|
48
|
+
```
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.cfg
|
|
5
|
+
setup.py
|
|
6
|
+
src/dataquick/__init__.py
|
|
7
|
+
src/dataquick/analyzer.py
|
|
8
|
+
src/dataquick/cleaner.py
|
|
9
|
+
src/dataquick/visualizer.py
|
|
10
|
+
src/dataquick.egg-info/PKG-INFO
|
|
11
|
+
src/dataquick.egg-info/SOURCES.txt
|
|
12
|
+
src/dataquick.egg-info/dependency_links.txt
|
|
13
|
+
src/dataquick.egg-info/requires.txt
|
|
14
|
+
src/dataquick.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
dataquick
|