qdesc 0.1.7.1__py3-none-any.whl → 0.1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdesc might be problematic. Click here for more details.

qdesc/__init__.py CHANGED
@@ -106,3 +106,51 @@ def freqdist_to_excel(df, output_path, sort_by='Percentage', ascending=False, to
106
106
  used_names.add(sheet_name.lower())
107
107
  distribution.to_excel(writer, sheet_name=sheet_name, index=False)
108
108
  print(f"Frequency distributions written to {output_path}")
109
+
110
+ def normcheck_dashboard(df, significance_level=0.05, figsize=(18, 5)):
111
+ import pandas as pd
112
+ import numpy as np
113
+ import matplotlib.pyplot as plt
114
+ import seaborn as sns
115
+ import statsmodels.api as sm
116
+ from scipy.stats import anderson
117
+ import math
118
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
119
+ if len(numeric_cols) == 0:
120
+ print("No numeric columns to analyze.")
121
+ return
122
+ for col in numeric_cols:
123
+ data = df[col].dropna()
124
+ print(f"\n--- Variable: {col} ---")
125
+ if len(data) < 8:
126
+ print("Not enough data to perform Anderson-Darling test or meaningful plots.")
127
+ continue
128
+ # Anderson-Darling Test
129
+ test_result = anderson(data, dist='norm')
130
+ stat = test_result.statistic
131
+ sig_levels = test_result.significance_level
132
+ crit_values = test_result.critical_values
133
+ level_diff = [abs(sl - (significance_level * 100)) for sl in sig_levels]
134
+ closest_index = level_diff.index(min(level_diff))
135
+ used_sig = sig_levels[closest_index]
136
+ crit_val = crit_values[closest_index]
137
+ decision = "Fail to Reject Null" if stat <= crit_val else "Reject Null"
138
+ # Print Summary
139
+ print(f" Anderson-Darling Statistic : {stat:.4f}")
140
+ print(f" Critical Value (@ {used_sig}%) : {crit_val:.4f}")
141
+ print(f" Decision : {decision}")
142
+ # Plots (QQ, Histogram, Boxplot)
143
+ fig, axes = plt.subplots(1, 3, figsize=figsize)
144
+ # QQ Plot
145
+ sm.qqplot(data, line='s', ax=axes[0])
146
+ axes[0].set_title(f"QQ Plot - {col}")
147
+ # Histogram (No KDE)
148
+ sns.histplot(data, bins=30, kde=False, color='gray', alpha=0.3, ax=axes[1])
149
+ axes[1].set_title(f"Histogram - {col}")
150
+ # Boxplot
151
+ sns.boxplot(x=data, ax=axes[2], color='lightblue')
152
+ axes[2].set_title(f"Boxplot - {col}")
153
+ axes[2].set_xlabel(col)
154
+ plt.suptitle(f"Normality Assessment - {col}", fontsize=14, y=1.05)
155
+ plt.tight_layout()
156
+ plt.show()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: qdesc
3
- Version: 0.1.7.1
3
+ Version: 0.1.8.1
4
4
  Summary: Quick and Easy way to do descriptive analysis.
5
5
  Author: Paolo Hilado
6
6
  Author-email: datasciencepgh@proton.me
@@ -53,15 +53,36 @@ Run the function qd.freqdist_to_excel(df, "Name of file.xlsx", ascending = FALSE
53
53
  * Counts - the number of observations
54
54
  * Percentage - percentage of observations from total.
55
55
 
56
+ ## qd.normcheck_dashboard Function
57
+ Run the function qd.normcheck_dashboard(df) to efficiently check each numeric variable for normality of its distribution. It will compute the Anderson-Darling statistic and
58
+ create visualizations (i.e., qq-plot, histogram, and boxplots) for checking whether the distribution is approximately normal.
59
+
56
60
 
57
61
  Later versions will include data visualizations handy for exploring the distribution of the data set.
58
62
 
59
63
  ## Installation
60
64
  pip install qdesc
61
65
 
62
- ## Usage - doing descriptive analysis using qdesc
63
- ### import qdesc as qd
64
- ### qd.desc(df)
66
+ ## Sample use of qdesc functions
67
+ # Creating a sample dataframe
68
+ import pandas as pd
69
+ import numpy as np
70
+
71
+ # Set seed for reproducibility
72
+ np.random.seed(21)
73
+
74
+ # Create two continuous variables
75
+ var1 = np.random.normal(loc=0, scale=1, size=1000) # Normal distribution
76
+ var2 = np.random.uniform(low=10, high=50, size=1000) # Uniform distribution
77
+
78
+ # Create DataFrame
79
+ df = pd.DataFrame({
80
+ 'Normal_Variable': var1,
81
+ 'Uniform_Variable': var2
82
+ })
83
+ # Using the qdesc function
84
+ import qdesc as qd
85
+ qd.desc(df)
65
86
 
66
87
  ## License
67
88
  This project is licensed under the GPL-3 License. See the LICENSE file for more details.
@@ -0,0 +1,6 @@
1
+ qdesc/__init__.py,sha256=YtYahB-neaYOG3DvVXweSuFExqVZFNR0lAivaPp9_SA,6599
2
+ qdesc-0.1.8.1.dist-info/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
3
+ qdesc-0.1.8.1.dist-info/METADATA,sha256=qXseXkIM1Ynrx9kralmBs9B1jtpnwb8-kHUKVMLjk9Y,4552
4
+ qdesc-0.1.8.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
5
+ qdesc-0.1.8.1.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
6
+ qdesc-0.1.8.1.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- qdesc/__init__.py,sha256=eT5JOnELIhQy3K453z3r22_xmf47uW1AOlUiWlSWgSM,4513
2
- qdesc-0.1.7.1.dist-info/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
3
- qdesc-0.1.7.1.dist-info/METADATA,sha256=OScoZdD8due5ilEz-waSAdaHcYqeLHDOQquLGRzMfAs,3782
4
- qdesc-0.1.7.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
5
- qdesc-0.1.7.1.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
6
- qdesc-0.1.7.1.dist-info/RECORD,,