qdesc 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdesc might be problematic. Click here for more details.
- qdesc/__init__.py +49 -0
- {qdesc-0.1.7.dist-info → qdesc-0.1.8.dist-info}/METADATA +1 -1
- qdesc-0.1.8.dist-info/RECORD +6 -0
- qdesc-0.1.7.dist-info/RECORD +0 -6
- {qdesc-0.1.7.dist-info → qdesc-0.1.8.dist-info}/LICENCE.txt +0 -0
- {qdesc-0.1.7.dist-info → qdesc-0.1.8.dist-info}/WHEEL +0 -0
- {qdesc-0.1.7.dist-info → qdesc-0.1.8.dist-info}/top_level.txt +0 -0
qdesc/__init__.py
CHANGED
|
@@ -72,6 +72,7 @@ def freqdist_a(df, ascending=False):
|
|
|
72
72
|
return final_df
|
|
73
73
|
|
|
74
74
|
def clean_sheet_name(name):
|
|
75
|
+
import re
|
|
75
76
|
# Remove invalid characters
|
|
76
77
|
name = re.sub(r'[:\\/?*\[\]]', '', name)
|
|
77
78
|
# Limit to 31 characters
|
|
@@ -105,3 +106,51 @@ def freqdist_to_excel(df, output_path, sort_by='Percentage', ascending=False, to
|
|
|
105
106
|
used_names.add(sheet_name.lower())
|
|
106
107
|
distribution.to_excel(writer, sheet_name=sheet_name, index=False)
|
|
107
108
|
print(f"Frequency distributions written to {output_path}")
|
|
109
|
+
|
|
110
|
+
def normcheck_dashboard(df, significance_level=0.05, figsize=(18, 5)):
|
|
111
|
+
import pandas as pd
|
|
112
|
+
import numpy as np
|
|
113
|
+
import matplotlib.pyplot as plt
|
|
114
|
+
import seaborn as sns
|
|
115
|
+
import statsmodels.api as sm
|
|
116
|
+
from scipy.stats import anderson
|
|
117
|
+
import math
|
|
118
|
+
numeric_cols = df.select_dtypes(include=[np.number]).columns
|
|
119
|
+
if len(numeric_cols) == 0:
|
|
120
|
+
print("No numeric columns to analyze.")
|
|
121
|
+
return
|
|
122
|
+
for col in numeric_cols:
|
|
123
|
+
data = df[col].dropna()
|
|
124
|
+
print(f"\n--- Variable: {col} ---")
|
|
125
|
+
if len(data) < 8:
|
|
126
|
+
print("Not enough data to perform Anderson-Darling test or meaningful plots.")
|
|
127
|
+
continue
|
|
128
|
+
# Anderson-Darling Test
|
|
129
|
+
test_result = anderson(data, dist='norm')
|
|
130
|
+
stat = test_result.statistic
|
|
131
|
+
sig_levels = test_result.significance_level
|
|
132
|
+
crit_values = test_result.critical_values
|
|
133
|
+
level_diff = [abs(sl - (significance_level * 100)) for sl in sig_levels]
|
|
134
|
+
closest_index = level_diff.index(min(level_diff))
|
|
135
|
+
used_sig = sig_levels[closest_index]
|
|
136
|
+
crit_val = crit_values[closest_index]
|
|
137
|
+
decision = "Fail to Reject Null" if stat <= crit_val else "Reject Null"
|
|
138
|
+
# Print Summary
|
|
139
|
+
print(f" Anderson-Darling Statistic : {stat:.4f}")
|
|
140
|
+
print(f" Critical Value (@ {used_sig}%) : {crit_val:.4f}")
|
|
141
|
+
print(f" Decision : {decision}")
|
|
142
|
+
# Plots (QQ, Histogram, Boxplot)
|
|
143
|
+
fig, axes = plt.subplots(1, 3, figsize=figsize)
|
|
144
|
+
# QQ Plot
|
|
145
|
+
sm.qqplot(data, line='s', ax=axes[0])
|
|
146
|
+
axes[0].set_title(f"QQ Plot - {col}")
|
|
147
|
+
# Histogram (No KDE)
|
|
148
|
+
sns.histplot(data, bins=30, kde=False, color='gray', alpha=0.3, ax=axes[1])
|
|
149
|
+
axes[1].set_title(f"Histogram - {col}")
|
|
150
|
+
# Boxplot
|
|
151
|
+
sns.boxplot(x=data, ax=axes[2], color='lightblue')
|
|
152
|
+
axes[2].set_title(f"Boxplot - {col}")
|
|
153
|
+
axes[2].set_xlabel(col)
|
|
154
|
+
plt.suptitle(f"Normality Assessment - {col}", fontsize=14, y=1.05)
|
|
155
|
+
plt.tight_layout()
|
|
156
|
+
plt.show()
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
qdesc/__init__.py,sha256=YtYahB-neaYOG3DvVXweSuFExqVZFNR0lAivaPp9_SA,6599
|
|
2
|
+
qdesc-0.1.8.dist-info/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
|
|
3
|
+
qdesc-0.1.8.dist-info/METADATA,sha256=k7IeXnqHYiaoz999aax8qWGlOD2khStdVVSRrRzFmsc,3780
|
|
4
|
+
qdesc-0.1.8.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
5
|
+
qdesc-0.1.8.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
|
|
6
|
+
qdesc-0.1.8.dist-info/RECORD,,
|
qdesc-0.1.7.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
qdesc/__init__.py,sha256=zmaI_ancCfg7Vuy8opqg8H9j6iwCElfIbGwBM15OJPU,4498
|
|
2
|
-
qdesc-0.1.7.dist-info/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
|
|
3
|
-
qdesc-0.1.7.dist-info/METADATA,sha256=RtfEWvWuIvU9BxXASizXho7YTYjMt_jzDEWCzfuXZZI,3780
|
|
4
|
-
qdesc-0.1.7.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
5
|
-
qdesc-0.1.7.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
|
|
6
|
-
qdesc-0.1.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|