qdesc 0.1.7.1__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdesc might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: qdesc
3
- Version: 0.1.7.1
3
+ Version: 0.1.8
4
4
  Summary: Quick and Easy way to do descriptive analysis.
5
5
  Author: Paolo Hilado
6
6
  Author-email: datasciencepgh@proton.me
@@ -106,3 +106,51 @@ def freqdist_to_excel(df, output_path, sort_by='Percentage', ascending=False, to
106
106
  used_names.add(sheet_name.lower())
107
107
  distribution.to_excel(writer, sheet_name=sheet_name, index=False)
108
108
  print(f"Frequency distributions written to {output_path}")
109
+
110
+ def normcheck_dashboard(df, significance_level=0.05, figsize=(18, 5)):
111
+ import pandas as pd
112
+ import numpy as np
113
+ import matplotlib.pyplot as plt
114
+ import seaborn as sns
115
+ import statsmodels.api as sm
116
+ from scipy.stats import anderson
117
+ import math
118
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
119
+ if len(numeric_cols) == 0:
120
+ print("No numeric columns to analyze.")
121
+ return
122
+ for col in numeric_cols:
123
+ data = df[col].dropna()
124
+ print(f"\n--- Variable: {col} ---")
125
+ if len(data) < 8:
126
+ print("Not enough data to perform Anderson-Darling test or meaningful plots.")
127
+ continue
128
+ # Anderson-Darling Test
129
+ test_result = anderson(data, dist='norm')
130
+ stat = test_result.statistic
131
+ sig_levels = test_result.significance_level
132
+ crit_values = test_result.critical_values
133
+ level_diff = [abs(sl - (significance_level * 100)) for sl in sig_levels]
134
+ closest_index = level_diff.index(min(level_diff))
135
+ used_sig = sig_levels[closest_index]
136
+ crit_val = crit_values[closest_index]
137
+ decision = "Fail to Reject Null" if stat <= crit_val else "Reject Null"
138
+ # Print Summary
139
+ print(f" Anderson-Darling Statistic : {stat:.4f}")
140
+ print(f" Critical Value (@ {used_sig}%) : {crit_val:.4f}")
141
+ print(f" Decision : {decision}")
142
+ # Plots (QQ, Histogram, Boxplot)
143
+ fig, axes = plt.subplots(1, 3, figsize=figsize)
144
+ # QQ Plot
145
+ sm.qqplot(data, line='s', ax=axes[0])
146
+ axes[0].set_title(f"QQ Plot - {col}")
147
+ # Histogram (No KDE)
148
+ sns.histplot(data, bins=30, kde=False, color='gray', alpha=0.3, ax=axes[1])
149
+ axes[1].set_title(f"Histogram - {col}")
150
+ # Boxplot
151
+ sns.boxplot(x=data, ax=axes[2], color='lightblue')
152
+ axes[2].set_title(f"Boxplot - {col}")
153
+ axes[2].set_xlabel(col)
154
+ plt.suptitle(f"Normality Assessment - {col}", fontsize=14, y=1.05)
155
+ plt.tight_layout()
156
+ plt.show()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: qdesc
3
- Version: 0.1.7.1
3
+ Version: 0.1.8
4
4
  Summary: Quick and Easy way to do descriptive analysis.
5
5
  Author: Paolo Hilado
6
6
  Author-email: datasciencepgh@proton.me
@@ -7,7 +7,7 @@ long_description = (this_directory / "README.md").read_text()
7
7
 
8
8
  setup(
9
9
  name='qdesc',
10
- version='0.1.7.1',
10
+ version='0.1.8',
11
11
  packages=find_packages(),
12
12
  install_requires=[
13
13
  # List your dependencies here, e.g., pandas if your function requires it
File without changes
File without changes
File without changes