qdesc 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qdesc might be problematic. Click here for more details.
- qdesc/__init__.py +38 -5
- {qdesc-0.1.4.dist-info → qdesc-0.1.6.dist-info}/METADATA +13 -3
- qdesc-0.1.6.dist-info/RECORD +6 -0
- qdesc-0.1.4.dist-info/RECORD +0 -6
- {qdesc-0.1.4.dist-info → qdesc-0.1.6.dist-info}/LICENCE.txt +0 -0
- {qdesc-0.1.4.dist-info → qdesc-0.1.6.dist-info}/WHEEL +0 -0
- {qdesc-0.1.4.dist-info → qdesc-0.1.6.dist-info}/top_level.txt +0 -0
qdesc/__init__.py
CHANGED
|
@@ -52,19 +52,52 @@ def freqdist(df, column_name):
|
|
|
52
52
|
return freq_dist
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
def freqdist_a(df):
|
|
56
|
-
results = []
|
|
55
|
+
def freqdist_a(df, ascending=False):
|
|
56
|
+
results = []
|
|
57
57
|
for column in df.select_dtypes(include=['object', 'category']).columns:
|
|
58
58
|
frequency_table = df[column].value_counts()
|
|
59
59
|
percentage_table = df[column].value_counts(normalize=True) * 100
|
|
60
|
+
|
|
60
61
|
distribution = pd.DataFrame({
|
|
61
62
|
'Column': column,
|
|
62
63
|
'Value': frequency_table.index,
|
|
63
64
|
'Count': frequency_table.values,
|
|
64
65
|
'Percentage': percentage_table.values
|
|
65
66
|
})
|
|
67
|
+
distribution = distribution.sort_values(by='Percentage', ascending=ascending)
|
|
66
68
|
results.append(distribution)
|
|
67
|
-
|
|
68
|
-
# Combine all distributions into a single DataFrame
|
|
69
69
|
final_df = pd.concat(results, ignore_index=True)
|
|
70
|
-
return final_df
|
|
70
|
+
return final_df
|
|
71
|
+
|
|
72
|
+
def clean_sheet_name(name):
|
|
73
|
+
# Remove invalid characters
|
|
74
|
+
name = re.sub(r'[:\\/?*\[\]]', '', name)
|
|
75
|
+
# Limit to 31 characters
|
|
76
|
+
name = name.strip()[:31]
|
|
77
|
+
return name
|
|
78
|
+
|
|
79
|
+
def freqdist_to_excel(df, output_path, sort_by='Percentage', ascending=False, top_n=None):
|
|
80
|
+
used_names = set()
|
|
81
|
+
with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
|
|
82
|
+
for column in df.select_dtypes(include=['object', 'category']).columns:
|
|
83
|
+
frequency_table = df[column].value_counts()
|
|
84
|
+
percentage_table = df[column].value_counts(normalize=True) * 100
|
|
85
|
+
|
|
86
|
+
distribution = pd.DataFrame({
|
|
87
|
+
'Value': frequency_table.index,
|
|
88
|
+
'Count': frequency_table.values,
|
|
89
|
+
'Percentage': percentage_table.values
|
|
90
|
+
})
|
|
91
|
+
distribution = distribution.sort_values(by=sort_by, ascending=ascending)
|
|
92
|
+
if top_n is not None:
|
|
93
|
+
distribution = distribution.head(top_n)
|
|
94
|
+
# Generate safe sheet name
|
|
95
|
+
base_name = clean_sheet_name(column)
|
|
96
|
+
sheet_name = base_name
|
|
97
|
+
count = 1
|
|
98
|
+
while sheet_name.lower() in used_names:
|
|
99
|
+
sheet_name = f"{base_name[:28]}_{count}" # stay within 31 char limit
|
|
100
|
+
count += 1
|
|
101
|
+
used_names.add(sheet_name.lower())
|
|
102
|
+
distribution.to_excel(writer, sheet_name=sheet_name, index=False)
|
|
103
|
+
print(f"Frequency distributions written to {output_path}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: qdesc
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Quick and Easy way to do descriptive analysis.
|
|
5
5
|
Author: Paolo Hilado
|
|
6
6
|
Author-email: datasciencepgh@proton.me
|
|
@@ -21,6 +21,7 @@ Be sure to run the following prior to using the "qd.desc" function:
|
|
|
21
21
|
|
|
22
22
|
The qdesc package provides a quick and easy approach to do descriptive analysis for quantitative data.
|
|
23
23
|
|
|
24
|
+
## qd.desc Function
|
|
24
25
|
Run the function qd.desc(df) to get the following statistics:
|
|
25
26
|
* count - number of observations
|
|
26
27
|
* mean - measure of central tendency for normal distribution
|
|
@@ -33,17 +34,26 @@ Run the function qd.desc(df) to get the following statistics:
|
|
|
33
34
|
* 5% crit_value - critical value for a 5% Significance Level
|
|
34
35
|
* 1% crit_value - critical value for a 1% Significance Level
|
|
35
36
|
|
|
37
|
+
## qd.freqdist Function
|
|
36
38
|
Run the function qd.freqdist(df, "Variable Name") to easily create a frequency distribution for your chosen categorical variable with the following:
|
|
37
39
|
* Variable Levels (i.e., for Sex Variable: Male and Female)
|
|
38
40
|
* Counts - the number of observations
|
|
39
41
|
* Percentage - percentage of observations from total.
|
|
40
42
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
+
## qd.freqdist_a Function
|
|
44
|
+
Run the function qd.freqdist_a(df, ascending = FALSE) to easily create frequency distribution tables, arranged in descending manner (default) or ascending (TRUE), for all
|
|
45
|
+
the categorical variables in your data frame. The resulting table will include columns such as:
|
|
43
46
|
* Variable levels (i.e., for Satisfaction: Very Low, Low, Moderate, High, Very High)
|
|
44
47
|
* Counts - the number of observations
|
|
45
48
|
* Percentage - percentage of observations from total.
|
|
46
49
|
|
|
50
|
+
## qd.freqdist_to_excel Function
|
|
51
|
+
Run the function qd.freqdist_to_excel(df, "Name of file.xlsx", ascending = FALSE ) to easily create frequency distribution tables, arranged in descending manner (default) or ascending (TRUE), for all the categorical variables in your data frame and SAVED as separate sheets in the .xlsx File. The resulting table will include columns such as:
|
|
52
|
+
* Variable levels (i.e., for Satisfaction: Very Low, Low, Moderate, High, Very High)
|
|
53
|
+
* Counts - the number of observations
|
|
54
|
+
* Percentage - percentage of observations from total.
|
|
55
|
+
|
|
56
|
+
|
|
47
57
|
Later versions will include data visualizations handy for exploring the distribution of the data set.
|
|
48
58
|
|
|
49
59
|
## Installation
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
qdesc/__init__.py,sha256=NPRlg4X8wabfKVQ25JbRZLoTEL0f9KwfEPEPLP3I870,4445
|
|
2
|
+
qdesc-0.1.6.dist-info/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
|
|
3
|
+
qdesc-0.1.6.dist-info/METADATA,sha256=0PQwiWOmpIXzABYsEu52vnnNKgxNH0Fc3Oyc-IaBDy8,3780
|
|
4
|
+
qdesc-0.1.6.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
5
|
+
qdesc-0.1.6.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
|
|
6
|
+
qdesc-0.1.6.dist-info/RECORD,,
|
qdesc-0.1.4.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
qdesc/__init__.py,sha256=4DVLq35LmNvg5Tq1Mz43h_YIWvS-MbwscbRpuqGKFUY,2933
|
|
2
|
-
qdesc-0.1.4.dist-info/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
|
|
3
|
-
qdesc-0.1.4.dist-info/METADATA,sha256=MHz5mWEIWOTp47oSPV3BBw_FZX5L4UgGIm1ROYQbRsI,3061
|
|
4
|
-
qdesc-0.1.4.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
5
|
-
qdesc-0.1.4.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
|
|
6
|
-
qdesc-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|