qdesc 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdesc might be problematic. Click here for more details.

qdesc/__init__.py CHANGED
@@ -49,4 +49,22 @@ def freqdist(df, column_name):
49
49
  freq_dist = df[column_name].value_counts().reset_index()
50
50
  freq_dist.columns = [column_name, 'Count']
51
51
  freq_dist['Percentage'] = (freq_dist['Count'] / len(df)) * 100
52
- return freq_dist
52
+ return freq_dist
53
+
54
+
55
+ def freqdist_a(df, ascending=False):
56
+ results = []
57
+ for column in df.select_dtypes(include=['object', 'category']).columns:
58
+ frequency_table = df[column].value_counts()
59
+ percentage_table = df[column].value_counts(normalize=True) * 100
60
+
61
+ distribution = pd.DataFrame({
62
+ 'Column': column,
63
+ 'Value': frequency_table.index,
64
+ 'Count': frequency_table.values,
65
+ 'Percentage': percentage_table.values
66
+ })
67
+ distribution = distribution.sort_values(by='Percentage', ascending=ascending)
68
+ results.append(distribution)
69
+ final_df = pd.concat(results, ignore_index=True)
70
+ return final_df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: qdesc
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Quick and Easy way to do descriptive analysis.
5
5
  Author: Paolo Hilado
6
6
  Author-email: datasciencepgh@proton.me
@@ -38,6 +38,13 @@ Run the function qd.freqdist(df, "Variable Name") to easily create a frequency d
38
38
  * Counts - the number of observations
39
39
  * Percentage - percentage of observations from total.
40
40
 
41
+ Run the function qd.freqdist_a(df, ascending = FALSE) to easily create frequency distribution tables, arranged in descending manner (default) or ascending (TRUE), for all
42
+ the categorical variables in your data frame. The resulting table will include columns such as:
43
+ * Variable levels (i.e., for Satisfaction: Very Low, Low, Moderate, High, Very High)
44
+ * Counts - the number of observations
45
+ * Percentage - percentage of observations from total.
46
+
47
+
41
48
  Later versions will include data visualizations handy for exploring the distribution of the data set.
42
49
 
43
50
  ## Installation
@@ -0,0 +1,6 @@
1
+ qdesc/__init__.py,sha256=W8y3xolTQA7a6DaDRzuYDWk3qa89b1HEudXyKjwV6S8,2953
2
+ qdesc-0.1.5.dist-info/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
3
+ qdesc-0.1.5.dist-info/METADATA,sha256=FNjZ7v_Bv_hEvNmPyGUqGyfhE1CaFGILUFXVSz1-48A,3145
4
+ qdesc-0.1.5.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
5
+ qdesc-0.1.5.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
6
+ qdesc-0.1.5.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- qdesc/__init__.py,sha256=o1uyXPq2AR6BG9a9KeXFtsmkmat4k4ZNNxU49GBUNec,2259
2
- qdesc-0.1.3.dist-info/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
3
- qdesc-0.1.3.dist-info/METADATA,sha256=buMXfELadWsJzcez0Pe8o4YnLuQ66BI_nmY4sbE0u3Q,2690
4
- qdesc-0.1.3.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
5
- qdesc-0.1.3.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
6
- qdesc-0.1.3.dist-info/RECORD,,
File without changes