qdesc 0.1.8.4__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qdesc might be problematic. Click here for more details.

qdesc/__init__.py CHANGED
@@ -40,6 +40,47 @@ def desc(df):
40
40
  x_df = np.round(pd.concat([xl, mad_df, xr, anderson_df], axis=1),2)
41
41
  return x_df
42
42
 
43
+
44
+ def grp_desc(df, numeric_col, group_col):
45
+ import pandas as pd
46
+ import numpy as np
47
+ from scipy.stats import median_abs_deviation, anderson
48
+ results = []
49
+ for group, group_df in df.groupby(group_col):
50
+ data = group_df[numeric_col].dropna()
51
+ if len(data) < 2:
52
+ # Not enough data for stats like std or AD test
53
+ stats = {
54
+ group_col: group,
55
+ 'count': len(data),
56
+ 'mean': np.nan,
57
+ 'std': np.nan,
58
+ 'median': np.nan,
59
+ 'mad': np.nan,
60
+ 'min': np.nan,
61
+ 'max': np.nan,
62
+ 'anderson_stat': np.nan,
63
+ 'crit_5%': np.nan,
64
+ 'crit_1%': np.nan
65
+ }
66
+ else:
67
+ ad_result = anderson(data, dist='norm')
68
+ stats = {
69
+ group_col: group,
70
+ 'count': len(data),
71
+ 'mean': data.mean(),
72
+ 'std': data.std(),
73
+ 'median': data.median(),
74
+ 'mad': median_abs_deviation(data),
75
+ 'min': data.min(),
76
+ 'max': data.max(),
77
+ 'anderson_stat': ad_result.statistic,
78
+ 'crit_5%': ad_result.critical_values[2], # 5% is the third value
79
+ 'crit_1%': ad_result.critical_values[3], # 1% is the fourth value
80
+ }
81
+ results.append(stats)
82
+ return np.round(pd.DataFrame(results),2)
83
+
43
84
  def freqdist(df, column_name):
44
85
  import pandas as pd
45
86
  if column_name not in df.columns:
@@ -55,6 +96,7 @@ def freqdist(df, column_name):
55
96
 
56
97
 
57
98
  def freqdist_a(df, ascending=False):
99
+ import pandas as pd
58
100
  results = []
59
101
  for column in df.select_dtypes(include=['object', 'category']).columns:
60
102
  frequency_table = df[column].value_counts()
@@ -1,11 +1,17 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: qdesc
3
- Version: 0.1.8.4
3
+ Version: 0.1.9
4
4
  Summary: Quick and Easy way to do descriptive analysis.
5
5
  Author: Paolo Hilado
6
6
  Author-email: datasciencepgh@proton.me
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENCE.txt
9
+ Dynamic: author
10
+ Dynamic: author-email
11
+ Dynamic: description
12
+ Dynamic: description-content-type
13
+ Dynamic: license-file
14
+ Dynamic: summary
9
15
 
10
16
  # qdesc - Quick and Easy Descriptive Analysis
11
17
 
@@ -0,0 +1,6 @@
1
+ qdesc/__init__.py,sha256=5EZcXkVluxvCnCmKqOty4lRmcyMJRDD0pz6wuTbnpzQ,8120
2
+ qdesc-0.1.9.dist-info/licenses/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
3
+ qdesc-0.1.9.dist-info/METADATA,sha256=iRg9Fy_IWrgvGeVFar5ATbv8eIAaso_GoY9Wr77fgKA,4543
4
+ qdesc-0.1.9.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
5
+ qdesc-0.1.9.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
6
+ qdesc-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.38.4)
2
+ Generator: setuptools (80.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +0,0 @@
1
- qdesc/__init__.py,sha256=YtYahB-neaYOG3DvVXweSuFExqVZFNR0lAivaPp9_SA,6599
2
- qdesc-0.1.8.4.dist-info/LICENCE.txt,sha256=xdFo-Rt6I7EP7C_qrVeIBIcH_7mRGUh8sciJs2R8VmY,9684
3
- qdesc-0.1.8.4.dist-info/METADATA,sha256=QnEaxbPAB45PhoNcTHHdMa1G7n3JyqCVYxsmyDPFtTo,4407
4
- qdesc-0.1.8.4.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
5
- qdesc-0.1.8.4.dist-info/top_level.txt,sha256=JuSs1wWRGN77DVuq-SX-5P7m_mIZF0ikEVgPTBOrHb0,6
6
- qdesc-0.1.8.4.dist-info/RECORD,,