funcguard 0.2.40__tar.gz → 0.2.41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {funcguard-0.2.40 → funcguard-0.2.41}/PKG-INFO +4 -2
- {funcguard-0.2.40 → funcguard-0.2.41}/README.md +3 -1
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/__init__.py +4 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/__init__.py +19 -2
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/statistics/__init__.py +3 -0
- funcguard-0.2.41/funcguard/pd_utils/statistics/agg_utils.py +83 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/statistics/df_statistics.py +49 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard.egg-info/PKG-INFO +4 -2
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard.egg-info/SOURCES.txt +1 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/setup.py +1 -1
- {funcguard-0.2.40 → funcguard-0.2.41}/LICENSE +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/calculate.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/core.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/data_models/__init__.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/data_models/request_models.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/ip_utils.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/log_utils.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/convert_utils.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/date_utils.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/fill_round.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/filter.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/json_utils/__init__.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/json_utils/json_parser.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/statistics/count_utils.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/pd_utils/statistics/mask_utils.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/printer.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/time_utils.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard/tools.py +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard.egg-info/dependency_links.txt +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard.egg-info/not-zip-safe +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard.egg-info/requires.txt +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/funcguard.egg-info/top_level.txt +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/setup.cfg +0 -0
- {funcguard-0.2.40 → funcguard-0.2.41}/tests/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: funcguard
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.41
|
|
4
4
|
Summary: FuncGuard是一个Python库,提供函数执行超时控制、重试机制、HTTP请求封装和格式化打印工具。
|
|
5
5
|
Home-page: https://github.com/tinycen/funcguard
|
|
6
6
|
Author: tinycen
|
|
@@ -544,7 +544,9 @@ print(f"当前价格: {current_price}, 变化: {price_change}") # 输出: 当
|
|
|
544
544
|
| `pd_fill_na` / `pd_fill_nat` | 数据填充 | [查看](docs/pandas/fill.md) |
|
|
545
545
|
| `pd_convert_columns` / `pd_convert_decimal` / `pd_convert_numeric_series` / `pd_convert_str_datetime` / `pd_convert_datetime_str` | 类型转换 | [查看](docs/pandas/convert.md) |
|
|
546
546
|
| `pd_load_json` | JSON解析 | [查看](docs/pandas/json.md) |
|
|
547
|
-
| `pd_filter`
|
|
547
|
+
| `pd_filter` | 数据筛选 | [查看](docs/pandas/filter.md) |
|
|
548
|
+
| `pd_count` / `pd_value_counts` | 条件计数统计 | [查看](docs/pandas/count.md) |
|
|
549
|
+
| `pd_group_agg` | 分组聚合统计 | [查看](docs/pandas/agg.md) |
|
|
548
550
|
| `pd_build_mask` / `pd_build_masks` / `pd_combine_masks` | 掩码构建 | [查看](docs/pandas/mask.md) |
|
|
549
551
|
| `DataFrameStatistics` | 统计分析 | [查看](docs/pandas/statistics.md) |
|
|
550
552
|
| `pd_cal_date_diff` / `pd_round_columns` | 日期计算和数值舍入 | [查看](docs/pandas/date.md) |
|
|
@@ -517,7 +517,9 @@ print(f"当前价格: {current_price}, 变化: {price_change}") # 输出: 当
|
|
|
517
517
|
| `pd_fill_na` / `pd_fill_nat` | 数据填充 | [查看](docs/pandas/fill.md) |
|
|
518
518
|
| `pd_convert_columns` / `pd_convert_decimal` / `pd_convert_numeric_series` / `pd_convert_str_datetime` / `pd_convert_datetime_str` | 类型转换 | [查看](docs/pandas/convert.md) |
|
|
519
519
|
| `pd_load_json` | JSON解析 | [查看](docs/pandas/json.md) |
|
|
520
|
-
| `pd_filter`
|
|
520
|
+
| `pd_filter` | 数据筛选 | [查看](docs/pandas/filter.md) |
|
|
521
|
+
| `pd_count` / `pd_value_counts` | 条件计数统计 | [查看](docs/pandas/count.md) |
|
|
522
|
+
| `pd_group_agg` | 分组聚合统计 | [查看](docs/pandas/agg.md) |
|
|
521
523
|
| `pd_build_mask` / `pd_build_masks` / `pd_combine_masks` | 掩码构建 | [查看](docs/pandas/mask.md) |
|
|
522
524
|
| `DataFrameStatistics` | 统计分析 | [查看](docs/pandas/statistics.md) |
|
|
523
525
|
| `pd_cal_date_diff` / `pd_round_columns` | 日期计算和数值舍入 | [查看](docs/pandas/date.md) |
|
|
@@ -30,6 +30,8 @@ from .pd_utils import (
|
|
|
30
30
|
pd_build_masks,
|
|
31
31
|
pd_combine_masks,
|
|
32
32
|
pd_count,
|
|
33
|
+
pd_value_counts,
|
|
34
|
+
pd_group_agg,
|
|
33
35
|
DataFrameStatistics,
|
|
34
36
|
|
|
35
37
|
)
|
|
@@ -101,6 +103,8 @@ __all__ = [
|
|
|
101
103
|
"pd_build_masks",
|
|
102
104
|
"pd_combine_masks",
|
|
103
105
|
"pd_count",
|
|
106
|
+
"pd_value_counts",
|
|
107
|
+
"pd_group_agg",
|
|
104
108
|
"DataFrameStatistics",
|
|
105
109
|
|
|
106
110
|
# 计算工具
|
|
@@ -1,8 +1,23 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
from .fill_round import fill_na, round_columns
|
|
3
3
|
from .date_utils import fill_nat, cal_date_diff
|
|
4
|
-
from .convert_utils import
|
|
5
|
-
|
|
4
|
+
from .convert_utils import (
|
|
5
|
+
convert_columns,
|
|
6
|
+
convert_decimal,
|
|
7
|
+
convert_numeric_series,
|
|
8
|
+
load_json,
|
|
9
|
+
convert_str_datetime,
|
|
10
|
+
convert_datetime_str
|
|
11
|
+
)
|
|
12
|
+
from .statistics import (
|
|
13
|
+
pd_build_mask,
|
|
14
|
+
pd_build_masks,
|
|
15
|
+
pd_combine_masks,
|
|
16
|
+
pd_count,
|
|
17
|
+
pd_value_counts,
|
|
18
|
+
pd_group_agg,
|
|
19
|
+
DataFrameStatistics
|
|
20
|
+
)
|
|
6
21
|
from .filter import pd_filter
|
|
7
22
|
|
|
8
23
|
# 启用未来行为:禁止静默降级
|
|
@@ -32,5 +47,7 @@ __all__ = [
|
|
|
32
47
|
"pd_build_masks",
|
|
33
48
|
"pd_combine_masks",
|
|
34
49
|
"pd_count",
|
|
50
|
+
"pd_value_counts",
|
|
51
|
+
"pd_group_agg",
|
|
35
52
|
"DataFrameStatistics",
|
|
36
53
|
]
|
|
@@ -5,6 +5,7 @@ from .mask_utils import (
|
|
|
5
5
|
build_base_mask as pd_build_masks,
|
|
6
6
|
combine_masks as pd_combine_masks,
|
|
7
7
|
)
|
|
8
|
+
from .agg_utils import group_agg as pd_group_agg
|
|
8
9
|
|
|
9
10
|
__all__ = [
|
|
10
11
|
# 掩码构建函数
|
|
@@ -14,6 +15,8 @@ __all__ = [
|
|
|
14
15
|
# 统计函数
|
|
15
16
|
"pd_count",
|
|
16
17
|
"pd_value_counts",
|
|
18
|
+
# 聚合函数
|
|
19
|
+
"pd_group_agg",
|
|
17
20
|
# 统计分析类
|
|
18
21
|
"DataFrameStatistics",
|
|
19
22
|
]
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
|
|
2
|
+
"""
|
|
3
|
+
聚合(aggregation)操作工具模块。
|
|
4
|
+
|
|
5
|
+
提供DataFrame分组聚合统计功能,支持按指定列分组后对另一列进行
|
|
6
|
+
sum、mean、max、min、count、median、std、var等聚合计算。
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from typing import Any, Dict, Optional, Union, List, Tuple
|
|
11
|
+
from .mask_utils import build_single_mask, build_base_mask
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def group_agg(
|
|
15
|
+
df: pd.DataFrame,
|
|
16
|
+
group_col: str,
|
|
17
|
+
agg_col: str,
|
|
18
|
+
agg_func: str = "sum",
|
|
19
|
+
sort: Optional[str] = None,
|
|
20
|
+
conditions: Optional[Union[Tuple, List[Tuple]]] = None,
|
|
21
|
+
logic: str = "and",
|
|
22
|
+
true_mask: Optional[pd.Series] = None,
|
|
23
|
+
false_mask: Optional[pd.Series] = None
|
|
24
|
+
) -> Dict[Any, Union[int, float]]:
|
|
25
|
+
"""
|
|
26
|
+
按指定列分组,对另一列进行聚合统计。
|
|
27
|
+
|
|
28
|
+
参数:
|
|
29
|
+
- df (pd.DataFrame):输入的DataFrame。
|
|
30
|
+
- group_col (str):分组列名(如A列)。
|
|
31
|
+
- agg_col (str):聚合列名(如B列)。
|
|
32
|
+
- agg_func (str):聚合函数,支持 "sum"、"mean"、"max"、"min"、"count"、"median"、"std"、"var",
|
|
33
|
+
默认为 "sum"。
|
|
34
|
+
- sort (Optional[str]):排序方式,"asc" 表示升序,"desc" 表示降序,
|
|
35
|
+
默认为None表示按分组列的原始顺序。
|
|
36
|
+
- conditions (Optional[Union[Tuple, List[Tuple]]]):可选的过滤条件,
|
|
37
|
+
格式与count函数相同。如果提供,则只统计符合条件的行。
|
|
38
|
+
- logic (str):逻辑操作类型,"and" 或 "or",默认为 "and"。
|
|
39
|
+
- true_mask (pd.Series):初始True掩码,默认为None。
|
|
40
|
+
- false_mask (pd.Series):初始False掩码,默认为None。
|
|
41
|
+
|
|
42
|
+
返回:
|
|
43
|
+
- Dict[Any, Union[int, float]]:以分组值为键,聚合结果为值的字典。
|
|
44
|
+
|
|
45
|
+
示例:
|
|
46
|
+
>>> group_agg(df, "category", "amount", "sum")
|
|
47
|
+
{'A': 1000, 'B': 2000, 'C': 1500}
|
|
48
|
+
>>> group_agg(df, "category", "amount", "mean")
|
|
49
|
+
{'A': 100.0, 'B': 200.0, 'C': 150.0}
|
|
50
|
+
>>> group_agg(df, "category", "amount", "sum", sort="desc")
|
|
51
|
+
{'B': 2000, 'C': 1500, 'A': 1000}
|
|
52
|
+
>>> group_agg(df, "category", "amount", "sum", conditions=[("status", "==", "active")])
|
|
53
|
+
{'A': 800, 'B': 1500}
|
|
54
|
+
"""
|
|
55
|
+
# 参数校验
|
|
56
|
+
valid_agg_funcs = ("sum", "mean", "max", "min", "count", "median", "std", "var")
|
|
57
|
+
if agg_func not in valid_agg_funcs:
|
|
58
|
+
raise ValueError(f"agg_func 参数必须是 {valid_agg_funcs} 之一,当前值: {agg_func}")
|
|
59
|
+
if sort is not None and sort not in ("asc", "desc"):
|
|
60
|
+
raise ValueError(f"sort 参数必须是 'asc'、'desc' 或 None,当前值: {sort}")
|
|
61
|
+
|
|
62
|
+
# 如果有过滤条件,先应用条件筛选
|
|
63
|
+
if conditions is not None:
|
|
64
|
+
if isinstance(conditions, tuple):
|
|
65
|
+
mask = build_single_mask(df, conditions)
|
|
66
|
+
else:
|
|
67
|
+
mask = build_base_mask(df, conditions, logic, true_mask, false_mask)
|
|
68
|
+
filtered_df = df[mask]
|
|
69
|
+
else:
|
|
70
|
+
filtered_df = df
|
|
71
|
+
|
|
72
|
+
# 使用pandas的groupby进行聚合
|
|
73
|
+
grouped = filtered_df.groupby(group_col)[agg_col]
|
|
74
|
+
result_series = grouped.agg(agg_func)
|
|
75
|
+
|
|
76
|
+
# 排序处理
|
|
77
|
+
if sort == "asc":
|
|
78
|
+
result_series = result_series.sort_values(ascending=True)
|
|
79
|
+
elif sort == "desc":
|
|
80
|
+
result_series = result_series.sort_values(ascending=False)
|
|
81
|
+
|
|
82
|
+
# 转换为字典返回
|
|
83
|
+
return result_series.to_dict()
|
|
@@ -2,6 +2,7 @@ import pandas as pd
|
|
|
2
2
|
from typing import Any, Dict, List, Tuple, Union, Optional, Mapping
|
|
3
3
|
from .mask_utils import build_single_mask as _original_build_single_mask, build_base_mask as _original_build_base_mask, combine_masks
|
|
4
4
|
from .count_utils import count as _original_count, value_counts as _original_value_counts
|
|
5
|
+
from .agg_utils import group_agg as _original_group_agg
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
|
|
@@ -154,6 +155,54 @@ class DataFrameStatistics:
|
|
|
154
155
|
)
|
|
155
156
|
|
|
156
157
|
|
|
158
|
+
def group_agg(
|
|
159
|
+
self,
|
|
160
|
+
group_col: str,
|
|
161
|
+
agg_col: str,
|
|
162
|
+
agg_func: str = "sum",
|
|
163
|
+
sort: Optional[str] = None,
|
|
164
|
+
conditions: Optional[Union[Tuple, List[Tuple]]] = None,
|
|
165
|
+
logic: str = "and",
|
|
166
|
+
true_mask: Optional[pd.Series] = None,
|
|
167
|
+
false_mask: Optional[pd.Series] = None
|
|
168
|
+
) -> Dict[Any, Union[int, float]]:
|
|
169
|
+
"""
|
|
170
|
+
按指定列分组,对另一列进行聚合统计,自动使用内部掩码参数
|
|
171
|
+
|
|
172
|
+
参数:
|
|
173
|
+
- group_col (str):分组列名(如A列)
|
|
174
|
+
- agg_col (str):聚合列名(如B列)
|
|
175
|
+
- agg_func (str):聚合函数,支持 "sum"、"mean"、"max"、"min"、"count"、"median"、"std"、"var",
|
|
176
|
+
默认为 "sum"
|
|
177
|
+
- sort (Optional[str]):排序方式,"asc" 表示升序,"desc" 表示降序,
|
|
178
|
+
默认为None表示按分组列的原始顺序
|
|
179
|
+
- conditions (Optional[Union[Tuple, List[Tuple]]]):可选的过滤条件
|
|
180
|
+
- logic (str):逻辑操作类型,"and" 或 "or",默认为 "and"
|
|
181
|
+
- true_mask (pd.Series):初始True掩码,默认为None(使用内部缓存)
|
|
182
|
+
- false_mask (pd.Series):初始False掩码,默认为None(使用内部缓存)
|
|
183
|
+
|
|
184
|
+
返回:
|
|
185
|
+
- Dict[Any, Union[int, float]]:以分组值为键,聚合结果为值的字典
|
|
186
|
+
|
|
187
|
+
示例:
|
|
188
|
+
>>> stats.group_agg("category", "amount", "sum")
|
|
189
|
+
{'A': 1000, 'B': 2000, 'C': 1500}
|
|
190
|
+
>>> stats.group_agg("category", "amount", "mean")
|
|
191
|
+
{'A': 100.0, 'B': 200.0, 'C': 150.0}
|
|
192
|
+
>>> stats.group_agg("category", "amount", "sum", sort="desc")
|
|
193
|
+
{'B': 2000, 'C': 1500, 'A': 1000}
|
|
194
|
+
"""
|
|
195
|
+
# 如果没有提供外部掩码,使用内部缓存的掩码
|
|
196
|
+
if true_mask is None:
|
|
197
|
+
true_mask = self._true_mask
|
|
198
|
+
if false_mask is None:
|
|
199
|
+
false_mask = self._false_mask
|
|
200
|
+
return _original_group_agg(
|
|
201
|
+
self._df, group_col, agg_col, agg_func, sort,
|
|
202
|
+
conditions, logic, true_mask, false_mask
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
|
|
157
206
|
def dataframe_info(self) -> Dict[str, Any]:
|
|
158
207
|
"""获取DataFrame的基本信息"""
|
|
159
208
|
return {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: funcguard
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.41
|
|
4
4
|
Summary: FuncGuard是一个Python库,提供函数执行超时控制、重试机制、HTTP请求封装和格式化打印工具。
|
|
5
5
|
Home-page: https://github.com/tinycen/funcguard
|
|
6
6
|
Author: tinycen
|
|
@@ -544,7 +544,9 @@ print(f"当前价格: {current_price}, 变化: {price_change}") # 输出: 当
|
|
|
544
544
|
| `pd_fill_na` / `pd_fill_nat` | 数据填充 | [查看](docs/pandas/fill.md) |
|
|
545
545
|
| `pd_convert_columns` / `pd_convert_decimal` / `pd_convert_numeric_series` / `pd_convert_str_datetime` / `pd_convert_datetime_str` | 类型转换 | [查看](docs/pandas/convert.md) |
|
|
546
546
|
| `pd_load_json` | JSON解析 | [查看](docs/pandas/json.md) |
|
|
547
|
-
| `pd_filter`
|
|
547
|
+
| `pd_filter` | 数据筛选 | [查看](docs/pandas/filter.md) |
|
|
548
|
+
| `pd_count` / `pd_value_counts` | 条件计数统计 | [查看](docs/pandas/count.md) |
|
|
549
|
+
| `pd_group_agg` | 分组聚合统计 | [查看](docs/pandas/agg.md) |
|
|
548
550
|
| `pd_build_mask` / `pd_build_masks` / `pd_combine_masks` | 掩码构建 | [查看](docs/pandas/mask.md) |
|
|
549
551
|
| `DataFrameStatistics` | 统计分析 | [查看](docs/pandas/statistics.md) |
|
|
550
552
|
| `pd_cal_date_diff` / `pd_round_columns` | 日期计算和数值舍入 | [查看](docs/pandas/date.md) |
|
|
@@ -25,6 +25,7 @@ funcguard/pd_utils/filter.py
|
|
|
25
25
|
funcguard/pd_utils/json_utils/__init__.py
|
|
26
26
|
funcguard/pd_utils/json_utils/json_parser.py
|
|
27
27
|
funcguard/pd_utils/statistics/__init__.py
|
|
28
|
+
funcguard/pd_utils/statistics/agg_utils.py
|
|
28
29
|
funcguard/pd_utils/statistics/count_utils.py
|
|
29
30
|
funcguard/pd_utils/statistics/df_statistics.py
|
|
30
31
|
funcguard/pd_utils/statistics/mask_utils.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|