mdbq 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +11 -12
- mdbq/aggregation/query_data.py +59 -4
- {mdbq-0.0.7.dist-info → mdbq-0.0.8.dist-info}/METADATA +1 -1
- {mdbq-0.0.7.dist-info → mdbq-0.0.8.dist-info}/RECORD +6 -6
- {mdbq-0.0.7.dist-info → mdbq-0.0.8.dist-info}/WHEEL +0 -0
- {mdbq-0.0.7.dist-info → mdbq-0.0.8.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -118,11 +118,13 @@ class DataTypes:
|
|
118
118
|
"""
|
119
119
|
# dtypes = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
|
120
120
|
df.replace([np.inf, -np.inf], 0, inplace=True) # 清理一些非法值
|
121
|
-
|
122
|
-
cols = df.columns.tolist()
|
123
121
|
df.replace(to_replace=['\\N', '-', '--', '', 'nan'], value=0, regex=False, inplace=True) # 替换掉特殊字符
|
124
122
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
123
|
+
df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
|
124
|
+
df.replace(to_replace=['"'], value='', regex=True, inplace=True)
|
125
|
+
cols = df.columns.tolist()
|
125
126
|
for col in cols:
|
127
|
+
# df[col] = df[col].apply(lambda x: re.sub('[="]', '', str(x)) if '="' in str(x) else x)
|
126
128
|
# 百分比在某些数据库中不兼容, 转换百分比为小数
|
127
129
|
df[col] = df[col].apply(lambda x: float(float((str(x).rstrip("%"))) / 100) if str(x).endswith('%') and '~' not in str(x) else x)
|
128
130
|
# 尝试转换合适的数据类型
|
@@ -976,9 +978,9 @@ def update_dtypte():
|
|
976
978
|
|
977
979
|
def upload():
|
978
980
|
""" 上传一个文件夹到数据库 """
|
979
|
-
path = '/Users/xigua/数据中心/原始文件2
|
980
|
-
db_name = '
|
981
|
-
collection_name = '
|
981
|
+
path = '/Users/xigua/数据中心/原始文件2/生意经/宝贝指标'
|
982
|
+
db_name = '生意经2'
|
983
|
+
collection_name = '宝贝指标'
|
982
984
|
|
983
985
|
username, password, host, port = get_myconf.select_config_values(
|
984
986
|
target_service='home_lx',
|
@@ -1018,10 +1020,7 @@ def upload():
|
|
1018
1020
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1019
1021
|
if len(df) == 0:
|
1020
1022
|
continue
|
1021
|
-
|
1022
|
-
df[col] = df[col].apply(lambda x: re.sub('[="]', '', str(x)) if '="' in str(x) else x)
|
1023
|
-
# df.replace(to_replace=['--'], value='', regex=False, inplace=True)
|
1024
|
-
df = dt.convert_df_cols(df=df)
|
1023
|
+
df = dt.convert_df_cols(df=df) # 清理列名和 df 中的非法字符
|
1025
1024
|
try:
|
1026
1025
|
df = df.astype(dtypes)
|
1027
1026
|
except Exception as e:
|
@@ -1044,8 +1043,8 @@ def upload():
|
|
1044
1043
|
|
1045
1044
|
|
1046
1045
|
if __name__ == '__main__':
|
1047
|
-
username, password, host, port = get_myconf.select_config_values(target_service='aliyun', database='mongodb')
|
1048
|
-
print(username, password, host, port)
|
1046
|
+
# username, password, host, port = get_myconf.select_config_values(target_service='aliyun', database='mongodb')
|
1047
|
+
# print(username, password, host, port)
|
1049
1048
|
|
1050
1049
|
# main()
|
1051
|
-
|
1050
|
+
upload()
|
mdbq/aggregation/query_data.py
CHANGED
@@ -102,6 +102,30 @@ class MysqlDatasQuery:
|
|
102
102
|
)
|
103
103
|
return df
|
104
104
|
|
105
|
+
def syj(self):
|
106
|
+
start_date, end_date = self.months_data(num=self.months)
|
107
|
+
projection = {
|
108
|
+
'日期': 1,
|
109
|
+
'宝贝id': 1,
|
110
|
+
'商家编码': 1,
|
111
|
+
'行业类目': 1,
|
112
|
+
'销售额': 1,
|
113
|
+
'销售量': 1,
|
114
|
+
'订单数': 1,
|
115
|
+
'退货量': 1,
|
116
|
+
'退款额': 1,
|
117
|
+
'退货量_发货后_': 1,
|
118
|
+
}
|
119
|
+
df = self.download.data_to_df(
|
120
|
+
db_name='生意经2',
|
121
|
+
tabel_name='宝贝指标',
|
122
|
+
start_date=start_date,
|
123
|
+
end_date=end_date,
|
124
|
+
projection=projection,
|
125
|
+
)
|
126
|
+
return df
|
127
|
+
|
128
|
+
|
105
129
|
@staticmethod
|
106
130
|
def months_data(num=0, end_date=None):
|
107
131
|
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
@@ -127,6 +151,7 @@ class GroupBy:
|
|
127
151
|
"""
|
128
152
|
self.is_maximize: 是否最大转化数据
|
129
153
|
"""
|
154
|
+
|
130
155
|
if '宝贝主体报表' in tabel_name:
|
131
156
|
df.rename(columns={
|
132
157
|
'场景名字': '营销场景',
|
@@ -169,6 +194,31 @@ class GroupBy:
|
|
169
194
|
)
|
170
195
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
171
196
|
return df
|
197
|
+
if '宝贝指标' in tabel_name:
|
198
|
+
df.fillna(0, inplace=True)
|
199
|
+
df = df[(df['销售额'] != 0) | (df['退款额'] != 0)]
|
200
|
+
df = df.groupby(['日期', '宝贝id', '商家编码', '行业类目'], as_index=False).agg(
|
201
|
+
**{'销售额': ('销售额', np.min),
|
202
|
+
'销售量': ('销售量', np.min),
|
203
|
+
'订单数': ('订单数', np.min),
|
204
|
+
'退货量': ('退货量', np.max),
|
205
|
+
'退款额': ('退款额', np.max),
|
206
|
+
'退货量_发货后_': ('退货量_发货后_', np.max),
|
207
|
+
}
|
208
|
+
)
|
209
|
+
df['件均价'] = df.apply(lambda x: x['销售额'] / x['销售量'] if x['销售量'] > 0 else 0, axis=1).round(
|
210
|
+
0) # 两列运算, 避免除以0
|
211
|
+
df['价格带'] = df['件均价'].apply(
|
212
|
+
lambda x: '2000+' if x >= 2000
|
213
|
+
else '1000+' if x >= 1000
|
214
|
+
else '500+' if x >= 500
|
215
|
+
else '300+' if x >= 300
|
216
|
+
else '300以下'
|
217
|
+
)
|
218
|
+
return df
|
219
|
+
else:
|
220
|
+
print(f'<{tabel_name}>: Groupby 类尚未配置,数据为空')
|
221
|
+
return pd.DataFrame({})
|
172
222
|
|
173
223
|
def as_csv(self, df, filename, path=None, encoding='utf-8_sig',
|
174
224
|
index=False, header=True, st_ascend=None, ascend=None, freq=None):
|
@@ -255,11 +305,16 @@ class GroupBy:
|
|
255
305
|
def main():
|
256
306
|
sdq = MysqlDatasQuery(target_service='home_lx')
|
257
307
|
sdq.months = 0
|
258
|
-
df = sdq.tg_wxt() # 从数据库中获取数据并转为 df
|
259
308
|
|
260
|
-
|
261
|
-
|
262
|
-
g.
|
309
|
+
# df = sdq.tg_wxt() # 从数据库中获取数据并转为 df
|
310
|
+
# g = GroupBy() # 数据聚合
|
311
|
+
# df = g.groupby(df=df, tabel_name='推广数据_宝贝主体报表', is_maximize=True)
|
312
|
+
# g.as_csv(df=df, filename='推广数据_宝贝主体报表') # 数据导出
|
313
|
+
|
314
|
+
df = sdq.syj()
|
315
|
+
g = GroupBy()
|
316
|
+
df = g.groupby(df=df, tabel_name='宝贝指标', is_maximize=True)
|
317
|
+
g.as_csv(df=df, filename='宝贝指标')
|
263
318
|
|
264
319
|
|
265
320
|
if __name__ == '__main__':
|
@@ -1,8 +1,8 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
5
|
-
mdbq/aggregation/query_data.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=7Id0cZ5QR7rPR9srbNRv78lGBwQ7pQre65qYIhu89Ms,59498
|
5
|
+
mdbq/aggregation/query_data.py,sha256=9E4dbeQAq7r1srlszP27X3jFiKtMmFc6lP965GHjQms,13368
|
6
6
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
7
7
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
8
8
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
@@ -28,7 +28,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
28
28
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
29
29
|
mdbq/pbix/refresh_all.py,sha256=wulHs4rivf4Mi0Pii2QR5Nk9-TBcvSwnCB_WH9QULKE,5939
|
30
30
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
31
|
-
mdbq-0.0.
|
32
|
-
mdbq-0.0.
|
33
|
-
mdbq-0.0.
|
34
|
-
mdbq-0.0.
|
31
|
+
mdbq-0.0.8.dist-info/METADATA,sha256=dVu6kA6RtVWnfsNTWXnOv_d1TmKhM4Gw6PrypX9MF0E,245
|
32
|
+
mdbq-0.0.8.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
33
|
+
mdbq-0.0.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
34
|
+
mdbq-0.0.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|