mdbq 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/query_data.py +118 -20
- mdbq/company/copysh.py +5 -1
- mdbq/mysql/s_query.py +32 -1
- {mdbq-0.1.5.dist-info → mdbq-0.1.7.dist-info}/METADATA +1 -1
- {mdbq-0.1.5.dist-info → mdbq-0.1.7.dist-info}/RECORD +7 -7
- {mdbq-0.1.5.dist-info → mdbq-0.1.7.dist-info}/WHEEL +0 -0
- {mdbq-0.1.5.dist-info → mdbq-0.1.7.dist-info}/top_level.txt +0 -0
mdbq/aggregation/query_data.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# -*- coding: UTF-8 –*-
|
2
2
|
from mdbq.mongo import mongo
|
3
|
+
from mdbq.mysql import mysql
|
3
4
|
from mdbq.mysql import s_query
|
4
5
|
from mdbq.config import get_myconf
|
5
6
|
import datetime
|
@@ -117,7 +118,7 @@ class MysqlDatasQuery:
|
|
117
118
|
'订单数': 1,
|
118
119
|
'退货量': 1,
|
119
120
|
'退款额': 1,
|
120
|
-
'退货量_发货后
|
121
|
+
'退货量_发货后': 1,
|
121
122
|
}
|
122
123
|
df = self.download.data_to_df(
|
123
124
|
db_name='生意经2',
|
@@ -128,6 +129,26 @@ class MysqlDatasQuery:
|
|
128
129
|
)
|
129
130
|
return df
|
130
131
|
|
132
|
+
def idbm(self):
|
133
|
+
""" 用生意经日数据制作商品 id 和编码对照表 """
|
134
|
+
data_values = self.download.columns_to_list(
|
135
|
+
db_name='生意经2',
|
136
|
+
tabel_name='宝贝指标',
|
137
|
+
columns_name=['宝贝id', '商家编码', '行业类目'],
|
138
|
+
)
|
139
|
+
df = pd.DataFrame(data=data_values)
|
140
|
+
return df
|
141
|
+
|
142
|
+
def sp_picture(self):
|
143
|
+
""" 用生意经日数据制作商品 id 和编码对照表 """
|
144
|
+
data_values = self.download.columns_to_list(
|
145
|
+
db_name='属性设置2',
|
146
|
+
tabel_name='商品素材导出',
|
147
|
+
columns_name=['日期', '商品id', '商品白底图', '方版场景图'],
|
148
|
+
)
|
149
|
+
df = pd.DataFrame(data=data_values)
|
150
|
+
return df
|
151
|
+
|
131
152
|
def dplyd(self):
|
132
153
|
start_date, end_date = self.months_data(num=self.months)
|
133
154
|
projection = {
|
@@ -150,6 +171,16 @@ class MysqlDatasQuery:
|
|
150
171
|
)
|
151
172
|
return df
|
152
173
|
|
174
|
+
def sp_cost(self):
|
175
|
+
""" 电商定价 """
|
176
|
+
data_values = self.download.columns_to_list(
|
177
|
+
db_name='属性设置2',
|
178
|
+
tabel_name='电商定价',
|
179
|
+
columns_name=['日期', '款号', '年份季节', '吊牌价', '商家平台', '成本价', '天猫页面价', '天猫中促价'],
|
180
|
+
)
|
181
|
+
df = pd.DataFrame(data=data_values)
|
182
|
+
return df
|
183
|
+
|
153
184
|
@staticmethod
|
154
185
|
def months_data(num=0, end_date=None):
|
155
186
|
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
@@ -229,7 +260,7 @@ class GroupBy:
|
|
229
260
|
'订单数': ('订单数', np.min),
|
230
261
|
'退货量': ('退货量', np.max),
|
231
262
|
'退款额': ('退款额', np.max),
|
232
|
-
'退货量_发货后
|
263
|
+
'退货量_发货后': ('退货量_发货后', np.max),
|
233
264
|
}
|
234
265
|
)
|
235
266
|
df['件均价'] = df.apply(lambda x: x['销售额'] / x['销售量'] if x['销售量'] > 0 else 0, axis=1).round(
|
@@ -244,6 +275,41 @@ class GroupBy:
|
|
244
275
|
return df
|
245
276
|
elif '店铺来源_日数据' in tabel_name:
|
246
277
|
return df
|
278
|
+
elif '商品id编码表' in tabel_name:
|
279
|
+
df.drop_duplicates(subset='宝贝id', keep='last', inplace=True, ignore_index=True)
|
280
|
+
# df['行业类目'] = df['行业类目'].apply(lambda x: re.sub(' ', '', x))
|
281
|
+
try:
|
282
|
+
df[['一级类目', '二级类目', '三级类目']] = df['行业类目'].str.split(' -> ', expand=True).loc[:, 0:2]
|
283
|
+
except:
|
284
|
+
try:
|
285
|
+
df[['一级类目', '二级类目']] = df['行业类目'].str.split(' -> ', expand=True).loc[:, 0:1]
|
286
|
+
except:
|
287
|
+
df['一级类目'] = df['行业类目']
|
288
|
+
df.drop('行业类目', axis=1, inplace=True)
|
289
|
+
df.sort_values('宝贝id', ascending=False, inplace=True)
|
290
|
+
df = df[(df['宝贝id'] != '973') & (df['宝贝id'] != '973')]
|
291
|
+
return df
|
292
|
+
elif '商品id图片对照表' in tabel_name:
|
293
|
+
df['商品id'] = df['商品id'].astype('int64')
|
294
|
+
df['日期'] = df['日期'].astype('datetime64[ns]')
|
295
|
+
df = df[(df['商品白底图'] != '0') | (df['方版场景图'] != '0')]
|
296
|
+
# 白底图优先
|
297
|
+
df['商品图片'] = df[['商品白底图', '方版场景图']].apply(
|
298
|
+
lambda x: x['商品白底图'] if x['商品白底图'] !='0' else x['方版场景图'], axis=1)
|
299
|
+
# # 方版场景图优先
|
300
|
+
# df['商品图片'] = df[['商品白底图', '方版场景图']].apply(
|
301
|
+
# lambda x: x['方版场景图'] if x['方版场景图'] != '0' else x['商品白底图'], axis=1)
|
302
|
+
df.sort_values(by=['商品id', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
|
303
|
+
df.drop_duplicates(subset=['商品id'], keep='last', inplace=True, ignore_index=True)
|
304
|
+
df = df[['商品id', '商品图片', '日期']]
|
305
|
+
df['商品图片'] = df['商品图片'].apply(lambda x: x if 'http' in x else None) # 检查是否是 http 链接
|
306
|
+
df.dropna(how='all', subset=['商品图片'], axis=0, inplace=True) # 删除指定列含有空值的行
|
307
|
+
df.sort_values(by='商品id', ascending=False, ignore_index=True, inplace=True) # ascending=False 降序排列
|
308
|
+
return df
|
309
|
+
elif '商品成本' in tabel_name:
|
310
|
+
df.sort_values(by=['款号', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
|
311
|
+
df.drop_duplicates(subset=['款号'], keep='last', inplace=True, ignore_index=True)
|
312
|
+
return df
|
247
313
|
else:
|
248
314
|
print(f'<{tabel_name}>: Groupby 类尚未配置,数据为空')
|
249
315
|
return pd.DataFrame({})
|
@@ -330,25 +396,57 @@ class GroupBy:
|
|
330
396
|
index=index, header=header, engine=engine, freeze_panes=freeze_panes)
|
331
397
|
|
332
398
|
|
333
|
-
def
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
#
|
343
|
-
|
344
|
-
|
345
|
-
# g.as_csv(df=df, filename='宝贝指标')
|
399
|
+
def data_aggregation():
|
400
|
+
"""
|
401
|
+
1. 从数据库中读取数据
|
402
|
+
2. 数据聚合清洗
|
403
|
+
3. 统一回传数据库: <聚合数据> (不再导出为文件)
|
404
|
+
"""
|
405
|
+
sdq = MysqlDatasQuery(target_service='company') # 实例化数据处理类
|
406
|
+
sdq.months = 0 # 设置数据周期
|
407
|
+
g = GroupBy() # 实例化数据聚合类
|
408
|
+
# 实例化数据库连接
|
409
|
+
username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
|
410
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
346
411
|
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
412
|
+
data_dict = [
|
413
|
+
{
|
414
|
+
'数据库名': '聚合数据',
|
415
|
+
'集合名': '推广数据_宝贝主体报表',
|
416
|
+
'数据主体': sdq.tg_wxt(),
|
417
|
+
},
|
418
|
+
{
|
419
|
+
'数据库名': '聚合数据',
|
420
|
+
'集合名': '天猫生意经_宝贝指标',
|
421
|
+
'数据主体': sdq.syj(),
|
422
|
+
},
|
423
|
+
{
|
424
|
+
'数据库名': '聚合数据',
|
425
|
+
'集合名': '天猫_店铺来源_日数据',
|
426
|
+
'数据主体': sdq.dplyd(),
|
427
|
+
},
|
428
|
+
{
|
429
|
+
'数据库名': '聚合数据',
|
430
|
+
'集合名': '商品id编码表',
|
431
|
+
'数据主体': sdq.idbm(),
|
432
|
+
},
|
433
|
+
{
|
434
|
+
'数据库名': '聚合数据',
|
435
|
+
'集合名': '商品id图片对照表',
|
436
|
+
'数据主体': sdq.sp_picture(),
|
437
|
+
},
|
438
|
+
{
|
439
|
+
'数据库名': '聚合数据',
|
440
|
+
'集合名': '商品成本',
|
441
|
+
'数据主体': sdq.sp_cost(),
|
442
|
+
},
|
443
|
+
]
|
444
|
+
for items in data_dict:
|
445
|
+
db_name, tabel_name, df = items['数据库名'], items['集合名'], items['数据主体']
|
446
|
+
df = g.groupby(df=df, tabel_name=tabel_name, is_maximize=True) # 2. 聚合数据
|
447
|
+
# g.as_csv(df=df, filename=tabel_name + '.csv')
|
448
|
+
m.df_to_mysql(df=df, db_name=db_name, tabel_name=tabel_name) # 3. 回传数据库
|
351
449
|
|
352
450
|
|
353
451
|
if __name__ == '__main__':
|
354
|
-
|
452
|
+
data_aggregation()
|
mdbq/company/copysh.py
CHANGED
mdbq/mysql/s_query.py
CHANGED
@@ -86,6 +86,33 @@ class QueryDatas:
|
|
86
86
|
print(f'database: {db_name}, table: {tabel_name} 查询的数据为空')
|
87
87
|
return df
|
88
88
|
|
89
|
+
def columns_to_list(self, db_name, tabel_name, columns_name) -> list:
|
90
|
+
"""
|
91
|
+
获取数据表的指定列, 返回列表
|
92
|
+
[{'视频bv号': 'BV1Dm4y1S7BU', '下载进度': 1}, {'视频bv号': 'BV1ov411c7US', '下载进度': 1}]
|
93
|
+
"""
|
94
|
+
if self.check_infos(db_name, tabel_name) == False: # 检查传入的数据库和数据表是否存在
|
95
|
+
return []
|
96
|
+
|
97
|
+
self.config.update({'database': db_name})
|
98
|
+
connection = pymysql.connect(**self.config) # 重新连接数据库
|
99
|
+
with connection.cursor() as cursor:
|
100
|
+
# 3. 获取数据表的所有列信息
|
101
|
+
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
102
|
+
cursor.execute(sql, (db_name, {tabel_name}))
|
103
|
+
columns = cursor.fetchall()
|
104
|
+
cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
|
105
|
+
columns_name = [item for item in columns_name if item in cols_exist]
|
106
|
+
if len(columns_name) == 0:
|
107
|
+
return []
|
108
|
+
columns_in = ', '.join(columns_name)
|
109
|
+
sql = (f"SELECT {columns_in} FROM {db_name}.{tabel_name} ")
|
110
|
+
cursor.execute(sql)
|
111
|
+
column_values = cursor.fetchall() # 返回指定列,结果是[dict, dict, dict, ...]
|
112
|
+
# column_values = [item[column_name] for item in column_values] # 提取字典的值, 组成列表
|
113
|
+
connection.close()
|
114
|
+
return column_values
|
115
|
+
|
89
116
|
def check_infos(self, db_name, tabel_name) -> bool:
|
90
117
|
""" 检查数据库、数据表是否存在 """
|
91
118
|
connection = pymysql.connect(**self.config) # 连接数据库
|
@@ -119,5 +146,9 @@ class QueryDatas:
|
|
119
146
|
|
120
147
|
|
121
148
|
if __name__ == '__main__':
|
122
|
-
username, password, host, port = get_myconf.select_config_values(target_service='
|
149
|
+
username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
|
123
150
|
print(username, password, host, port)
|
151
|
+
|
152
|
+
q = QueryDatas(username, password, host, port)
|
153
|
+
res = q.columns_to_list(db_name='视频数据', tabel_name='bilibili视频', columns_name=['视频bv号', '下载进度'])
|
154
|
+
print(res)
|
@@ -2,13 +2,13 @@ mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/aggregation.py,sha256=n2MoKwN1ltJD3juu59zkhc7PGUMDTkn0zCcZGs8RnXI,56775
|
5
|
-
mdbq/aggregation/query_data.py,sha256=
|
5
|
+
mdbq/aggregation/query_data.py,sha256=w_C29vHZJpmj_TMjpNgb_a-7eZ9pO0PixPkWFyLr50I,19476
|
6
6
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
7
7
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
8
8
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
9
9
|
mdbq/clean/data_clean.py,sha256=33OmeQFl9AW21P5EOay52W_S8DF96H5oHwCg4fSuBxA,85359
|
10
10
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
11
|
-
mdbq/company/copysh.py,sha256=
|
11
|
+
mdbq/company/copysh.py,sha256=4cfFnxuYhfVAdROX59I-pzood3T3ebPPHDYxyFs8NHk,15819
|
12
12
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
13
13
|
mdbq/config/get_myconf.py,sha256=9v3xebfcS1tptxpvk3_tGxfXjAehGVCveYe4iRUzLQQ,6372
|
14
14
|
mdbq/config/update_conf.py,sha256=YjGjjRchu5BcrmLJkoLjHEF2TbGOmsgCWX4LroXOYWQ,3455
|
@@ -20,7 +20,7 @@ mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
|
20
20
|
mdbq/mongo/mongo.py,sha256=q0B4wXDSTtXg_vMN7MPh6zdxl6tT68tM74LmdVNQQek,31892
|
21
21
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
22
22
|
mdbq/mysql/mysql.py,sha256=w_FDE4ulOnHdfIjjXB1Ff_cnXyx2PuBAQWGjTYyVm3U,30629
|
23
|
-
mdbq/mysql/s_query.py,sha256=
|
23
|
+
mdbq/mysql/s_query.py,sha256=mNrdyMeiQ5wQVD_9IDTrk6jrMtszQftYQg3AwNWhROc,6997
|
24
24
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
25
25
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
26
26
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
@@ -30,7 +30,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
30
30
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
31
31
|
mdbq/pbix/refresh_all.py,sha256=wulHs4rivf4Mi0Pii2QR5Nk9-TBcvSwnCB_WH9QULKE,5939
|
32
32
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
33
|
-
mdbq-0.1.
|
34
|
-
mdbq-0.1.
|
35
|
-
mdbq-0.1.
|
36
|
-
mdbq-0.1.
|
33
|
+
mdbq-0.1.7.dist-info/METADATA,sha256=6IJUwkrTSwOhf6lTMUtOXI5JpiDGHVTVo3C06u7yUrg,245
|
34
|
+
mdbq-0.1.7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
35
|
+
mdbq-0.1.7.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
36
|
+
mdbq-0.1.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|