mdbq 2.9.3__py3-none-any.whl → 2.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +39 -21
- mdbq/aggregation/query_data.py +52 -28
- mdbq/config/products.py +15 -11
- mdbq/mysql/mysql.py +113 -159
- mdbq/mysql/recheck_mysql.py +1 -1
- mdbq/req_post/req_tb.py +1 -1
- {mdbq-2.9.3.dist-info → mdbq-2.9.5.dist-info}/METADATA +1 -1
- {mdbq-2.9.3.dist-info → mdbq-2.9.5.dist-info}/RECORD +10 -12
- {mdbq-2.9.3.dist-info → mdbq-2.9.5.dist-info}/WHEEL +1 -1
- mdbq/company/copysh_bak.py +0 -417
- mdbq/company/home_sh.py +0 -386
- {mdbq-2.9.3.dist-info → mdbq-2.9.5.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -919,8 +919,8 @@ class DatabaseUpdateBak:
|
|
919
919
|
move_insert=True, # 先删除,再插入
|
920
920
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
921
921
|
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
922
|
+
count=None,
|
922
923
|
filename=rt_filename, # 用来追踪处理进度
|
923
|
-
service_database=service_database, # 字典
|
924
924
|
)
|
925
925
|
df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
|
926
926
|
|
@@ -1083,8 +1083,8 @@ class DatabaseUpdateBak:
|
|
1083
1083
|
move_insert=True, # 先删除,再插入
|
1084
1084
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1085
1085
|
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1086
|
+
count=None,
|
1086
1087
|
filename=None, # 用来追踪处理进度
|
1087
|
-
service_database=service_database, # 用来追踪处理进度
|
1088
1088
|
)
|
1089
1089
|
# return df
|
1090
1090
|
|
@@ -1116,8 +1116,8 @@ class DatabaseUpdateBak:
|
|
1116
1116
|
move_insert=False, # 先删除,再插入
|
1117
1117
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1118
1118
|
drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1119
|
+
count=None,
|
1119
1120
|
filename=None, # 用来追踪处理进度
|
1120
|
-
service_database=service_database, # 用来追踪处理进度
|
1121
1121
|
)
|
1122
1122
|
|
1123
1123
|
|
@@ -1189,7 +1189,8 @@ def upload_dir(path, db_name, collection_name, json_path=None):
|
|
1189
1189
|
move_insert=False, # 先删除,再插入
|
1190
1190
|
df_sql = True,
|
1191
1191
|
drop_duplicates=False,
|
1192
|
-
|
1192
|
+
count=f'{i}/{count}',
|
1193
|
+
filename=name,
|
1193
1194
|
)
|
1194
1195
|
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
|
1195
1196
|
except Exception as e:
|
@@ -1210,15 +1211,16 @@ def one_file_to_mysql(file, db_name, table_name):
|
|
1210
1211
|
df=df,
|
1211
1212
|
db_name=db_name,
|
1212
1213
|
table_name=table_name,
|
1213
|
-
filename=filename,
|
1214
1214
|
move_insert=False,
|
1215
1215
|
df_sql=True,
|
1216
1216
|
drop_duplicates=False,
|
1217
|
+
count=None,
|
1218
|
+
filename=filename,
|
1217
1219
|
)
|
1218
1220
|
|
1219
1221
|
|
1220
1222
|
def test():
|
1221
|
-
path = r'
|
1223
|
+
path = r'/Users/xigua/数据中心/原始文件3/达摩盘/dmp人群报表'
|
1222
1224
|
|
1223
1225
|
results = []
|
1224
1226
|
for root, dirs, files in os.walk(path, topdown=False):
|
@@ -1226,12 +1228,18 @@ def test():
|
|
1226
1228
|
if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
|
1227
1229
|
# df = pd.read_excel(os.path.join(root, name), header=0)
|
1228
1230
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1231
|
+
results.append(df)
|
1229
1232
|
# print(name)
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1233
|
+
if len(df) == 0:
|
1234
|
+
continue
|
1235
|
+
if '达摩盘消耗占比' in df.columns.tolist():
|
1236
|
+
df.pop('达摩盘消耗占比')
|
1237
|
+
if '更新时间' not in df.columns.tolist():
|
1238
|
+
print(name)
|
1239
|
+
df['更新时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
1240
|
+
# df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
|
1241
|
+
# df['更新时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
1242
|
+
df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
|
1235
1243
|
# for col in ['更新时间']:
|
1236
1244
|
# if col not in df.columns.tolist():
|
1237
1245
|
# print(name)
|
@@ -1243,12 +1251,23 @@ def test():
|
|
1243
1251
|
# os.remove(os.path.join(root, name))
|
1244
1252
|
# results.append(df)
|
1245
1253
|
# df = pd.concat(results)
|
1246
|
-
#
|
1247
|
-
#
|
1254
|
+
# df.drop_duplicates(
|
1255
|
+
# subset=[
|
1256
|
+
# '日期',
|
1257
|
+
# '店铺名称',
|
1258
|
+
# '报表类型',
|
1259
|
+
# '消耗',
|
1260
|
+
# '展现量',
|
1261
|
+
# '点击量',
|
1262
|
+
# ], keep='last', inplace=True, ignore_index=True)
|
1263
|
+
# df.fillna(0, inplace=True)
|
1264
|
+
# for col in df.columns.tolist():
|
1265
|
+
# df[col] = df[col].apply(lambda x: 0 if str(x) == '' else x)
|
1266
|
+
# path = '/Users/xigua/Downloads'
|
1267
|
+
# filename = '品销宝_2024年_合并.csv'
|
1248
1268
|
# df.to_csv(os.path.join(path, filename), encoding='utf-8_sig', index=False, header=True)
|
1249
1269
|
|
1250
1270
|
|
1251
|
-
|
1252
1271
|
if __name__ == '__main__':
|
1253
1272
|
# username = 'root'
|
1254
1273
|
# password = ''
|
@@ -1257,20 +1276,19 @@ if __name__ == '__main__':
|
|
1257
1276
|
#
|
1258
1277
|
# # 上传 1 个文件到数据库
|
1259
1278
|
# one_file_to_mysql(
|
1260
|
-
# file=r'/Users/
|
1261
|
-
# db_name='
|
1262
|
-
# table_name='
|
1279
|
+
# file=r'/Users/xigua/Downloads/DMP报表_2024-10-23_2024-10-29.csv',
|
1280
|
+
# db_name='达摩盘3',
|
1281
|
+
# table_name='dmp人群报表',
|
1263
1282
|
# )
|
1264
1283
|
|
1265
1284
|
# test()
|
1266
|
-
|
1267
|
-
col = 1
|
1285
|
+
col = 0
|
1268
1286
|
if col:
|
1269
1287
|
# 上传一个目录到指定数据库
|
1270
1288
|
db_name = '生意参谋3'
|
1271
|
-
table_name = '
|
1289
|
+
table_name = '店铺流量来源构成'
|
1272
1290
|
upload_dir(
|
1273
|
-
path=r'
|
1291
|
+
path=r'/Users/xigua/数据中心/原始文件3/生意参谋/店铺流量来源',
|
1274
1292
|
db_name=db_name,
|
1275
1293
|
collection_name=table_name,
|
1276
1294
|
)
|
mdbq/aggregation/query_data.py
CHANGED
@@ -6,6 +6,7 @@ from mdbq.mysql import mysql
|
|
6
6
|
from mdbq.mysql import s_query
|
7
7
|
from mdbq.aggregation import optimize_data
|
8
8
|
from mdbq.config import myconfig
|
9
|
+
from mdbq.config import products
|
9
10
|
import datetime
|
10
11
|
from dateutil.relativedelta import relativedelta
|
11
12
|
import pandas as pd
|
@@ -15,6 +16,7 @@ import platform
|
|
15
16
|
import getpass
|
16
17
|
import json
|
17
18
|
import os
|
19
|
+
import time
|
18
20
|
|
19
21
|
"""
|
20
22
|
程序用于下载数据库(调用 s_query.py 下载并清洗), 并对数据进行聚合清洗, 不会更新数据库信息;
|
@@ -855,8 +857,7 @@ class MysqlDatasQuery:
|
|
855
857
|
df_crowd.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
856
858
|
df_crowd.drop_duplicates(subset=['人群id',], keep='last', inplace=True, ignore_index=True)
|
857
859
|
df_crowd.pop('日期')
|
858
|
-
|
859
|
-
|
860
|
+
df_crowd = df_crowd.astype({'人群id': 'int64'}, errors='ignore')
|
860
861
|
projection = {}
|
861
862
|
df_dmp = self.download.data_to_df(
|
862
863
|
db_name='达摩盘3',
|
@@ -865,6 +866,7 @@ class MysqlDatasQuery:
|
|
865
866
|
end_date=end_date,
|
866
867
|
projection=projection,
|
867
868
|
)
|
869
|
+
df_dmp = df_dmp.astype({'人群id': 'int64'}, errors='ignore')
|
868
870
|
df_dmp.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
869
871
|
df_dmp.drop_duplicates(subset=['日期', '人群id', '消耗_元'], keep='last', inplace=True, ignore_index=True)
|
870
872
|
df = pd.merge(df_dmp, df_crowd, left_on=['人群id'], right_on=['人群id'], how='left')
|
@@ -1959,7 +1961,7 @@ class GroupBy:
|
|
1959
1961
|
)
|
1960
1962
|
return df
|
1961
1963
|
|
1962
|
-
@try_except
|
1964
|
+
# @try_except
|
1963
1965
|
def performance_jd(self, jd_tg=True):
|
1964
1966
|
jdtg, sku_sales = self.data_jdtg['京东_京准通'], self.data_jdtg['京东_sku_商品明细']
|
1965
1967
|
jdtg = jdtg.groupby(['日期', '跟单sku_id'],
|
@@ -1973,6 +1975,7 @@ class GroupBy:
|
|
1973
1975
|
df = df[['日期', '商品id', '货号', '成交单量', '成交金额', '成本价']]
|
1974
1976
|
df['商品id'] = df['商品id'].astype(str)
|
1975
1977
|
jdtg['跟单sku_id'] = jdtg['跟单sku_id'].astype(str)
|
1978
|
+
jdtg = jdtg.astype({'日期': 'datetime64[ns]'}, errors='raise')
|
1976
1979
|
if jd_tg is True:
|
1977
1980
|
# 完整的数据表,包含全店所有推广、销售数据
|
1978
1981
|
df = pd.merge(df, jdtg, how='left', left_on=['日期', '商品id'], right_on=['日期', '跟单sku_id']) # df 合并推广表
|
@@ -2116,8 +2119,9 @@ def date_table():
|
|
2116
2119
|
move_insert=True, # 先删除,再插入
|
2117
2120
|
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
2118
2121
|
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
2122
|
+
count=None,
|
2119
2123
|
filename=None, # 用来追踪处理进度
|
2120
|
-
|
2124
|
+
set_typ={},
|
2121
2125
|
)
|
2122
2126
|
|
2123
2127
|
|
@@ -2281,7 +2285,9 @@ def data_aggregation(months=1, is_juhe=True, less_dict=[]):
|
|
2281
2285
|
# df_sql=True,
|
2282
2286
|
drop_duplicates=False,
|
2283
2287
|
icm_update=['商品id'],
|
2284
|
-
|
2288
|
+
count=None,
|
2289
|
+
filename=None,
|
2290
|
+
set_typ={},
|
2285
2291
|
)
|
2286
2292
|
g.sp_index_datas = pd.DataFrame() # 重置,不然下个循环会继续刷入数据库
|
2287
2293
|
# g.as_csv(df=df, filename=table_name + '.csv') # 导出 csv
|
@@ -2294,7 +2300,9 @@ def data_aggregation(months=1, is_juhe=True, less_dict=[]):
|
|
2294
2300
|
# df_sql=True,
|
2295
2301
|
# drop_duplicates=False,
|
2296
2302
|
# icm_update=unique_key_list,
|
2297
|
-
|
2303
|
+
count=None,
|
2304
|
+
filename=None,
|
2305
|
+
set_typ={},
|
2298
2306
|
) # 3. 回传数据库
|
2299
2307
|
else: # 没有日期列的就用主键排重
|
2300
2308
|
m.df_to_mysql(
|
@@ -2305,7 +2313,9 @@ def data_aggregation(months=1, is_juhe=True, less_dict=[]):
|
|
2305
2313
|
# df_sql=True,
|
2306
2314
|
drop_duplicates=False,
|
2307
2315
|
icm_update=unique_key_list,
|
2308
|
-
|
2316
|
+
count=None,
|
2317
|
+
filename=None,
|
2318
|
+
set_typ={},
|
2309
2319
|
) # 3. 回传数据库
|
2310
2320
|
if is_juhe:
|
2311
2321
|
res = g.performance(bb_tg=True) # 盈亏表,依赖其他表,单独做
|
@@ -2317,7 +2327,9 @@ def data_aggregation(months=1, is_juhe=True, less_dict=[]):
|
|
2317
2327
|
# df_sql=True,
|
2318
2328
|
# drop_duplicates=False,
|
2319
2329
|
# icm_update=['日期', '商品id'], # 设置唯一主键
|
2320
|
-
|
2330
|
+
count=None,
|
2331
|
+
filename=None,
|
2332
|
+
set_typ={},
|
2321
2333
|
)
|
2322
2334
|
res = g.performance(bb_tg=False) # 盈亏表,依赖其他表,单独做
|
2323
2335
|
m.df_to_mysql(
|
@@ -2328,7 +2340,9 @@ def data_aggregation(months=1, is_juhe=True, less_dict=[]):
|
|
2328
2340
|
# df_sql=True,
|
2329
2341
|
# drop_duplicates=False,
|
2330
2342
|
# icm_update=['日期', '商品id'], # 设置唯一主键
|
2331
|
-
|
2343
|
+
count=None,
|
2344
|
+
filename=None,
|
2345
|
+
set_typ={},
|
2332
2346
|
)
|
2333
2347
|
res = g.performance_concat(bb_tg=False) # 推广主体合并直播表,依赖其他表,单独做
|
2334
2348
|
m.df_to_mysql(
|
@@ -2339,7 +2353,9 @@ def data_aggregation(months=1, is_juhe=True, less_dict=[]):
|
|
2339
2353
|
# df_sql=True,
|
2340
2354
|
# drop_duplicates=False,
|
2341
2355
|
# icm_update=['日期', '推广渠道', '营销场景', '商品id', '花费', '展现量', '点击量'], # 设置唯一主键
|
2342
|
-
|
2356
|
+
count=None,
|
2357
|
+
filename=None,
|
2358
|
+
set_typ={},
|
2343
2359
|
)
|
2344
2360
|
res = g.performance_jd(jd_tg=False) # 盈亏表,依赖其他表,单独做
|
2345
2361
|
m.df_to_mysql(
|
@@ -2350,46 +2366,54 @@ def data_aggregation(months=1, is_juhe=True, less_dict=[]):
|
|
2350
2366
|
# df_sql=True,
|
2351
2367
|
# drop_duplicates=False,
|
2352
2368
|
# icm_update=['日期', '跟单sku_id', '货号', '花费'], # 设置唯一主键
|
2353
|
-
|
2369
|
+
count=None,
|
2370
|
+
filename=None,
|
2371
|
+
set_typ={},
|
2354
2372
|
)
|
2355
2373
|
|
2356
2374
|
|
2357
|
-
def main():
|
2358
|
-
# 更新日期表
|
2375
|
+
def main(days=100, months=3):
|
2376
|
+
# 1. 更新日期表 更新货品年份基准表, 属性设置 3 - 货品年份基准
|
2359
2377
|
date_table()
|
2360
|
-
# 更新货品年份基准表, 属性设置 3 - 货品年份基准
|
2361
2378
|
p = products.Products()
|
2362
2379
|
p.to_mysql()
|
2363
2380
|
|
2381
|
+
# 2. 清理非聚合数据库
|
2364
2382
|
system = platform.system() # 本机系统
|
2365
2383
|
host_name = socket.gethostname() # 本机名
|
2366
2384
|
conf = myconfig.main()
|
2367
2385
|
db_list = conf[system][host_name]['mysql']['数据库集']
|
2368
|
-
|
2369
|
-
# 清理所有非聚合数据的库
|
2386
|
+
not_juhe_db_list = [item for item in db_list if item != '聚合数据']
|
2370
2387
|
optimize_data.op_data(
|
2371
|
-
db_name_lists=
|
2372
|
-
days=
|
2373
|
-
is_mongo=
|
2388
|
+
db_name_lists=not_juhe_db_list,
|
2389
|
+
days=31, # 原始数据不需要设置清理太长
|
2390
|
+
is_mongo=False,
|
2374
2391
|
is_mysql=True,
|
2375
2392
|
)
|
2376
2393
|
|
2377
|
-
# 数据聚合
|
2378
|
-
|
2394
|
+
# 3. 数据聚合
|
2395
|
+
data_aggregation(
|
2396
|
+
months=months,
|
2397
|
+
is_juhe=True, # 生成聚合表
|
2398
|
+
# less_dict=['天猫_品销宝账户报表'], # 单独聚合某一个数据库
|
2399
|
+
)
|
2379
2400
|
time.sleep(60)
|
2380
2401
|
|
2381
|
-
#
|
2402
|
+
# 4. 清理聚合数据
|
2382
2403
|
optimize_data.op_data(
|
2383
2404
|
db_name_lists=['聚合数据'],
|
2384
|
-
days=
|
2405
|
+
days=days,
|
2385
2406
|
is_mongo=False,
|
2386
2407
|
is_mysql=True,
|
2387
2408
|
)
|
2388
2409
|
|
2389
2410
|
|
2390
2411
|
if __name__ == '__main__':
|
2391
|
-
|
2392
|
-
|
2393
|
-
|
2394
|
-
|
2395
|
-
|
2412
|
+
main(days=100, months=3)
|
2413
|
+
|
2414
|
+
# data_aggregation(
|
2415
|
+
# months=3,
|
2416
|
+
# is_juhe=True, # 生成聚合表
|
2417
|
+
# # less_dict=['天猫_品销宝账户报表'], # 单独聚合某一个数据库
|
2418
|
+
# )
|
2419
|
+
|
mdbq/config/products.py
CHANGED
@@ -124,25 +124,29 @@ class Products:
|
|
124
124
|
]
|
125
125
|
self.datas += my_datas
|
126
126
|
|
127
|
+
|
127
128
|
def to_mysql(self):
|
128
129
|
self.update_my_datas()
|
129
130
|
df = pd.DataFrame(self.datas)
|
130
|
-
|
131
|
+
m_engine = mysql.MysqlUpload(
|
131
132
|
username=username,
|
132
133
|
password=password,
|
133
134
|
host=host,
|
134
135
|
port=port,
|
135
136
|
)
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
137
|
+
for dict_data in df.to_dict(orient='records'):
|
138
|
+
m_engine.dict_to_mysql(
|
139
|
+
db_name='属性设置3',
|
140
|
+
table_name='货品年份基准',
|
141
|
+
dict_data=dict_data,
|
142
|
+
# icm_update=['日期', '店铺名称', '宝贝id'], # 唯一组合键
|
143
|
+
unique_main_key=['商品id'],
|
144
|
+
set_type={
|
145
|
+
'商品id': 'mediumtext',
|
146
|
+
'平台': 'mediumtext',
|
147
|
+
'上市年份': 'mediumtext',
|
148
|
+
},
|
149
|
+
)
|
146
150
|
|
147
151
|
def market_date(self, product_id: int):
|
148
152
|
try:
|