mdbq 2.9.3__py3-none-any.whl → 2.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +30 -15
- mdbq/aggregation/query_data.py +25 -24
- mdbq/config/products.py +25 -11
- mdbq/mysql/mysql.py +13 -1
- {mdbq-2.9.3.dist-info → mdbq-2.9.4.dist-info}/METADATA +1 -1
- {mdbq-2.9.3.dist-info → mdbq-2.9.4.dist-info}/RECORD +8 -8
- {mdbq-2.9.3.dist-info → mdbq-2.9.4.dist-info}/WHEEL +1 -1
- {mdbq-2.9.3.dist-info → mdbq-2.9.4.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1218,7 +1218,7 @@ def one_file_to_mysql(file, db_name, table_name):
|
|
1218
1218
|
|
1219
1219
|
|
1220
1220
|
def test():
|
1221
|
-
path = r'
|
1221
|
+
path = r'/Users/xigua/Downloads/DMP报表'
|
1222
1222
|
|
1223
1223
|
results = []
|
1224
1224
|
for root, dirs, files in os.walk(path, topdown=False):
|
@@ -1226,12 +1226,16 @@ def test():
|
|
1226
1226
|
if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
|
1227
1227
|
# df = pd.read_excel(os.path.join(root, name), header=0)
|
1228
1228
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1229
|
+
results.append(df)
|
1229
1230
|
# print(name)
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1231
|
+
if len(df) == 0:
|
1232
|
+
continue
|
1233
|
+
if '达摩盘消耗占比' in df.columns.tolist():
|
1234
|
+
print(name)
|
1235
|
+
df.pop('达摩盘消耗占比')
|
1236
|
+
# df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
|
1237
|
+
# df['更新时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
1238
|
+
df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
|
1235
1239
|
# for col in ['更新时间']:
|
1236
1240
|
# if col not in df.columns.tolist():
|
1237
1241
|
# print(name)
|
@@ -1243,12 +1247,23 @@ def test():
|
|
1243
1247
|
# os.remove(os.path.join(root, name))
|
1244
1248
|
# results.append(df)
|
1245
1249
|
# df = pd.concat(results)
|
1246
|
-
#
|
1247
|
-
#
|
1250
|
+
# df.drop_duplicates(
|
1251
|
+
# subset=[
|
1252
|
+
# '日期',
|
1253
|
+
# '店铺名称',
|
1254
|
+
# '报表类型',
|
1255
|
+
# '消耗',
|
1256
|
+
# '展现量',
|
1257
|
+
# '点击量',
|
1258
|
+
# ], keep='last', inplace=True, ignore_index=True)
|
1259
|
+
# df.fillna(0, inplace=True)
|
1260
|
+
# for col in df.columns.tolist():
|
1261
|
+
# df[col] = df[col].apply(lambda x: 0 if str(x) == '' else x)
|
1262
|
+
# path = '/Users/xigua/Downloads'
|
1263
|
+
# filename = '品销宝_2024年_合并.csv'
|
1248
1264
|
# df.to_csv(os.path.join(path, filename), encoding='utf-8_sig', index=False, header=True)
|
1249
1265
|
|
1250
1266
|
|
1251
|
-
|
1252
1267
|
if __name__ == '__main__':
|
1253
1268
|
# username = 'root'
|
1254
1269
|
# password = ''
|
@@ -1257,9 +1272,9 @@ if __name__ == '__main__':
|
|
1257
1272
|
#
|
1258
1273
|
# # 上传 1 个文件到数据库
|
1259
1274
|
# one_file_to_mysql(
|
1260
|
-
# file=r'/Users/
|
1261
|
-
# db_name='
|
1262
|
-
# table_name='
|
1275
|
+
# file=r'/Users/xigua/Downloads/DMP报表_2024-10-23_2024-10-29.csv',
|
1276
|
+
# db_name='达摩盘3',
|
1277
|
+
# table_name='dmp人群报表',
|
1263
1278
|
# )
|
1264
1279
|
|
1265
1280
|
# test()
|
@@ -1267,10 +1282,10 @@ if __name__ == '__main__':
|
|
1267
1282
|
col = 1
|
1268
1283
|
if col:
|
1269
1284
|
# 上传一个目录到指定数据库
|
1270
|
-
db_name = '
|
1271
|
-
table_name = '
|
1285
|
+
db_name = '达摩盘3'
|
1286
|
+
table_name = 'dmp人群报表'
|
1272
1287
|
upload_dir(
|
1273
|
-
path=r'
|
1288
|
+
path=r'/Users/xigua/Downloads/DMP报表',
|
1274
1289
|
db_name=db_name,
|
1275
1290
|
collection_name=table_name,
|
1276
1291
|
)
|
mdbq/aggregation/query_data.py
CHANGED
@@ -6,6 +6,7 @@ from mdbq.mysql import mysql
|
|
6
6
|
from mdbq.mysql import s_query
|
7
7
|
from mdbq.aggregation import optimize_data
|
8
8
|
from mdbq.config import myconfig
|
9
|
+
from mdbq.config import products
|
9
10
|
import datetime
|
10
11
|
from dateutil.relativedelta import relativedelta
|
11
12
|
import pandas as pd
|
@@ -15,6 +16,7 @@ import platform
|
|
15
16
|
import getpass
|
16
17
|
import json
|
17
18
|
import os
|
19
|
+
import time
|
18
20
|
|
19
21
|
"""
|
20
22
|
程序用于下载数据库(调用 s_query.py 下载并清洗), 并对数据进行聚合清洗, 不会更新数据库信息;
|
@@ -855,8 +857,7 @@ class MysqlDatasQuery:
|
|
855
857
|
df_crowd.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
856
858
|
df_crowd.drop_duplicates(subset=['人群id',], keep='last', inplace=True, ignore_index=True)
|
857
859
|
df_crowd.pop('日期')
|
858
|
-
|
859
|
-
|
860
|
+
df_crowd = df_crowd.astype({'人群id': 'int64'}, errors='ignore')
|
860
861
|
projection = {}
|
861
862
|
df_dmp = self.download.data_to_df(
|
862
863
|
db_name='达摩盘3',
|
@@ -865,6 +866,7 @@ class MysqlDatasQuery:
|
|
865
866
|
end_date=end_date,
|
866
867
|
projection=projection,
|
867
868
|
)
|
869
|
+
df_dmp = df_dmp.astype({'人群id': 'int64'}, errors='ignore')
|
868
870
|
df_dmp.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
869
871
|
df_dmp.drop_duplicates(subset=['日期', '人群id', '消耗_元'], keep='last', inplace=True, ignore_index=True)
|
870
872
|
df = pd.merge(df_dmp, df_crowd, left_on=['人群id'], right_on=['人群id'], how='left')
|
@@ -1959,7 +1961,7 @@ class GroupBy:
|
|
1959
1961
|
)
|
1960
1962
|
return df
|
1961
1963
|
|
1962
|
-
@try_except
|
1964
|
+
# @try_except
|
1963
1965
|
def performance_jd(self, jd_tg=True):
|
1964
1966
|
jdtg, sku_sales = self.data_jdtg['京东_京准通'], self.data_jdtg['京东_sku_商品明细']
|
1965
1967
|
jdtg = jdtg.groupby(['日期', '跟单sku_id'],
|
@@ -1973,6 +1975,7 @@ class GroupBy:
|
|
1973
1975
|
df = df[['日期', '商品id', '货号', '成交单量', '成交金额', '成本价']]
|
1974
1976
|
df['商品id'] = df['商品id'].astype(str)
|
1975
1977
|
jdtg['跟单sku_id'] = jdtg['跟单sku_id'].astype(str)
|
1978
|
+
jdtg = jdtg.astype({'日期': 'datetime64[ns]'}, errors='raise')
|
1976
1979
|
if jd_tg is True:
|
1977
1980
|
# 完整的数据表,包含全店所有推广、销售数据
|
1978
1981
|
df = pd.merge(df, jdtg, how='left', left_on=['日期', '商品id'], right_on=['日期', '跟单sku_id']) # df 合并推广表
|
@@ -2354,42 +2357,40 @@ def data_aggregation(months=1, is_juhe=True, less_dict=[]):
|
|
2354
2357
|
)
|
2355
2358
|
|
2356
2359
|
|
2357
|
-
def main():
|
2360
|
+
def main(days=100, months=3):
|
2358
2361
|
# 更新日期表
|
2359
2362
|
date_table()
|
2360
2363
|
# 更新货品年份基准表, 属性设置 3 - 货品年份基准
|
2361
2364
|
p = products.Products()
|
2362
2365
|
p.to_mysql()
|
2363
2366
|
|
2367
|
+
# 数据聚合
|
2368
|
+
data_aggregation(
|
2369
|
+
months=months,
|
2370
|
+
is_juhe=True, # 生成聚合表
|
2371
|
+
# less_dict=['天猫_品销宝账户报表'], # 单独聚合某一个数据库
|
2372
|
+
)
|
2373
|
+
time.sleep(60)
|
2374
|
+
|
2364
2375
|
system = platform.system() # 本机系统
|
2365
2376
|
host_name = socket.gethostname() # 本机名
|
2366
2377
|
conf = myconfig.main()
|
2367
2378
|
db_list = conf[system][host_name]['mysql']['数据库集']
|
2368
|
-
|
2369
|
-
# 清理所有非聚合数据的库
|
2379
|
+
# 清理所有库
|
2370
2380
|
optimize_data.op_data(
|
2371
2381
|
db_name_lists=db_list,
|
2372
|
-
days=
|
2373
|
-
is_mongo=True,
|
2374
|
-
is_mysql=True,
|
2375
|
-
)
|
2376
|
-
|
2377
|
-
# 数据聚合
|
2378
|
-
query_data.data_aggregation(months=3)
|
2379
|
-
time.sleep(60)
|
2380
|
-
|
2381
|
-
# 清理聚合数据, mongodb 中没有聚合数据,所以只需要清理 mysql 即可
|
2382
|
-
optimize_data.op_data(
|
2383
|
-
db_name_lists=['聚合数据'],
|
2384
|
-
days=100,
|
2382
|
+
days=days,
|
2385
2383
|
is_mongo=False,
|
2386
2384
|
is_mysql=True,
|
2387
2385
|
)
|
2388
2386
|
|
2389
2387
|
|
2390
2388
|
if __name__ == '__main__':
|
2391
|
-
|
2392
|
-
|
2393
|
-
|
2394
|
-
|
2395
|
-
|
2389
|
+
main(days=100, months=3)
|
2390
|
+
|
2391
|
+
# data_aggregation(
|
2392
|
+
# months=3,
|
2393
|
+
# is_juhe=True, # 生成聚合表
|
2394
|
+
# # less_dict=['天猫_品销宝账户报表'], # 单独聚合某一个数据库
|
2395
|
+
# )
|
2396
|
+
|
mdbq/config/products.py
CHANGED
@@ -124,25 +124,39 @@ class Products:
|
|
124
124
|
]
|
125
125
|
self.datas += my_datas
|
126
126
|
|
127
|
+
|
127
128
|
def to_mysql(self):
|
128
129
|
self.update_my_datas()
|
129
130
|
df = pd.DataFrame(self.datas)
|
130
|
-
|
131
|
+
m_engine = mysql.MysqlUpload(
|
131
132
|
username=username,
|
132
133
|
password=password,
|
133
134
|
host=host,
|
134
135
|
port=port,
|
135
136
|
)
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
137
|
+
for dict_data in df.to_dict(orient='records'):
|
138
|
+
m_engine.dict_to_mysql(
|
139
|
+
db_name='属性设置3',
|
140
|
+
table_name='货品年份基准',
|
141
|
+
dict_data=dict_data,
|
142
|
+
# icm_update=['日期', '店铺名称', '宝贝id'], # 唯一组合键
|
143
|
+
unique_main_key=['商品id'],
|
144
|
+
set_type={
|
145
|
+
'商品id': 'mediumtext',
|
146
|
+
'平台': 'mediumtext',
|
147
|
+
'上市年份': 'mediumtext',
|
148
|
+
},
|
149
|
+
)
|
150
|
+
# m.df_to_mysql(
|
151
|
+
# df=df,
|
152
|
+
# db_name='属性设置3',
|
153
|
+
# table_name='货品年份基准',
|
154
|
+
# move_insert = False,
|
155
|
+
# df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
156
|
+
# drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
157
|
+
# icm_update=[],
|
158
|
+
# service_database=service_database, # 用来追踪处理进度
|
159
|
+
# )
|
146
160
|
|
147
161
|
def market_date(self, product_id: int):
|
148
162
|
try:
|
mdbq/mysql/mysql.py
CHANGED
@@ -132,6 +132,7 @@ class MysqlUpload:
|
|
132
132
|
|
133
133
|
return wrapper
|
134
134
|
|
135
|
+
@try_except
|
135
136
|
def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_type=None):
|
136
137
|
"""
|
137
138
|
插入字典数据
|
@@ -294,10 +295,12 @@ class MysqlUpload:
|
|
294
295
|
if unique_main_key:
|
295
296
|
for col in unique_main_key:
|
296
297
|
del dict_data[col]
|
297
|
-
|
298
|
+
# 涉及列名务必使用反引号
|
299
|
+
update_datas = ', '.join([f'`{k}` = VALUES(`{k}`)' for k, v in dict_data.items()])
|
298
300
|
|
299
301
|
# 构建 sql
|
300
302
|
sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
|
303
|
+
# print(sql)
|
301
304
|
cursor.execute(sql)
|
302
305
|
connection.commit() # 提交数据库
|
303
306
|
connection.close()
|
@@ -329,6 +332,10 @@ class MysqlUpload:
|
|
329
332
|
count_int, count_float = count_decimal_places(v) # 判断小数,返回小数位数
|
330
333
|
if result1: # 京东sku/spu商品信息
|
331
334
|
__res_dict.update({k: 'mediumtext'})
|
335
|
+
elif k == '日期':
|
336
|
+
__res_dict.update({k: 'DATE'})
|
337
|
+
elif k == '更新时间':
|
338
|
+
__res_dict.update({k: 'TIMESTAMP'})
|
332
339
|
elif str(v) == '':
|
333
340
|
__res_dict.update({k: 'mediumtext'})
|
334
341
|
elif result2: # 小数
|
@@ -418,6 +425,11 @@ class MysqlUpload:
|
|
418
425
|
for service_name, database in service_database.items():
|
419
426
|
# 2. 列数据类型转换,将 df 数据类型转换为 mysql 的数据类型
|
420
427
|
dtypes, cl, db_n, tb_n = self.convert_dtypes(df=df, db_name=db_name, table_name=table_name, path=json_path, service_name=service_name)
|
428
|
+
for dy in dtypes.keys():
|
429
|
+
if '日期' == dy:
|
430
|
+
dtypes.update({'日期': 'DATE'})
|
431
|
+
if '更新时间' == dy:
|
432
|
+
dtypes.update({'更新时间': 'TIMESTAMP'})
|
421
433
|
|
422
434
|
# 有特殊字符不需转义
|
423
435
|
sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
|
@@ -1,11 +1,11 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=IJS5ILEmYlrepj2oX6TDuMjab5rYEOpQuYyTpgfRbR0,73747
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=79uwiM2WqNNFxGpE2wKz742PRq-ZGgFjdOV0vgptHdY,3513
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=w_p013oMdF6YovQwP6RY6wiPTKuuTfSn53Wo1RC_CD0,103372
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
@@ -18,7 +18,7 @@ mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
|
|
18
18
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
19
19
|
mdbq/config/get_myconf.py,sha256=cmNvsyoNa0RbZ9FOTjSd3jyyGwkxjUo0phvdHbGlrms,6010
|
20
20
|
mdbq/config/myconfig.py,sha256=EGymTlAimtHIDJ9egCtOehBEPOj6rea504kvsEZu64o,854
|
21
|
-
mdbq/config/products.py,sha256=
|
21
|
+
mdbq/config/products.py,sha256=sC4ctAiHR7ydkEXuIlvwvTPDLJXwengkG0hFWSQRFz0,6808
|
22
22
|
mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
|
23
23
|
mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
|
24
24
|
mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
@@ -28,7 +28,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
28
28
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
29
29
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
30
30
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
31
|
-
mdbq/mysql/mysql.py,sha256=
|
31
|
+
mdbq/mysql/mysql.py,sha256=5l5wFS6AFjXuZtb54JSnoqh_DlB3-tQd9LNac0CSYjQ,62538
|
32
32
|
mdbq/mysql/recheck_mysql.py,sha256=jHQSlQy0PlQ_EYICQv_2nairUX3t6OIwPtSELKIpjkY,8702
|
33
33
|
mdbq/mysql/s_query.py,sha256=bgNNIqYLDCHjD5KTFcm6x4u74selpAGs5ouJYuqX86k,8447
|
34
34
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
@@ -45,7 +45,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
45
45
|
mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
|
46
46
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
47
47
|
mdbq/spider/aikucun.py,sha256=jHrdGWBJQaSywx7V-U4YuM6vWkwC5SR5tTOOdB3YU_c,17306
|
48
|
-
mdbq-2.9.
|
49
|
-
mdbq-2.9.
|
50
|
-
mdbq-2.9.
|
51
|
-
mdbq-2.9.
|
48
|
+
mdbq-2.9.4.dist-info/METADATA,sha256=1xFHayCMA4H3FNPMIEdut-xWBAWiwooZZrD6p7blLQU,243
|
49
|
+
mdbq-2.9.4.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
50
|
+
mdbq-2.9.4.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
51
|
+
mdbq-2.9.4.dist-info/RECORD,,
|
File without changes
|