mdbq 2.9.6__py3-none-any.whl → 2.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +34 -46
- mdbq/aggregation/query_data.py +1287 -1176
- mdbq/aggregation/query_data_bak.py +2423 -0
- mdbq/dataframe/converter.py +3 -3
- mdbq/mysql/mysql.py +60 -54
- mdbq/mysql/s_query.py +1 -0
- mdbq/spider/aikucun.py +41 -0
- {mdbq-2.9.6.dist-info → mdbq-2.9.8.dist-info}/METADATA +1 -1
- {mdbq-2.9.6.dist-info → mdbq-2.9.8.dist-info}/RECORD +11 -10
- {mdbq-2.9.6.dist-info → mdbq-2.9.8.dist-info}/WHEEL +1 -1
- {mdbq-2.9.6.dist-info → mdbq-2.9.8.dist-info}/top_level.txt +0 -0
mdbq/dataframe/converter.py
CHANGED
@@ -71,8 +71,6 @@ class DataFrameConverter(object):
|
|
71
71
|
pass
|
72
72
|
if df[col].dtype == 'float' or df[col].dtype == 'float64': # 对于小数类型, 保留 6 位小数
|
73
73
|
df[col] = df[col].fillna(0.0).apply(lambda x: round(x, 6))
|
74
|
-
# df[col] = df[col].fillna(0.0).apply(lambda x: "{:.6f}".format(x))
|
75
|
-
# df[col] = df[col].apply('float64')
|
76
74
|
|
77
75
|
# 转换日期样式的列为日期类型
|
78
76
|
value = df.loc[0, col]
|
@@ -84,7 +82,9 @@ class DataFrameConverter(object):
|
|
84
82
|
df[col] = df[col].apply(lambda x: pd.to_datetime(x))
|
85
83
|
except:
|
86
84
|
pass
|
87
|
-
new_col =
|
85
|
+
new_col = col.lower()
|
86
|
+
new_col = re.sub(r'[()\-,,&~^、 ()\"\'“”=·/。》《><!!`]', '_', new_col, re.IGNORECASE)
|
87
|
+
new_col = new_col.replace(')', '')
|
88
88
|
new_col = re.sub(r'_{2,}', '_', new_col)
|
89
89
|
new_col = re.sub(r'_+$', '', new_col)
|
90
90
|
df.rename(columns={col: new_col}, inplace=True)
|
mdbq/mysql/mysql.py
CHANGED
@@ -376,17 +376,17 @@ class MysqlUpload:
|
|
376
376
|
cols = df.columns.tolist()
|
377
377
|
for col in cols:
|
378
378
|
df[col] = df[col].apply(lambda x: float(re.sub(r'%$', '', str(x))) / 100 if (
|
379
|
-
str(x) != '' and str(x).endswith('%')) else '0.0' if str(x) == '0%' else x)
|
379
|
+
str(x) != '' and str(x).endswith('%')) and not re.findall('[\\u4e00-\\u9fa5]', str(x)) else '0.0' if str(x) == '0%' else x)
|
380
380
|
try:
|
381
381
|
df[col] = df[col].apply(
|
382
382
|
lambda x: int(x) if '_' not in str(x) and '.' not in str(x) else x) # 不含小数点尝试转整数
|
383
383
|
except:
|
384
384
|
pass
|
385
|
-
|
386
|
-
|
385
|
+
try:
|
386
|
+
if df[col].dtype == 'object': # 有些列没有被 pandas 识别数据类型,会没有 dtype 属性
|
387
387
|
df[col] = df[col].apply(lambda x: float(x) if '.' in str(x) and '_' not in str(x) else x)
|
388
|
-
|
389
|
-
|
388
|
+
except:
|
389
|
+
pass
|
390
390
|
new_col = col.lower()
|
391
391
|
new_col = re.sub(r'[()\-,,&~^、 ()\"\'“”=·/。》《><!!`]', '_', new_col, re.IGNORECASE)
|
392
392
|
new_col = new_col.replace(')', '')
|
@@ -432,7 +432,7 @@ class MysqlUpload:
|
|
432
432
|
__res_dict.update({k: 'varchar(255)'})
|
433
433
|
return __res_dict, df
|
434
434
|
|
435
|
-
@try_except
|
435
|
+
# @try_except
|
436
436
|
def df_to_mysql(self, df, db_name, table_name, set_typ=None, icm_update=[], move_insert=False, df_sql=False, drop_duplicates=False,
|
437
437
|
filename=None, count=None, reset_id=False):
|
438
438
|
"""
|
@@ -532,18 +532,19 @@ class MysqlUpload:
|
|
532
532
|
chunksize=1000
|
533
533
|
)
|
534
534
|
if reset_id:
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
535
|
+
pass
|
536
|
+
# # 6. 重置自增列
|
537
|
+
# try:
|
538
|
+
# cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
539
|
+
# result = cursor.fetchone()
|
540
|
+
# if result:
|
541
|
+
# cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
542
|
+
# cursor.execute(
|
543
|
+
# f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
544
|
+
# cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
545
|
+
# except Exception as e:
|
546
|
+
# print(f'{e}')
|
547
|
+
# connection.rollback()
|
547
548
|
connection.commit() # 提交事务
|
548
549
|
connection.close()
|
549
550
|
return
|
@@ -570,17 +571,18 @@ class MysqlUpload:
|
|
570
571
|
)
|
571
572
|
# 6. 重置自增列
|
572
573
|
if reset_id:
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
574
|
+
pass
|
575
|
+
# try:
|
576
|
+
# cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
577
|
+
# result = cursor.fetchone()
|
578
|
+
# if result:
|
579
|
+
# cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
580
|
+
# cursor.execute(
|
581
|
+
# f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
582
|
+
# cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
583
|
+
# except Exception as e:
|
584
|
+
# print(f'{e}')
|
585
|
+
# connection.rollback()
|
584
586
|
connection.close()
|
585
587
|
return
|
586
588
|
|
@@ -589,6 +591,9 @@ class MysqlUpload:
|
|
589
591
|
# data 是传进来待处理的数据, 不是数据库数据
|
590
592
|
# data 示例: {'日期': Timestamp('2024-08-27 00:00:00'), '推广费余额': 33299, '品销宝余额': 2930.73, '短信剩余': 67471}
|
591
593
|
try:
|
594
|
+
cols = ', '.join(f"`{item}`" for item in data.keys()) # 列名需要转义
|
595
|
+
# data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
|
596
|
+
values = ', '.join([f'"{item}"' for item in data.values()]) # 值要加引号
|
592
597
|
condition = []
|
593
598
|
for k, v in data.items():
|
594
599
|
condition += [f'`{k}` = "{v}"']
|
@@ -665,23 +670,24 @@ class MysqlUpload:
|
|
665
670
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (%s);" % (values)
|
666
671
|
cursor.execute(sql)
|
667
672
|
except Exception as e:
|
673
|
+
pass
|
668
674
|
# print(data)
|
669
675
|
# print(values)
|
670
|
-
print(f'mysql -> df_to_mysql 报错: {e}, {self.filename}')
|
676
|
+
# print(f'mysql -> df_to_mysql 报错: {e}, {self.filename}')
|
671
677
|
# breakpoint()
|
672
678
|
|
673
|
-
# 6. 重置自增列
|
674
|
-
try:
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
except Exception as e:
|
683
|
-
|
684
|
-
|
679
|
+
# # 6. 重置自增列
|
680
|
+
# try:
|
681
|
+
# cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
682
|
+
# result = cursor.fetchone()
|
683
|
+
# if result:
|
684
|
+
# cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
685
|
+
# cursor.execute(
|
686
|
+
# f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
687
|
+
# cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
688
|
+
# except Exception as e:
|
689
|
+
# print(f'{table_name}, -> {e}')
|
690
|
+
# connection.rollback()
|
685
691
|
connection.commit() # 提交事务
|
686
692
|
connection.close()
|
687
693
|
|
@@ -911,18 +917,18 @@ class OptimizeDatas:
|
|
911
917
|
else: # 不存在日期列的情况
|
912
918
|
self.delete_duplicate2(table_name=table_name)
|
913
919
|
|
914
|
-
# 5. 重置自增列 (id 列)
|
915
|
-
try:
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
except Exception as e:
|
924
|
-
|
925
|
-
|
920
|
+
# # 5. 重置自增列 (id 列)
|
921
|
+
# try:
|
922
|
+
# cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
923
|
+
# result = cursor.fetchone()
|
924
|
+
# if result:
|
925
|
+
# cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
926
|
+
# cursor.execute(
|
927
|
+
# f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
928
|
+
# cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
929
|
+
# except Exception as e:
|
930
|
+
# print(f'{e}')
|
931
|
+
# self.connection.rollback()
|
926
932
|
self.connection.close()
|
927
933
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
928
934
|
print(f'{now}mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
|
mdbq/mysql/s_query.py
CHANGED
mdbq/spider/aikucun.py
CHANGED
@@ -20,6 +20,9 @@ from selenium.webdriver.common.keys import Keys
|
|
20
20
|
from mdbq.aggregation import aggregation
|
21
21
|
from mdbq.clean import data_clean
|
22
22
|
from mdbq.other import ua_sj
|
23
|
+
from mdbq.mysql import mysql
|
24
|
+
from mdbq.config import myconfig
|
25
|
+
import socket
|
23
26
|
|
24
27
|
warnings.filterwarnings('ignore')
|
25
28
|
|
@@ -44,6 +47,23 @@ else:
|
|
44
47
|
Share_Path = str(pathlib.Path('/Volumes/时尚事业部/01.运营部/天猫报表')) # 共享文件根目录
|
45
48
|
Source_Path = str(pathlib.Path(Data_Path, '原始文件2'))
|
46
49
|
upload_path = os.path.join(D_PATH, '数据上传中心') # 此目录位于下载文件夹
|
50
|
+
if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
|
51
|
+
conf = myconfig.main()
|
52
|
+
conf_data = conf['Windows']['company']['mysql']['local']
|
53
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data[
|
54
|
+
'port']
|
55
|
+
else:
|
56
|
+
conf = myconfig.main()
|
57
|
+
conf_data = conf['Windows']['xigua_lx']['mysql']['local']
|
58
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data[
|
59
|
+
'port']
|
60
|
+
m_engine = mysql.MysqlUpload(
|
61
|
+
username=username,
|
62
|
+
password=password,
|
63
|
+
host=host,
|
64
|
+
port=port,
|
65
|
+
charset='utf8mb4'
|
66
|
+
)
|
47
67
|
|
48
68
|
|
49
69
|
def get_cookie_aikucun():
|
@@ -262,6 +282,12 @@ class AikuCun:
|
|
262
282
|
_driver.quit()
|
263
283
|
|
264
284
|
def clean_data(self, date):
|
285
|
+
set_typ = {
|
286
|
+
'店铺名称': 'varchar(100)',
|
287
|
+
'spu_id': 'varchar(100)',
|
288
|
+
'图片': 'varchar(255)',
|
289
|
+
'数据更新时间': 'timestamp',
|
290
|
+
}
|
265
291
|
for root, dirs, files in os.walk(upload_path, topdown=False):
|
266
292
|
for name in files:
|
267
293
|
if '~$' in name or 'DS_Store' in name:
|
@@ -285,6 +311,21 @@ class AikuCun:
|
|
285
311
|
# df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
|
286
312
|
# print(df['数据更新时间'])
|
287
313
|
# breakpoint()
|
314
|
+
|
315
|
+
m_engine.df_to_mysql(
|
316
|
+
df=df,
|
317
|
+
db_name='爱库存2',
|
318
|
+
table_name='商品spu榜单',
|
319
|
+
icm_update=[], # 增量更新, 在聚合数据中使用,其他不要用
|
320
|
+
move_insert=False, # 先删除,再插入
|
321
|
+
df_sql=True, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
322
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
323
|
+
count=None,
|
324
|
+
filename=None, # 用来追踪处理进度
|
325
|
+
reset_id=False, # 是否重置自增列
|
326
|
+
set_typ=set_typ,
|
327
|
+
)
|
328
|
+
|
288
329
|
new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
|
289
330
|
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
|
290
331
|
os.remove(os.path.join(root, name))
|
@@ -1,11 +1,12 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=2KCVXZygQt4xVxGbFcDMBpL3PukY4yQF_uI-qLSTWaU,73460
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=79uwiM2WqNNFxGpE2wKz742PRq-ZGgFjdOV0vgptHdY,3513
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=e6vb4hPYZL5KWE6O-MtDoY13GWhx5YMDvTyD3rdgy3c,111441
|
9
|
+
mdbq/aggregation/query_data_bak.py,sha256=r1FU0C4zjXln7oVSrRkElh4Ehl-9mYhGcq57jLbViUA,104071
|
9
10
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
11
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
12
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
@@ -20,15 +21,15 @@ mdbq/config/products.py,sha256=ykvoQiA4OvFEYQ35wmCkREECdz0xIJzIs-Xix9mFpYI,6295
|
|
20
21
|
mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
|
21
22
|
mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
|
22
23
|
mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
23
|
-
mdbq/dataframe/converter.py,sha256=
|
24
|
+
mdbq/dataframe/converter.py,sha256=doWRcFMqf0_RQz7w5BxRNB8JeLetFSsNld43GWlhXXc,4600
|
24
25
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
25
26
|
mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
26
27
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
27
28
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
28
29
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
29
|
-
mdbq/mysql/mysql.py,sha256=
|
30
|
+
mdbq/mysql/mysql.py,sha256=_geeu7LP-Ur76kr1ka7_jdifnwrnJJjWnUBzlPXOQOQ,60119
|
30
31
|
mdbq/mysql/recheck_mysql.py,sha256=rgTpvDMWYTyEn7UQdlig-pdXDluTgiU8JG6lkMh8DV0,8665
|
31
|
-
mdbq/mysql/s_query.py,sha256=
|
32
|
+
mdbq/mysql/s_query.py,sha256=fnXncwSmA7CB0ELn1a-YxYZDrYkC2Bcgnj2J4dcQ8X8,8481
|
32
33
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
33
34
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
34
35
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
@@ -42,8 +43,8 @@ mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,
|
|
42
43
|
mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
43
44
|
mdbq/req_post/req_tb.py,sha256=qg7pet73IgKGmCwxaeUyImJIoeK_pBQT9BBKD7fkBNg,36160
|
44
45
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
45
|
-
mdbq/spider/aikucun.py,sha256=
|
46
|
-
mdbq-2.9.
|
47
|
-
mdbq-2.9.
|
48
|
-
mdbq-2.9.
|
49
|
-
mdbq-2.9.
|
46
|
+
mdbq/spider/aikucun.py,sha256=01qJo_Di5Kmi2lG5_HKb0OI283b1-Pgqh-nnA0pX4TY,19038
|
47
|
+
mdbq-2.9.8.dist-info/METADATA,sha256=-YqbseryNUHGqmaRj1Brh9FejzA62uUxgav_hmn14CQ,243
|
48
|
+
mdbq-2.9.8.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
49
|
+
mdbq-2.9.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
50
|
+
mdbq-2.9.8.dist-info/RECORD,,
|
File without changes
|