mdbq 3.6.13__py3-none-any.whl → 3.6.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/optimize_data.py +7 -5
- mdbq/aggregation/query_data.py +41 -39
- mdbq/mysql/mysql.py +119 -140
- mdbq/mysql/s_query.py +9 -7
- mdbq/redis/getredis.py +0 -1
- {mdbq-3.6.13.dist-info → mdbq-3.6.15.dist-info}/METADATA +1 -1
- {mdbq-3.6.13.dist-info → mdbq-3.6.15.dist-info}/RECORD +9 -9
- {mdbq-3.6.13.dist-info → mdbq-3.6.15.dist-info}/WHEEL +0 -0
- {mdbq-3.6.13.dist-info → mdbq-3.6.15.dist-info}/top_level.txt +0 -0
mdbq/mysql/mysql.py
CHANGED
@@ -16,6 +16,7 @@ import calendar
|
|
16
16
|
from mdbq.config import set_support
|
17
17
|
from mdbq.config import myconfig
|
18
18
|
import traceback
|
19
|
+
import logging
|
19
20
|
|
20
21
|
warnings.filterwarnings('ignore')
|
21
22
|
"""
|
@@ -29,6 +30,7 @@ warnings.filterwarnings('ignore')
|
|
29
30
|
|
30
31
|
"""
|
31
32
|
error_file = os.path.join(set_support.SetSupport(dirname='support').dirname, 'error.log')
|
33
|
+
logger = logging.getLogger(__name__)
|
32
34
|
|
33
35
|
|
34
36
|
def is_valid_date(date_string):
|
@@ -119,14 +121,14 @@ class MysqlUpload:
|
|
119
121
|
try:
|
120
122
|
return func(*args, **kwargs)
|
121
123
|
except Exception as e:
|
122
|
-
|
124
|
+
logger.info(f'{func.__name__}, {e}') # 将异常信息返回
|
123
125
|
with open(error_file, 'a') as f:
|
124
126
|
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
125
127
|
f.write(f'\n{now} \n')
|
126
128
|
f.write(f'函数注释内容(用于定位函数): {func.__doc__} \n')
|
127
129
|
# f.write(f'报错的文件:\n{e.__traceback__.tb_frame.f_globals["__file__"]}\n') # 发生异常所在的文件
|
128
130
|
traceback.print_exc(file=open(error_file, 'a')) # 返回完整的堆栈信息
|
129
|
-
|
131
|
+
logger.info(f'更多信息请查看日志文件: {error_file}')
|
130
132
|
|
131
133
|
return wrapper
|
132
134
|
|
@@ -137,16 +139,16 @@ class MysqlUpload:
|
|
137
139
|
connection = pymysql.connect(**_config) # 连接数据库
|
138
140
|
return connection
|
139
141
|
except Exception as e:
|
140
|
-
|
142
|
+
logger.info(f'连接失败,正在重试: {attempts}/{max_try} {e}')
|
141
143
|
attempts += 1
|
142
144
|
time.sleep(30)
|
143
|
-
|
145
|
+
logger.info(f'{_db_name}: 连接失败,重试次数超限,当前设定次数: {max_try}')
|
144
146
|
return None
|
145
147
|
|
146
148
|
def cover_doc_dtypes(self, dict_data):
|
147
149
|
""" 清理字典键值 并转换数据类型 """
|
148
150
|
if not dict_data:
|
149
|
-
|
151
|
+
logger.info(f'mysql.py -> MysqlUpload -> cover_dict_dtypes -> 传入的字典不能为空')
|
150
152
|
return
|
151
153
|
__res_dict = {}
|
152
154
|
new_dict_data = {}
|
@@ -213,7 +215,7 @@ class MysqlUpload:
|
|
213
215
|
if not self.config:
|
214
216
|
return
|
215
217
|
if '数据主体' not in dict_data.keys():
|
216
|
-
|
218
|
+
logger.info(f'dict_data 中"数据主体"键不能为空')
|
217
219
|
return
|
218
220
|
|
219
221
|
# connection = pymysql.connect(**self.config) # 连接数据库
|
@@ -234,7 +236,7 @@ class MysqlUpload:
|
|
234
236
|
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
235
237
|
cursor.execute(sql)
|
236
238
|
connection.commit()
|
237
|
-
|
239
|
+
logger.info(f"创建Database: {db_name}")
|
238
240
|
|
239
241
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
240
242
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -248,7 +250,7 @@ class MysqlUpload:
|
|
248
250
|
if not cursor.fetchone():
|
249
251
|
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
250
252
|
cursor.execute(sql)
|
251
|
-
|
253
|
+
logger.info(f'创建 mysql 表: {table_name}')
|
252
254
|
|
253
255
|
new_dict = {}
|
254
256
|
[new_dict.update({k: v}) for k, v in dict_data.items() if k != '数据主体']
|
@@ -275,11 +277,11 @@ class MysqlUpload:
|
|
275
277
|
else:
|
276
278
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {set_typ[col]} NOT NULL;"
|
277
279
|
cursor.execute(sql)
|
278
|
-
|
280
|
+
logger.info(f"添加列: {col}({set_typ[col]})") # 添加列并指定数据类型
|
279
281
|
|
280
282
|
if col == '日期':
|
281
283
|
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
282
|
-
|
284
|
+
logger.info(f"设置为索引: {col}({set_typ[col]})")
|
283
285
|
cursor.execute(sql)
|
284
286
|
connection.commit() # 提交事务
|
285
287
|
|
@@ -290,7 +292,7 @@ class MysqlUpload:
|
|
290
292
|
for up_col in remove_by_key:
|
291
293
|
condition += [f'`{up_col}` = "{dict_data[up_col]}"']
|
292
294
|
condition = ' AND '.join(condition)
|
293
|
-
#
|
295
|
+
# logger.info(condition)
|
294
296
|
sql = f"SELECT {se_key} FROM `{table_name}` WHERE {condition}"
|
295
297
|
cursor.execute(sql)
|
296
298
|
result = cursor.fetchall()
|
@@ -300,15 +302,14 @@ class MysqlUpload:
|
|
300
302
|
|
301
303
|
# 插入数据到数据库
|
302
304
|
# 有数据格式错误问题,所以分开处理,将数据主体移到最后面用占位符
|
303
|
-
|
304
|
-
print(f'{now} 正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name} -> {filename}')
|
305
|
+
logger.info(f'正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name} -> {filename}')
|
305
306
|
if new_dict:
|
306
307
|
cols = ', '.join(f"`{item}`" for item in new_dict.keys()) # 列名需要转义
|
307
308
|
values = ', '.join([f'"{item}"' for item in new_dict.values()]) # 值要加引号
|
308
309
|
cols = ', '.join([cols, '数据主体'])
|
309
310
|
binary_data = dict_data['数据主体']
|
310
311
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values}, %s)"
|
311
|
-
#
|
312
|
+
# logger.info(sql)
|
312
313
|
cursor.execute(sql, binary_data)
|
313
314
|
else:
|
314
315
|
sql = f"""INSERT INTO `{table_name}` (数据主体) VALUES (%s);"""
|
@@ -338,11 +339,11 @@ class MysqlUpload:
|
|
338
339
|
cursor.execute(
|
339
340
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
340
341
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
341
|
-
#
|
342
|
+
# logger.info(f'重置自增id')
|
342
343
|
else:
|
343
|
-
|
344
|
+
logger.info(f'{table_name} 存在复合主键: 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
344
345
|
except Exception as e:
|
345
|
-
|
346
|
+
logger.info(f'333 {table_name} {e}')
|
346
347
|
connection.rollback()
|
347
348
|
connection.commit()
|
348
349
|
|
@@ -362,7 +363,7 @@ class MysqlUpload:
|
|
362
363
|
return
|
363
364
|
if icm_update:
|
364
365
|
if main_key or unique_main_key:
|
365
|
-
|
366
|
+
logger.info(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
|
366
367
|
return
|
367
368
|
if not main_key:
|
368
369
|
main_key = []
|
@@ -370,7 +371,7 @@ class MysqlUpload:
|
|
370
371
|
unique_main_key = []
|
371
372
|
|
372
373
|
if not dict_data_list:
|
373
|
-
|
374
|
+
logger.info(f'dict_data_list 不能为空 ')
|
374
375
|
return
|
375
376
|
dict_data = dict_data_list[0]
|
376
377
|
if cut_data:
|
@@ -383,9 +384,9 @@ class MysqlUpload:
|
|
383
384
|
elif str(cut_data).lower() == 'month':
|
384
385
|
table_name = f'{table_name}_{__y_m}'
|
385
386
|
else:
|
386
|
-
|
387
|
+
logger.info(f'参数不正确,cut_data应为 year 或 month ')
|
387
388
|
except Exception as e:
|
388
|
-
|
389
|
+
logger.info(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
389
390
|
|
390
391
|
# connection = pymysql.connect(**self.config) # 连接数据库
|
391
392
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
@@ -405,7 +406,7 @@ class MysqlUpload:
|
|
405
406
|
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
406
407
|
cursor.execute(sql)
|
407
408
|
connection.commit()
|
408
|
-
|
409
|
+
logger.info(f"创建Database: {db_name}")
|
409
410
|
|
410
411
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
411
412
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -419,7 +420,7 @@ class MysqlUpload:
|
|
419
420
|
if not cursor.fetchone():
|
420
421
|
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
421
422
|
cursor.execute(sql)
|
422
|
-
|
423
|
+
logger.info(f'创建 mysql 表: {table_name}')
|
423
424
|
|
424
425
|
# 根据 dict_data 的值添加指定的数据类型
|
425
426
|
dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
|
@@ -443,13 +444,13 @@ class MysqlUpload:
|
|
443
444
|
else:
|
444
445
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
445
446
|
# sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
446
|
-
#
|
447
|
+
# logger.info(sql)
|
447
448
|
cursor.execute(sql)
|
448
|
-
|
449
|
+
logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
449
450
|
|
450
451
|
if col in main_key or col == '日期':
|
451
452
|
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
452
|
-
|
453
|
+
logger.info(f"设置为索引: {col}({dtypes[col]})")
|
453
454
|
cursor.execute(sql)
|
454
455
|
if col in unique_main_key:
|
455
456
|
if dtypes[col] == 'mediumtext':
|
@@ -464,22 +465,22 @@ class MysqlUpload:
|
|
464
465
|
# if not result:
|
465
466
|
# if col in main_key:
|
466
467
|
# sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
467
|
-
#
|
468
|
+
# logger.info(f"设置为索引: {col}({dtypes[col]})")
|
468
469
|
# cursor.execute(sql)
|
469
470
|
# elif col in unique_main_key:
|
470
471
|
# if dtypes[col] == 'mediumtext':
|
471
472
|
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`({index_length}));"
|
472
473
|
# else:
|
473
474
|
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`);"
|
474
|
-
#
|
475
|
-
#
|
475
|
+
# logger.info(f"设置唯一索引: {col}({dtypes[col]})")
|
476
|
+
# logger.info(sql)
|
476
477
|
# cursor.execute(sql)
|
477
478
|
connection.commit() # 提交事务
|
478
479
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
479
480
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
480
481
|
# 处理插入的数据
|
481
482
|
for dict_data in dict_data_list:
|
482
|
-
#
|
483
|
+
# logger.info(dict_data)
|
483
484
|
dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
|
484
485
|
if icm_update:
|
485
486
|
""" 使用增量更新: 需确保 icm_update['主键'] 传进来的列组合是数据表中唯一,值不会发生变化且不会重复,否则可能产生覆盖 """
|
@@ -496,7 +497,7 @@ class MysqlUpload:
|
|
496
497
|
condition += [f'`{up_col}` = "{dict_data[up_col]}"']
|
497
498
|
condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
|
498
499
|
sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
|
499
|
-
#
|
500
|
+
# logger.info(sql)
|
500
501
|
# sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
501
502
|
cursor.execute(sql)
|
502
503
|
results = cursor.fetchall() # results 是数据库取出的数据
|
@@ -515,7 +516,7 @@ class MysqlUpload:
|
|
515
516
|
mysql_value = re.sub(r'0+$', '', mysql_value)
|
516
517
|
mysql_value = re.sub(r'\.$', '', mysql_value)
|
517
518
|
if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
|
518
|
-
#
|
519
|
+
# logger.info(f'{dict_data['日期']}{dict_data['商品id']}{col} 列的值有变化,{str(dict_data[col])} != {str(result[col])}')
|
519
520
|
change_values += [f"`{col}` = \"{str(dict_data[col])}\""]
|
520
521
|
change_col.append(col)
|
521
522
|
not_change_col = [item for item in update_col if item not in change_col]
|
@@ -525,11 +526,11 @@ class MysqlUpload:
|
|
525
526
|
not_change_values = [f'`{col}` = "{str(dict_data[col])}"' for col in not_change_col]
|
526
527
|
not_change_values = ' AND '.join(
|
527
528
|
not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
|
528
|
-
#
|
529
|
+
# logger.info(change_values, not_change_values)
|
529
530
|
condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
|
530
531
|
change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
|
531
532
|
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
532
|
-
#
|
533
|
+
# logger.info(sql)
|
533
534
|
cursor.execute(sql)
|
534
535
|
else: # 没有数据返回,则直接插入数据
|
535
536
|
cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
|
@@ -556,7 +557,7 @@ class MysqlUpload:
|
|
556
557
|
|
557
558
|
# 构建 sql
|
558
559
|
sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
|
559
|
-
#
|
560
|
+
# logger.info(sql)
|
560
561
|
cursor.execute(sql)
|
561
562
|
connection.commit() # 提交数据库
|
562
563
|
connection.close()
|
@@ -577,7 +578,7 @@ class MysqlUpload:
|
|
577
578
|
return
|
578
579
|
if icm_update:
|
579
580
|
if main_key or unique_main_key:
|
580
|
-
|
581
|
+
logger.info(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
|
581
582
|
return
|
582
583
|
if not main_key:
|
583
584
|
main_key = []
|
@@ -594,9 +595,9 @@ class MysqlUpload:
|
|
594
595
|
elif str(cut_data).lower() == 'month':
|
595
596
|
table_name = f'{table_name}_{__y_m}'
|
596
597
|
else:
|
597
|
-
|
598
|
+
logger.info(f'参数不正确,cut_data应为 year 或 month ')
|
598
599
|
except Exception as e:
|
599
|
-
|
600
|
+
logger.info(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
600
601
|
|
601
602
|
# connection = pymysql.connect(**self.config) # 连接数据库
|
602
603
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
@@ -616,7 +617,7 @@ class MysqlUpload:
|
|
616
617
|
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
617
618
|
cursor.execute(sql)
|
618
619
|
connection.commit()
|
619
|
-
|
620
|
+
logger.info(f"创建Database: {db_name}")
|
620
621
|
|
621
622
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
622
623
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -630,7 +631,7 @@ class MysqlUpload:
|
|
630
631
|
if not cursor.fetchone():
|
631
632
|
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
632
633
|
cursor.execute(sql)
|
633
|
-
|
634
|
+
logger.info(f'创建 mysql 表: {table_name}')
|
634
635
|
|
635
636
|
# 根据 dict_data 的值添加指定的数据类型
|
636
637
|
dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
|
@@ -654,13 +655,13 @@ class MysqlUpload:
|
|
654
655
|
else:
|
655
656
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
656
657
|
# sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
657
|
-
#
|
658
|
+
# logger.info(sql)
|
658
659
|
cursor.execute(sql)
|
659
|
-
|
660
|
+
logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
660
661
|
|
661
662
|
if col in main_key or col == '日期':
|
662
663
|
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
663
|
-
|
664
|
+
logger.info(f"设置为索引: {col}({dtypes[col]})")
|
664
665
|
cursor.execute(sql)
|
665
666
|
if col in unique_main_key:
|
666
667
|
if dtypes[col] == 'mediumtext':
|
@@ -675,15 +676,15 @@ class MysqlUpload:
|
|
675
676
|
# if not result:
|
676
677
|
# if col in main_key:
|
677
678
|
# sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
678
|
-
#
|
679
|
+
# logger.info(f"设置为索引: {col}({dtypes[col]})")
|
679
680
|
# cursor.execute(sql)
|
680
681
|
# elif col in unique_main_key:
|
681
682
|
# if dtypes[col] == 'mediumtext':
|
682
683
|
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`({index_length}));"
|
683
684
|
# else:
|
684
685
|
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`);"
|
685
|
-
#
|
686
|
-
#
|
686
|
+
# logger.info(f"设置唯一索引: {col}({dtypes[col]})")
|
687
|
+
# logger.info(sql)
|
687
688
|
# cursor.execute(sql)
|
688
689
|
connection.commit() # 提交事务
|
689
690
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
@@ -704,7 +705,7 @@ class MysqlUpload:
|
|
704
705
|
condition += [f'`{up_col}` = "{dict_data[up_col]}"']
|
705
706
|
condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
|
706
707
|
sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
|
707
|
-
#
|
708
|
+
# logger.info(sql)
|
708
709
|
# sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
709
710
|
cursor.execute(sql)
|
710
711
|
results = cursor.fetchall() # results 是数据库取出的数据
|
@@ -723,7 +724,7 @@ class MysqlUpload:
|
|
723
724
|
mysql_value = re.sub(r'0+$', '', mysql_value)
|
724
725
|
mysql_value = re.sub(r'\.$', '', mysql_value)
|
725
726
|
if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
|
726
|
-
#
|
727
|
+
# logger.info(f'{dict_data['日期']}{dict_data['商品id']}{col} 列的值有变化,{str(dict_data[col])} != {str(result[col])}')
|
727
728
|
change_values += [f"`{col}` = \"{str(dict_data[col])}\""]
|
728
729
|
change_col.append(col)
|
729
730
|
not_change_col = [item for item in update_col if item not in change_col]
|
@@ -733,11 +734,11 @@ class MysqlUpload:
|
|
733
734
|
not_change_values = [f'`{col}` = "{str(dict_data[col])}"' for col in not_change_col]
|
734
735
|
not_change_values = ' AND '.join(
|
735
736
|
not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
|
736
|
-
#
|
737
|
+
# logger.info(change_values, not_change_values)
|
737
738
|
condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
|
738
739
|
change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
|
739
740
|
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
740
|
-
#
|
741
|
+
# logger.info(sql)
|
741
742
|
cursor.execute(sql)
|
742
743
|
else: # 没有数据返回,则直接插入数据
|
743
744
|
cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
|
@@ -765,7 +766,7 @@ class MysqlUpload:
|
|
765
766
|
|
766
767
|
# 构建 sql
|
767
768
|
sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
|
768
|
-
#
|
769
|
+
# logger.info(sql)
|
769
770
|
cursor.execute(sql)
|
770
771
|
connection.commit() # 提交数据库
|
771
772
|
connection.close()
|
@@ -773,7 +774,7 @@ class MysqlUpload:
|
|
773
774
|
def cover_dict_dtypes(self, dict_data):
|
774
775
|
""" 清理字典键值 并转换数据类型 """
|
775
776
|
if not dict_data:
|
776
|
-
|
777
|
+
logger.info(f'mysql.py -> MysqlUpload -> cover_dict_dtypes -> 传入的字典不能为空')
|
777
778
|
return
|
778
779
|
__res_dict = {}
|
779
780
|
new_dict_data = {}
|
@@ -924,23 +925,23 @@ class MysqlUpload:
|
|
924
925
|
return
|
925
926
|
if icm_update:
|
926
927
|
if move_insert or df_sql or drop_duplicates:
|
927
|
-
|
928
|
+
logger.info(f'icm_update/move_insert/df_sql/drop_duplicates 参数不能同时设定')
|
928
929
|
return
|
929
930
|
if move_insert:
|
930
931
|
if icm_update or df_sql or drop_duplicates:
|
931
|
-
|
932
|
+
logger.info(f'icm_update/move_insert/df_sql/drop_duplicates 参数不能同时设定')
|
932
933
|
return
|
933
934
|
|
934
935
|
self.filename = filename
|
935
936
|
if isinstance(df, pd.DataFrame):
|
936
937
|
if len(df) == 0:
|
937
|
-
|
938
|
+
logger.info(f'{db_name}: {table_name} 传入的 df 数据长度为0, {self.filename}')
|
938
939
|
return
|
939
940
|
else:
|
940
|
-
|
941
|
+
logger.info(f'{db_name}: {table_name} 传入的 df 不是有效的 dataframe 结构, {self.filename}')
|
941
942
|
return
|
942
943
|
if not db_name or db_name == 'None':
|
943
|
-
|
944
|
+
logger.info(f'{db_name} 不能为 None')
|
944
945
|
return
|
945
946
|
|
946
947
|
if cut_data:
|
@@ -956,9 +957,9 @@ class MysqlUpload:
|
|
956
957
|
elif str(cut_data).lower() == 'month':
|
957
958
|
table_name = f'{table_name}_{min_year}-{min_month}'
|
958
959
|
else:
|
959
|
-
|
960
|
+
logger.info(f'参数不正确,cut_data应为 year 或 month ')
|
960
961
|
except Exception as e:
|
961
|
-
|
962
|
+
logger.info(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
962
963
|
# 清理 dataframe 非法值,并转换获取数据类型
|
963
964
|
dtypes, df = self.convert_df_dtypes(df)
|
964
965
|
if set_typ:
|
@@ -985,7 +986,7 @@ class MysqlUpload:
|
|
985
986
|
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
986
987
|
cursor.execute(sql)
|
987
988
|
connection.commit()
|
988
|
-
|
989
|
+
logger.info(f"创建Database: {db_name}")
|
989
990
|
|
990
991
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
991
992
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -999,7 +1000,7 @@ class MysqlUpload:
|
|
999
1000
|
if not cursor.fetchone():
|
1000
1001
|
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
1001
1002
|
cursor.execute(sql)
|
1002
|
-
|
1003
|
+
logger.info(f'创建 mysql 表: {table_name}')
|
1003
1004
|
|
1004
1005
|
# 有特殊字符不需转义
|
1005
1006
|
sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
|
@@ -1017,7 +1018,7 @@ class MysqlUpload:
|
|
1017
1018
|
else:
|
1018
1019
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
1019
1020
|
cursor.execute(sql)
|
1020
|
-
|
1021
|
+
logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
1021
1022
|
|
1022
1023
|
# 创建索引
|
1023
1024
|
if col == '日期':
|
@@ -1029,9 +1030,7 @@ class MysqlUpload:
|
|
1029
1030
|
connection.commit() # 提交事务
|
1030
1031
|
|
1031
1032
|
if df_sql:
|
1032
|
-
|
1033
|
-
print(
|
1034
|
-
f'{now} 正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
|
1033
|
+
logger.info(f'正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
|
1035
1034
|
engine = create_engine(
|
1036
1035
|
f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
|
1037
1036
|
# df.to_csv('/Users/xigua/Downloads/mysql.csv', index=False, header=True, encoding='utf-8_sig')
|
@@ -1067,11 +1066,11 @@ class MysqlUpload:
|
|
1067
1066
|
cursor.execute(
|
1068
1067
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1069
1068
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1070
|
-
#
|
1069
|
+
# logger.info(f'重置自增id')
|
1071
1070
|
else:
|
1072
|
-
|
1071
|
+
logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1073
1072
|
except Exception as e:
|
1074
|
-
|
1073
|
+
logger.info(f'333 {table_name} {e}')
|
1075
1074
|
connection.rollback()
|
1076
1075
|
connection.commit() # 提交事务
|
1077
1076
|
connection.close()
|
@@ -1081,7 +1080,7 @@ class MysqlUpload:
|
|
1081
1080
|
if move_insert and '日期' in df.columns.tolist():
|
1082
1081
|
# 移除数据
|
1083
1082
|
dates = df['日期'].values.tolist()
|
1084
|
-
#
|
1083
|
+
# logger.info(dates)
|
1085
1084
|
dates = [pd.to_datetime(item) for item in dates] # 需要先转换类型才能用 min, max
|
1086
1085
|
start_date = pd.to_datetime(min(dates)).strftime('%Y-%m-%d')
|
1087
1086
|
end_date = (pd.to_datetime(max(dates)) + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
@@ -1123,11 +1122,11 @@ class MysqlUpload:
|
|
1123
1122
|
cursor.execute(
|
1124
1123
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1125
1124
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1126
|
-
#
|
1125
|
+
# logger.info(f'重置自增id')
|
1127
1126
|
else:
|
1128
|
-
|
1127
|
+
logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1129
1128
|
except Exception as e:
|
1130
|
-
|
1129
|
+
logger.info(f'333 {table_name} {e}')
|
1131
1130
|
connection.rollback()
|
1132
1131
|
connection.close()
|
1133
1132
|
return
|
@@ -1144,20 +1143,20 @@ class MysqlUpload:
|
|
1144
1143
|
for k, v in data.items():
|
1145
1144
|
condition += [f'`{k}` = "{v}"']
|
1146
1145
|
condition = ' AND '.join(condition) # 构建查询条件
|
1147
|
-
#
|
1146
|
+
# logger.info(condition)
|
1148
1147
|
|
1149
1148
|
if drop_duplicates: # 查重插入
|
1150
1149
|
sql = "SELECT %s FROM %s WHERE %s" % (cols, table_name, condition)
|
1151
1150
|
# sql = f"SELECT {cols} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
1152
1151
|
cursor.execute(sql)
|
1153
1152
|
result = cursor.fetchall() # 获取查询结果, 有结果返回 list 表示数据已存在(不重复插入),没有则返回空 tuple
|
1154
|
-
#
|
1153
|
+
# logger.info(result)
|
1155
1154
|
if not result: # 数据不存在则插入
|
1156
1155
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (%s);" % (values)
|
1157
|
-
#
|
1156
|
+
# logger.info(sql)
|
1158
1157
|
cursor.execute(sql)
|
1159
1158
|
# else:
|
1160
|
-
#
|
1159
|
+
# logger.info(f'重复数据不插入: {condition[:50]}...')
|
1161
1160
|
elif icm_update: # 增量更新, 专门用于聚合数据,其他库不要调用
|
1162
1161
|
""" 使用增量更新: 需确保 icm_update['主键'] 传进来的列必须是数据表中唯一主键,值不会发生变化且不会重复,否则可能产生覆盖情况 """
|
1163
1162
|
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
@@ -1174,7 +1173,7 @@ class MysqlUpload:
|
|
1174
1173
|
condition += [f'`{up_col}` = "{data[up_col]}"']
|
1175
1174
|
condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
|
1176
1175
|
sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
|
1177
|
-
#
|
1176
|
+
# logger.info(sql)
|
1178
1177
|
# sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
1179
1178
|
cursor.execute(sql)
|
1180
1179
|
results = cursor.fetchall() # results 是数据库取出的数据
|
@@ -1193,7 +1192,7 @@ class MysqlUpload:
|
|
1193
1192
|
mysql_value = re.sub(r'0+$', '', mysql_value)
|
1194
1193
|
mysql_value = re.sub(r'\.$', '', mysql_value)
|
1195
1194
|
if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
|
1196
|
-
#
|
1195
|
+
# logger.info(f'{data['日期']}{data['商品id']}{col} 列的值有变化,{str(data[col])} != {str(result[col])}')
|
1197
1196
|
change_values += [f"`{col}` = \"{str(data[col])}\""]
|
1198
1197
|
change_col.append(col)
|
1199
1198
|
not_change_col = [item for item in update_col if item not in change_col]
|
@@ -1203,11 +1202,11 @@ class MysqlUpload:
|
|
1203
1202
|
not_change_values = [f'`{col}` = "{str(data[col])}"' for col in not_change_col]
|
1204
1203
|
not_change_values = ' AND '.join(
|
1205
1204
|
not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
|
1206
|
-
#
|
1205
|
+
# logger.info(change_values, not_change_values)
|
1207
1206
|
condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
|
1208
1207
|
change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
|
1209
1208
|
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
1210
|
-
#
|
1209
|
+
# logger.info(sql)
|
1211
1210
|
cursor.execute(sql)
|
1212
1211
|
else: # 没有数据返回,则直接插入数据
|
1213
1212
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
@@ -1217,9 +1216,9 @@ class MysqlUpload:
|
|
1217
1216
|
cursor.execute(sql)
|
1218
1217
|
except Exception as e:
|
1219
1218
|
pass
|
1220
|
-
#
|
1221
|
-
#
|
1222
|
-
#
|
1219
|
+
# logger.info(data)
|
1220
|
+
# logger.info(values)
|
1221
|
+
# logger.info(f'mysql -> df_to_mysql 报错: {e}, {self.filename}')
|
1223
1222
|
# breakpoint()
|
1224
1223
|
|
1225
1224
|
if reset_id:
|
@@ -1246,11 +1245,11 @@ class MysqlUpload:
|
|
1246
1245
|
cursor.execute(
|
1247
1246
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1248
1247
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1249
|
-
#
|
1248
|
+
# logger.info(f'重置自增id')
|
1250
1249
|
else:
|
1251
|
-
|
1250
|
+
logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1252
1251
|
except Exception as e:
|
1253
|
-
|
1252
|
+
logger.info(f'333 {table_name} {e}')
|
1254
1253
|
connection.rollback()
|
1255
1254
|
connection.commit() # 提交事务
|
1256
1255
|
connection.close()
|
@@ -1265,7 +1264,7 @@ class MysqlUpload:
|
|
1265
1264
|
save_path: 保存位置
|
1266
1265
|
"""
|
1267
1266
|
if not filename:
|
1268
|
-
|
1267
|
+
logger.info(f'未指定文件名: filename')
|
1269
1268
|
return
|
1270
1269
|
# connection = pymysql.connect(**self.config) # 连接数据库
|
1271
1270
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
@@ -1276,7 +1275,7 @@ class MysqlUpload:
|
|
1276
1275
|
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
1277
1276
|
database_exists = cursor.fetchone()
|
1278
1277
|
if not database_exists:
|
1279
|
-
|
1278
|
+
logger.info(f"Database {db_name} 数据库不存在")
|
1280
1279
|
return
|
1281
1280
|
self.config.update({'database': db_name})
|
1282
1281
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -1288,7 +1287,7 @@ class MysqlUpload:
|
|
1288
1287
|
sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
|
1289
1288
|
cursor.execute(sql, (table_name))
|
1290
1289
|
if not cursor.fetchone():
|
1291
|
-
|
1290
|
+
logger.info(f'{table_name} -> 数据表不存在')
|
1292
1291
|
return
|
1293
1292
|
|
1294
1293
|
# 读取数据
|
@@ -1301,8 +1300,7 @@ class MysqlUpload:
|
|
1301
1300
|
# 将二进制数据写入到文件
|
1302
1301
|
with open(os.path.join(save_path, filename), 'wb') as f:
|
1303
1302
|
f.write(result['数据主体'])
|
1304
|
-
|
1305
|
-
print(f'{now} 写入本地文件: ({self.host}:{self.port}) {db_name}/{table_name} -> {os.path.join(save_path, filename)}')
|
1303
|
+
logger.info(f'写入本地文件: ({self.host}:{self.port}) {db_name}/{table_name} -> {os.path.join(save_path, filename)}')
|
1306
1304
|
connection.close()
|
1307
1305
|
|
1308
1306
|
def read_mysql(self, table_name, start_date, end_date, db_name='远程数据源', date_name='日期'):
|
@@ -1320,11 +1318,10 @@ class MysqlUpload:
|
|
1320
1318
|
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
1321
1319
|
database_exists = cursor.fetchone()
|
1322
1320
|
if not database_exists:
|
1323
|
-
|
1321
|
+
logger.info(f"Database {db_name} 数据库不存在")
|
1324
1322
|
return df
|
1325
1323
|
else:
|
1326
|
-
|
1327
|
-
print(f'{now} mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
|
1324
|
+
logger.info(f'mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
|
1328
1325
|
except:
|
1329
1326
|
return df
|
1330
1327
|
finally:
|
@@ -1346,19 +1343,18 @@ class MysqlUpload:
|
|
1346
1343
|
columns = [desc[0] for desc in cursor.description]
|
1347
1344
|
df = pd.DataFrame(rows, columns=columns) # 转为 df
|
1348
1345
|
except Exception as e:
|
1349
|
-
|
1346
|
+
logger.info(f'{e} {db_name} -> {table_name} 表不存在')
|
1350
1347
|
return df
|
1351
1348
|
finally:
|
1352
1349
|
connection.close()
|
1353
1350
|
|
1354
1351
|
if len(df) == 0:
|
1355
|
-
|
1352
|
+
logger.info(f'database: {db_name}, table: {table_name} 查询的数据为空')
|
1356
1353
|
else:
|
1357
|
-
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1358
1354
|
cost_time = int(time.time() - before_time)
|
1359
1355
|
if cost_time < 1:
|
1360
1356
|
cost_time = round(time.time() - before_time, 2)
|
1361
|
-
|
1357
|
+
logger.info(f'mysql ({self.host}) 表: {table_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
|
1362
1358
|
return df
|
1363
1359
|
|
1364
1360
|
def upload_pandas(self, update_path, db_name, days=None):
|
@@ -1385,10 +1381,6 @@ class MysqlUpload:
|
|
1385
1381
|
for name in files:
|
1386
1382
|
if name.endswith('.csv') and 'baidu' not in name:
|
1387
1383
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1388
|
-
# if '日期' not in df.columns.tolist():
|
1389
|
-
# now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1390
|
-
# print(f'{now} {root_file} 缺少日期列, 不支持上传 mysql')
|
1391
|
-
# continue
|
1392
1384
|
if '日期' in df.columns.tolist():
|
1393
1385
|
df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
|
1394
1386
|
df = df[df['日期'] >= start_date]
|
@@ -1398,10 +1390,6 @@ class MysqlUpload:
|
|
1398
1390
|
elif os.path.isfile(f_path):
|
1399
1391
|
if f_path.endswith('.csv') and 'baidu' not in f_path:
|
1400
1392
|
df = pd.read_csv(f_path, encoding='utf-8_sig', header=0, na_filter=False)
|
1401
|
-
# if '日期' not in df.columns.tolist():
|
1402
|
-
# now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1403
|
-
# print(f'{now} {root_file} 缺少日期列, 不支持上传 mysql')
|
1404
|
-
# continue
|
1405
1393
|
if '日期' not in df.columns.tolist():
|
1406
1394
|
df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
|
1407
1395
|
df = df[df['日期'] >= start_date]
|
@@ -1449,14 +1437,14 @@ class OptimizeDatas:
|
|
1449
1437
|
try:
|
1450
1438
|
return func(*args, **kwargs)
|
1451
1439
|
except Exception as e:
|
1452
|
-
|
1440
|
+
logger.info(f'{func.__name__}, {e}') # 将异常信息返回
|
1453
1441
|
with open(error_file, 'a') as f:
|
1454
1442
|
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
1455
1443
|
f.write(f'\n{now} \n')
|
1456
1444
|
f.write(f'函数注释内容(用于定位函数): {func.__doc__} \n')
|
1457
1445
|
# f.write(f'报错的文件:\n{e.__traceback__.tb_frame.f_globals["__file__"]}\n') # 发生异常所在的文件
|
1458
1446
|
traceback.print_exc(file=open(error_file, 'a')) # 返回完整的堆栈信息
|
1459
|
-
|
1447
|
+
logger.info(f'更多信息请查看日志文件: {error_file}')
|
1460
1448
|
|
1461
1449
|
return wrapper
|
1462
1450
|
|
@@ -1467,10 +1455,10 @@ class OptimizeDatas:
|
|
1467
1455
|
connection = pymysql.connect(**_config) # 连接数据库
|
1468
1456
|
return connection
|
1469
1457
|
except Exception as e:
|
1470
|
-
|
1458
|
+
logger.info(f'连接失败,正在重试: {attempts}/{max_try} {e}')
|
1471
1459
|
attempts += 1
|
1472
1460
|
time.sleep(30)
|
1473
|
-
|
1461
|
+
logger.info(f'{_db_name}: 连接失败,重试次数超限,当前设定次数: {max_try}')
|
1474
1462
|
return None
|
1475
1463
|
|
1476
1464
|
def optimize_list(self):
|
@@ -1479,8 +1467,7 @@ class OptimizeDatas:
|
|
1479
1467
|
需要设置 self.db_name_lists
|
1480
1468
|
"""
|
1481
1469
|
if not self.db_name_lists:
|
1482
|
-
|
1483
|
-
print(f'{now} 尚未设置参数: self.db_name_lists')
|
1470
|
+
logger.info(f'尚未设置参数: self.db_name_lists')
|
1484
1471
|
return
|
1485
1472
|
for db_name in self.db_name_lists:
|
1486
1473
|
self.db_name = db_name
|
@@ -1489,13 +1476,11 @@ class OptimizeDatas:
|
|
1489
1476
|
def optimize(self, except_key=['更新时间']):
|
1490
1477
|
""" 更新一个数据库 移除冗余数据 """
|
1491
1478
|
if not self.db_name:
|
1492
|
-
|
1493
|
-
print(f'{now} 尚未设置参数: self.db_name')
|
1479
|
+
logger.info(f'尚未设置参数: self.db_name')
|
1494
1480
|
return
|
1495
1481
|
tables = self.table_list(db_name=self.db_name)
|
1496
1482
|
if not tables:
|
1497
|
-
|
1498
|
-
print(f'{now} {self.db_name} -> 数据表不存在')
|
1483
|
+
logger.info(f'{self.db_name} -> 数据表不存在')
|
1499
1484
|
return
|
1500
1485
|
|
1501
1486
|
# 日期初始化
|
@@ -1512,8 +1497,7 @@ class OptimizeDatas:
|
|
1512
1497
|
start_date_before = self.start_date
|
1513
1498
|
end_date_before = self.end_date
|
1514
1499
|
|
1515
|
-
|
1516
|
-
print(f'{now} mysql({self.host}: {self.port}) {self.db_name} 数据库优化中(日期长度: {self.days} 天)...')
|
1500
|
+
logger.info(f'mysql({self.host}: {self.port}) {self.db_name} 数据库优化中(日期长度: {self.days} 天)...')
|
1517
1501
|
for table_dict in tables:
|
1518
1502
|
for key, table_name in table_dict.items():
|
1519
1503
|
# if '店铺指标' not in table_name:
|
@@ -1525,12 +1509,11 @@ class OptimizeDatas:
|
|
1525
1509
|
return
|
1526
1510
|
with self.connection.cursor() as cursor:
|
1527
1511
|
sql = f"SELECT 1 FROM `{table_name}` LIMIT 1"
|
1528
|
-
#
|
1512
|
+
# logger.info(sql)
|
1529
1513
|
cursor.execute(sql)
|
1530
1514
|
result = cursor.fetchone()
|
1531
1515
|
if not result:
|
1532
|
-
|
1533
|
-
print(f'{now} 数据表: {table_name}, 数据长度为 0')
|
1516
|
+
logger.info(f'数据表: {table_name}, 数据长度为 0')
|
1534
1517
|
continue # 检查数据表是否为空
|
1535
1518
|
|
1536
1519
|
cursor.execute(f"SHOW FULL COLUMNS FROM `{table_name}`") # 查询数据表的列信息
|
@@ -1547,7 +1530,7 @@ class OptimizeDatas:
|
|
1547
1530
|
max_result = cursor.fetchone()
|
1548
1531
|
cursor.execute(sql_min)
|
1549
1532
|
min_result = cursor.fetchone()
|
1550
|
-
#
|
1533
|
+
# logger.info(min_result['min_date'], max_result['max_date'])
|
1551
1534
|
# 匹配修改为合适的起始和结束日期
|
1552
1535
|
if self.start_date < pd.to_datetime(min_result['min_date']):
|
1553
1536
|
self.start_date = pd.to_datetime(min_result['min_date'])
|
@@ -1585,15 +1568,14 @@ class OptimizeDatas:
|
|
1585
1568
|
cursor.execute(
|
1586
1569
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1587
1570
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1588
|
-
#
|
1571
|
+
# logger.info(f'重置自增id')
|
1589
1572
|
else:
|
1590
|
-
|
1573
|
+
logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1591
1574
|
except Exception as e:
|
1592
|
-
|
1575
|
+
logger.info(f'333 {table_name} {e}')
|
1593
1576
|
self.connection.rollback()
|
1594
1577
|
self.connection.close()
|
1595
|
-
|
1596
|
-
print(f'{now} mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
|
1578
|
+
logger.info(f'mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
|
1597
1579
|
|
1598
1580
|
def delete_duplicate(self, table_name, date, except_key=['更新时间']):
|
1599
1581
|
datas = self.table_datas(db_name=self.db_name, table_name=str(table_name), date=date)
|
@@ -1614,7 +1596,7 @@ class OptimizeDatas:
|
|
1614
1596
|
continue
|
1615
1597
|
all_datas.append(data) # 数据没有重复
|
1616
1598
|
except Exception as e:
|
1617
|
-
|
1599
|
+
logger.info(f'{table_name} 函数: mysql - > OptimizeDatas -> delete_duplicate -> {e}')
|
1618
1600
|
del all_datas
|
1619
1601
|
|
1620
1602
|
if not duplicate_id: # 如果没有重复数据,则跳过该数据表
|
@@ -1626,11 +1608,10 @@ class OptimizeDatas:
|
|
1626
1608
|
# 移除冗余数据
|
1627
1609
|
sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
|
1628
1610
|
cursor.execute(sql, duplicate_id)
|
1629
|
-
|
1630
|
-
print(f"{now} {table_name} -> {date.strftime('%Y-%m-%d')} before: {len(datas)}, remove: {cursor.rowcount}")
|
1611
|
+
logger.info(f"{table_name} -> {date.strftime('%Y-%m-%d')} before: {len(datas)}, remove: {cursor.rowcount}")
|
1631
1612
|
self.connection.commit() # 提交事务
|
1632
1613
|
except Exception as e:
|
1633
|
-
|
1614
|
+
logger.info(f'{self.db_name}/{table_name}, {e}')
|
1634
1615
|
self.connection.rollback() # 异常则回滚
|
1635
1616
|
|
1636
1617
|
def delete_duplicate2(self, table_name, except_key=['更新时间']):
|
@@ -1664,12 +1645,11 @@ class OptimizeDatas:
|
|
1664
1645
|
# 移除冗余数据
|
1665
1646
|
sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
|
1666
1647
|
cursor.execute(sql, duplicate_id)
|
1667
|
-
|
1668
|
-
print(f"{now} {table_name} -> before: {len(datas)}, "
|
1648
|
+
logger.info(f"{table_name} -> before: {len(datas)}, "
|
1669
1649
|
f"remove: {cursor.rowcount}")
|
1670
1650
|
self.connection.commit() # 提交事务
|
1671
1651
|
except Exception as e:
|
1672
|
-
|
1652
|
+
logger.info(f'{self.db_name}/{table_name}, {e}')
|
1673
1653
|
self.connection.rollback() # 异常则回滚
|
1674
1654
|
|
1675
1655
|
def database_list(self):
|
@@ -1695,11 +1675,10 @@ class OptimizeDatas:
|
|
1695
1675
|
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
1696
1676
|
database_exists = cursor.fetchone()
|
1697
1677
|
if not database_exists:
|
1698
|
-
|
1699
|
-
print(f'{now} {db_name}: 数据表不存在!')
|
1678
|
+
logger.info(f'{db_name}: 数据表不存在!')
|
1700
1679
|
return
|
1701
1680
|
except Exception as e:
|
1702
|
-
|
1681
|
+
logger.info(f'002 {e}')
|
1703
1682
|
return
|
1704
1683
|
finally:
|
1705
1684
|
connection.close() # 断开连接
|
@@ -1730,7 +1709,7 @@ class OptimizeDatas:
|
|
1730
1709
|
cursor.execute(sql)
|
1731
1710
|
results = cursor.fetchall()
|
1732
1711
|
except Exception as e:
|
1733
|
-
|
1712
|
+
logger.info(f'001 {e}')
|
1734
1713
|
finally:
|
1735
1714
|
connection.close()
|
1736
1715
|
return results
|
@@ -1784,7 +1763,7 @@ def year_month_day_bak(start_date, end_date):
|
|
1784
1763
|
try:
|
1785
1764
|
start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
|
1786
1765
|
except Exception as e:
|
1787
|
-
|
1766
|
+
logger.info(e)
|
1788
1767
|
return []
|
1789
1768
|
# 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
|
1790
1769
|
date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
|
@@ -1805,4 +1784,4 @@ if __name__ == '__main__':
|
|
1805
1784
|
conf = myconfig.main()
|
1806
1785
|
data = conf['Windows']['xigua_lx']['mysql']['local']
|
1807
1786
|
username, password, host, port = data['username'], data['password'], data['host'], data['port']
|
1808
|
-
|
1787
|
+
logger.info(username, password, host, port)
|