mdbq 3.6.13__py3-none-any.whl → 3.6.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/optimize_data.py +7 -5
- mdbq/aggregation/query_data.py +41 -39
- mdbq/mysql/mysql.py +121 -119
- mdbq/mysql/s_query.py +9 -7
- mdbq/redis/getredis.py +0 -1
- {mdbq-3.6.13.dist-info → mdbq-3.6.14.dist-info}/METADATA +1 -1
- {mdbq-3.6.13.dist-info → mdbq-3.6.14.dist-info}/RECORD +9 -9
- {mdbq-3.6.13.dist-info → mdbq-3.6.14.dist-info}/WHEEL +0 -0
- {mdbq-3.6.13.dist-info → mdbq-3.6.14.dist-info}/top_level.txt +0 -0
mdbq/mysql/mysql.py
CHANGED
@@ -16,6 +16,7 @@ import calendar
|
|
16
16
|
from mdbq.config import set_support
|
17
17
|
from mdbq.config import myconfig
|
18
18
|
import traceback
|
19
|
+
import logging
|
19
20
|
|
20
21
|
warnings.filterwarnings('ignore')
|
21
22
|
"""
|
@@ -29,6 +30,7 @@ warnings.filterwarnings('ignore')
|
|
29
30
|
|
30
31
|
"""
|
31
32
|
error_file = os.path.join(set_support.SetSupport(dirname='support').dirname, 'error.log')
|
33
|
+
logger = logging.getLogger(__name__)
|
32
34
|
|
33
35
|
|
34
36
|
def is_valid_date(date_string):
|
@@ -119,14 +121,14 @@ class MysqlUpload:
|
|
119
121
|
try:
|
120
122
|
return func(*args, **kwargs)
|
121
123
|
except Exception as e:
|
122
|
-
|
124
|
+
logger.info(f'{func.__name__}, {e}') # 将异常信息返回
|
123
125
|
with open(error_file, 'a') as f:
|
124
126
|
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
125
127
|
f.write(f'\n{now} \n')
|
126
128
|
f.write(f'函数注释内容(用于定位函数): {func.__doc__} \n')
|
127
129
|
# f.write(f'报错的文件:\n{e.__traceback__.tb_frame.f_globals["__file__"]}\n') # 发生异常所在的文件
|
128
130
|
traceback.print_exc(file=open(error_file, 'a')) # 返回完整的堆栈信息
|
129
|
-
|
131
|
+
logger.info(f'更多信息请查看日志文件: {error_file}')
|
130
132
|
|
131
133
|
return wrapper
|
132
134
|
|
@@ -137,16 +139,16 @@ class MysqlUpload:
|
|
137
139
|
connection = pymysql.connect(**_config) # 连接数据库
|
138
140
|
return connection
|
139
141
|
except Exception as e:
|
140
|
-
|
142
|
+
logger.info(f'连接失败,正在重试: {attempts}/{max_try} {e}')
|
141
143
|
attempts += 1
|
142
144
|
time.sleep(30)
|
143
|
-
|
145
|
+
logger.info(f'{_db_name}: 连接失败,重试次数超限,当前设定次数: {max_try}')
|
144
146
|
return None
|
145
147
|
|
146
148
|
def cover_doc_dtypes(self, dict_data):
|
147
149
|
""" 清理字典键值 并转换数据类型 """
|
148
150
|
if not dict_data:
|
149
|
-
|
151
|
+
logger.info(f'mysql.py -> MysqlUpload -> cover_dict_dtypes -> 传入的字典不能为空')
|
150
152
|
return
|
151
153
|
__res_dict = {}
|
152
154
|
new_dict_data = {}
|
@@ -213,7 +215,7 @@ class MysqlUpload:
|
|
213
215
|
if not self.config:
|
214
216
|
return
|
215
217
|
if '数据主体' not in dict_data.keys():
|
216
|
-
|
218
|
+
logger.info(f'dict_data 中"数据主体"键不能为空')
|
217
219
|
return
|
218
220
|
|
219
221
|
# connection = pymysql.connect(**self.config) # 连接数据库
|
@@ -234,7 +236,7 @@ class MysqlUpload:
|
|
234
236
|
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
235
237
|
cursor.execute(sql)
|
236
238
|
connection.commit()
|
237
|
-
|
239
|
+
logger.info(f"创建Database: {db_name}")
|
238
240
|
|
239
241
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
240
242
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -248,7 +250,7 @@ class MysqlUpload:
|
|
248
250
|
if not cursor.fetchone():
|
249
251
|
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
250
252
|
cursor.execute(sql)
|
251
|
-
|
253
|
+
logger.info(f'创建 mysql 表: {table_name}')
|
252
254
|
|
253
255
|
new_dict = {}
|
254
256
|
[new_dict.update({k: v}) for k, v in dict_data.items() if k != '数据主体']
|
@@ -275,11 +277,11 @@ class MysqlUpload:
|
|
275
277
|
else:
|
276
278
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {set_typ[col]} NOT NULL;"
|
277
279
|
cursor.execute(sql)
|
278
|
-
|
280
|
+
logger.info(f"添加列: {col}({set_typ[col]})") # 添加列并指定数据类型
|
279
281
|
|
280
282
|
if col == '日期':
|
281
283
|
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
282
|
-
|
284
|
+
logger.info(f"设置为索引: {col}({set_typ[col]})")
|
283
285
|
cursor.execute(sql)
|
284
286
|
connection.commit() # 提交事务
|
285
287
|
|
@@ -290,7 +292,7 @@ class MysqlUpload:
|
|
290
292
|
for up_col in remove_by_key:
|
291
293
|
condition += [f'`{up_col}` = "{dict_data[up_col]}"']
|
292
294
|
condition = ' AND '.join(condition)
|
293
|
-
#
|
295
|
+
# logger.info(condition)
|
294
296
|
sql = f"SELECT {se_key} FROM `{table_name}` WHERE {condition}"
|
295
297
|
cursor.execute(sql)
|
296
298
|
result = cursor.fetchall()
|
@@ -301,14 +303,14 @@ class MysqlUpload:
|
|
301
303
|
# 插入数据到数据库
|
302
304
|
# 有数据格式错误问题,所以分开处理,将数据主体移到最后面用占位符
|
303
305
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
304
|
-
|
306
|
+
logger.info(f'{now} 正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name} -> {filename}')
|
305
307
|
if new_dict:
|
306
308
|
cols = ', '.join(f"`{item}`" for item in new_dict.keys()) # 列名需要转义
|
307
309
|
values = ', '.join([f'"{item}"' for item in new_dict.values()]) # 值要加引号
|
308
310
|
cols = ', '.join([cols, '数据主体'])
|
309
311
|
binary_data = dict_data['数据主体']
|
310
312
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values}, %s)"
|
311
|
-
#
|
313
|
+
# logger.info(sql)
|
312
314
|
cursor.execute(sql, binary_data)
|
313
315
|
else:
|
314
316
|
sql = f"""INSERT INTO `{table_name}` (数据主体) VALUES (%s);"""
|
@@ -338,11 +340,11 @@ class MysqlUpload:
|
|
338
340
|
cursor.execute(
|
339
341
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
340
342
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
341
|
-
#
|
343
|
+
# logger.info(f'重置自增id')
|
342
344
|
else:
|
343
|
-
|
345
|
+
logger.info(f'{table_name} 存在复合主键: 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
344
346
|
except Exception as e:
|
345
|
-
|
347
|
+
logger.info(f'333 {table_name} {e}')
|
346
348
|
connection.rollback()
|
347
349
|
connection.commit()
|
348
350
|
|
@@ -362,7 +364,7 @@ class MysqlUpload:
|
|
362
364
|
return
|
363
365
|
if icm_update:
|
364
366
|
if main_key or unique_main_key:
|
365
|
-
|
367
|
+
logger.info(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
|
366
368
|
return
|
367
369
|
if not main_key:
|
368
370
|
main_key = []
|
@@ -370,7 +372,7 @@ class MysqlUpload:
|
|
370
372
|
unique_main_key = []
|
371
373
|
|
372
374
|
if not dict_data_list:
|
373
|
-
|
375
|
+
logger.info(f'dict_data_list 不能为空 ')
|
374
376
|
return
|
375
377
|
dict_data = dict_data_list[0]
|
376
378
|
if cut_data:
|
@@ -383,9 +385,9 @@ class MysqlUpload:
|
|
383
385
|
elif str(cut_data).lower() == 'month':
|
384
386
|
table_name = f'{table_name}_{__y_m}'
|
385
387
|
else:
|
386
|
-
|
388
|
+
logger.info(f'参数不正确,cut_data应为 year 或 month ')
|
387
389
|
except Exception as e:
|
388
|
-
|
390
|
+
logger.info(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
389
391
|
|
390
392
|
# connection = pymysql.connect(**self.config) # 连接数据库
|
391
393
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
@@ -405,7 +407,7 @@ class MysqlUpload:
|
|
405
407
|
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
406
408
|
cursor.execute(sql)
|
407
409
|
connection.commit()
|
408
|
-
|
410
|
+
logger.info(f"创建Database: {db_name}")
|
409
411
|
|
410
412
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
411
413
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -419,7 +421,7 @@ class MysqlUpload:
|
|
419
421
|
if not cursor.fetchone():
|
420
422
|
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
421
423
|
cursor.execute(sql)
|
422
|
-
|
424
|
+
logger.info(f'创建 mysql 表: {table_name}')
|
423
425
|
|
424
426
|
# 根据 dict_data 的值添加指定的数据类型
|
425
427
|
dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
|
@@ -443,13 +445,13 @@ class MysqlUpload:
|
|
443
445
|
else:
|
444
446
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
445
447
|
# sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
446
|
-
#
|
448
|
+
# logger.info(sql)
|
447
449
|
cursor.execute(sql)
|
448
|
-
|
450
|
+
logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
449
451
|
|
450
452
|
if col in main_key or col == '日期':
|
451
453
|
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
452
|
-
|
454
|
+
logger.info(f"设置为索引: {col}({dtypes[col]})")
|
453
455
|
cursor.execute(sql)
|
454
456
|
if col in unique_main_key:
|
455
457
|
if dtypes[col] == 'mediumtext':
|
@@ -464,22 +466,22 @@ class MysqlUpload:
|
|
464
466
|
# if not result:
|
465
467
|
# if col in main_key:
|
466
468
|
# sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
467
|
-
#
|
469
|
+
# logger.info(f"设置为索引: {col}({dtypes[col]})")
|
468
470
|
# cursor.execute(sql)
|
469
471
|
# elif col in unique_main_key:
|
470
472
|
# if dtypes[col] == 'mediumtext':
|
471
473
|
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`({index_length}));"
|
472
474
|
# else:
|
473
475
|
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`);"
|
474
|
-
#
|
475
|
-
#
|
476
|
+
# logger.info(f"设置唯一索引: {col}({dtypes[col]})")
|
477
|
+
# logger.info(sql)
|
476
478
|
# cursor.execute(sql)
|
477
479
|
connection.commit() # 提交事务
|
478
480
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
479
481
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
480
482
|
# 处理插入的数据
|
481
483
|
for dict_data in dict_data_list:
|
482
|
-
#
|
484
|
+
# logger.info(dict_data)
|
483
485
|
dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
|
484
486
|
if icm_update:
|
485
487
|
""" 使用增量更新: 需确保 icm_update['主键'] 传进来的列组合是数据表中唯一,值不会发生变化且不会重复,否则可能产生覆盖 """
|
@@ -496,7 +498,7 @@ class MysqlUpload:
|
|
496
498
|
condition += [f'`{up_col}` = "{dict_data[up_col]}"']
|
497
499
|
condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
|
498
500
|
sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
|
499
|
-
#
|
501
|
+
# logger.info(sql)
|
500
502
|
# sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
501
503
|
cursor.execute(sql)
|
502
504
|
results = cursor.fetchall() # results 是数据库取出的数据
|
@@ -515,7 +517,7 @@ class MysqlUpload:
|
|
515
517
|
mysql_value = re.sub(r'0+$', '', mysql_value)
|
516
518
|
mysql_value = re.sub(r'\.$', '', mysql_value)
|
517
519
|
if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
|
518
|
-
#
|
520
|
+
# logger.info(f'{dict_data['日期']}{dict_data['商品id']}{col} 列的值有变化,{str(dict_data[col])} != {str(result[col])}')
|
519
521
|
change_values += [f"`{col}` = \"{str(dict_data[col])}\""]
|
520
522
|
change_col.append(col)
|
521
523
|
not_change_col = [item for item in update_col if item not in change_col]
|
@@ -525,11 +527,11 @@ class MysqlUpload:
|
|
525
527
|
not_change_values = [f'`{col}` = "{str(dict_data[col])}"' for col in not_change_col]
|
526
528
|
not_change_values = ' AND '.join(
|
527
529
|
not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
|
528
|
-
#
|
530
|
+
# logger.info(change_values, not_change_values)
|
529
531
|
condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
|
530
532
|
change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
|
531
533
|
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
532
|
-
#
|
534
|
+
# logger.info(sql)
|
533
535
|
cursor.execute(sql)
|
534
536
|
else: # 没有数据返回,则直接插入数据
|
535
537
|
cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
|
@@ -556,7 +558,7 @@ class MysqlUpload:
|
|
556
558
|
|
557
559
|
# 构建 sql
|
558
560
|
sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
|
559
|
-
#
|
561
|
+
# logger.info(sql)
|
560
562
|
cursor.execute(sql)
|
561
563
|
connection.commit() # 提交数据库
|
562
564
|
connection.close()
|
@@ -577,7 +579,7 @@ class MysqlUpload:
|
|
577
579
|
return
|
578
580
|
if icm_update:
|
579
581
|
if main_key or unique_main_key:
|
580
|
-
|
582
|
+
logger.info(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
|
581
583
|
return
|
582
584
|
if not main_key:
|
583
585
|
main_key = []
|
@@ -594,9 +596,9 @@ class MysqlUpload:
|
|
594
596
|
elif str(cut_data).lower() == 'month':
|
595
597
|
table_name = f'{table_name}_{__y_m}'
|
596
598
|
else:
|
597
|
-
|
599
|
+
logger.info(f'参数不正确,cut_data应为 year 或 month ')
|
598
600
|
except Exception as e:
|
599
|
-
|
601
|
+
logger.info(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
600
602
|
|
601
603
|
# connection = pymysql.connect(**self.config) # 连接数据库
|
602
604
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
@@ -616,7 +618,7 @@ class MysqlUpload:
|
|
616
618
|
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
617
619
|
cursor.execute(sql)
|
618
620
|
connection.commit()
|
619
|
-
|
621
|
+
logger.info(f"创建Database: {db_name}")
|
620
622
|
|
621
623
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
622
624
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -630,7 +632,7 @@ class MysqlUpload:
|
|
630
632
|
if not cursor.fetchone():
|
631
633
|
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
632
634
|
cursor.execute(sql)
|
633
|
-
|
635
|
+
logger.info(f'创建 mysql 表: {table_name}')
|
634
636
|
|
635
637
|
# 根据 dict_data 的值添加指定的数据类型
|
636
638
|
dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
|
@@ -654,13 +656,13 @@ class MysqlUpload:
|
|
654
656
|
else:
|
655
657
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
656
658
|
# sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
657
|
-
#
|
659
|
+
# logger.info(sql)
|
658
660
|
cursor.execute(sql)
|
659
|
-
|
661
|
+
logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
660
662
|
|
661
663
|
if col in main_key or col == '日期':
|
662
664
|
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
663
|
-
|
665
|
+
logger.info(f"设置为索引: {col}({dtypes[col]})")
|
664
666
|
cursor.execute(sql)
|
665
667
|
if col in unique_main_key:
|
666
668
|
if dtypes[col] == 'mediumtext':
|
@@ -675,15 +677,15 @@ class MysqlUpload:
|
|
675
677
|
# if not result:
|
676
678
|
# if col in main_key:
|
677
679
|
# sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
678
|
-
#
|
680
|
+
# logger.info(f"设置为索引: {col}({dtypes[col]})")
|
679
681
|
# cursor.execute(sql)
|
680
682
|
# elif col in unique_main_key:
|
681
683
|
# if dtypes[col] == 'mediumtext':
|
682
684
|
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`({index_length}));"
|
683
685
|
# else:
|
684
686
|
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`);"
|
685
|
-
#
|
686
|
-
#
|
687
|
+
# logger.info(f"设置唯一索引: {col}({dtypes[col]})")
|
688
|
+
# logger.info(sql)
|
687
689
|
# cursor.execute(sql)
|
688
690
|
connection.commit() # 提交事务
|
689
691
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
@@ -704,7 +706,7 @@ class MysqlUpload:
|
|
704
706
|
condition += [f'`{up_col}` = "{dict_data[up_col]}"']
|
705
707
|
condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
|
706
708
|
sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
|
707
|
-
#
|
709
|
+
# logger.info(sql)
|
708
710
|
# sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
709
711
|
cursor.execute(sql)
|
710
712
|
results = cursor.fetchall() # results 是数据库取出的数据
|
@@ -723,7 +725,7 @@ class MysqlUpload:
|
|
723
725
|
mysql_value = re.sub(r'0+$', '', mysql_value)
|
724
726
|
mysql_value = re.sub(r'\.$', '', mysql_value)
|
725
727
|
if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
|
726
|
-
#
|
728
|
+
# logger.info(f'{dict_data['日期']}{dict_data['商品id']}{col} 列的值有变化,{str(dict_data[col])} != {str(result[col])}')
|
727
729
|
change_values += [f"`{col}` = \"{str(dict_data[col])}\""]
|
728
730
|
change_col.append(col)
|
729
731
|
not_change_col = [item for item in update_col if item not in change_col]
|
@@ -733,11 +735,11 @@ class MysqlUpload:
|
|
733
735
|
not_change_values = [f'`{col}` = "{str(dict_data[col])}"' for col in not_change_col]
|
734
736
|
not_change_values = ' AND '.join(
|
735
737
|
not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
|
736
|
-
#
|
738
|
+
# logger.info(change_values, not_change_values)
|
737
739
|
condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
|
738
740
|
change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
|
739
741
|
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
740
|
-
#
|
742
|
+
# logger.info(sql)
|
741
743
|
cursor.execute(sql)
|
742
744
|
else: # 没有数据返回,则直接插入数据
|
743
745
|
cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
|
@@ -765,7 +767,7 @@ class MysqlUpload:
|
|
765
767
|
|
766
768
|
# 构建 sql
|
767
769
|
sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
|
768
|
-
#
|
770
|
+
# logger.info(sql)
|
769
771
|
cursor.execute(sql)
|
770
772
|
connection.commit() # 提交数据库
|
771
773
|
connection.close()
|
@@ -773,7 +775,7 @@ class MysqlUpload:
|
|
773
775
|
def cover_dict_dtypes(self, dict_data):
|
774
776
|
""" 清理字典键值 并转换数据类型 """
|
775
777
|
if not dict_data:
|
776
|
-
|
778
|
+
logger.info(f'mysql.py -> MysqlUpload -> cover_dict_dtypes -> 传入的字典不能为空')
|
777
779
|
return
|
778
780
|
__res_dict = {}
|
779
781
|
new_dict_data = {}
|
@@ -924,23 +926,23 @@ class MysqlUpload:
|
|
924
926
|
return
|
925
927
|
if icm_update:
|
926
928
|
if move_insert or df_sql or drop_duplicates:
|
927
|
-
|
929
|
+
logger.info(f'icm_update/move_insert/df_sql/drop_duplicates 参数不能同时设定')
|
928
930
|
return
|
929
931
|
if move_insert:
|
930
932
|
if icm_update or df_sql or drop_duplicates:
|
931
|
-
|
933
|
+
logger.info(f'icm_update/move_insert/df_sql/drop_duplicates 参数不能同时设定')
|
932
934
|
return
|
933
935
|
|
934
936
|
self.filename = filename
|
935
937
|
if isinstance(df, pd.DataFrame):
|
936
938
|
if len(df) == 0:
|
937
|
-
|
939
|
+
logger.info(f'{db_name}: {table_name} 传入的 df 数据长度为0, {self.filename}')
|
938
940
|
return
|
939
941
|
else:
|
940
|
-
|
942
|
+
logger.info(f'{db_name}: {table_name} 传入的 df 不是有效的 dataframe 结构, {self.filename}')
|
941
943
|
return
|
942
944
|
if not db_name or db_name == 'None':
|
943
|
-
|
945
|
+
logger.info(f'{db_name} 不能为 None')
|
944
946
|
return
|
945
947
|
|
946
948
|
if cut_data:
|
@@ -956,9 +958,9 @@ class MysqlUpload:
|
|
956
958
|
elif str(cut_data).lower() == 'month':
|
957
959
|
table_name = f'{table_name}_{min_year}-{min_month}'
|
958
960
|
else:
|
959
|
-
|
961
|
+
logger.info(f'参数不正确,cut_data应为 year 或 month ')
|
960
962
|
except Exception as e:
|
961
|
-
|
963
|
+
logger.info(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
962
964
|
# 清理 dataframe 非法值,并转换获取数据类型
|
963
965
|
dtypes, df = self.convert_df_dtypes(df)
|
964
966
|
if set_typ:
|
@@ -985,7 +987,7 @@ class MysqlUpload:
|
|
985
987
|
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
986
988
|
cursor.execute(sql)
|
987
989
|
connection.commit()
|
988
|
-
|
990
|
+
logger.info(f"创建Database: {db_name}")
|
989
991
|
|
990
992
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
991
993
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -999,7 +1001,7 @@ class MysqlUpload:
|
|
999
1001
|
if not cursor.fetchone():
|
1000
1002
|
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
1001
1003
|
cursor.execute(sql)
|
1002
|
-
|
1004
|
+
logger.info(f'创建 mysql 表: {table_name}')
|
1003
1005
|
|
1004
1006
|
# 有特殊字符不需转义
|
1005
1007
|
sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
|
@@ -1017,7 +1019,7 @@ class MysqlUpload:
|
|
1017
1019
|
else:
|
1018
1020
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
1019
1021
|
cursor.execute(sql)
|
1020
|
-
|
1022
|
+
logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
1021
1023
|
|
1022
1024
|
# 创建索引
|
1023
1025
|
if col == '日期':
|
@@ -1030,7 +1032,7 @@ class MysqlUpload:
|
|
1030
1032
|
|
1031
1033
|
if df_sql:
|
1032
1034
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1033
|
-
|
1035
|
+
logger.info(
|
1034
1036
|
f'{now} 正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
|
1035
1037
|
engine = create_engine(
|
1036
1038
|
f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
|
@@ -1067,11 +1069,11 @@ class MysqlUpload:
|
|
1067
1069
|
cursor.execute(
|
1068
1070
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1069
1071
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1070
|
-
#
|
1072
|
+
# logger.info(f'重置自增id')
|
1071
1073
|
else:
|
1072
|
-
|
1074
|
+
logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1073
1075
|
except Exception as e:
|
1074
|
-
|
1076
|
+
logger.info(f'333 {table_name} {e}')
|
1075
1077
|
connection.rollback()
|
1076
1078
|
connection.commit() # 提交事务
|
1077
1079
|
connection.close()
|
@@ -1081,7 +1083,7 @@ class MysqlUpload:
|
|
1081
1083
|
if move_insert and '日期' in df.columns.tolist():
|
1082
1084
|
# 移除数据
|
1083
1085
|
dates = df['日期'].values.tolist()
|
1084
|
-
#
|
1086
|
+
# logger.info(dates)
|
1085
1087
|
dates = [pd.to_datetime(item) for item in dates] # 需要先转换类型才能用 min, max
|
1086
1088
|
start_date = pd.to_datetime(min(dates)).strftime('%Y-%m-%d')
|
1087
1089
|
end_date = (pd.to_datetime(max(dates)) + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
@@ -1123,11 +1125,11 @@ class MysqlUpload:
|
|
1123
1125
|
cursor.execute(
|
1124
1126
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1125
1127
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1126
|
-
#
|
1128
|
+
# logger.info(f'重置自增id')
|
1127
1129
|
else:
|
1128
|
-
|
1130
|
+
logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1129
1131
|
except Exception as e:
|
1130
|
-
|
1132
|
+
logger.info(f'333 {table_name} {e}')
|
1131
1133
|
connection.rollback()
|
1132
1134
|
connection.close()
|
1133
1135
|
return
|
@@ -1144,20 +1146,20 @@ class MysqlUpload:
|
|
1144
1146
|
for k, v in data.items():
|
1145
1147
|
condition += [f'`{k}` = "{v}"']
|
1146
1148
|
condition = ' AND '.join(condition) # 构建查询条件
|
1147
|
-
#
|
1149
|
+
# logger.info(condition)
|
1148
1150
|
|
1149
1151
|
if drop_duplicates: # 查重插入
|
1150
1152
|
sql = "SELECT %s FROM %s WHERE %s" % (cols, table_name, condition)
|
1151
1153
|
# sql = f"SELECT {cols} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
1152
1154
|
cursor.execute(sql)
|
1153
1155
|
result = cursor.fetchall() # 获取查询结果, 有结果返回 list 表示数据已存在(不重复插入),没有则返回空 tuple
|
1154
|
-
#
|
1156
|
+
# logger.info(result)
|
1155
1157
|
if not result: # 数据不存在则插入
|
1156
1158
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (%s);" % (values)
|
1157
|
-
#
|
1159
|
+
# logger.info(sql)
|
1158
1160
|
cursor.execute(sql)
|
1159
1161
|
# else:
|
1160
|
-
#
|
1162
|
+
# logger.info(f'重复数据不插入: {condition[:50]}...')
|
1161
1163
|
elif icm_update: # 增量更新, 专门用于聚合数据,其他库不要调用
|
1162
1164
|
""" 使用增量更新: 需确保 icm_update['主键'] 传进来的列必须是数据表中唯一主键,值不会发生变化且不会重复,否则可能产生覆盖情况 """
|
1163
1165
|
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
@@ -1174,7 +1176,7 @@ class MysqlUpload:
|
|
1174
1176
|
condition += [f'`{up_col}` = "{data[up_col]}"']
|
1175
1177
|
condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
|
1176
1178
|
sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
|
1177
|
-
#
|
1179
|
+
# logger.info(sql)
|
1178
1180
|
# sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
1179
1181
|
cursor.execute(sql)
|
1180
1182
|
results = cursor.fetchall() # results 是数据库取出的数据
|
@@ -1193,7 +1195,7 @@ class MysqlUpload:
|
|
1193
1195
|
mysql_value = re.sub(r'0+$', '', mysql_value)
|
1194
1196
|
mysql_value = re.sub(r'\.$', '', mysql_value)
|
1195
1197
|
if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
|
1196
|
-
#
|
1198
|
+
# logger.info(f'{data['日期']}{data['商品id']}{col} 列的值有变化,{str(data[col])} != {str(result[col])}')
|
1197
1199
|
change_values += [f"`{col}` = \"{str(data[col])}\""]
|
1198
1200
|
change_col.append(col)
|
1199
1201
|
not_change_col = [item for item in update_col if item not in change_col]
|
@@ -1203,11 +1205,11 @@ class MysqlUpload:
|
|
1203
1205
|
not_change_values = [f'`{col}` = "{str(data[col])}"' for col in not_change_col]
|
1204
1206
|
not_change_values = ' AND '.join(
|
1205
1207
|
not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
|
1206
|
-
#
|
1208
|
+
# logger.info(change_values, not_change_values)
|
1207
1209
|
condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
|
1208
1210
|
change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
|
1209
1211
|
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
1210
|
-
#
|
1212
|
+
# logger.info(sql)
|
1211
1213
|
cursor.execute(sql)
|
1212
1214
|
else: # 没有数据返回,则直接插入数据
|
1213
1215
|
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
@@ -1217,9 +1219,9 @@ class MysqlUpload:
|
|
1217
1219
|
cursor.execute(sql)
|
1218
1220
|
except Exception as e:
|
1219
1221
|
pass
|
1220
|
-
#
|
1221
|
-
#
|
1222
|
-
#
|
1222
|
+
# logger.info(data)
|
1223
|
+
# logger.info(values)
|
1224
|
+
# logger.info(f'mysql -> df_to_mysql 报错: {e}, {self.filename}')
|
1223
1225
|
# breakpoint()
|
1224
1226
|
|
1225
1227
|
if reset_id:
|
@@ -1246,11 +1248,11 @@ class MysqlUpload:
|
|
1246
1248
|
cursor.execute(
|
1247
1249
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1248
1250
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1249
|
-
#
|
1251
|
+
# logger.info(f'重置自增id')
|
1250
1252
|
else:
|
1251
|
-
|
1253
|
+
logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1252
1254
|
except Exception as e:
|
1253
|
-
|
1255
|
+
logger.info(f'333 {table_name} {e}')
|
1254
1256
|
connection.rollback()
|
1255
1257
|
connection.commit() # 提交事务
|
1256
1258
|
connection.close()
|
@@ -1265,7 +1267,7 @@ class MysqlUpload:
|
|
1265
1267
|
save_path: 保存位置
|
1266
1268
|
"""
|
1267
1269
|
if not filename:
|
1268
|
-
|
1270
|
+
logger.info(f'未指定文件名: filename')
|
1269
1271
|
return
|
1270
1272
|
# connection = pymysql.connect(**self.config) # 连接数据库
|
1271
1273
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
@@ -1276,7 +1278,7 @@ class MysqlUpload:
|
|
1276
1278
|
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
1277
1279
|
database_exists = cursor.fetchone()
|
1278
1280
|
if not database_exists:
|
1279
|
-
|
1281
|
+
logger.info(f"Database {db_name} 数据库不存在")
|
1280
1282
|
return
|
1281
1283
|
self.config.update({'database': db_name})
|
1282
1284
|
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
@@ -1288,7 +1290,7 @@ class MysqlUpload:
|
|
1288
1290
|
sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
|
1289
1291
|
cursor.execute(sql, (table_name))
|
1290
1292
|
if not cursor.fetchone():
|
1291
|
-
|
1293
|
+
logger.info(f'{table_name} -> 数据表不存在')
|
1292
1294
|
return
|
1293
1295
|
|
1294
1296
|
# 读取数据
|
@@ -1302,7 +1304,7 @@ class MysqlUpload:
|
|
1302
1304
|
with open(os.path.join(save_path, filename), 'wb') as f:
|
1303
1305
|
f.write(result['数据主体'])
|
1304
1306
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1305
|
-
|
1307
|
+
logger.info(f'{now} 写入本地文件: ({self.host}:{self.port}) {db_name}/{table_name} -> {os.path.join(save_path, filename)}')
|
1306
1308
|
connection.close()
|
1307
1309
|
|
1308
1310
|
def read_mysql(self, table_name, start_date, end_date, db_name='远程数据源', date_name='日期'):
|
@@ -1320,11 +1322,11 @@ class MysqlUpload:
|
|
1320
1322
|
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
1321
1323
|
database_exists = cursor.fetchone()
|
1322
1324
|
if not database_exists:
|
1323
|
-
|
1325
|
+
logger.info(f"Database {db_name} 数据库不存在")
|
1324
1326
|
return df
|
1325
1327
|
else:
|
1326
1328
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1327
|
-
|
1329
|
+
logger.info(f'{now} mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
|
1328
1330
|
except:
|
1329
1331
|
return df
|
1330
1332
|
finally:
|
@@ -1346,19 +1348,19 @@ class MysqlUpload:
|
|
1346
1348
|
columns = [desc[0] for desc in cursor.description]
|
1347
1349
|
df = pd.DataFrame(rows, columns=columns) # 转为 df
|
1348
1350
|
except Exception as e:
|
1349
|
-
|
1351
|
+
logger.info(f'{e} {db_name} -> {table_name} 表不存在')
|
1350
1352
|
return df
|
1351
1353
|
finally:
|
1352
1354
|
connection.close()
|
1353
1355
|
|
1354
1356
|
if len(df) == 0:
|
1355
|
-
|
1357
|
+
logger.info(f'database: {db_name}, table: {table_name} 查询的数据为空')
|
1356
1358
|
else:
|
1357
1359
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1358
1360
|
cost_time = int(time.time() - before_time)
|
1359
1361
|
if cost_time < 1:
|
1360
1362
|
cost_time = round(time.time() - before_time, 2)
|
1361
|
-
|
1363
|
+
logger.info(f'{now} mysql ({self.host}) 表: {table_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
|
1362
1364
|
return df
|
1363
1365
|
|
1364
1366
|
def upload_pandas(self, update_path, db_name, days=None):
|
@@ -1387,7 +1389,7 @@ class MysqlUpload:
|
|
1387
1389
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1388
1390
|
# if '日期' not in df.columns.tolist():
|
1389
1391
|
# now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1390
|
-
#
|
1392
|
+
# logger.info(f'{now} {root_file} 缺少日期列, 不支持上传 mysql')
|
1391
1393
|
# continue
|
1392
1394
|
if '日期' in df.columns.tolist():
|
1393
1395
|
df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
|
@@ -1400,7 +1402,7 @@ class MysqlUpload:
|
|
1400
1402
|
df = pd.read_csv(f_path, encoding='utf-8_sig', header=0, na_filter=False)
|
1401
1403
|
# if '日期' not in df.columns.tolist():
|
1402
1404
|
# now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1403
|
-
#
|
1405
|
+
# logger.info(f'{now} {root_file} 缺少日期列, 不支持上传 mysql')
|
1404
1406
|
# continue
|
1405
1407
|
if '日期' not in df.columns.tolist():
|
1406
1408
|
df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
|
@@ -1449,14 +1451,14 @@ class OptimizeDatas:
|
|
1449
1451
|
try:
|
1450
1452
|
return func(*args, **kwargs)
|
1451
1453
|
except Exception as e:
|
1452
|
-
|
1454
|
+
logger.info(f'{func.__name__}, {e}') # 将异常信息返回
|
1453
1455
|
with open(error_file, 'a') as f:
|
1454
1456
|
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
1455
1457
|
f.write(f'\n{now} \n')
|
1456
1458
|
f.write(f'函数注释内容(用于定位函数): {func.__doc__} \n')
|
1457
1459
|
# f.write(f'报错的文件:\n{e.__traceback__.tb_frame.f_globals["__file__"]}\n') # 发生异常所在的文件
|
1458
1460
|
traceback.print_exc(file=open(error_file, 'a')) # 返回完整的堆栈信息
|
1459
|
-
|
1461
|
+
logger.info(f'更多信息请查看日志文件: {error_file}')
|
1460
1462
|
|
1461
1463
|
return wrapper
|
1462
1464
|
|
@@ -1467,10 +1469,10 @@ class OptimizeDatas:
|
|
1467
1469
|
connection = pymysql.connect(**_config) # 连接数据库
|
1468
1470
|
return connection
|
1469
1471
|
except Exception as e:
|
1470
|
-
|
1472
|
+
logger.info(f'连接失败,正在重试: {attempts}/{max_try} {e}')
|
1471
1473
|
attempts += 1
|
1472
1474
|
time.sleep(30)
|
1473
|
-
|
1475
|
+
logger.info(f'{_db_name}: 连接失败,重试次数超限,当前设定次数: {max_try}')
|
1474
1476
|
return None
|
1475
1477
|
|
1476
1478
|
def optimize_list(self):
|
@@ -1480,7 +1482,7 @@ class OptimizeDatas:
|
|
1480
1482
|
"""
|
1481
1483
|
if not self.db_name_lists:
|
1482
1484
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1483
|
-
|
1485
|
+
logger.info(f'{now} 尚未设置参数: self.db_name_lists')
|
1484
1486
|
return
|
1485
1487
|
for db_name in self.db_name_lists:
|
1486
1488
|
self.db_name = db_name
|
@@ -1490,12 +1492,12 @@ class OptimizeDatas:
|
|
1490
1492
|
""" 更新一个数据库 移除冗余数据 """
|
1491
1493
|
if not self.db_name:
|
1492
1494
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1493
|
-
|
1495
|
+
logger.info(f'{now} 尚未设置参数: self.db_name')
|
1494
1496
|
return
|
1495
1497
|
tables = self.table_list(db_name=self.db_name)
|
1496
1498
|
if not tables:
|
1497
1499
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1498
|
-
|
1500
|
+
logger.info(f'{now} {self.db_name} -> 数据表不存在')
|
1499
1501
|
return
|
1500
1502
|
|
1501
1503
|
# 日期初始化
|
@@ -1513,7 +1515,7 @@ class OptimizeDatas:
|
|
1513
1515
|
end_date_before = self.end_date
|
1514
1516
|
|
1515
1517
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1516
|
-
|
1518
|
+
logger.info(f'{now} mysql({self.host}: {self.port}) {self.db_name} 数据库优化中(日期长度: {self.days} 天)...')
|
1517
1519
|
for table_dict in tables:
|
1518
1520
|
for key, table_name in table_dict.items():
|
1519
1521
|
# if '店铺指标' not in table_name:
|
@@ -1525,12 +1527,12 @@ class OptimizeDatas:
|
|
1525
1527
|
return
|
1526
1528
|
with self.connection.cursor() as cursor:
|
1527
1529
|
sql = f"SELECT 1 FROM `{table_name}` LIMIT 1"
|
1528
|
-
#
|
1530
|
+
# logger.info(sql)
|
1529
1531
|
cursor.execute(sql)
|
1530
1532
|
result = cursor.fetchone()
|
1531
1533
|
if not result:
|
1532
1534
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1533
|
-
|
1535
|
+
logger.info(f'{now} 数据表: {table_name}, 数据长度为 0')
|
1534
1536
|
continue # 检查数据表是否为空
|
1535
1537
|
|
1536
1538
|
cursor.execute(f"SHOW FULL COLUMNS FROM `{table_name}`") # 查询数据表的列信息
|
@@ -1547,7 +1549,7 @@ class OptimizeDatas:
|
|
1547
1549
|
max_result = cursor.fetchone()
|
1548
1550
|
cursor.execute(sql_min)
|
1549
1551
|
min_result = cursor.fetchone()
|
1550
|
-
#
|
1552
|
+
# logger.info(min_result['min_date'], max_result['max_date'])
|
1551
1553
|
# 匹配修改为合适的起始和结束日期
|
1552
1554
|
if self.start_date < pd.to_datetime(min_result['min_date']):
|
1553
1555
|
self.start_date = pd.to_datetime(min_result['min_date'])
|
@@ -1585,15 +1587,15 @@ class OptimizeDatas:
|
|
1585
1587
|
cursor.execute(
|
1586
1588
|
f"ALTER TABLE `{table_name}` ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
1587
1589
|
cursor.execute(f"ALTER TABLE `{table_name}` AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
1588
|
-
#
|
1590
|
+
# logger.info(f'重置自增id')
|
1589
1591
|
else:
|
1590
|
-
|
1592
|
+
logger.info(f'{table_name} 存在复合主键: {[item['PrimaryKey'] for item in result]}, 无法重置自增id')
|
1591
1593
|
except Exception as e:
|
1592
|
-
|
1594
|
+
logger.info(f'333 {table_name} {e}')
|
1593
1595
|
self.connection.rollback()
|
1594
1596
|
self.connection.close()
|
1595
1597
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1596
|
-
|
1598
|
+
logger.info(f'{now} mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
|
1597
1599
|
|
1598
1600
|
def delete_duplicate(self, table_name, date, except_key=['更新时间']):
|
1599
1601
|
datas = self.table_datas(db_name=self.db_name, table_name=str(table_name), date=date)
|
@@ -1614,7 +1616,7 @@ class OptimizeDatas:
|
|
1614
1616
|
continue
|
1615
1617
|
all_datas.append(data) # 数据没有重复
|
1616
1618
|
except Exception as e:
|
1617
|
-
|
1619
|
+
logger.info(f'{table_name} 函数: mysql - > OptimizeDatas -> delete_duplicate -> {e}')
|
1618
1620
|
del all_datas
|
1619
1621
|
|
1620
1622
|
if not duplicate_id: # 如果没有重复数据,则跳过该数据表
|
@@ -1627,10 +1629,10 @@ class OptimizeDatas:
|
|
1627
1629
|
sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
|
1628
1630
|
cursor.execute(sql, duplicate_id)
|
1629
1631
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1630
|
-
|
1632
|
+
logger.info(f"{now} {table_name} -> {date.strftime('%Y-%m-%d')} before: {len(datas)}, remove: {cursor.rowcount}")
|
1631
1633
|
self.connection.commit() # 提交事务
|
1632
1634
|
except Exception as e:
|
1633
|
-
|
1635
|
+
logger.info(f'{self.db_name}/{table_name}, {e}')
|
1634
1636
|
self.connection.rollback() # 异常则回滚
|
1635
1637
|
|
1636
1638
|
def delete_duplicate2(self, table_name, except_key=['更新时间']):
|
@@ -1665,11 +1667,11 @@ class OptimizeDatas:
|
|
1665
1667
|
sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
|
1666
1668
|
cursor.execute(sql, duplicate_id)
|
1667
1669
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1668
|
-
|
1670
|
+
logger.info(f"{now} {table_name} -> before: {len(datas)}, "
|
1669
1671
|
f"remove: {cursor.rowcount}")
|
1670
1672
|
self.connection.commit() # 提交事务
|
1671
1673
|
except Exception as e:
|
1672
|
-
|
1674
|
+
logger.info(f'{self.db_name}/{table_name}, {e}')
|
1673
1675
|
self.connection.rollback() # 异常则回滚
|
1674
1676
|
|
1675
1677
|
def database_list(self):
|
@@ -1696,10 +1698,10 @@ class OptimizeDatas:
|
|
1696
1698
|
database_exists = cursor.fetchone()
|
1697
1699
|
if not database_exists:
|
1698
1700
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1699
|
-
|
1701
|
+
logger.info(f'{now} {db_name}: 数据表不存在!')
|
1700
1702
|
return
|
1701
1703
|
except Exception as e:
|
1702
|
-
|
1704
|
+
logger.info(f'002 {e}')
|
1703
1705
|
return
|
1704
1706
|
finally:
|
1705
1707
|
connection.close() # 断开连接
|
@@ -1730,7 +1732,7 @@ class OptimizeDatas:
|
|
1730
1732
|
cursor.execute(sql)
|
1731
1733
|
results = cursor.fetchall()
|
1732
1734
|
except Exception as e:
|
1733
|
-
|
1735
|
+
logger.info(f'001 {e}')
|
1734
1736
|
finally:
|
1735
1737
|
connection.close()
|
1736
1738
|
return results
|
@@ -1784,7 +1786,7 @@ def year_month_day_bak(start_date, end_date):
|
|
1784
1786
|
try:
|
1785
1787
|
start_date = f'{pd.to_datetime(start_date).year}-{pd.to_datetime(start_date).month}-01'
|
1786
1788
|
except Exception as e:
|
1787
|
-
|
1789
|
+
logger.info(e)
|
1788
1790
|
return []
|
1789
1791
|
# 使用pandas的date_range创建一个日期范围,频率为'MS'代表每月开始
|
1790
1792
|
date_range = pd.date_range(start=start_date, end=end_date, freq='MS')
|
@@ -1805,4 +1807,4 @@ if __name__ == '__main__':
|
|
1805
1807
|
conf = myconfig.main()
|
1806
1808
|
data = conf['Windows']['xigua_lx']['mysql']['local']
|
1807
1809
|
username, password, host, port = data['username'], data['password'], data['host'], data['port']
|
1808
|
-
|
1810
|
+
logger.info(username, password, host, port)
|