mdbq 3.8.12__py3-none-any.whl → 3.8.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/aggregation/query_data.py +0 -64
- mdbq/mysql/mysql.py +173 -542
- {mdbq-3.8.12.dist-info → mdbq-3.8.14.dist-info}/METADATA +1 -1
- {mdbq-3.8.12.dist-info → mdbq-3.8.14.dist-info}/RECORD +7 -7
- {mdbq-3.8.12.dist-info → mdbq-3.8.14.dist-info}/WHEEL +0 -0
- {mdbq-3.8.12.dist-info → mdbq-3.8.14.dist-info}/top_level.txt +0 -0
mdbq/mysql/mysql.py
CHANGED
@@ -141,135 +141,17 @@ class MysqlUpload:
|
|
141
141
|
return __res_dict, new_dict_data
|
142
142
|
|
143
143
|
@try_except
|
144
|
-
def
|
145
|
-
"""
|
146
|
-
db_name:
|
147
|
-
table_name:
|
148
|
-
remove_by_key: 设置时先删除数据再插入,不设置则直接添加
|
149
|
-
dict_data:
|
150
|
-
set_typ:
|
151
|
-
allow_not_null:
|
152
|
-
filename:
|
153
|
-
reset_id:
|
154
|
-
"""
|
155
|
-
if not self.config:
|
156
|
-
return
|
157
|
-
if '数据主体' not in dict_data.keys():
|
158
|
-
logger.info(f'dict_data 中"数据主体"键不能为空')
|
159
|
-
return
|
160
|
-
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
161
|
-
if not connection:
|
162
|
-
return
|
163
|
-
with connection.cursor() as cursor:
|
164
|
-
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
165
|
-
database_exists = cursor.fetchone()
|
166
|
-
if not database_exists:
|
167
|
-
# 如果数据库不存在,则新建
|
168
|
-
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
169
|
-
cursor.execute(sql)
|
170
|
-
connection.commit()
|
171
|
-
logger.info(f"创建Database: {db_name}")
|
172
|
-
|
173
|
-
self.config.update({'database': db_name}) # 添加更新 config 字段
|
174
|
-
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
175
|
-
if not connection:
|
176
|
-
return
|
177
|
-
with connection.cursor() as cursor:
|
178
|
-
# 1. 查询表, 不存在则创建一个空表
|
179
|
-
sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
|
180
|
-
cursor.execute(sql, (table_name))
|
181
|
-
if not cursor.fetchone():
|
182
|
-
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
183
|
-
cursor.execute(sql)
|
184
|
-
logger.info(f'创建 mysql 表: {table_name}')
|
185
|
-
|
186
|
-
new_dict = {}
|
187
|
-
[new_dict.update({k: v}) for k, v in dict_data.items() if k != '数据主体']
|
188
|
-
# 清理列名中的非法字符
|
189
|
-
dtypes, new_dict = self.cover_doc_dtypes(new_dict)
|
190
|
-
if set_typ:
|
191
|
-
# 更新自定义的列数据类型
|
192
|
-
for k, v in dtypes.items():
|
193
|
-
# 确保传进来的 set_typ 键存在于实际的 df 列才 update
|
194
|
-
[dtypes.update({k: inside_v}) for inside_k, inside_v in set_typ.items() if k == inside_k]
|
195
|
-
|
196
|
-
# 检查列
|
197
|
-
sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
|
198
|
-
cursor.execute(sql, (db_name, table_name))
|
199
|
-
col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()] # 已存在的所有列
|
200
|
-
|
201
|
-
col_not_exist = [col for col in set_typ.keys() if col not in col_exist] # 不存在的列
|
202
|
-
# 不存在则新建列
|
203
|
-
if col_not_exist: # 数据表中不存在的列
|
204
|
-
for col in col_not_exist:
|
205
|
-
# 创建列,需转义
|
206
|
-
if allow_not_null:
|
207
|
-
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {set_typ[col]};"
|
208
|
-
else:
|
209
|
-
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {set_typ[col]} NOT NULL;"
|
210
|
-
cursor.execute(sql)
|
211
|
-
logger.info(f"添加列: {col}({set_typ[col]})") # 添加列并指定数据类型
|
212
|
-
|
213
|
-
if col == '日期':
|
214
|
-
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
215
|
-
logger.info(f"设置为索引: {col}({set_typ[col]})")
|
216
|
-
cursor.execute(sql)
|
217
|
-
connection.commit() # 提交事务
|
218
|
-
|
219
|
-
if remove_by_key:
|
220
|
-
# 删除数据
|
221
|
-
se_key = ', '.join(remove_by_key)
|
222
|
-
condition = []
|
223
|
-
for up_col in remove_by_key:
|
224
|
-
condition += [f'`{up_col}` = "{dict_data[up_col]}"']
|
225
|
-
condition = ' AND '.join(condition)
|
226
|
-
sql = f"SELECT {se_key} FROM `{table_name}` WHERE {condition}"
|
227
|
-
cursor.execute(sql)
|
228
|
-
result = cursor.fetchall()
|
229
|
-
if result:
|
230
|
-
sql = f'DELETE FROM `{table_name}` WHERE {condition};'
|
231
|
-
cursor.execute(sql)
|
232
|
-
|
233
|
-
# 插入数据到数据库
|
234
|
-
# 有数据格式错误问题,所以分开处理,将数据主体移到最后面用占位符
|
235
|
-
logger.info(f'正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name} -> {filename}')
|
236
|
-
if new_dict:
|
237
|
-
cols = ', '.join(f"`{item}`" for item in new_dict.keys()) # 列名需要转义
|
238
|
-
values = ', '.join([f'"{item}"' for item in new_dict.values()]) # 值要加引号
|
239
|
-
cols = ', '.join([cols, '数据主体'])
|
240
|
-
binary_data = dict_data['数据主体']
|
241
|
-
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values}, %s)"
|
242
|
-
cursor.execute(sql, binary_data)
|
243
|
-
else:
|
244
|
-
sql = f"""INSERT INTO `{table_name}` (数据主体) VALUES (%s);"""
|
245
|
-
cursor.execute(sql, dict_data['数据主体'])
|
246
|
-
|
247
|
-
if reset_id:
|
248
|
-
pass
|
249
|
-
connection.commit()
|
250
|
-
|
251
|
-
@try_except
|
252
|
-
def insert_many_dict(self, db_name, table_name, dict_data_list, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
|
144
|
+
def insert_many_dict(self, db_name, table_name, dict_data_list, icm_update=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
|
253
145
|
"""
|
254
146
|
插入字典数据
|
255
147
|
dict_data: 字典
|
256
|
-
main_key: 指定索引列, 通常用日期列,默认会设置日期为索引
|
257
|
-
unique_main_key: 指定唯一索引列
|
258
148
|
index_length: 索引长度
|
259
|
-
icm_update:
|
149
|
+
icm_update: 增量更正
|
260
150
|
set_typ: {}
|
261
151
|
allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
|
262
152
|
"""
|
263
153
|
if not self.config:
|
264
154
|
return
|
265
|
-
if icm_update:
|
266
|
-
if main_key or unique_main_key:
|
267
|
-
logger.info(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
|
268
|
-
return
|
269
|
-
if not main_key:
|
270
|
-
main_key = []
|
271
|
-
if not unique_main_key:
|
272
|
-
unique_main_key = []
|
273
155
|
|
274
156
|
if not dict_data_list:
|
275
157
|
logger.info(f'dict_data_list 不能为空 ')
|
@@ -289,7 +171,6 @@ class MysqlUpload:
|
|
289
171
|
except Exception as e:
|
290
172
|
logger.error(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
291
173
|
|
292
|
-
# connection = pymysql.connect(**self.config) # 连接数据库
|
293
174
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
294
175
|
if not connection:
|
295
176
|
return
|
@@ -298,14 +179,12 @@ class MysqlUpload:
|
|
298
179
|
database_exists = cursor.fetchone()
|
299
180
|
if not database_exists:
|
300
181
|
# 如果数据库不存在,则新建
|
301
|
-
|
302
182
|
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
303
183
|
cursor.execute(sql)
|
304
184
|
connection.commit()
|
305
185
|
logger.info(f"创建Database: {db_name}")
|
306
186
|
|
307
187
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
308
|
-
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
309
188
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
310
189
|
if not connection:
|
311
190
|
return
|
@@ -339,64 +218,38 @@ class MysqlUpload:
|
|
339
218
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
|
340
219
|
else:
|
341
220
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
342
|
-
|
343
|
-
# logger.info(sql)
|
221
|
+
|
344
222
|
cursor.execute(sql)
|
345
223
|
logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
346
224
|
|
347
|
-
if col
|
225
|
+
if col == '日期':
|
348
226
|
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
349
227
|
logger.info(f"设置为索引: {col}({dtypes[col]})")
|
350
228
|
cursor.execute(sql)
|
351
|
-
|
352
|
-
if dtypes[col] == 'mediumtext':
|
353
|
-
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`({index_length}))"
|
354
|
-
else:
|
355
|
-
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`)"
|
356
|
-
cursor.execute(sql)
|
357
|
-
# if col in main_key or col in unique_main_key:
|
358
|
-
# sql = f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = %s"
|
359
|
-
# cursor.execute(sql, (col))
|
360
|
-
# result = cursor.fetchone() # 检查索引是否存在
|
361
|
-
# if not result:
|
362
|
-
# if col in main_key:
|
363
|
-
# sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
364
|
-
# logger.info(f"设置为索引: {col}({dtypes[col]})")
|
365
|
-
# cursor.execute(sql)
|
366
|
-
# elif col in unique_main_key:
|
367
|
-
# if dtypes[col] == 'mediumtext':
|
368
|
-
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`({index_length}));"
|
369
|
-
# else:
|
370
|
-
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`);"
|
371
|
-
# logger.info(f"设置唯一索引: {col}({dtypes[col]})")
|
372
|
-
# logger.info(sql)
|
373
|
-
# cursor.execute(sql)
|
229
|
+
|
374
230
|
connection.commit() # 提交事务
|
375
231
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
376
232
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
377
233
|
# 处理插入的数据
|
378
234
|
for dict_data in dict_data_list:
|
379
|
-
# logger.info(dict_data)
|
380
235
|
dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
|
381
236
|
if icm_update:
|
382
237
|
""" 使用增量更新: 需确保 icm_update['主键'] 传进来的列组合是数据表中唯一,值不会发生变化且不会重复,否则可能产生覆盖 """
|
383
238
|
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
384
|
-
cursor.execute(sql, (db_name,
|
239
|
+
cursor.execute(sql, (db_name, table_name))
|
385
240
|
columns = cursor.fetchall()
|
386
241
|
cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
|
387
242
|
update_col = [item for item in cols_exist if item not in icm_update and item != 'id'] # 除了主键外的其他列
|
388
243
|
|
389
|
-
#
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
sql
|
396
|
-
|
397
|
-
|
398
|
-
cursor.execute(sql)
|
399
|
-
results = cursor.fetchall() # results 是数据库取出的数据
|
244
|
+
# 构造查询条件(参数化)
|
245
|
+
condition = ' AND '.join([f'`{up_col}` = %s' for up_col in icm_update])
|
246
|
+
condition_values = [dict_data[up_col] for up_col in icm_update]
|
247
|
+
|
248
|
+
# 执行查询(参数化)
|
249
|
+
sql = f"SELECT {','.join([f'`{col}`' for col in update_col])} FROM `{table_name}` WHERE {condition}"
|
250
|
+
cursor.execute(sql, condition_values)
|
251
|
+
results = cursor.fetchall()
|
252
|
+
|
400
253
|
if results: # 有数据返回,再进行增量检查
|
401
254
|
for result in results: # results 是数据库数据, dict_data 是传进来的数据
|
402
255
|
change_col = [] # 发生变化的列名
|
@@ -412,74 +265,61 @@ class MysqlUpload:
|
|
412
265
|
mysql_value = re.sub(r'0+$', '', mysql_value)
|
413
266
|
mysql_value = re.sub(r'\.$', '', mysql_value)
|
414
267
|
if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
|
415
|
-
# logger.info(f'{dict_data['日期']}{dict_data['商品id']}{col} 列的值有变化,{str(dict_data[col])} != {str(result[col])}')
|
416
268
|
change_values += [f"`{col}` = \"{str(dict_data[col])}\""]
|
417
269
|
change_col.append(col)
|
418
270
|
not_change_col = [item for item in update_col if item not in change_col]
|
419
|
-
|
420
|
-
|
271
|
+
|
272
|
+
# 构造更新语句(参数化)
|
273
|
+
if change_values:
|
274
|
+
set_clause = ', '.join([f'`{col}` = %s' for col in change_col])
|
275
|
+
update_values = [dict_data[col] for col in change_col]
|
276
|
+
# 添加未变化列的查询条件
|
421
277
|
if not_change_col:
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
429
|
-
# logger.info(sql)
|
430
|
-
cursor.execute(sql)
|
278
|
+
not_change_condition = ' AND '.join([f'`{col}` = %s' for col in not_change_col])
|
279
|
+
condition += f' AND {not_change_condition}'
|
280
|
+
condition_values += [dict_data[col] for col in not_change_col]
|
281
|
+
# 执行更新
|
282
|
+
sql = f"UPDATE `{table_name}` SET {set_clause} WHERE {condition}"
|
283
|
+
cursor.execute(sql, update_values + condition_values)
|
431
284
|
else: # 没有数据返回,则直接插入数据
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({
|
436
|
-
cursor.execute(sql)
|
285
|
+
# 参数化插入
|
286
|
+
cols = ', '.join([f'`{k}`' for k in dict_data.keys()])
|
287
|
+
placeholders = ', '.join(['%s'] * len(dict_data))
|
288
|
+
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({placeholders})"
|
289
|
+
cursor.execute(sql, tuple(dict_data.values()))
|
437
290
|
connection.commit() # 提交数据库
|
438
291
|
continue
|
439
292
|
|
440
|
-
#
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
# logger.info(sql)
|
457
|
-
cursor.execute(sql)
|
458
|
-
connection.commit() # 提交数据库
|
293
|
+
# 标准插入逻辑(参数化修改)
|
294
|
+
# 构造更新列(排除主键)
|
295
|
+
update_cols = [k for k in dict_data.keys()]
|
296
|
+
# 构建SQL
|
297
|
+
cols = ', '.join([f'`{k}`' for k in dict_data.keys()])
|
298
|
+
placeholders = ', '.join(['%s'] * len(dict_data))
|
299
|
+
update_clause = ', '.join([f'`{k}` = VALUES(`{k}`)' for k in update_cols]) or 'id=id'
|
300
|
+
|
301
|
+
sql = f"""INSERT INTO `{table_name}` ({cols}) VALUES ({placeholders}) ON DUPLICATE KEY UPDATE {update_clause}"""
|
302
|
+
# 执行参数化查询
|
303
|
+
try:
|
304
|
+
cursor.execute(sql, tuple(dict_data.values()))
|
305
|
+
connection.commit()
|
306
|
+
except pymysql.Error as e:
|
307
|
+
logger.error(f"插入失败: {e}\nSQL: {cursor.mogrify(sql, tuple(dict_data.values()))}")
|
308
|
+
connection.rollback()
|
459
309
|
connection.close()
|
460
310
|
|
461
311
|
@try_except
|
462
|
-
def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None,
|
312
|
+
def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
|
463
313
|
"""
|
464
314
|
插入字典数据
|
465
315
|
dict_data: 字典
|
466
|
-
main_key: 指定索引列, 通常用日期列,默认会设置日期为索引
|
467
|
-
unique_main_key: 指定唯一索引列
|
468
316
|
index_length: 索引长度
|
469
|
-
icm_update:
|
317
|
+
icm_update: 增量更新
|
470
318
|
set_typ: {}
|
471
319
|
allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
|
472
320
|
"""
|
473
321
|
if not self.config:
|
474
322
|
return
|
475
|
-
if icm_update:
|
476
|
-
if main_key or unique_main_key:
|
477
|
-
logger.info(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
|
478
|
-
return
|
479
|
-
if not main_key:
|
480
|
-
main_key = []
|
481
|
-
if not unique_main_key:
|
482
|
-
unique_main_key = []
|
483
323
|
|
484
324
|
if cut_data:
|
485
325
|
if '日期' in dict_data.keys():
|
@@ -495,7 +335,6 @@ class MysqlUpload:
|
|
495
335
|
except Exception as e:
|
496
336
|
logger.error(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
497
337
|
|
498
|
-
# connection = pymysql.connect(**self.config) # 连接数据库
|
499
338
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
500
339
|
if not connection:
|
501
340
|
return
|
@@ -510,7 +349,6 @@ class MysqlUpload:
|
|
510
349
|
logger.info(f"创建Database: {db_name}")
|
511
350
|
|
512
351
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
513
|
-
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
514
352
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
515
353
|
if not connection:
|
516
354
|
return
|
@@ -544,61 +382,34 @@ class MysqlUpload:
|
|
544
382
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
|
545
383
|
else:
|
546
384
|
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
547
|
-
# sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
548
|
-
# logger.info(sql)
|
549
385
|
cursor.execute(sql)
|
550
386
|
logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
551
387
|
|
552
|
-
if col
|
388
|
+
if col == '日期':
|
553
389
|
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
554
390
|
logger.info(f"设置为索引: {col}({dtypes[col]})")
|
555
391
|
cursor.execute(sql)
|
556
|
-
if col in unique_main_key:
|
557
|
-
if dtypes[col] == 'mediumtext':
|
558
|
-
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`({index_length}))"
|
559
|
-
else:
|
560
|
-
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`)"
|
561
|
-
cursor.execute(sql)
|
562
|
-
# if col in main_key or col in unique_main_key:
|
563
|
-
# sql = f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = %s"
|
564
|
-
# cursor.execute(sql, (col))
|
565
|
-
# result = cursor.fetchone() # 检查索引是否存在
|
566
|
-
# if not result:
|
567
|
-
# if col in main_key:
|
568
|
-
# sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
569
|
-
# logger.info(f"设置为索引: {col}({dtypes[col]})")
|
570
|
-
# cursor.execute(sql)
|
571
|
-
# elif col in unique_main_key:
|
572
|
-
# if dtypes[col] == 'mediumtext':
|
573
|
-
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`({index_length}));"
|
574
|
-
# else:
|
575
|
-
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`);"
|
576
|
-
# logger.info(f"设置唯一索引: {col}({dtypes[col]})")
|
577
|
-
# logger.info(sql)
|
578
|
-
# cursor.execute(sql)
|
579
392
|
connection.commit() # 提交事务
|
580
393
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
581
394
|
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
582
395
|
# 处理插入的数据
|
583
396
|
if icm_update:
|
584
397
|
""" 使用增量更新: 需确保 icm_update['主键'] 传进来的列组合是数据表中唯一,值不会发生变化且不会重复,否则可能产生覆盖 """
|
585
|
-
sql =
|
586
|
-
cursor.execute(sql, (db_name,
|
587
|
-
|
588
|
-
cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
|
398
|
+
sql = """SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s"""
|
399
|
+
cursor.execute(sql, (db_name, table_name))
|
400
|
+
cols_exist = [col['COLUMN_NAME'] for col in cursor.fetchall()] # 数据表的所有列, 返回 list
|
589
401
|
update_col = [item for item in cols_exist if item not in icm_update and item != 'id'] # 除了主键外的其他列
|
590
402
|
|
591
|
-
#
|
592
|
-
|
593
|
-
|
403
|
+
# 参数化构建查询条件
|
404
|
+
condition_params = []
|
405
|
+
condition_parts = []
|
594
406
|
for up_col in icm_update:
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
cursor.
|
601
|
-
results = cursor.fetchall() # results 是数据库取出的数据
|
407
|
+
condition_parts.append(f"`{up_col}` = %s")
|
408
|
+
condition_params.append(dict_data[up_col])
|
409
|
+
|
410
|
+
sql = f"""SELECT `{','.join(update_col)}` FROM `{table_name}` WHERE {' AND '.join(condition_parts)}"""
|
411
|
+
cursor.execute(sql, condition_params)
|
412
|
+
results = cursor.fetchall()
|
602
413
|
if results: # 有数据返回,再进行增量检查
|
603
414
|
for result in results: # results 是数据库数据, dict_data 是传进来的数据
|
604
415
|
change_col = [] # 发生变化的列名
|
@@ -614,51 +425,35 @@ class MysqlUpload:
|
|
614
425
|
mysql_value = re.sub(r'0+$', '', mysql_value)
|
615
426
|
mysql_value = re.sub(r'\.$', '', mysql_value)
|
616
427
|
if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
|
617
|
-
# logger.info(f'{dict_data['日期']}{dict_data['商品id']}{col} 列的值有变化,{str(dict_data[col])} != {str(result[col])}')
|
618
428
|
change_values += [f"`{col}` = \"{str(dict_data[col])}\""]
|
619
429
|
change_col.append(col)
|
620
|
-
not_change_col = [item for item in update_col if item not in change_col]
|
621
430
|
# change_values 是 df 传进来且和数据库对比后,发生了变化的数据,值示例: [`品销宝余额` = '9999.0', `短信剩余` = '888']
|
622
431
|
if change_values: # change_values 有数据返回,表示值需要更新
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
631
|
-
# logger.info(sql)
|
632
|
-
cursor.execute(sql)
|
432
|
+
set_params = [dict_data[col] for col in change_col]
|
433
|
+
full_params = set_params + condition_params # 正确顺序
|
434
|
+
|
435
|
+
sql = f"""UPDATE `{table_name}`
|
436
|
+
SET {','.join(set_parts)}
|
437
|
+
WHERE {' AND '.join(condition_parts)}"""
|
438
|
+
cursor.execute(sql, full_params)
|
633
439
|
else: # 没有数据返回,则直接插入数据
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
440
|
+
# 参数化插入语句
|
441
|
+
keys = [f"`{k}`" for k in dict_data.keys()]
|
442
|
+
placeholders = ','.join(['%s'] * len(dict_data))
|
443
|
+
update_clause = ','.join([f"`{k}`=VALUES(`{k}`)" for k in dict_data.keys()])
|
444
|
+
sql = f"""INSERT INTO `{table_name}` ({','.join(keys)}) VALUES ({placeholders}) ON DUPLICATE KEY UPDATE {update_clause}"""
|
445
|
+
cursor.execute(sql, tuple(dict_data.values()))
|
639
446
|
connection.commit() # 提交数据库
|
640
447
|
connection.close()
|
641
448
|
return
|
642
449
|
|
643
|
-
#
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
del dict_data[col]
|
651
|
-
if unique_main_key:
|
652
|
-
for col in unique_main_key:
|
653
|
-
del dict_data[col]
|
654
|
-
# 涉及列名务必使用反引号
|
655
|
-
update_datas = ', '.join([f'`{k}` = VALUES(`{k}`)' for k, v in dict_data.items()])
|
656
|
-
|
657
|
-
# 构建 sql
|
658
|
-
sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
|
659
|
-
# logger.info(sql)
|
660
|
-
cursor.execute(sql)
|
661
|
-
connection.commit() # 提交数据库
|
450
|
+
# 常规插入处理(参数化)
|
451
|
+
keys = [f"`{k}`" for k in dict_data.keys()]
|
452
|
+
placeholders = ','.join(['%s'] * len(dict_data))
|
453
|
+
update_clause = ','.join([f"`{k}`=VALUES(`{k}`)" for k in dict_data.keys()])
|
454
|
+
sql = f"""INSERT INTO `{table_name}` ({','.join(keys)}) VALUES ({placeholders}) ON DUPLICATE KEY UPDATE {update_clause}"""
|
455
|
+
cursor.execute(sql, tuple(dict_data.values()))
|
456
|
+
connection.commit()
|
662
457
|
connection.close()
|
663
458
|
|
664
459
|
def cover_dict_dtypes(self, dict_data):
|
@@ -677,9 +472,8 @@ class MysqlUpload:
|
|
677
472
|
if str(v) == '':
|
678
473
|
v = 0
|
679
474
|
v = str(v)
|
680
|
-
# v = re.sub('^-$|^--$|^nan$|^null$', '0', v, re.I)
|
681
|
-
# v = re.sub(',|="|"', '', v, re.I)
|
682
475
|
v = re.sub('^="|"$', '', v, re.I)
|
476
|
+
v = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', str(v)) # 移除控制字符
|
683
477
|
if re.findall(r'^[-+]?\d+\.?\d*%$', v):
|
684
478
|
v = str(float(v.rstrip("%")) / 100)
|
685
479
|
|
@@ -761,17 +555,15 @@ class MysqlUpload:
|
|
761
555
|
return __res_dict, df
|
762
556
|
|
763
557
|
@try_except
|
764
|
-
def df_to_mysql(self, df, db_name, table_name, set_typ=None, icm_update=[], move_insert=False, df_sql=False,
|
765
|
-
filename=None, count=None,
|
558
|
+
def df_to_mysql(self, df, db_name, table_name, set_typ=None, icm_update=[], move_insert=False, df_sql=False,
|
559
|
+
filename=None, count=None, allow_not_null=False, cut_data=None):
|
766
560
|
"""
|
767
561
|
db_name: 数据库名
|
768
562
|
table_name: 表名
|
769
|
-
move_insert: 根据df 的日期,先移除数据库数据,再插入, df_sql,
|
563
|
+
move_insert: 根据df 的日期,先移除数据库数据,再插入, df_sql, icm_update 都要设置为 False
|
770
564
|
原则上只限于聚合数据使用,原始数据插入时不要设置
|
771
|
-
|
772
565
|
df_sql: 这是一个临时参数, 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重,初创表大量上传数据的时候使用
|
773
|
-
|
774
|
-
icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_duplicates 改为 False
|
566
|
+
icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用
|
775
567
|
使用增量更新: 必须确保 icm_update 传进来的列必须是数据表中唯一主键,值不会发生变化,不会重复,否则可能产生错乱覆盖情况
|
776
568
|
filename: 用来追踪处理进度,传这个参数是方便定位产生错误的文件
|
777
569
|
allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
|
@@ -779,12 +571,12 @@ class MysqlUpload:
|
|
779
571
|
if not self.config:
|
780
572
|
return
|
781
573
|
if icm_update:
|
782
|
-
if move_insert or df_sql
|
783
|
-
logger.info(f'icm_update/move_insert/df_sql
|
574
|
+
if move_insert or df_sql:
|
575
|
+
logger.info(f'icm_update/move_insert/df_sql 参数不能同时设定')
|
784
576
|
return
|
785
577
|
if move_insert:
|
786
|
-
if icm_update or df_sql
|
787
|
-
logger.info(f'icm_update/move_insert/df_sql
|
578
|
+
if icm_update or df_sql:
|
579
|
+
logger.info(f'icm_update/move_insert/df_sql 参数不能同时设定')
|
788
580
|
return
|
789
581
|
|
790
582
|
self.filename = filename
|
@@ -823,12 +615,11 @@ class MysqlUpload:
|
|
823
615
|
# 确保传进来的 set_typ 键存在于实际的 df 列才 update
|
824
616
|
[dtypes.update({k: inside_v}) for inside_k, inside_v in set_typ.items() if k == inside_k]
|
825
617
|
|
826
|
-
# connection = pymysql.connect(**self.config) # 连接数据库
|
827
618
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
828
619
|
if not connection:
|
829
620
|
return
|
830
621
|
with connection.cursor() as cursor:
|
831
|
-
cursor.execute(
|
622
|
+
cursor.execute("SHOW DATABASES LIKE %s", (db_name,)) # 检查数据库是否存在
|
832
623
|
database_exists = cursor.fetchone()
|
833
624
|
if not database_exists:
|
834
625
|
# 如果数据库不存在,则新建
|
@@ -838,7 +629,6 @@ class MysqlUpload:
|
|
838
629
|
logger.info(f"创建Database: {db_name}")
|
839
630
|
|
840
631
|
self.config.update({'database': db_name}) # 添加更新 config 字段
|
841
|
-
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
842
632
|
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
843
633
|
if not connection:
|
844
634
|
return
|
@@ -847,8 +637,8 @@ class MysqlUpload:
|
|
847
637
|
sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
|
848
638
|
cursor.execute(sql, (table_name))
|
849
639
|
if not cursor.fetchone():
|
850
|
-
|
851
|
-
cursor.execute(
|
640
|
+
create_table_sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY)"
|
641
|
+
cursor.execute(create_table_sql)
|
852
642
|
logger.info(f'创建 mysql 表: {table_name}')
|
853
643
|
|
854
644
|
# 有特殊字符不需转义
|
@@ -862,11 +652,10 @@ class MysqlUpload:
|
|
862
652
|
if col_not_exist: # 数据表中不存在的列
|
863
653
|
for col in col_not_exist:
|
864
654
|
# 创建列,需转义
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
cursor.execute(sql)
|
655
|
+
ialter_sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]}"
|
656
|
+
if not allow_not_null:
|
657
|
+
alter_sql += " NOT NULL"
|
658
|
+
cursor.execute(alter_sql)
|
870
659
|
logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
871
660
|
|
872
661
|
# 创建索引
|
@@ -882,17 +671,14 @@ class MysqlUpload:
|
|
882
671
|
logger.info(f'正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
|
883
672
|
engine = create_engine(
|
884
673
|
f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
|
885
|
-
# df.to_csv('/Users/xigua/Downloads/mysql.csv', index=False, header=True, encoding='utf-8_sig')
|
886
|
-
# df.to_excel('/Users/xigua/Downloads/mysql.xlsx', index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
887
674
|
df.to_sql(
|
888
675
|
name=table_name,
|
889
676
|
con=engine,
|
890
677
|
if_exists='append',
|
891
678
|
index=False,
|
892
|
-
chunksize=1000
|
679
|
+
chunksize=1000,
|
680
|
+
method='multi'
|
893
681
|
)
|
894
|
-
if reset_id:
|
895
|
-
pass
|
896
682
|
connection.commit() # 提交事务
|
897
683
|
connection.close()
|
898
684
|
return
|
@@ -901,13 +687,15 @@ class MysqlUpload:
|
|
901
687
|
if move_insert and '日期' in df.columns.tolist():
|
902
688
|
# 移除数据
|
903
689
|
dates = df['日期'].values.tolist()
|
904
|
-
# logger.info(dates)
|
905
690
|
dates = [pd.to_datetime(item) for item in dates] # 需要先转换类型才能用 min, max
|
906
691
|
start_date = pd.to_datetime(min(dates)).strftime('%Y-%m-%d')
|
907
692
|
end_date = (pd.to_datetime(max(dates)) + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
908
693
|
|
909
|
-
|
910
|
-
|
694
|
+
delete_sql = f"""
|
695
|
+
DELETE FROM `{table_name}`
|
696
|
+
WHERE 日期 BETWEEN %s AND %s
|
697
|
+
"""
|
698
|
+
cursor.execute(delete_sql, (start_date, end_date))
|
911
699
|
connection.commit()
|
912
700
|
|
913
701
|
# 插入数据
|
@@ -918,7 +706,8 @@ class MysqlUpload:
|
|
918
706
|
con=engine,
|
919
707
|
if_exists='append',
|
920
708
|
index=False,
|
921
|
-
chunksize=1000
|
709
|
+
chunksize=1000,
|
710
|
+
method='multi'
|
922
711
|
)
|
923
712
|
return
|
924
713
|
|
@@ -927,236 +716,78 @@ class MysqlUpload:
|
|
927
716
|
# data 是传进来待处理的数据, 不是数据库数据
|
928
717
|
# data 示例: {'日期': Timestamp('2024-08-27 00:00:00'), '推广费余额': 33299, '品销宝余额': 2930.73, '短信剩余': 67471}
|
929
718
|
try:
|
930
|
-
|
931
|
-
|
932
|
-
values = ', '.join([f'"{item}"' for item in data.values()]) # 值要加引号
|
933
|
-
condition = []
|
719
|
+
# 预处理数据:转换非字符串类型
|
720
|
+
processed_data = {}
|
934
721
|
for k, v in data.items():
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
#
|
961
|
-
|
962
|
-
|
963
|
-
for
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
#
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
# logger.info(change_values, not_change_values)
|
997
|
-
condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
|
998
|
-
change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
|
999
|
-
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
1000
|
-
# logger.info(sql)
|
1001
|
-
cursor.execute(sql)
|
1002
|
-
else: # 没有数据返回,则直接插入数据
|
1003
|
-
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
1004
|
-
cursor.execute(sql)
|
722
|
+
if isinstance(v, (int, float)):
|
723
|
+
processed_data[k] = float(v)
|
724
|
+
elif isinstance(v, pd.Timestamp):
|
725
|
+
processed_data[k] = v.strftime('%Y-%m-%d')
|
726
|
+
else:
|
727
|
+
processed_data[k] = str(v)
|
728
|
+
|
729
|
+
# 构建基础SQL要素
|
730
|
+
columns = [f'`{k}`' for k in processed_data.keys()]
|
731
|
+
placeholders = ', '.join(['%s'] * len(processed_data))
|
732
|
+
values = list(processed_data.values())
|
733
|
+
|
734
|
+
# 构建基本INSERT语句
|
735
|
+
insert_sql = f"INSERT INTO `{table_name}` ({', '.join(columns)}) VALUES ({placeholders})"
|
736
|
+
|
737
|
+
if icm_update: # 增量更新, 专门用于聚合数据,其他库不要调用
|
738
|
+
# 获取数据表结构
|
739
|
+
cursor.execute(
|
740
|
+
"SELECT COLUMN_NAME FROM information_schema.columns "
|
741
|
+
"WHERE table_schema = %s AND table_name = %s",
|
742
|
+
(db_name, table_name)
|
743
|
+
)
|
744
|
+
cols_exist = [row['COLUMN_NAME'] for row in cursor.fetchall()]
|
745
|
+
update_columns = [col for col in cols_exist if col not in icm_update and col != 'id']
|
746
|
+
|
747
|
+
# 构建WHERE条件
|
748
|
+
where_conditions = []
|
749
|
+
where_values = []
|
750
|
+
for col in icm_update:
|
751
|
+
where_conditions.append(f"`{col}` = %s")
|
752
|
+
where_values.append(processed_data[col])
|
753
|
+
|
754
|
+
# 查询现有数据
|
755
|
+
select_sql = f"SELECT {', '.join([f'`{col}`' for col in update_columns])} " \
|
756
|
+
f"FROM `{table_name}` WHERE {' AND '.join(where_conditions)}"
|
757
|
+
cursor.execute(select_sql, where_values)
|
758
|
+
existing_data = cursor.fetchone()
|
759
|
+
|
760
|
+
if existing_data:
|
761
|
+
# 比较并构建更新语句
|
762
|
+
update_set = []
|
763
|
+
update_values = []
|
764
|
+
for col in update_columns:
|
765
|
+
db_value = existing_data[col]
|
766
|
+
new_value = processed_data[col]
|
767
|
+
|
768
|
+
# 处理数值类型的精度差异
|
769
|
+
if isinstance(db_value, float) and isinstance(new_value, float):
|
770
|
+
if not math.isclose(db_value, new_value, rel_tol=1e-9):
|
771
|
+
update_set.append(f"`{col}` = %s")
|
772
|
+
update_values.append(new_value)
|
773
|
+
elif db_value != new_value:
|
774
|
+
update_set.append(f"`{col}` = %s")
|
775
|
+
update_values.append(new_value)
|
776
|
+
|
777
|
+
if update_set:
|
778
|
+
update_sql = f"UPDATE `{table_name}` SET {', '.join(update_set)} " \
|
779
|
+
f"WHERE {' AND '.join(where_conditions)}"
|
780
|
+
cursor.execute(update_sql, update_values + where_values)
|
781
|
+
else:
|
782
|
+
cursor.execute(insert_sql, values)
|
1005
783
|
else:
|
1006
|
-
|
1007
|
-
cursor.execute(
|
784
|
+
# 普通插入
|
785
|
+
cursor.execute(insert_sql, values)
|
1008
786
|
except Exception as e:
|
1009
787
|
pass
|
1010
|
-
|
1011
|
-
if reset_id:
|
1012
|
-
pass
|
1013
788
|
connection.commit() # 提交事务
|
1014
789
|
connection.close()
|
1015
790
|
|
1016
|
-
@try_except
|
1017
|
-
def read_doc_data(self, table_name, db_name='pdf文件', column='文件名', filename=None, save_path='/Users/xigua/Downloads'):
|
1018
|
-
"""
|
1019
|
-
db_name:
|
1020
|
-
table_name:
|
1021
|
-
column: 读取哪一列
|
1022
|
-
filename: 文件名称
|
1023
|
-
save_path: 保存位置
|
1024
|
-
"""
|
1025
|
-
if not filename:
|
1026
|
-
logger.info(f'未指定文件名: filename')
|
1027
|
-
return
|
1028
|
-
# connection = pymysql.connect(**self.config) # 连接数据库
|
1029
|
-
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
1030
|
-
if not connection:
|
1031
|
-
return
|
1032
|
-
# try:
|
1033
|
-
with connection.cursor() as cursor:
|
1034
|
-
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
1035
|
-
database_exists = cursor.fetchone()
|
1036
|
-
if not database_exists:
|
1037
|
-
logger.info(f"Database {db_name} 数据库不存在")
|
1038
|
-
return
|
1039
|
-
self.config.update({'database': db_name})
|
1040
|
-
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
1041
|
-
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
1042
|
-
if not connection:
|
1043
|
-
return
|
1044
|
-
with connection.cursor() as cursor:
|
1045
|
-
# 1. 查询表
|
1046
|
-
sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
|
1047
|
-
cursor.execute(sql, (table_name))
|
1048
|
-
if not cursor.fetchone():
|
1049
|
-
logger.info(f'{table_name} -> 数据表不存在')
|
1050
|
-
return
|
1051
|
-
|
1052
|
-
# 读取数据
|
1053
|
-
condition = f'`{column}` = "{filename}"'
|
1054
|
-
sql = f"SELECT `{column}`, `数据主体` FROM `{table_name}` WHERE {condition}"
|
1055
|
-
cursor.execute(sql)
|
1056
|
-
results = cursor.fetchall()
|
1057
|
-
if results:
|
1058
|
-
for result in results:
|
1059
|
-
# 将二进制数据写入到文件
|
1060
|
-
with open(os.path.join(save_path, filename), 'wb') as f:
|
1061
|
-
f.write(result['数据主体'])
|
1062
|
-
logger.info(f'写入本地文件: ({self.host}:{self.port}) {db_name}/{table_name} -> {os.path.join(save_path, filename)}')
|
1063
|
-
connection.close()
|
1064
|
-
|
1065
|
-
def read_mysql(self, table_name, start_date, end_date, db_name='远程数据源', date_name='日期'):
|
1066
|
-
""" 读取指定数据表,可指定日期范围,返回结果: df """
|
1067
|
-
start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
|
1068
|
-
end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
|
1069
|
-
df = pd.DataFrame()
|
1070
|
-
|
1071
|
-
# connection = pymysql.connect(**self.config) # 连接数据库
|
1072
|
-
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
1073
|
-
if not connection:
|
1074
|
-
return
|
1075
|
-
try:
|
1076
|
-
with connection.cursor() as cursor:
|
1077
|
-
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
1078
|
-
database_exists = cursor.fetchone()
|
1079
|
-
if not database_exists:
|
1080
|
-
logger.info(f"Database {db_name} 数据库不存在")
|
1081
|
-
return df
|
1082
|
-
else:
|
1083
|
-
logger.info(f'mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
|
1084
|
-
except:
|
1085
|
-
return df
|
1086
|
-
finally:
|
1087
|
-
connection.close() # 断开连接
|
1088
|
-
|
1089
|
-
before_time = time.time()
|
1090
|
-
# 读取数据
|
1091
|
-
self.config.update({'database': db_name})
|
1092
|
-
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
1093
|
-
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
|
1094
|
-
if not connection:
|
1095
|
-
return
|
1096
|
-
try:
|
1097
|
-
with connection.cursor() as cursor:
|
1098
|
-
# 获取指定日期范围的数据
|
1099
|
-
sql = f"SELECT * FROM `{db_name}`.`{table_name}` WHERE `{date_name}` BETWEEN '%s' AND '%s'" % (start_date, end_date)
|
1100
|
-
cursor.execute(sql)
|
1101
|
-
rows = cursor.fetchall() # 获取查询结果
|
1102
|
-
columns = [desc[0] for desc in cursor.description]
|
1103
|
-
df = pd.DataFrame(rows, columns=columns) # 转为 df
|
1104
|
-
except Exception as e:
|
1105
|
-
logger.error(f'{e} {db_name} -> {table_name} 表不存在')
|
1106
|
-
return df
|
1107
|
-
finally:
|
1108
|
-
connection.close()
|
1109
|
-
|
1110
|
-
if len(df) == 0:
|
1111
|
-
logger.info(f'database: {db_name}, table: {table_name} 查询的数据为空')
|
1112
|
-
else:
|
1113
|
-
cost_time = int(time.time() - before_time)
|
1114
|
-
if cost_time < 1:
|
1115
|
-
cost_time = round(time.time() - before_time, 2)
|
1116
|
-
logger.info(f'mysql ({self.host}) 表: {table_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
|
1117
|
-
return df
|
1118
|
-
|
1119
|
-
def upload_pandas(self, update_path, db_name, days=None):
|
1120
|
-
"""
|
1121
|
-
专门用来上传 pandas数据源的全部文件
|
1122
|
-
db_name: 数据库名: pandas数据源
|
1123
|
-
update_path: pandas数据源所在路径
|
1124
|
-
days: 更新近期数据,单位: 天, 不设置则全部更新
|
1125
|
-
"""
|
1126
|
-
if days:
|
1127
|
-
today = datetime.date.today()
|
1128
|
-
start_date = pd.to_datetime(today - datetime.timedelta(days=days))
|
1129
|
-
else:
|
1130
|
-
start_date = pd.to_datetime('2000-01-01')
|
1131
|
-
|
1132
|
-
root_files = os.listdir(update_path)
|
1133
|
-
for root_file in root_files:
|
1134
|
-
if '其他数据' in root_file or '年.csv' in root_file or '京东数据集' in root_file:
|
1135
|
-
continue # 跳过的文件夹
|
1136
|
-
f_path = os.path.join(update_path, root_file)
|
1137
|
-
|
1138
|
-
if os.path.isdir(f_path):
|
1139
|
-
for root, dirs, files in os.walk(f_path, topdown=False):
|
1140
|
-
for name in files:
|
1141
|
-
if name.endswith('.csv') and 'baidu' not in name:
|
1142
|
-
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1143
|
-
if '日期' in df.columns.tolist():
|
1144
|
-
df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
|
1145
|
-
df = df[df['日期'] >= start_date]
|
1146
|
-
if len(df) == 0:
|
1147
|
-
continue
|
1148
|
-
self.df_to_mysql(df=df, db_name=db_name, table_name=root_file)
|
1149
|
-
elif os.path.isfile(f_path):
|
1150
|
-
if f_path.endswith('.csv') and 'baidu' not in f_path:
|
1151
|
-
df = pd.read_csv(f_path, encoding='utf-8_sig', header=0, na_filter=False)
|
1152
|
-
if '日期' not in df.columns.tolist():
|
1153
|
-
df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
|
1154
|
-
df = df[df['日期'] >= start_date]
|
1155
|
-
if len(df) == 0:
|
1156
|
-
continue
|
1157
|
-
table = f'{os.path.splitext(root_file)[0]}_f' # 这里定义了文件表会加 _f 后缀
|
1158
|
-
self.df_to_mysql(df=df, db_name=db_name, table_name=table)
|
1159
|
-
|
1160
791
|
|
1161
792
|
class OptimizeDatas:
|
1162
793
|
"""
|