mdbq 3.8.13__py3-none-any.whl → 3.8.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/mysql/mysql.py CHANGED
@@ -141,135 +141,17 @@ class MysqlUpload:
141
141
  return __res_dict, new_dict_data
142
142
 
143
143
  @try_except
144
- def doc_to_sql(self, db_name, table_name, dict_data, set_typ={}, remove_by_key=None, allow_not_null=False, filename=None, reset_id=False):
145
- """
146
- db_name:
147
- table_name:
148
- remove_by_key: 设置时先删除数据再插入,不设置则直接添加
149
- dict_data:
150
- set_typ:
151
- allow_not_null:
152
- filename:
153
- reset_id:
154
- """
155
- if not self.config:
156
- return
157
- if '数据主体' not in dict_data.keys():
158
- logger.info(f'dict_data 中"数据主体"键不能为空')
159
- return
160
- connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
161
- if not connection:
162
- return
163
- with connection.cursor() as cursor:
164
- cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
165
- database_exists = cursor.fetchone()
166
- if not database_exists:
167
- # 如果数据库不存在,则新建
168
- sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
169
- cursor.execute(sql)
170
- connection.commit()
171
- logger.info(f"创建Database: {db_name}")
172
-
173
- self.config.update({'database': db_name}) # 添加更新 config 字段
174
- connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
175
- if not connection:
176
- return
177
- with connection.cursor() as cursor:
178
- # 1. 查询表, 不存在则创建一个空表
179
- sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
180
- cursor.execute(sql, (table_name))
181
- if not cursor.fetchone():
182
- sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
183
- cursor.execute(sql)
184
- logger.info(f'创建 mysql 表: {table_name}')
185
-
186
- new_dict = {}
187
- [new_dict.update({k: v}) for k, v in dict_data.items() if k != '数据主体']
188
- # 清理列名中的非法字符
189
- dtypes, new_dict = self.cover_doc_dtypes(new_dict)
190
- if set_typ:
191
- # 更新自定义的列数据类型
192
- for k, v in dtypes.items():
193
- # 确保传进来的 set_typ 键存在于实际的 df 列才 update
194
- [dtypes.update({k: inside_v}) for inside_k, inside_v in set_typ.items() if k == inside_k]
195
-
196
- # 检查列
197
- sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
198
- cursor.execute(sql, (db_name, table_name))
199
- col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()] # 已存在的所有列
200
-
201
- col_not_exist = [col for col in set_typ.keys() if col not in col_exist] # 不存在的列
202
- # 不存在则新建列
203
- if col_not_exist: # 数据表中不存在的列
204
- for col in col_not_exist:
205
- # 创建列,需转义
206
- if allow_not_null:
207
- sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {set_typ[col]};"
208
- else:
209
- sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {set_typ[col]} NOT NULL;"
210
- cursor.execute(sql)
211
- logger.info(f"添加列: {col}({set_typ[col]})") # 添加列并指定数据类型
212
-
213
- if col == '日期':
214
- sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
215
- logger.info(f"设置为索引: {col}({set_typ[col]})")
216
- cursor.execute(sql)
217
- connection.commit() # 提交事务
218
-
219
- if remove_by_key:
220
- # 删除数据
221
- se_key = ', '.join(remove_by_key)
222
- condition = []
223
- for up_col in remove_by_key:
224
- condition += [f'`{up_col}` = "{dict_data[up_col]}"']
225
- condition = ' AND '.join(condition)
226
- sql = f"SELECT {se_key} FROM `{table_name}` WHERE {condition}"
227
- cursor.execute(sql)
228
- result = cursor.fetchall()
229
- if result:
230
- sql = f'DELETE FROM `{table_name}` WHERE {condition};'
231
- cursor.execute(sql)
232
-
233
- # 插入数据到数据库
234
- # 有数据格式错误问题,所以分开处理,将数据主体移到最后面用占位符
235
- logger.info(f'正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name} -> {filename}')
236
- if new_dict:
237
- cols = ', '.join(f"`{item}`" for item in new_dict.keys()) # 列名需要转义
238
- values = ', '.join([f'"{item}"' for item in new_dict.values()]) # 值要加引号
239
- cols = ', '.join([cols, '数据主体'])
240
- binary_data = dict_data['数据主体']
241
- sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values}, %s)"
242
- cursor.execute(sql, binary_data)
243
- else:
244
- sql = f"""INSERT INTO `{table_name}` (数据主体) VALUES (%s);"""
245
- cursor.execute(sql, dict_data['数据主体'])
246
-
247
- if reset_id:
248
- pass
249
- connection.commit()
250
-
251
- @try_except
252
- def insert_many_dict(self, db_name, table_name, dict_data_list, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
144
+ def insert_many_dict(self, db_name, table_name, dict_data_list, icm_update=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
253
145
  """
254
146
  插入字典数据
255
147
  dict_data: 字典
256
- main_key: 指定索引列, 通常用日期列,默认会设置日期为索引
257
- unique_main_key: 指定唯一索引列
258
148
  index_length: 索引长度
259
- icm_update: 增量更正,指定后 main_key 只用于检查/创建列,不能更新数据
149
+ icm_update: 增量更正
260
150
  set_typ: {}
261
151
  allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
262
152
  """
263
153
  if not self.config:
264
154
  return
265
- if icm_update:
266
- if main_key or unique_main_key:
267
- logger.info(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
268
- return
269
- if not main_key:
270
- main_key = []
271
- if not unique_main_key:
272
- unique_main_key = []
273
155
 
274
156
  if not dict_data_list:
275
157
  logger.info(f'dict_data_list 不能为空 ')
@@ -289,7 +171,6 @@ class MysqlUpload:
289
171
  except Exception as e:
290
172
  logger.error(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
291
173
 
292
- # connection = pymysql.connect(**self.config) # 连接数据库
293
174
  connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
294
175
  if not connection:
295
176
  return
@@ -298,14 +179,12 @@ class MysqlUpload:
298
179
  database_exists = cursor.fetchone()
299
180
  if not database_exists:
300
181
  # 如果数据库不存在,则新建
301
-
302
182
  sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
303
183
  cursor.execute(sql)
304
184
  connection.commit()
305
185
  logger.info(f"创建Database: {db_name}")
306
186
 
307
187
  self.config.update({'database': db_name}) # 添加更新 config 字段
308
- # connection = pymysql.connect(**self.config) # 重新连接数据库
309
188
  connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
310
189
  if not connection:
311
190
  return
@@ -339,47 +218,38 @@ class MysqlUpload:
339
218
  sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
340
219
  else:
341
220
  sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
342
- # sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
343
- # logger.info(sql)
221
+
344
222
  cursor.execute(sql)
345
223
  logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
346
224
 
347
- if col in main_key or col == '日期':
225
+ if col == '日期':
348
226
  sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
349
227
  logger.info(f"设置为索引: {col}({dtypes[col]})")
350
228
  cursor.execute(sql)
351
- if col in unique_main_key:
352
- if dtypes[col] == 'mediumtext':
353
- sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`({index_length}))"
354
- else:
355
- sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`)"
356
- cursor.execute(sql)
229
+
357
230
  connection.commit() # 提交事务
358
231
  """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
359
232
  """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
360
233
  # 处理插入的数据
361
234
  for dict_data in dict_data_list:
362
- # logger.info(dict_data)
363
235
  dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
364
236
  if icm_update:
365
237
  """ 使用增量更新: 需确保 icm_update['主键'] 传进来的列组合是数据表中唯一,值不会发生变化且不会重复,否则可能产生覆盖 """
366
238
  sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
367
- cursor.execute(sql, (db_name, {table_name}))
239
+ cursor.execute(sql, (db_name, table_name))
368
240
  columns = cursor.fetchall()
369
241
  cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
370
242
  update_col = [item for item in cols_exist if item not in icm_update and item != 'id'] # 除了主键外的其他列
371
243
 
372
- # unique_keys 示例: `日期`, `余额`
373
- unique_keys = ', '.join(f"`{item}`" for item in update_col) # 列名需要转义
374
- condition = []
375
- for up_col in icm_update:
376
- condition += [f'`{up_col}` = "{dict_data[up_col]}"']
377
- condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
378
- sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
379
- # logger.info(sql)
380
- # sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
381
- cursor.execute(sql)
382
- results = cursor.fetchall() # results 是数据库取出的数据
244
+ # 构造查询条件(参数化)
245
+ condition = ' AND '.join([f'`{up_col}` = %s' for up_col in icm_update])
246
+ condition_values = [dict_data[up_col] for up_col in icm_update]
247
+
248
+ # 执行查询(参数化)
249
+ sql = f"SELECT {','.join([f'`{col}`' for col in update_col])} FROM `{table_name}` WHERE {condition}"
250
+ cursor.execute(sql, condition_values)
251
+ results = cursor.fetchall()
252
+
383
253
  if results: # 有数据返回,再进行增量检查
384
254
  for result in results: # results 是数据库数据, dict_data 是传进来的数据
385
255
  change_col = [] # 发生变化的列名
@@ -395,86 +265,61 @@ class MysqlUpload:
395
265
  mysql_value = re.sub(r'0+$', '', mysql_value)
396
266
  mysql_value = re.sub(r'\.$', '', mysql_value)
397
267
  if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
398
- # logger.info(f'{dict_data['日期']}{dict_data['商品id']}{col} 列的值有变化,{str(dict_data[col])} != {str(result[col])}')
399
268
  change_values += [f"`{col}` = \"{str(dict_data[col])}\""]
400
269
  change_col.append(col)
401
270
  not_change_col = [item for item in update_col if item not in change_col]
402
- # change_values 是 df 传进来且和数据库对比后,发生了变化的数据,值示例: [`品销宝余额` = '9999.0', `短信剩余` = '888']
403
- if change_values: # change_values 有数据返回,表示值需要更新
271
+
272
+ # 构造更新语句(参数化)
273
+ if change_values:
274
+ set_clause = ', '.join([f'`{col}` = %s' for col in change_col])
275
+ update_values = [dict_data[col] for col in change_col]
276
+ # 添加未变化列的查询条件
404
277
  if not_change_col:
405
- not_change_values = [f'`{col}` = "{str(dict_data[col])}"' for col in not_change_col]
406
- not_change_values = ' AND '.join(
407
- not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
408
- # logger.info(change_values, not_change_values)
409
- condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
410
- change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
411
- sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
412
- # logger.info(sql)
413
- cursor.execute(sql)
278
+ not_change_condition = ' AND '.join([f'`{col}` = %s' for col in not_change_col])
279
+ condition += f' AND {not_change_condition}'
280
+ condition_values += [dict_data[col] for col in not_change_col]
281
+ # 执行更新
282
+ sql = f"UPDATE `{table_name}` SET {set_clause} WHERE {condition}"
283
+ cursor.execute(sql, update_values + condition_values)
414
284
  else: # 没有数据返回,则直接插入数据
415
- # cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
416
- # # data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
417
- # values = ', '.join([f'"{item}"' for item in dict_data.values()]) # 值要加引号
418
- # sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
419
- # cursor.execute(sql)
420
-
421
- # 清理和验证列名
422
- safe_columns = [f"`{escape_string(str(col))}`" for col in dict_data.keys()]
423
- cols = ", ".join(safe_columns)
424
- # 使用参数化查询
425
- placeholders = ", ".join(["%s"] * len(dict_data))
285
+ # 参数化插入
286
+ cols = ', '.join([f'`{k}`' for k in dict_data.keys()])
287
+ placeholders = ', '.join(['%s'] * len(dict_data))
426
288
  sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({placeholders})"
427
- # 转义值并作为参数传递
428
- escaped_values = [escape_string(str(v)) if isinstance(v, str) else v for v in
429
- dict_data.values()]
430
- cursor.execute(sql, tuple(escaped_values))
431
-
289
+ cursor.execute(sql, tuple(dict_data.values()))
432
290
  connection.commit() # 提交数据库
433
291
  continue
434
292
 
435
- # 构建 keys
436
- keys_data = ', '.join([f'`{str(item)}`' for item in dict_data.keys()])
437
- # 构建 values
438
- values_data = ', '.join(f'"{str(item)}"' for item in dict_data.values())
439
- # 构建其他键值,重复时要更新的其他键
440
- if main_key:
441
- for col in main_key:
442
- del dict_data[col]
443
- if unique_main_key:
444
- for col in unique_main_key:
445
- del dict_data[col]
446
- # 涉及列名务必使用反引号
447
- update_datas = ', '.join([f'`{k}` = VALUES(`{k}`)' for k, v in dict_data.items()])
448
-
449
- # 构建 sql
450
- sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
451
- # logger.info(sql)
452
- cursor.execute(sql)
453
- connection.commit() # 提交数据库
293
+ # 标准插入逻辑(参数化修改)
294
+ # 构造更新列(排除主键)
295
+ update_cols = [k for k in dict_data.keys()]
296
+ # 构建SQL
297
+ cols = ', '.join([f'`{k}`' for k in dict_data.keys()])
298
+ placeholders = ', '.join(['%s'] * len(dict_data))
299
+ update_clause = ', '.join([f'`{k}` = VALUES(`{k}`)' for k in update_cols]) or 'id=id'
300
+
301
+ sql = f"""INSERT INTO `{table_name}` ({cols}) VALUES ({placeholders}) ON DUPLICATE KEY UPDATE {update_clause}"""
302
+ # 执行参数化查询
303
+ try:
304
+ cursor.execute(sql, tuple(dict_data.values()))
305
+ connection.commit()
306
+ except pymysql.Error as e:
307
+ logger.error(f"插入失败: {e}\nSQL: {cursor.mogrify(sql, tuple(dict_data.values()))}")
308
+ connection.rollback()
454
309
  connection.close()
455
310
 
456
311
  @try_except
457
- def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
312
+ def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
458
313
  """
459
314
  插入字典数据
460
315
  dict_data: 字典
461
- main_key: 指定索引列, 通常用日期列,默认会设置日期为索引
462
- unique_main_key: 指定唯一索引列
463
316
  index_length: 索引长度
464
- icm_update: 增量更正,指定后 main_key 只用于检查/创建列,不能更新数据
317
+ icm_update: 增量更新
465
318
  set_typ: {}
466
319
  allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
467
320
  """
468
321
  if not self.config:
469
322
  return
470
- if icm_update:
471
- if main_key or unique_main_key:
472
- logger.info(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
473
- return
474
- if not main_key:
475
- main_key = []
476
- if not unique_main_key:
477
- unique_main_key = []
478
323
 
479
324
  if cut_data:
480
325
  if '日期' in dict_data.keys():
@@ -490,7 +335,6 @@ class MysqlUpload:
490
335
  except Exception as e:
491
336
  logger.error(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
492
337
 
493
- # connection = pymysql.connect(**self.config) # 连接数据库
494
338
  connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
495
339
  if not connection:
496
340
  return
@@ -505,7 +349,6 @@ class MysqlUpload:
505
349
  logger.info(f"创建Database: {db_name}")
506
350
 
507
351
  self.config.update({'database': db_name}) # 添加更新 config 字段
508
- # connection = pymysql.connect(**self.config) # 重新连接数据库
509
352
  connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
510
353
  if not connection:
511
354
  return
@@ -539,44 +382,34 @@ class MysqlUpload:
539
382
  sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
540
383
  else:
541
384
  sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
542
- # sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
543
- # logger.info(sql)
544
385
  cursor.execute(sql)
545
386
  logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
546
387
 
547
- if col in main_key or col == '日期':
388
+ if col == '日期':
548
389
  sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
549
390
  logger.info(f"设置为索引: {col}({dtypes[col]})")
550
391
  cursor.execute(sql)
551
- if col in unique_main_key:
552
- if dtypes[col] == 'mediumtext':
553
- sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`({index_length}))"
554
- else:
555
- sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`)"
556
- cursor.execute(sql)
557
392
  connection.commit() # 提交事务
558
393
  """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
559
394
  """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
560
395
  # 处理插入的数据
561
396
  if icm_update:
562
397
  """ 使用增量更新: 需确保 icm_update['主键'] 传进来的列组合是数据表中唯一,值不会发生变化且不会重复,否则可能产生覆盖 """
563
- sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
564
- cursor.execute(sql, (db_name, {table_name}))
565
- columns = cursor.fetchall()
566
- cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
398
+ sql = """SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s"""
399
+ cursor.execute(sql, (db_name, table_name))
400
+ cols_exist = [col['COLUMN_NAME'] for col in cursor.fetchall()] # 数据表的所有列, 返回 list
567
401
  update_col = [item for item in cols_exist if item not in icm_update and item != 'id'] # 除了主键外的其他列
568
402
 
569
- # unique_keys 示例: `日期`, `余额`
570
- unique_keys = ', '.join(f"`{item}`" for item in update_col) # 列名需要转义
571
- condition = []
403
+ # 参数化构建查询条件
404
+ condition_params = []
405
+ condition_parts = []
572
406
  for up_col in icm_update:
573
- condition += [f'`{up_col}` = "{dict_data[up_col]}"']
574
- condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
575
- sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
576
- # logger.info(sql)
577
- # sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
578
- cursor.execute(sql)
579
- results = cursor.fetchall() # results 是数据库取出的数据
407
+ condition_parts.append(f"`{up_col}` = %s")
408
+ condition_params.append(dict_data[up_col])
409
+
410
+ sql = f"""SELECT `{','.join(update_col)}` FROM `{table_name}` WHERE {' AND '.join(condition_parts)}"""
411
+ cursor.execute(sql, condition_params)
412
+ results = cursor.fetchall()
580
413
  if results: # 有数据返回,再进行增量检查
581
414
  for result in results: # results 是数据库数据, dict_data 是传进来的数据
582
415
  change_col = [] # 发生变化的列名
@@ -592,62 +425,35 @@ class MysqlUpload:
592
425
  mysql_value = re.sub(r'0+$', '', mysql_value)
593
426
  mysql_value = re.sub(r'\.$', '', mysql_value)
594
427
  if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
595
- # logger.info(f'{dict_data['日期']}{dict_data['商品id']}{col} 列的值有变化,{str(dict_data[col])} != {str(result[col])}')
596
428
  change_values += [f"`{col}` = \"{str(dict_data[col])}\""]
597
429
  change_col.append(col)
598
- not_change_col = [item for item in update_col if item not in change_col]
599
430
  # change_values 是 df 传进来且和数据库对比后,发生了变化的数据,值示例: [`品销宝余额` = '9999.0', `短信剩余` = '888']
600
431
  if change_values: # change_values 有数据返回,表示值需要更新
601
- if not_change_col:
602
- not_change_values = [f'`{col}` = "{str(dict_data[col])}"' for col in not_change_col]
603
- not_change_values = ' AND '.join(
604
- not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
605
- # logger.info(change_values, not_change_values)
606
- condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
607
- change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
608
- sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
609
- # logger.info(sql)
610
- cursor.execute(sql)
611
- else: # 没有数据返回,则直接插入数据
612
- # cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
613
- # # data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
614
- # values = ', '.join([f'"{item}"' for item in dict_data.values()]) # 值要加引号
615
- # sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
616
- # cursor.execute(sql)
617
-
618
- # 构建安全的INSERT语句
619
- safe_keys = [f"`{escape_string(str(k))}`" for k in dict_data.keys()]
620
- keys_data = ", ".join(safe_keys)
621
- placeholders = ", ".join(["%s"] * len(dict_data))
622
-
623
- # 使用参数化查询
624
- sql = f"INSERT INTO `{table_name}` ({keys_data}) VALUES ({placeholders}) ON DUPLICATE KEY UPDATE {update_datas}"
625
- escaped_values = [escape_string(str(v)) if isinstance(v, str) else v for v in dict_data.values()]
626
- cursor.execute(sql, tuple(escaped_values))
432
+ set_params = [dict_data[col] for col in change_col]
433
+ full_params = set_params + condition_params # 正确顺序
627
434
 
435
+ sql = f"""UPDATE `{table_name}`
436
+ SET {','.join(set_parts)}
437
+ WHERE {' AND '.join(condition_parts)}"""
438
+ cursor.execute(sql, full_params)
439
+ else: # 没有数据返回,则直接插入数据
440
+ # 参数化插入语句
441
+ keys = [f"`{k}`" for k in dict_data.keys()]
442
+ placeholders = ','.join(['%s'] * len(dict_data))
443
+ update_clause = ','.join([f"`{k}`=VALUES(`{k}`)" for k in dict_data.keys()])
444
+ sql = f"""INSERT INTO `{table_name}` ({','.join(keys)}) VALUES ({placeholders}) ON DUPLICATE KEY UPDATE {update_clause}"""
445
+ cursor.execute(sql, tuple(dict_data.values()))
628
446
  connection.commit() # 提交数据库
629
447
  connection.close()
630
448
  return
631
449
 
632
- # 构建 keys
633
- keys_data = ', '.join([f'`{str(item)}`' for item in dict_data.keys()])
634
- # 构建 values
635
- values_data = ', '.join(f'"{str(item)}"' for item in dict_data.values())
636
- # 构建其他键值,重复时要更新的其他键
637
- if main_key:
638
- for col in main_key:
639
- del dict_data[col]
640
- if unique_main_key:
641
- for col in unique_main_key:
642
- del dict_data[col]
643
- # 涉及列名务必使用反引号
644
- update_datas = ', '.join([f'`{k}` = VALUES(`{k}`)' for k, v in dict_data.items()])
645
-
646
- # 构建 sql
647
- sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
648
- # logger.info(sql)
649
- cursor.execute(sql)
650
- connection.commit() # 提交数据库
450
+ # 常规插入处理(参数化)
451
+ keys = [f"`{k}`" for k in dict_data.keys()]
452
+ placeholders = ','.join(['%s'] * len(dict_data))
453
+ update_clause = ','.join([f"`{k}`=VALUES(`{k}`)" for k in dict_data.keys()])
454
+ sql = f"""INSERT INTO `{table_name}` ({','.join(keys)}) VALUES ({placeholders}) ON DUPLICATE KEY UPDATE {update_clause}"""
455
+ cursor.execute(sql, tuple(dict_data.values()))
456
+ connection.commit()
651
457
  connection.close()
652
458
 
653
459
  def cover_dict_dtypes(self, dict_data):
@@ -749,17 +555,15 @@ class MysqlUpload:
749
555
  return __res_dict, df
750
556
 
751
557
  @try_except
752
- def df_to_mysql(self, df, db_name, table_name, set_typ=None, icm_update=[], move_insert=False, df_sql=False, drop_duplicates=False,
753
- filename=None, count=None, reset_id=False, allow_not_null=False, cut_data=None):
558
+ def df_to_mysql(self, df, db_name, table_name, set_typ=None, icm_update=[], move_insert=False, df_sql=False,
559
+ filename=None, count=None, allow_not_null=False, cut_data=None):
754
560
  """
755
561
  db_name: 数据库名
756
562
  table_name: 表名
757
- move_insert: 根据df 的日期,先移除数据库数据,再插入, df_sql, drop_duplicates, icm_update 都要设置为 False
563
+ move_insert: 根据df 的日期,先移除数据库数据,再插入, df_sql, icm_update 都要设置为 False
758
564
  原则上只限于聚合数据使用,原始数据插入时不要设置
759
-
760
565
  df_sql: 这是一个临时参数, 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重,初创表大量上传数据的时候使用
761
- drop_duplicates: 值为 True 时检查重复数据再插入,反之直接上传,数据量大时会比较慢
762
- icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_duplicates 改为 False
566
+ icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用
763
567
  使用增量更新: 必须确保 icm_update 传进来的列必须是数据表中唯一主键,值不会发生变化,不会重复,否则可能产生错乱覆盖情况
764
568
  filename: 用来追踪处理进度,传这个参数是方便定位产生错误的文件
765
569
  allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
@@ -767,12 +571,12 @@ class MysqlUpload:
767
571
  if not self.config:
768
572
  return
769
573
  if icm_update:
770
- if move_insert or df_sql or drop_duplicates:
771
- logger.info(f'icm_update/move_insert/df_sql/drop_duplicates 参数不能同时设定')
574
+ if move_insert or df_sql:
575
+ logger.info(f'icm_update/move_insert/df_sql 参数不能同时设定')
772
576
  return
773
577
  if move_insert:
774
- if icm_update or df_sql or drop_duplicates:
775
- logger.info(f'icm_update/move_insert/df_sql/drop_duplicates 参数不能同时设定')
578
+ if icm_update or df_sql:
579
+ logger.info(f'icm_update/move_insert/df_sql 参数不能同时设定')
776
580
  return
777
581
 
778
582
  self.filename = filename
@@ -811,12 +615,11 @@ class MysqlUpload:
811
615
  # 确保传进来的 set_typ 键存在于实际的 df 列才 update
812
616
  [dtypes.update({k: inside_v}) for inside_k, inside_v in set_typ.items() if k == inside_k]
813
617
 
814
- # connection = pymysql.connect(**self.config) # 连接数据库
815
618
  connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
816
619
  if not connection:
817
620
  return
818
621
  with connection.cursor() as cursor:
819
- cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
622
+ cursor.execute("SHOW DATABASES LIKE %s", (db_name,)) # 检查数据库是否存在
820
623
  database_exists = cursor.fetchone()
821
624
  if not database_exists:
822
625
  # 如果数据库不存在,则新建
@@ -826,7 +629,6 @@ class MysqlUpload:
826
629
  logger.info(f"创建Database: {db_name}")
827
630
 
828
631
  self.config.update({'database': db_name}) # 添加更新 config 字段
829
- # connection = pymysql.connect(**self.config) # 重新连接数据库
830
632
  connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
831
633
  if not connection:
832
634
  return
@@ -835,8 +637,8 @@ class MysqlUpload:
835
637
  sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
836
638
  cursor.execute(sql, (table_name))
837
639
  if not cursor.fetchone():
838
- sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
839
- cursor.execute(sql)
640
+ create_table_sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY)"
641
+ cursor.execute(create_table_sql)
840
642
  logger.info(f'创建 mysql 表: {table_name}')
841
643
 
842
644
  # 有特殊字符不需转义
@@ -850,11 +652,10 @@ class MysqlUpload:
850
652
  if col_not_exist: # 数据表中不存在的列
851
653
  for col in col_not_exist:
852
654
  # 创建列,需转义
853
- if allow_not_null:
854
- sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
855
- else:
856
- sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
857
- cursor.execute(sql)
655
+ ialter_sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]}"
656
+ if not allow_not_null:
657
+ alter_sql += " NOT NULL"
658
+ cursor.execute(alter_sql)
858
659
  logger.info(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
859
660
 
860
661
  # 创建索引
@@ -870,17 +671,14 @@ class MysqlUpload:
870
671
  logger.info(f'正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
871
672
  engine = create_engine(
872
673
  f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
873
- # df.to_csv('/Users/xigua/Downloads/mysql.csv', index=False, header=True, encoding='utf-8_sig')
874
- # df.to_excel('/Users/xigua/Downloads/mysql.xlsx', index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
875
674
  df.to_sql(
876
675
  name=table_name,
877
676
  con=engine,
878
677
  if_exists='append',
879
678
  index=False,
880
- chunksize=1000
679
+ chunksize=1000,
680
+ method='multi'
881
681
  )
882
- if reset_id:
883
- pass
884
682
  connection.commit() # 提交事务
885
683
  connection.close()
886
684
  return
@@ -889,13 +687,15 @@ class MysqlUpload:
889
687
  if move_insert and '日期' in df.columns.tolist():
890
688
  # 移除数据
891
689
  dates = df['日期'].values.tolist()
892
- # logger.info(dates)
893
690
  dates = [pd.to_datetime(item) for item in dates] # 需要先转换类型才能用 min, max
894
691
  start_date = pd.to_datetime(min(dates)).strftime('%Y-%m-%d')
895
692
  end_date = (pd.to_datetime(max(dates)) + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
896
693
 
897
- sql = f"DELETE FROM `{table_name}` WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
898
- cursor.execute(sql)
694
+ delete_sql = f"""
695
+ DELETE FROM `{table_name}`
696
+ WHERE 日期 BETWEEN %s AND %s
697
+ """
698
+ cursor.execute(delete_sql, (start_date, end_date))
899
699
  connection.commit()
900
700
 
901
701
  # 插入数据
@@ -906,7 +706,8 @@ class MysqlUpload:
906
706
  con=engine,
907
707
  if_exists='append',
908
708
  index=False,
909
- chunksize=1000
709
+ chunksize=1000,
710
+ method='multi'
910
711
  )
911
712
  return
912
713
 
@@ -915,236 +716,78 @@ class MysqlUpload:
915
716
  # data 是传进来待处理的数据, 不是数据库数据
916
717
  # data 示例: {'日期': Timestamp('2024-08-27 00:00:00'), '推广费余额': 33299, '品销宝余额': 2930.73, '短信剩余': 67471}
917
718
  try:
918
- cols = ', '.join(f"`{item}`" for item in data.keys()) # 列名需要转义
919
- # data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
920
- values = ', '.join([f'"{item}"' for item in data.values()]) # 值要加引号
921
- condition = []
719
+ # 预处理数据:转换非字符串类型
720
+ processed_data = {}
922
721
  for k, v in data.items():
923
- condition += [f'`{k}` = "{v}"']
924
- condition = ' AND '.join(condition) # 构建查询条件
925
- # logger.info(condition)
926
-
927
- if drop_duplicates: # 查重插入
928
- sql = "SELECT %s FROM %s WHERE %s" % (cols, table_name, condition)
929
- # sql = f"SELECT {cols} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
930
- cursor.execute(sql)
931
- result = cursor.fetchall() # 获取查询结果, 有结果返回 list 表示数据已存在(不重复插入),没有则返回空 tuple
932
- # logger.info(result)
933
- if not result: # 数据不存在则插入
934
- sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (%s);" % (values)
935
- # logger.info(sql)
936
- cursor.execute(sql)
937
- # else:
938
- # logger.info(f'重复数据不插入: {condition[:50]}...')
939
- elif icm_update: # 增量更新, 专门用于聚合数据,其他库不要调用
940
- """ 使用增量更新: 需确保 icm_update['主键'] 传进来的列必须是数据表中唯一主键,值不会发生变化且不会重复,否则可能产生覆盖情况 """
941
- sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
942
- cursor.execute(sql, (db_name, {table_name}))
943
- columns = cursor.fetchall()
944
- cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
945
- update_col = [item for item in cols_exist if
946
- item not in icm_update and item != 'id'] # 除了主键外的其他列
947
-
948
- # unique_keys 示例: `日期`, `余额`
949
- unique_keys = ', '.join(f"`{item}`" for item in update_col) # 列名需要转义
950
- condition = []
951
- for up_col in icm_update:
952
- condition += [f'`{up_col}` = "{data[up_col]}"']
953
- condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
954
- sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
955
- # logger.info(sql)
956
- # sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
957
- cursor.execute(sql)
958
- results = cursor.fetchall() # results 是数据库取出的数据
959
- if results: # 有数据返回,再进行增量检查
960
- for result in results: # results 是数据库数据, data 是传进来的数据
961
- change_col = [] # 发生变化的列名
962
- change_values = [] # 发生变化的数据
963
- for col in update_col:
964
- # 因为 mysql 里面有 decimal 数据类型,要移除末尾的 0 再做比较(df 默认将 5.00 小数截断为 5.0)
965
- df_value = str(data[col])
966
- mysql_value = str(result[col])
967
- if '.' in df_value:
968
- df_value = re.sub(r'0+$', '', df_value)
969
- df_value = re.sub(r'\.$', '', df_value)
970
- if '.' in mysql_value:
971
- mysql_value = re.sub(r'0+$', '', mysql_value)
972
- mysql_value = re.sub(r'\.$', '', mysql_value)
973
- if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
974
- # logger.info(f'{data['日期']}{data['商品id']}{col} 列的值有变化,{str(data[col])} != {str(result[col])}')
975
- change_values += [f"`{col}` = \"{str(data[col])}\""]
976
- change_col.append(col)
977
- not_change_col = [item for item in update_col if item not in change_col]
978
- # change_values 是 df 传进来且和数据库对比后,发生了变化的数据,值示例: [`品销宝余额` = '9999.0', `短信剩余` = '888']
979
- if change_values: # change_values 有数据返回,表示值需要更新
980
- if not_change_col:
981
- not_change_values = [f'`{col}` = "{str(data[col])}"' for col in not_change_col]
982
- not_change_values = ' AND '.join(
983
- not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
984
- # logger.info(change_values, not_change_values)
985
- condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
986
- change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
987
- sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
988
- # logger.info(sql)
989
- cursor.execute(sql)
990
- else: # 没有数据返回,则直接插入数据
991
- sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
992
- cursor.execute(sql)
722
+ if isinstance(v, (int, float)):
723
+ processed_data[k] = float(v)
724
+ elif isinstance(v, pd.Timestamp):
725
+ processed_data[k] = v.strftime('%Y-%m-%d')
726
+ else:
727
+ processed_data[k] = str(v)
728
+
729
+ # 构建基础SQL要素
730
+ columns = [f'`{k}`' for k in processed_data.keys()]
731
+ placeholders = ', '.join(['%s'] * len(processed_data))
732
+ values = list(processed_data.values())
733
+
734
+ # 构建基本INSERT语句
735
+ insert_sql = f"INSERT INTO `{table_name}` ({', '.join(columns)}) VALUES ({placeholders})"
736
+
737
+ if icm_update: # 增量更新, 专门用于聚合数据,其他库不要调用
738
+ # 获取数据表结构
739
+ cursor.execute(
740
+ "SELECT COLUMN_NAME FROM information_schema.columns "
741
+ "WHERE table_schema = %s AND table_name = %s",
742
+ (db_name, table_name)
743
+ )
744
+ cols_exist = [row['COLUMN_NAME'] for row in cursor.fetchall()]
745
+ update_columns = [col for col in cols_exist if col not in icm_update and col != 'id']
746
+
747
+ # 构建WHERE条件
748
+ where_conditions = []
749
+ where_values = []
750
+ for col in icm_update:
751
+ where_conditions.append(f"`{col}` = %s")
752
+ where_values.append(processed_data[col])
753
+
754
+ # 查询现有数据
755
+ select_sql = f"SELECT {', '.join([f'`{col}`' for col in update_columns])} " \
756
+ f"FROM `{table_name}` WHERE {' AND '.join(where_conditions)}"
757
+ cursor.execute(select_sql, where_values)
758
+ existing_data = cursor.fetchone()
759
+
760
+ if existing_data:
761
+ # 比较并构建更新语句
762
+ update_set = []
763
+ update_values = []
764
+ for col in update_columns:
765
+ db_value = existing_data[col]
766
+ new_value = processed_data[col]
767
+
768
+ # 处理数值类型的精度差异
769
+ if isinstance(db_value, float) and isinstance(new_value, float):
770
+ if not math.isclose(db_value, new_value, rel_tol=1e-9):
771
+ update_set.append(f"`{col}` = %s")
772
+ update_values.append(new_value)
773
+ elif db_value != new_value:
774
+ update_set.append(f"`{col}` = %s")
775
+ update_values.append(new_value)
776
+
777
+ if update_set:
778
+ update_sql = f"UPDATE `{table_name}` SET {', '.join(update_set)} " \
779
+ f"WHERE {' AND '.join(where_conditions)}"
780
+ cursor.execute(update_sql, update_values + where_values)
781
+ else:
782
+ cursor.execute(insert_sql, values)
993
783
  else:
994
- sql = f"INSERT INTO `{table_name}` ({cols}) VALUES (%s);" % (values)
995
- cursor.execute(sql)
784
+ # 普通插入
785
+ cursor.execute(insert_sql, values)
996
786
  except Exception as e:
997
787
  pass
998
-
999
- if reset_id:
1000
- pass
1001
788
  connection.commit() # 提交事务
1002
789
  connection.close()
1003
790
 
1004
- @try_except
1005
- def read_doc_data(self, table_name, db_name='pdf文件', column='文件名', filename=None, save_path='/Users/xigua/Downloads'):
1006
- """
1007
- db_name:
1008
- table_name:
1009
- column: 读取哪一列
1010
- filename: 文件名称
1011
- save_path: 保存位置
1012
- """
1013
- if not filename:
1014
- logger.info(f'未指定文件名: filename')
1015
- return
1016
- # connection = pymysql.connect(**self.config) # 连接数据库
1017
- connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
1018
- if not connection:
1019
- return
1020
- # try:
1021
- with connection.cursor() as cursor:
1022
- cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
1023
- database_exists = cursor.fetchone()
1024
- if not database_exists:
1025
- logger.info(f"Database {db_name} 数据库不存在")
1026
- return
1027
- self.config.update({'database': db_name})
1028
- # connection = pymysql.connect(**self.config) # 重新连接数据库
1029
- connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
1030
- if not connection:
1031
- return
1032
- with connection.cursor() as cursor:
1033
- # 1. 查询表
1034
- sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
1035
- cursor.execute(sql, (table_name))
1036
- if not cursor.fetchone():
1037
- logger.info(f'{table_name} -> 数据表不存在')
1038
- return
1039
-
1040
- # 读取数据
1041
- condition = f'`{column}` = "{filename}"'
1042
- sql = f"SELECT `{column}`, `数据主体` FROM `{table_name}` WHERE {condition}"
1043
- cursor.execute(sql)
1044
- results = cursor.fetchall()
1045
- if results:
1046
- for result in results:
1047
- # 将二进制数据写入到文件
1048
- with open(os.path.join(save_path, filename), 'wb') as f:
1049
- f.write(result['数据主体'])
1050
- logger.info(f'写入本地文件: ({self.host}:{self.port}) {db_name}/{table_name} -> {os.path.join(save_path, filename)}')
1051
- connection.close()
1052
-
1053
- def read_mysql(self, table_name, start_date, end_date, db_name='远程数据源', date_name='日期'):
1054
- """ 读取指定数据表,可指定日期范围,返回结果: df """
1055
- start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
1056
- end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
1057
- df = pd.DataFrame()
1058
-
1059
- # connection = pymysql.connect(**self.config) # 连接数据库
1060
- connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
1061
- if not connection:
1062
- return
1063
- try:
1064
- with connection.cursor() as cursor:
1065
- cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
1066
- database_exists = cursor.fetchone()
1067
- if not database_exists:
1068
- logger.info(f"Database {db_name} 数据库不存在")
1069
- return df
1070
- else:
1071
- logger.info(f'mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
1072
- except:
1073
- return df
1074
- finally:
1075
- connection.close() # 断开连接
1076
-
1077
- before_time = time.time()
1078
- # 读取数据
1079
- self.config.update({'database': db_name})
1080
- # connection = pymysql.connect(**self.config) # 重新连接数据库
1081
- connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=10)
1082
- if not connection:
1083
- return
1084
- try:
1085
- with connection.cursor() as cursor:
1086
- # 获取指定日期范围的数据
1087
- sql = f"SELECT * FROM `{db_name}`.`{table_name}` WHERE `{date_name}` BETWEEN '%s' AND '%s'" % (start_date, end_date)
1088
- cursor.execute(sql)
1089
- rows = cursor.fetchall() # 获取查询结果
1090
- columns = [desc[0] for desc in cursor.description]
1091
- df = pd.DataFrame(rows, columns=columns) # 转为 df
1092
- except Exception as e:
1093
- logger.error(f'{e} {db_name} -> {table_name} 表不存在')
1094
- return df
1095
- finally:
1096
- connection.close()
1097
-
1098
- if len(df) == 0:
1099
- logger.info(f'database: {db_name}, table: {table_name} 查询的数据为空')
1100
- else:
1101
- cost_time = int(time.time() - before_time)
1102
- if cost_time < 1:
1103
- cost_time = round(time.time() - before_time, 2)
1104
- logger.info(f'mysql ({self.host}) 表: {table_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
1105
- return df
1106
-
1107
- def upload_pandas(self, update_path, db_name, days=None):
1108
- """
1109
- 专门用来上传 pandas数据源的全部文件
1110
- db_name: 数据库名: pandas数据源
1111
- update_path: pandas数据源所在路径
1112
- days: 更新近期数据,单位: 天, 不设置则全部更新
1113
- """
1114
- if days:
1115
- today = datetime.date.today()
1116
- start_date = pd.to_datetime(today - datetime.timedelta(days=days))
1117
- else:
1118
- start_date = pd.to_datetime('2000-01-01')
1119
-
1120
- root_files = os.listdir(update_path)
1121
- for root_file in root_files:
1122
- if '其他数据' in root_file or '年.csv' in root_file or '京东数据集' in root_file:
1123
- continue # 跳过的文件夹
1124
- f_path = os.path.join(update_path, root_file)
1125
-
1126
- if os.path.isdir(f_path):
1127
- for root, dirs, files in os.walk(f_path, topdown=False):
1128
- for name in files:
1129
- if name.endswith('.csv') and 'baidu' not in name:
1130
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1131
- if '日期' in df.columns.tolist():
1132
- df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
1133
- df = df[df['日期'] >= start_date]
1134
- if len(df) == 0:
1135
- continue
1136
- self.df_to_mysql(df=df, db_name=db_name, table_name=root_file)
1137
- elif os.path.isfile(f_path):
1138
- if f_path.endswith('.csv') and 'baidu' not in f_path:
1139
- df = pd.read_csv(f_path, encoding='utf-8_sig', header=0, na_filter=False)
1140
- if '日期' not in df.columns.tolist():
1141
- df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
1142
- df = df[df['日期'] >= start_date]
1143
- if len(df) == 0:
1144
- continue
1145
- table = f'{os.path.splitext(root_file)[0]}_f' # 这里定义了文件表会加 _f 后缀
1146
- self.df_to_mysql(df=df, db_name=db_name, table_name=table)
1147
-
1148
791
 
1149
792
  class OptimizeDatas:
1150
793
  """