mdbq 3.6.14__py3-none-any.whl → 3.6.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/mysql/mysql.py
CHANGED
@@ -302,8 +302,7 @@ class MysqlUpload:
|
|
302
302
|
|
303
303
|
# 插入数据到数据库
|
304
304
|
# 有数据格式错误问题,所以分开处理,将数据主体移到最后面用占位符
|
305
|
-
|
306
|
-
logger.info(f'{now} 正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name} -> {filename}')
|
305
|
+
logger.info(f'正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name} -> {filename}')
|
307
306
|
if new_dict:
|
308
307
|
cols = ', '.join(f"`{item}`" for item in new_dict.keys()) # 列名需要转义
|
309
308
|
values = ', '.join([f'"{item}"' for item in new_dict.values()]) # 值要加引号
|
@@ -1031,9 +1030,7 @@ class MysqlUpload:
|
|
1031
1030
|
connection.commit() # 提交事务
|
1032
1031
|
|
1033
1032
|
if df_sql:
|
1034
|
-
|
1035
|
-
logger.info(
|
1036
|
-
f'{now} 正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
|
1033
|
+
logger.info(f'正在更新: mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
|
1037
1034
|
engine = create_engine(
|
1038
1035
|
f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
|
1039
1036
|
# df.to_csv('/Users/xigua/Downloads/mysql.csv', index=False, header=True, encoding='utf-8_sig')
|
@@ -1303,8 +1300,7 @@ class MysqlUpload:
|
|
1303
1300
|
# 将二进制数据写入到文件
|
1304
1301
|
with open(os.path.join(save_path, filename), 'wb') as f:
|
1305
1302
|
f.write(result['数据主体'])
|
1306
|
-
|
1307
|
-
logger.info(f'{now} 写入本地文件: ({self.host}:{self.port}) {db_name}/{table_name} -> {os.path.join(save_path, filename)}')
|
1303
|
+
logger.info(f'写入本地文件: ({self.host}:{self.port}) {db_name}/{table_name} -> {os.path.join(save_path, filename)}')
|
1308
1304
|
connection.close()
|
1309
1305
|
|
1310
1306
|
def read_mysql(self, table_name, start_date, end_date, db_name='远程数据源', date_name='日期'):
|
@@ -1325,8 +1321,7 @@ class MysqlUpload:
|
|
1325
1321
|
logger.info(f"Database {db_name} 数据库不存在")
|
1326
1322
|
return df
|
1327
1323
|
else:
|
1328
|
-
|
1329
|
-
logger.info(f'{now} mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
|
1324
|
+
logger.info(f'mysql 正在查询表: {table_name}, 范围: {start_date}~{end_date}')
|
1330
1325
|
except:
|
1331
1326
|
return df
|
1332
1327
|
finally:
|
@@ -1356,11 +1351,10 @@ class MysqlUpload:
|
|
1356
1351
|
if len(df) == 0:
|
1357
1352
|
logger.info(f'database: {db_name}, table: {table_name} 查询的数据为空')
|
1358
1353
|
else:
|
1359
|
-
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1360
1354
|
cost_time = int(time.time() - before_time)
|
1361
1355
|
if cost_time < 1:
|
1362
1356
|
cost_time = round(time.time() - before_time, 2)
|
1363
|
-
logger.info(f'
|
1357
|
+
logger.info(f'mysql ({self.host}) 表: {table_name} 获取数据长度: {len(df)}, 用时: {cost_time} 秒')
|
1364
1358
|
return df
|
1365
1359
|
|
1366
1360
|
def upload_pandas(self, update_path, db_name, days=None):
|
@@ -1387,10 +1381,6 @@ class MysqlUpload:
|
|
1387
1381
|
for name in files:
|
1388
1382
|
if name.endswith('.csv') and 'baidu' not in name:
|
1389
1383
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1390
|
-
# if '日期' not in df.columns.tolist():
|
1391
|
-
# now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1392
|
-
# logger.info(f'{now} {root_file} 缺少日期列, 不支持上传 mysql')
|
1393
|
-
# continue
|
1394
1384
|
if '日期' in df.columns.tolist():
|
1395
1385
|
df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
|
1396
1386
|
df = df[df['日期'] >= start_date]
|
@@ -1400,10 +1390,6 @@ class MysqlUpload:
|
|
1400
1390
|
elif os.path.isfile(f_path):
|
1401
1391
|
if f_path.endswith('.csv') and 'baidu' not in f_path:
|
1402
1392
|
df = pd.read_csv(f_path, encoding='utf-8_sig', header=0, na_filter=False)
|
1403
|
-
# if '日期' not in df.columns.tolist():
|
1404
|
-
# now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1405
|
-
# logger.info(f'{now} {root_file} 缺少日期列, 不支持上传 mysql')
|
1406
|
-
# continue
|
1407
1393
|
if '日期' not in df.columns.tolist():
|
1408
1394
|
df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(x) if x else x)
|
1409
1395
|
df = df[df['日期'] >= start_date]
|
@@ -1481,8 +1467,7 @@ class OptimizeDatas:
|
|
1481
1467
|
需要设置 self.db_name_lists
|
1482
1468
|
"""
|
1483
1469
|
if not self.db_name_lists:
|
1484
|
-
|
1485
|
-
logger.info(f'{now} 尚未设置参数: self.db_name_lists')
|
1470
|
+
logger.info(f'尚未设置参数: self.db_name_lists')
|
1486
1471
|
return
|
1487
1472
|
for db_name in self.db_name_lists:
|
1488
1473
|
self.db_name = db_name
|
@@ -1491,13 +1476,11 @@ class OptimizeDatas:
|
|
1491
1476
|
def optimize(self, except_key=['更新时间']):
|
1492
1477
|
""" 更新一个数据库 移除冗余数据 """
|
1493
1478
|
if not self.db_name:
|
1494
|
-
|
1495
|
-
logger.info(f'{now} 尚未设置参数: self.db_name')
|
1479
|
+
logger.info(f'尚未设置参数: self.db_name')
|
1496
1480
|
return
|
1497
1481
|
tables = self.table_list(db_name=self.db_name)
|
1498
1482
|
if not tables:
|
1499
|
-
|
1500
|
-
logger.info(f'{now} {self.db_name} -> 数据表不存在')
|
1483
|
+
logger.info(f'{self.db_name} -> 数据表不存在')
|
1501
1484
|
return
|
1502
1485
|
|
1503
1486
|
# 日期初始化
|
@@ -1514,8 +1497,7 @@ class OptimizeDatas:
|
|
1514
1497
|
start_date_before = self.start_date
|
1515
1498
|
end_date_before = self.end_date
|
1516
1499
|
|
1517
|
-
|
1518
|
-
logger.info(f'{now} mysql({self.host}: {self.port}) {self.db_name} 数据库优化中(日期长度: {self.days} 天)...')
|
1500
|
+
logger.info(f'mysql({self.host}: {self.port}) {self.db_name} 数据库优化中(日期长度: {self.days} 天)...')
|
1519
1501
|
for table_dict in tables:
|
1520
1502
|
for key, table_name in table_dict.items():
|
1521
1503
|
# if '店铺指标' not in table_name:
|
@@ -1531,8 +1513,7 @@ class OptimizeDatas:
|
|
1531
1513
|
cursor.execute(sql)
|
1532
1514
|
result = cursor.fetchone()
|
1533
1515
|
if not result:
|
1534
|
-
|
1535
|
-
logger.info(f'{now} 数据表: {table_name}, 数据长度为 0')
|
1516
|
+
logger.info(f'数据表: {table_name}, 数据长度为 0')
|
1536
1517
|
continue # 检查数据表是否为空
|
1537
1518
|
|
1538
1519
|
cursor.execute(f"SHOW FULL COLUMNS FROM `{table_name}`") # 查询数据表的列信息
|
@@ -1594,8 +1575,7 @@ class OptimizeDatas:
|
|
1594
1575
|
logger.info(f'333 {table_name} {e}')
|
1595
1576
|
self.connection.rollback()
|
1596
1577
|
self.connection.close()
|
1597
|
-
|
1598
|
-
logger.info(f'{now} mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
|
1578
|
+
logger.info(f'mysql({self.host}: {self.port}) {self.db_name} 数据库优化完成!')
|
1599
1579
|
|
1600
1580
|
def delete_duplicate(self, table_name, date, except_key=['更新时间']):
|
1601
1581
|
datas = self.table_datas(db_name=self.db_name, table_name=str(table_name), date=date)
|
@@ -1628,8 +1608,7 @@ class OptimizeDatas:
|
|
1628
1608
|
# 移除冗余数据
|
1629
1609
|
sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
|
1630
1610
|
cursor.execute(sql, duplicate_id)
|
1631
|
-
|
1632
|
-
logger.info(f"{now} {table_name} -> {date.strftime('%Y-%m-%d')} before: {len(datas)}, remove: {cursor.rowcount}")
|
1611
|
+
logger.info(f"{table_name} -> {date.strftime('%Y-%m-%d')} before: {len(datas)}, remove: {cursor.rowcount}")
|
1633
1612
|
self.connection.commit() # 提交事务
|
1634
1613
|
except Exception as e:
|
1635
1614
|
logger.info(f'{self.db_name}/{table_name}, {e}')
|
@@ -1666,8 +1645,7 @@ class OptimizeDatas:
|
|
1666
1645
|
# 移除冗余数据
|
1667
1646
|
sql = f"DELETE FROM `{table_name}` WHERE id IN ({placeholders})"
|
1668
1647
|
cursor.execute(sql, duplicate_id)
|
1669
|
-
|
1670
|
-
logger.info(f"{now} {table_name} -> before: {len(datas)}, "
|
1648
|
+
logger.info(f"{table_name} -> before: {len(datas)}, "
|
1671
1649
|
f"remove: {cursor.rowcount}")
|
1672
1650
|
self.connection.commit() # 提交事务
|
1673
1651
|
except Exception as e:
|
@@ -1697,8 +1675,7 @@ class OptimizeDatas:
|
|
1697
1675
|
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
1698
1676
|
database_exists = cursor.fetchone()
|
1699
1677
|
if not database_exists:
|
1700
|
-
|
1701
|
-
logger.info(f'{now} {db_name}: 数据表不存在!')
|
1678
|
+
logger.info(f'{db_name}: 数据表不存在!')
|
1702
1679
|
return
|
1703
1680
|
except Exception as e:
|
1704
1681
|
logger.info(f'002 {e}')
|
@@ -18,7 +18,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
18
18
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
19
19
|
mdbq/mongo/mongo.py,sha256=M9DUeUCMPDngkwn9-ui0uTiFrvfNU1kLs22s5SmoNm0,31899
|
20
20
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
21
|
-
mdbq/mysql/mysql.py,sha256=
|
21
|
+
mdbq/mysql/mysql.py,sha256=ZB552VAMn7tz-Z0MPZKwY2LL2mAms0ATb5y68OBL9Tg,98712
|
22
22
|
mdbq/mysql/mysql_bak.py,sha256=_jFo2_OC1BNm5wEmoYiBG_TcuNNA2xUWKNhMBfgDiAM,99699
|
23
23
|
mdbq/mysql/recheck_mysql.py,sha256=ppBTfBLgkRWirMVZ31e_ZPULiGPJU7K3PP9G6QBZ3QI,8605
|
24
24
|
mdbq/mysql/s_query.py,sha256=CL2Ayo2sL11RbLnh9nE-GXA-NpA815-rrlFo24TipKY,8792
|
@@ -38,7 +38,7 @@ mdbq/redis/getredis.py,sha256=4rYk9lMRvvlpY7cV3VNQcSnbDWlZIsZZ-tSq--YqfSQ,26638
|
|
38
38
|
mdbq/redis/getredis_优化hash.py,sha256=q7omKJCPw_6Zr_r6WwTv4RGSXzZzpLPkIaqJ22svJhE,29104
|
39
39
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
40
40
|
mdbq/spider/aikucun.py,sha256=v7VO5gtEXR6_4Q6ujbTyu1FHu7TXHcwSQ6hIO249YH0,22208
|
41
|
-
mdbq-3.6.
|
42
|
-
mdbq-3.6.
|
43
|
-
mdbq-3.6.
|
44
|
-
mdbq-3.6.
|
41
|
+
mdbq-3.6.15.dist-info/METADATA,sha256=Zb_mvyFxjvFYudvISR7D9HMXnBj34_ZvWJXHDo9_pOY,244
|
42
|
+
mdbq-3.6.15.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
43
|
+
mdbq-3.6.15.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
44
|
+
mdbq-3.6.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|