mdbq 3.3.5__py3-none-any.whl → 3.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/query_data.py +9 -40
- mdbq/mongo/mongo.py +24 -22
- mdbq/mysql/mysql.py +215 -1
- mdbq/mysql/recheck_mysql.py +1 -1
- mdbq/mysql/s_query.py +0 -1
- mdbq/spider/aikucun.py +9 -55
- {mdbq-3.3.5.dist-info → mdbq-3.3.8.dist-info}/METADATA +1 -1
- {mdbq-3.3.5.dist-info → mdbq-3.3.8.dist-info}/RECORD +10 -21
- mdbq/aggregation/df_types.py +0 -188
- mdbq/aggregation/mysql_types.py +0 -240
- mdbq/clean/__init__.py +0 -4
- mdbq/clean/clean_upload.py +0 -1350
- mdbq/clean/data_clean.py +0 -1551
- mdbq/company/__init__.py +0 -4
- mdbq/company/copysh.py +0 -447
- mdbq/config/get_myconf.py +0 -131
- mdbq/config/update_conf.py +0 -102
- mdbq/req_post/__init__.py +0 -4
- mdbq/req_post/req_tb.py +0 -624
- {mdbq-3.3.5.dist-info → mdbq-3.3.8.dist-info}/WHEEL +0 -0
- {mdbq-3.3.5.dist-info → mdbq-3.3.8.dist-info}/top_level.txt +0 -0
mdbq/aggregation/query_data.py
CHANGED
@@ -2088,6 +2088,8 @@ class MysqlDatasQuery:
|
|
2088
2088
|
}
|
2089
2089
|
min_date = df['日期'].min()
|
2090
2090
|
max_date = df['日期'].max()
|
2091
|
+
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
2092
|
+
print(f'{now} 正在更新: mysql ({host}:{port}) {db_name}/{table_name} -> {min_date}~{max_date}')
|
2091
2093
|
new_dict = {
|
2092
2094
|
'日期': '',
|
2093
2095
|
'店铺名称': '',
|
@@ -2121,23 +2123,24 @@ class MysqlDatasQuery:
|
|
2121
2123
|
'预售定金支付金额': '',
|
2122
2124
|
'预售预估总金额': '',
|
2123
2125
|
}
|
2124
|
-
|
2125
|
-
print(f'{now} 正在更新: mysql ({host}:{port}) {db_name}/{table_name} -> {min_date}~{max_date}')
|
2126
|
+
_results = []
|
2126
2127
|
for dict_data in df.to_dict(orient='records'):
|
2127
2128
|
new_dict.update(dict_data)
|
2128
|
-
|
2129
|
+
_results.append(new_dict)
|
2130
|
+
if _results:
|
2131
|
+
m_engine.insert_many_dict(
|
2129
2132
|
db_name=db_name,
|
2130
2133
|
table_name=table_name,
|
2131
|
-
|
2134
|
+
dict_data_list=_results,
|
2132
2135
|
unique_main_key=None,
|
2133
2136
|
icm_update=['场次id'], # 唯一组合键
|
2134
2137
|
main_key=None, # 指定索引列, 通常用日期列,默认会设置日期为索引
|
2135
2138
|
set_typ=set_typ, # 指定数据类型
|
2136
2139
|
)
|
2137
|
-
company_engine.
|
2140
|
+
company_engine.insert_many_dict(
|
2138
2141
|
db_name=db_name,
|
2139
2142
|
table_name=table_name,
|
2140
|
-
|
2143
|
+
dict_data_list=_results,
|
2141
2144
|
unique_main_key=None,
|
2142
2145
|
icm_update=['场次id'], # 唯一组合键
|
2143
2146
|
main_key=None, # 指定索引列, 通常用日期列,默认会设置日期为索引
|
@@ -2564,40 +2567,6 @@ class MysqlDatasQuery:
|
|
2564
2567
|
max_date = df['日期'].max()
|
2565
2568
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
2566
2569
|
print(f'{now} 正在更新: mysql ({host}:{port}) {db_name}/{table_name} -> {min_date}~{max_date}')
|
2567
|
-
# new_dict = {
|
2568
|
-
# '日期': '',
|
2569
|
-
# '店铺名称': '',
|
2570
|
-
# '序号': '',
|
2571
|
-
# '商品名称': '',
|
2572
|
-
# 'spu_id': '',
|
2573
|
-
# '商品款号': '',
|
2574
|
-
# '一级类目名称': '',
|
2575
|
-
# '二级类目名称': '',
|
2576
|
-
# '三级类目名称': '',
|
2577
|
-
# '访客量': '',
|
2578
|
-
# '浏览量': '',
|
2579
|
-
# '下单gmv': '',
|
2580
|
-
# '成交gmv': '',
|
2581
|
-
# '支付人数_成交': '',
|
2582
|
-
# }
|
2583
|
-
# for dict_data in df.to_dict(orient='records'):
|
2584
|
-
# new_dict.update(dict_data)
|
2585
|
-
# m_engine.dict_to_mysql(
|
2586
|
-
# db_name=db_name,
|
2587
|
-
# table_name=table_name,
|
2588
|
-
# dict_data=new_dict,
|
2589
|
-
# icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
|
2590
|
-
# unique_main_key=None,
|
2591
|
-
# set_typ=set_typ,
|
2592
|
-
# )
|
2593
|
-
# company_engine.dict_to_mysql(
|
2594
|
-
# db_name=db_name,
|
2595
|
-
# table_name=table_name,
|
2596
|
-
# dict_data=new_dict,
|
2597
|
-
# icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
|
2598
|
-
# unique_main_key=None,
|
2599
|
-
# set_typ=set_typ,
|
2600
|
-
# )
|
2601
2570
|
m_engine.df_to_mysql(
|
2602
2571
|
df=df,
|
2603
2572
|
db_name=db_name,
|
mdbq/mongo/mongo.py
CHANGED
@@ -8,11 +8,23 @@ import pandas as pd
|
|
8
8
|
import numpy as np
|
9
9
|
import pymongo
|
10
10
|
from functools import wraps
|
11
|
+
import socket
|
12
|
+
import platform
|
11
13
|
from concurrent.futures import ThreadPoolExecutor
|
12
|
-
from mdbq.config import
|
14
|
+
from mdbq.config import myconfig
|
13
15
|
from mdbq.dataframe import converter
|
14
16
|
|
15
17
|
warnings.filterwarnings('ignore')
|
18
|
+
if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
|
19
|
+
conf = myconfig.main()
|
20
|
+
conf_data = conf['Windows']['xigua_lx']['mysql']['remoto']
|
21
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data[
|
22
|
+
'port']
|
23
|
+
else:
|
24
|
+
conf = myconfig.main()
|
25
|
+
conf_data = conf['Windows']['company']['mysql']['remoto']
|
26
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data[
|
27
|
+
'port']
|
16
28
|
|
17
29
|
|
18
30
|
def rename_col(username, password, host, db_name, collection_name, old_name, new_name, port: int = 27017,):
|
@@ -679,30 +691,21 @@ class OptimizeDatas:
|
|
679
691
|
|
680
692
|
|
681
693
|
def upload_one_dir():
|
682
|
-
|
694
|
+
if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
|
695
|
+
conf = myconfig.main()
|
696
|
+
conf_data = conf['Windows']['xigua_lx']['mysql']['remoto']
|
697
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data[
|
698
|
+
'port']
|
699
|
+
else:
|
700
|
+
conf = myconfig.main()
|
701
|
+
conf_data = conf['Windows']['company']['mysql']['remoto']
|
702
|
+
username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data[
|
703
|
+
'port']
|
704
|
+
|
683
705
|
p = UploadMongo(username=username, password=password, host=host, port=port, drop_duplicates=False)
|
684
706
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
685
707
|
print(f'{now}数据处理中...')
|
686
708
|
|
687
|
-
p.db_name = ''
|
688
|
-
p.collection_name = f''
|
689
|
-
path = os.path.join('C:\\同步空间', 'BaiduSyncdisk', '原始文件2', r'京东报表', 'JD流量来源')
|
690
|
-
|
691
|
-
for root, dirs, files in os.walk(path, topdown=False):
|
692
|
-
for name in files:
|
693
|
-
if '按天_' not in name:
|
694
|
-
continue
|
695
|
-
if name.endswith('.csv') and 'baidu' not in name:
|
696
|
-
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
697
|
-
for col in df.columns.tolist():
|
698
|
-
if '日期' in col:
|
699
|
-
df[col] = df[col].apply(lambda x: pd.to_datetime(x) if x else pd.to_datetime('2099-01-01'))
|
700
|
-
p.df_to_mongo(df=df)
|
701
|
-
if p.client:
|
702
|
-
p.client.close()
|
703
|
-
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
704
|
-
print(f'{now}数据完成!')
|
705
|
-
|
706
709
|
|
707
710
|
def main():
|
708
711
|
pass
|
@@ -710,7 +713,6 @@ def main():
|
|
710
713
|
|
711
714
|
if __name__ == '__main__':
|
712
715
|
# main()
|
713
|
-
username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mongodb')
|
714
716
|
print(username, password, host, port)
|
715
717
|
|
716
718
|
# for db_name in [
|
mdbq/mysql/mysql.py
CHANGED
@@ -344,6 +344,221 @@ class MysqlUpload:
|
|
344
344
|
connection.rollback()
|
345
345
|
connection.commit()
|
346
346
|
|
347
|
+
@try_except
|
348
|
+
def insert_many_dict(self, db_name, table_name, dict_data_list, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
|
349
|
+
"""
|
350
|
+
插入字典数据
|
351
|
+
dict_data: 字典
|
352
|
+
main_key: 指定索引列, 通常用日期列,默认会设置日期为索引
|
353
|
+
unique_main_key: 指定唯一索引列
|
354
|
+
index_length: 索引长度
|
355
|
+
icm_update: 增量更正,指定后 main_key 只用于检查/创建列,不能更新数据
|
356
|
+
set_typ: {}
|
357
|
+
allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
|
358
|
+
"""
|
359
|
+
if not self.config:
|
360
|
+
return
|
361
|
+
if icm_update:
|
362
|
+
if main_key or unique_main_key:
|
363
|
+
print(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
|
364
|
+
return
|
365
|
+
if not main_key:
|
366
|
+
main_key = []
|
367
|
+
if not unique_main_key:
|
368
|
+
unique_main_key = []
|
369
|
+
|
370
|
+
if not dict_data_list:
|
371
|
+
print(f'dict_data_list 不能为空 ')
|
372
|
+
return
|
373
|
+
dict_data = dict_data_list[0]
|
374
|
+
if cut_data:
|
375
|
+
if '日期' in dict_data.keys():
|
376
|
+
try:
|
377
|
+
__y = pd.to_datetime(dict_data['日期']).strftime('%Y')
|
378
|
+
__y_m = pd.to_datetime(dict_data['日期']).strftime('%Y-%m')
|
379
|
+
if str(cut_data).lower() == 'year':
|
380
|
+
table_name = f'{table_name}_{__y}'
|
381
|
+
elif str(cut_data).lower() == 'month':
|
382
|
+
table_name = f'{table_name}_{__y_m}'
|
383
|
+
else:
|
384
|
+
print(f'参数不正确,cut_data应为 year 或 month ')
|
385
|
+
except Exception as e:
|
386
|
+
print(f'{table_name} 将数据按年/月保存(cut_data),但在转换日期时报错 -> {e}')
|
387
|
+
|
388
|
+
# connection = pymysql.connect(**self.config) # 连接数据库
|
389
|
+
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=5)
|
390
|
+
if not connection:
|
391
|
+
return
|
392
|
+
with connection.cursor() as cursor:
|
393
|
+
cursor.execute(f"SHOW DATABASES LIKE '{db_name}'") # 检查数据库是否存在
|
394
|
+
database_exists = cursor.fetchone()
|
395
|
+
if not database_exists:
|
396
|
+
# 如果数据库不存在,则新建
|
397
|
+
if '8.138.27' in str(self.host) or platform.system() == "Linux": # 阿里云 mysql 低版本不支持 0900
|
398
|
+
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_unicode_ci"
|
399
|
+
self.config.update({'charset': 'utf8mb4_unicode_ci'})
|
400
|
+
if '192.168.1.100' in str(self.host):
|
401
|
+
sql = f"CREATE DATABASE `{db_name}`"
|
402
|
+
else:
|
403
|
+
sql = f"CREATE DATABASE `{db_name}` COLLATE utf8mb4_0900_ai_ci"
|
404
|
+
cursor.execute(sql)
|
405
|
+
connection.commit()
|
406
|
+
print(f"创建Database: {db_name}")
|
407
|
+
|
408
|
+
self.config.update({'database': db_name}) # 添加更新 config 字段
|
409
|
+
# connection = pymysql.connect(**self.config) # 重新连接数据库
|
410
|
+
connection = self.keep_connect(_db_name=db_name, _config=self.config, max_try=5)
|
411
|
+
if not connection:
|
412
|
+
return
|
413
|
+
with connection.cursor() as cursor:
|
414
|
+
# 1. 查询表, 不存在则创建一个空表
|
415
|
+
sql = "SHOW TABLES LIKE %s;" # 有特殊字符不需转义
|
416
|
+
cursor.execute(sql, (table_name))
|
417
|
+
if not cursor.fetchone():
|
418
|
+
sql = f"CREATE TABLE IF NOT EXISTS `{table_name}` (id INT AUTO_INCREMENT PRIMARY KEY);"
|
419
|
+
cursor.execute(sql)
|
420
|
+
print(f'创建 mysql 表: {table_name}')
|
421
|
+
|
422
|
+
# 根据 dict_data 的值添加指定的数据类型
|
423
|
+
dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
|
424
|
+
if set_typ:
|
425
|
+
# 更新自定义的列数据类型
|
426
|
+
for k, v in dtypes.items():
|
427
|
+
# 确保传进来的 set_typ 键存在于实际的 df 列才 update
|
428
|
+
[dtypes.update({k: inside_v}) for inside_k, inside_v in set_typ.items() if k == inside_k]
|
429
|
+
|
430
|
+
# 检查列
|
431
|
+
sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s;"
|
432
|
+
cursor.execute(sql, (db_name, table_name))
|
433
|
+
col_exist = [item['COLUMN_NAME'] for item in cursor.fetchall()] # 已存在的所有列
|
434
|
+
col_not_exist = [col for col in dict_data.keys() if col not in col_exist] # 不存在的列
|
435
|
+
# 不存在则新建列
|
436
|
+
if col_not_exist: # 数据表中不存在的列
|
437
|
+
for col in col_not_exist:
|
438
|
+
# 创建列,需转义
|
439
|
+
if allow_not_null:
|
440
|
+
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
|
441
|
+
else:
|
442
|
+
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
443
|
+
# sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
444
|
+
# print(sql)
|
445
|
+
cursor.execute(sql)
|
446
|
+
print(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
447
|
+
|
448
|
+
if col in main_key or col == '日期':
|
449
|
+
sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
450
|
+
print(f"设置为索引: {col}({dtypes[col]})")
|
451
|
+
cursor.execute(sql)
|
452
|
+
if col in unique_main_key:
|
453
|
+
if dtypes[col] == 'mediumtext':
|
454
|
+
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`({index_length}))"
|
455
|
+
else:
|
456
|
+
sql = f"ALTER TABLE `{table_name}` ADD UNIQUE (`{col}`)"
|
457
|
+
cursor.execute(sql)
|
458
|
+
# if col in main_key or col in unique_main_key:
|
459
|
+
# sql = f"SHOW INDEXES FROM `{table_name}` WHERE `Column_name` = %s"
|
460
|
+
# cursor.execute(sql, (col))
|
461
|
+
# result = cursor.fetchone() # 检查索引是否存在
|
462
|
+
# if not result:
|
463
|
+
# if col in main_key:
|
464
|
+
# sql = f"CREATE INDEX index_name ON `{table_name}`(`{col}`);"
|
465
|
+
# print(f"设置为索引: {col}({dtypes[col]})")
|
466
|
+
# cursor.execute(sql)
|
467
|
+
# elif col in unique_main_key:
|
468
|
+
# if dtypes[col] == 'mediumtext':
|
469
|
+
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`({index_length}));"
|
470
|
+
# else:
|
471
|
+
# sql = f"CREATE INDEX UNIQUE index_name ON `{table_name}` (`{col}`);"
|
472
|
+
# print(f"设置唯一索引: {col}({dtypes[col]})")
|
473
|
+
# print(sql)
|
474
|
+
# cursor.execute(sql)
|
475
|
+
connection.commit() # 提交事务
|
476
|
+
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
477
|
+
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
478
|
+
# 处理插入的数据
|
479
|
+
for dict_data in dict_data_list:
|
480
|
+
dtypes, dict_data = self.cover_dict_dtypes(dict_data=dict_data) # {'店铺名称': 'varchar(100)',...}
|
481
|
+
if icm_update:
|
482
|
+
""" 使用增量更新: 需确保 icm_update['主键'] 传进来的列组合是数据表中唯一,值不会发生变化且不会重复,否则可能产生覆盖 """
|
483
|
+
sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
|
484
|
+
cursor.execute(sql, (db_name, {table_name}))
|
485
|
+
columns = cursor.fetchall()
|
486
|
+
cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
|
487
|
+
update_col = [item for item in cols_exist if item not in icm_update and item != 'id'] # 除了主键外的其他列
|
488
|
+
|
489
|
+
# unique_keys 示例: `日期`, `余额`
|
490
|
+
unique_keys = ', '.join(f"`{item}`" for item in update_col) # 列名需要转义
|
491
|
+
condition = []
|
492
|
+
for up_col in icm_update:
|
493
|
+
condition += [f'`{up_col}` = "{dict_data[up_col]}"']
|
494
|
+
condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
|
495
|
+
sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
|
496
|
+
# print(sql)
|
497
|
+
# sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
|
498
|
+
cursor.execute(sql)
|
499
|
+
results = cursor.fetchall() # results 是数据库取出的数据
|
500
|
+
if results: # 有数据返回,再进行增量检查
|
501
|
+
for result in results: # results 是数据库数据, dict_data 是传进来的数据
|
502
|
+
change_col = [] # 发生变化的列名
|
503
|
+
change_values = [] # 发生变化的数据
|
504
|
+
for col in update_col:
|
505
|
+
# 因为 mysql 里面有 decimal 数据类型,要移除末尾的 0 再做比较(df 默认将 5.00 小数截断为 5.0)
|
506
|
+
df_value = str(dict_data[col])
|
507
|
+
mysql_value = str(result[col])
|
508
|
+
if '.' in df_value:
|
509
|
+
df_value = re.sub(r'0+$', '', df_value)
|
510
|
+
df_value = re.sub(r'\.$', '', df_value)
|
511
|
+
if '.' in mysql_value:
|
512
|
+
mysql_value = re.sub(r'0+$', '', mysql_value)
|
513
|
+
mysql_value = re.sub(r'\.$', '', mysql_value)
|
514
|
+
if df_value != mysql_value: # 传进来的数据和数据库比较, 有变化
|
515
|
+
# print(f'{dict_data['日期']}{dict_data['商品id']}{col} 列的值有变化,{str(dict_data[col])} != {str(result[col])}')
|
516
|
+
change_values += [f"`{col}` = \"{str(dict_data[col])}\""]
|
517
|
+
change_col.append(col)
|
518
|
+
not_change_col = [item for item in update_col if item not in change_col]
|
519
|
+
# change_values 是 df 传进来且和数据库对比后,发生了变化的数据,值示例: [`品销宝余额` = '9999.0', `短信剩余` = '888']
|
520
|
+
if change_values: # change_values 有数据返回,表示值需要更新
|
521
|
+
if not_change_col:
|
522
|
+
not_change_values = [f'`{col}` = "{str(dict_data[col])}"' for col in not_change_col]
|
523
|
+
not_change_values = ' AND '.join(
|
524
|
+
not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
|
525
|
+
# print(change_values, not_change_values)
|
526
|
+
condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
|
527
|
+
change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
|
528
|
+
sql = "UPDATE `%s` SET %s WHERE %s" % (table_name, change_values, condition)
|
529
|
+
# print(sql)
|
530
|
+
cursor.execute(sql)
|
531
|
+
else: # 没有数据返回,则直接插入数据
|
532
|
+
cols = ', '.join(f"`{item}`" for item in dict_data.keys()) # 列名需要转义
|
533
|
+
# data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
|
534
|
+
values = ', '.join([f'"{item}"' for item in dict_data.values()]) # 值要加引号
|
535
|
+
sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
|
536
|
+
cursor.execute(sql)
|
537
|
+
connection.commit() # 提交数据库
|
538
|
+
connection.close()
|
539
|
+
return
|
540
|
+
|
541
|
+
# 构建 keys
|
542
|
+
keys_data = ', '.join([f'`{str(item)}`' for item in dict_data.keys()])
|
543
|
+
# 构建 values
|
544
|
+
values_data = ', '.join(f'"{str(item)}"' for item in dict_data.values())
|
545
|
+
# 构建其他键值,重复时要更新的其他键
|
546
|
+
if main_key:
|
547
|
+
for col in main_key:
|
548
|
+
del dict_data[col]
|
549
|
+
if unique_main_key:
|
550
|
+
for col in unique_main_key:
|
551
|
+
del dict_data[col]
|
552
|
+
# 涉及列名务必使用反引号
|
553
|
+
update_datas = ', '.join([f'`{k}` = VALUES(`{k}`)' for k, v in dict_data.items()])
|
554
|
+
|
555
|
+
# 构建 sql
|
556
|
+
sql = f"INSERT INTO %s (%s) VALUES (%s) ON DUPLICATE KEY UPDATE %s" % (table_name, keys_data, values_data, update_datas)
|
557
|
+
# print(sql)
|
558
|
+
cursor.execute(sql)
|
559
|
+
connection.commit() # 提交数据库
|
560
|
+
connection.close()
|
561
|
+
|
347
562
|
@try_except
|
348
563
|
def dict_to_mysql(self, db_name, table_name, dict_data, icm_update=None, main_key=None, unique_main_key=None, index_length=100, set_typ=None, allow_not_null=False, cut_data=None):
|
349
564
|
"""
|
@@ -1086,7 +1301,6 @@ class MysqlUpload:
|
|
1086
1301
|
print(f'{now} 写入本地文件: ({self.host}:{self.port}) {db_name}/{table_name} -> {os.path.join(save_path, filename)}')
|
1087
1302
|
connection.close()
|
1088
1303
|
|
1089
|
-
|
1090
1304
|
def read_mysql(self, table_name, start_date, end_date, db_name='远程数据源', date_name='日期'):
|
1091
1305
|
""" 读取指定数据表,可指定日期范围,返回结果: df """
|
1092
1306
|
start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
|
mdbq/mysql/recheck_mysql.py
CHANGED
@@ -156,7 +156,7 @@ class ReCheckMysql:
|
|
156
156
|
|
157
157
|
|
158
158
|
def recheck_csv():
|
159
|
-
path = '
|
159
|
+
path = ''
|
160
160
|
for root, dirs, files in os.walk(path, topdown=False):
|
161
161
|
for name in files:
|
162
162
|
if '~' in name or 'baidu' in name or 'Ds_' in name or 'xunlei' in name:
|
mdbq/mysql/s_query.py
CHANGED
mdbq/spider/aikucun.py
CHANGED
@@ -17,8 +17,6 @@ from selenium.webdriver.support import expected_conditions as EC
|
|
17
17
|
from selenium.webdriver.chrome.service import Service
|
18
18
|
from mdbq.config import set_support
|
19
19
|
from selenium.webdriver.common.keys import Keys
|
20
|
-
from mdbq.aggregation import aggregation
|
21
|
-
from mdbq.clean import data_clean
|
22
20
|
from mdbq.other import ua_sj
|
23
21
|
from mdbq.mysql import mysql
|
24
22
|
from mdbq.config import myconfig
|
@@ -46,7 +44,7 @@ else:
|
|
46
44
|
D_PATH = str(pathlib.Path(f'/Users/{getpass.getuser()}/Downloads'))
|
47
45
|
Share_Path = str(pathlib.Path('/Volumes/时尚事业部/01.运营部/天猫报表')) # 共享文件根目录
|
48
46
|
Source_Path = str(pathlib.Path(Data_Path, '原始文件2'))
|
49
|
-
upload_path = os.path.join(D_PATH, '数据上传中心') # 此目录位于下载文件夹
|
47
|
+
upload_path = os.path.join(D_PATH, '数据上传中心', '爱库存') # 此目录位于下载文件夹
|
50
48
|
|
51
49
|
m_engine = mysql.MysqlUpload(username='', password='', host='', port=0, charset='utf8mb4')
|
52
50
|
company_engine = mysql.MysqlUpload(username='', password='', host='', port=0, charset='utf8mb4')
|
@@ -403,53 +401,29 @@ class AikuCun:
|
|
403
401
|
'成交gmv': '',
|
404
402
|
'支付人数_成交': '',
|
405
403
|
}
|
404
|
+
_results = []
|
406
405
|
for dict_data in df.to_dict(orient='records'):
|
407
406
|
new_dict.update(dict_data)
|
408
407
|
new_dict.update({'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
|
409
|
-
|
408
|
+
_results.append(new_dict)
|
409
|
+
if _results:
|
410
|
+
m_engine.insert_many_dict(
|
410
411
|
db_name='爱库存2',
|
411
412
|
table_name='商品spu榜单',
|
412
|
-
|
413
|
+
dict_data_list=_results,
|
413
414
|
icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
|
414
415
|
unique_main_key=None,
|
415
416
|
set_typ=set_typ,
|
416
417
|
)
|
417
|
-
company_engine.
|
418
|
+
company_engine.insert_many_dict(
|
418
419
|
db_name='爱库存2',
|
419
420
|
table_name='商品spu榜单',
|
420
|
-
|
421
|
+
dict_data_list=_results,
|
421
422
|
icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
|
422
423
|
unique_main_key=None,
|
423
424
|
set_typ=set_typ,
|
424
425
|
)
|
425
426
|
|
426
|
-
# m_engine.df_to_mysql(
|
427
|
-
# df=df,
|
428
|
-
# db_name='爱库存2',
|
429
|
-
# table_name='商品spu榜单',
|
430
|
-
# icm_update=[], # 增量更新, 在聚合数据中使用,其他不要用
|
431
|
-
# move_insert=False, # 先删除,再插入
|
432
|
-
# df_sql=True, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
433
|
-
# drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
434
|
-
# count=None,
|
435
|
-
# filename=None, # 用来追踪处理进度
|
436
|
-
# reset_id=False, # 是否重置自增列
|
437
|
-
# set_typ=set_typ,
|
438
|
-
# )
|
439
|
-
# company_engine.df_to_mysql(
|
440
|
-
# df=df,
|
441
|
-
# db_name='爱库存2',
|
442
|
-
# table_name='商品spu榜单',
|
443
|
-
# icm_update=[], # 增量更新, 在聚合数据中使用,其他不要用
|
444
|
-
# move_insert=False, # 先删除,再插入
|
445
|
-
# df_sql=True, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
446
|
-
# drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
447
|
-
# count=None,
|
448
|
-
# filename=None, # 用来追踪处理进度
|
449
|
-
# reset_id=False, # 是否重置自增列
|
450
|
-
# set_typ=set_typ,
|
451
|
-
# )
|
452
|
-
|
453
427
|
new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
|
454
428
|
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
|
455
429
|
os.remove(os.path.join(root, name))
|
@@ -458,26 +432,6 @@ class AikuCun:
|
|
458
432
|
def akucun(headless=True, date_num=10):
|
459
433
|
akc = AikuCun()
|
460
434
|
akc.get_data(shop_name='aikucun', date_num=date_num, headless=headless) # 获取最近 N 天数据,0表示今天
|
461
|
-
# akc.clean_data()
|
462
|
-
|
463
|
-
# # 新版 数据分类
|
464
|
-
# dp = aggregation.DatabaseUpdate(path=upload_path)
|
465
|
-
# dp.new_unzip(is_move=True)
|
466
|
-
# dp.cleaning(is_move=False, is_except=['临时文件']) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
467
|
-
# # 将 self.datas 更新至数据库
|
468
|
-
# dp.upload_df(service_databases=[
|
469
|
-
# # {'home_lx': 'mongodb'},
|
470
|
-
# # {'home_lx': 'mysql'},
|
471
|
-
# {'company': 'mysql'},
|
472
|
-
# # {'nas': 'mysql'},
|
473
|
-
# ])
|
474
|
-
# # 数据分类
|
475
|
-
# c = data_clean.DataClean(path=upload_path, source_path=Source_Path)
|
476
|
-
# c.set_up_to_mogo = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
477
|
-
# c.set_up_to_mysql = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
478
|
-
# c.new_unzip(is_move=True, ) # 解压文件
|
479
|
-
# c.change_and_sort(is_except=['临时文件'])
|
480
|
-
# c.move_all(is_except=['临时文件']) # 移到文件到原始文件夹
|
481
435
|
|
482
436
|
|
483
437
|
class AikuCunNew:
|
@@ -529,7 +483,7 @@ class AikuCunNew:
|
|
529
483
|
|
530
484
|
if __name__ == '__main__':
|
531
485
|
get_cookie_aikucun() # 登录并获取 cookies
|
532
|
-
akucun(date_num=
|
486
|
+
akucun(date_num=5, headless=True) # 下载数据
|
533
487
|
|
534
488
|
# a = AikuCunNew(shop_name='aikucun')
|
535
489
|
# a.akc()
|
@@ -2,33 +2,24 @@ mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/aggregation.py,sha256=-yzApnlqSN2L0E1YMu5ml-W827qpKQvWPCOI7jj2kzY,80264
|
5
|
-
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
|
-
mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
|
7
5
|
mdbq/aggregation/optimize_data.py,sha256=RXIv7cACCgYyehAxMjUYi_S7rVyjIwXKWMaM3nduGtA,3068
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
6
|
+
mdbq/aggregation/query_data.py,sha256=CE5oKG9FNZ2I3kNuJtvcbGFFJqyKvO9T15XeXWCIc8s,166441
|
9
7
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
8
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
|
-
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/clean_upload.py,sha256=yMAb6tV9XHhFJbRrCOeaPfszApJ9y5M4-hQGuBSXNqE,67799
|
13
|
-
mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
|
14
|
-
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
15
|
-
mdbq/company/copysh.py,sha256=eFu6focRqm2Njn_XN1KW2ZYJiTv6EYgsdBCLokobyxQ,21572
|
16
9
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
|
-
mdbq/config/get_myconf.py,sha256=cmNvsyoNa0RbZ9FOTjSd3jyyGwkxjUo0phvdHbGlrms,6010
|
18
10
|
mdbq/config/myconfig.py,sha256=EGymTlAimtHIDJ9egCtOehBEPOj6rea504kvsEZu64o,854
|
19
11
|
mdbq/config/products.py,sha256=Ab6eaAUMUtjRL8z9NvYukyCjp3nAi4OYISY_IdPhAJ0,6279
|
20
12
|
mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
|
21
|
-
mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
|
22
13
|
mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
23
14
|
mdbq/dataframe/converter.py,sha256=lETYhT7KXlWzWwqguqhk6vI6kj4rnOBEW1lhqKy2Abc,5035
|
24
15
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
25
16
|
mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
26
17
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
27
|
-
mdbq/mongo/mongo.py,sha256=
|
18
|
+
mdbq/mongo/mongo.py,sha256=M9DUeUCMPDngkwn9-ui0uTiFrvfNU1kLs22s5SmoNm0,31899
|
28
19
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
29
|
-
mdbq/mysql/mysql.py,sha256=
|
30
|
-
mdbq/mysql/recheck_mysql.py,sha256=
|
31
|
-
mdbq/mysql/s_query.py,sha256=
|
20
|
+
mdbq/mysql/mysql.py,sha256=Ekqg-svOi2kbPIBBoEtL9Eg4-LJqyqoLprSSUy17MWU,99360
|
21
|
+
mdbq/mysql/recheck_mysql.py,sha256=ppBTfBLgkRWirMVZ31e_ZPULiGPJU7K3PP9G6QBZ3QI,8605
|
22
|
+
mdbq/mysql/s_query.py,sha256=6L5Cp90zq13noZHjzSA5mqms_hD01c8GO1_NfbYDu6w,9252
|
32
23
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
33
24
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
34
25
|
mdbq/other/download_sku_picture.py,sha256=Z2Crtwsv69nMGN8uw7qea_1HghRwil3vfnczBM6Ab80,45879
|
@@ -40,11 +31,9 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
40
31
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
41
32
|
mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
42
33
|
mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
|
43
|
-
mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
44
|
-
mdbq/req_post/req_tb.py,sha256=qg7pet73IgKGmCwxaeUyImJIoeK_pBQT9BBKD7fkBNg,36160
|
45
34
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
46
|
-
mdbq/spider/aikucun.py,sha256=
|
47
|
-
mdbq-3.3.
|
48
|
-
mdbq-3.3.
|
49
|
-
mdbq-3.3.
|
50
|
-
mdbq-3.3.
|
35
|
+
mdbq/spider/aikucun.py,sha256=zOacjrJ3MvToyuugA68xB-oN6RKj8K3GxMKudnln9EA,22207
|
36
|
+
mdbq-3.3.8.dist-info/METADATA,sha256=MAs4cq1MyNeUOHWp9orMnoBXaLmXGK_jryKf_-G_134,243
|
37
|
+
mdbq-3.3.8.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
38
|
+
mdbq-3.3.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
39
|
+
mdbq-3.3.8.dist-info/RECORD,,
|