mdbq 2.3.0__tar.gz → 2.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-2.3.0 → mdbq-2.3.1}/PKG-INFO +1 -1
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/aggregation.py +8 -8
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/req_post/req_tb.py +100 -2
- mdbq-2.3.1/mdbq/spider/aikucun.py +293 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq.egg-info/SOURCES.txt +2 -1
- {mdbq-2.3.0 → mdbq-2.3.1}/setup.py +1 -1
- {mdbq-2.3.0 → mdbq-2.3.1}/README.txt +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/__version__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/query_data.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/bdup/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/bdup/bdup.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/clean/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/clean/data_clean.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/company/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/company/copysh.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/company/home_sh.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/get_myconf.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/products.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/set_support.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/update_conf.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/dataframe/converter.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/log/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/log/mylogger.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mongo/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mongo/mongo.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mysql/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mysql/mysql.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mysql/s_query.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/porxy.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/pov_city.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/sku_picture.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/ua_sj.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/pbix/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/pbix/refresh_all_old.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/req_post/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/spider/__init__.py +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-2.3.0 → mdbq-2.3.1}/setup.cfg +0 -0
@@ -1295,14 +1295,14 @@ def test2():
|
|
1295
1295
|
if __name__ == '__main__':
|
1296
1296
|
username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1297
1297
|
print(username, password, host, port)
|
1298
|
-
file_dir(one_file=False, target_service='company')
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1298
|
+
# file_dir(one_file=False, target_service='company')
|
1299
|
+
one_file_to_mysql(
|
1300
|
+
file='/Users/xigua/Downloads/爱库存_商品榜单_spu_2024-10-17_2024-10-17.csv',
|
1301
|
+
db_name='爱库存2',
|
1302
|
+
table_name='商品spu榜单',
|
1303
|
+
target_service='company',
|
1304
|
+
database='mysql'
|
1305
|
+
)
|
1306
1306
|
|
1307
1307
|
# db_name = '推广数据2'
|
1308
1308
|
# table_name = '权益报表'
|
@@ -135,6 +135,58 @@ class RequestData:
|
|
135
135
|
with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
136
136
|
json.dump(self.datas, f, ensure_ascii=False, sort_keys=True, indent=4)
|
137
137
|
|
138
|
+
def hd_sp(self, date, url, headers, cookies, path, filename, pages=5):
|
139
|
+
""" 活动预售页面 分商品效果 """
|
140
|
+
|
141
|
+
self.date = date
|
142
|
+
self.url = url
|
143
|
+
self.headers = headers
|
144
|
+
self.cookies = cookies
|
145
|
+
self.path = path
|
146
|
+
self.filename = filename
|
147
|
+
for page in range(1, pages + 1):
|
148
|
+
self.url = f'{self.url}&page={page}'
|
149
|
+
result = requests.get(
|
150
|
+
self.url,
|
151
|
+
headers=self.headers,
|
152
|
+
cookies=self.cookies,
|
153
|
+
)
|
154
|
+
m_data = json.loads(result.text)
|
155
|
+
# print(m_data)
|
156
|
+
# with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
157
|
+
# json.dump(m_data, f, ensure_ascii=False, sort_keys=True, indent=4)
|
158
|
+
update_time = m_data['data']['updateTime']
|
159
|
+
time_stamp = m_data['data']['timestamp']
|
160
|
+
# pt_data = data['data']['data'][0] # 平台流量
|
161
|
+
# gg_data = data['data']['data'][1] # 广告流量
|
162
|
+
for all_data in m_data['data']['data']['data']:
|
163
|
+
self.datas.append({
|
164
|
+
'activityItemDepUv': all_data['activityItemDepUv']['value'],
|
165
|
+
'商品链接': all_data['item']['detailUrl'],
|
166
|
+
'商品id': all_data['item']['itemId'],
|
167
|
+
'商品图片': all_data['item']['pictUrl'],
|
168
|
+
'startDate': all_data['item']['startDate'],
|
169
|
+
'商品标题': all_data['item']['title'],
|
170
|
+
'预售订单金额': all_data['presaleOrdAmt']['value'],
|
171
|
+
'定金支付件数': all_data['presalePayItemCnt']['value'],
|
172
|
+
'预售访客人数': all_data['presaleUv']['value'],
|
173
|
+
'定金支付金额': all_data['sumPayDepositAmt']['value'],
|
174
|
+
'定金支付买家数': all_data['sumPayDepositByrCnt']['value'],
|
175
|
+
'支付转化率': all_data['uvPayRate']['value'],
|
176
|
+
'日期': date,
|
177
|
+
'时间戳': time_stamp,
|
178
|
+
'更新时间': update_time,
|
179
|
+
'促销活动': '2024双11预售',
|
180
|
+
'类型': '分商品效果',
|
181
|
+
})
|
182
|
+
time.sleep(random.randint(5, 10))
|
183
|
+
for item in self.datas:
|
184
|
+
if item['日期'] != '':
|
185
|
+
item.update({'日期': f'{item['日期'][0:4]}-{item['日期'][4:6]}-{item['日期'][6:8]}'})
|
186
|
+
if self.is_json_file:
|
187
|
+
with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
188
|
+
json.dump(self.datas, f, ensure_ascii=False, sort_keys=True, indent=4)
|
189
|
+
|
138
190
|
def request_jd(self, date, url, headers, cookies, path, filename):
|
139
191
|
""" 京东 """
|
140
192
|
self.date = date
|
@@ -224,9 +276,55 @@ def company_run():
|
|
224
276
|
while True:
|
225
277
|
tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2',
|
226
278
|
table_name='2024双11预售实时流量分析')
|
227
|
-
time.sleep(random.
|
279
|
+
time.sleep(random.randint(1500, 2000))
|
280
|
+
|
281
|
+
|
282
|
+
def hd_sp_data(service_databases=[], db_name=None, table_name=None, pages=5):
|
283
|
+
""" 2024双11预售 分商品效果 """
|
284
|
+
date = datetime.date.today().strftime('%Y%m%d')
|
285
|
+
url = (
|
286
|
+
f'https://sycm.taobao.com/datawar/v7/presaleActivity/itemCoreIndex/getItemListLive.json?'
|
287
|
+
f'activityId=94040472'
|
288
|
+
f'&itemType=0' # 必传, 查看全部商品 0, 活动商品 1 , 跨店满减商品 2 ,官方立减 3(无数据)
|
289
|
+
f'&device=1'
|
290
|
+
f'&dateRange={date}%7C{date}'
|
291
|
+
f'&dateType=today'
|
292
|
+
f'&pageSize=10' # 必传
|
293
|
+
# f'&page=1' # 必传
|
294
|
+
# f'&order=desc'
|
295
|
+
# f'&orderBy=presaleOrdAmt'
|
296
|
+
# f'&indexCode=presaleOrdAmt%2CsumPayDepositByrCnt%2CpresalePayItemCnt'
|
297
|
+
# f'&_=1729133575797'
|
298
|
+
)
|
299
|
+
headers = {
|
300
|
+
# "referer": "https://dmp.taobao.com/index_new.html",
|
301
|
+
'User-Agent': ua_sj.get_ua(),
|
302
|
+
}
|
303
|
+
cookies = {
|
304
|
+
'session': 't=c198527347800dafa75165f084784668; thw=cn; xlly_s=1; _tb_token_=rPWSGun4nUou9aKxviPg; _samesite_flag_=true; 3PcFlag=1729054801593; cookie2=130befc055eed2df29935197bd2b514b; sgcookie=E100aLOltfWHqLLH1qtyH3it%2BLrGH2v3MAnIBdSfu7xwjEpSyh101lblDVcj3zGpAOLv%2FXcrVNbT%2FN%2BI8KZeCoE4HBzHQk0ANtSqjOG5gIzdKamfirBxGWJyVEccitvvDZhK; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie21=W5iHLLyFfoaZ&cookie14=UoYcCoAfJ7pSQA%3D%3D; csg=1e2bdb8a; _cc_=Vq8l%2BKCLiw%3D%3D; cancelledSubSites=empty; skt=f813f8478f7318f8; v=0; cna=8+iAHxeojXcCAXjsc5Mt+BAV; mtop_partitioned_detect=1; _m_h5_tk=88c56a84a93c1199f8abe086a132c7eb_1729068459392; _m_h5_tk_enc=4b0ed8316f46edae303547d3863982a4; XSRF-TOKEN=4ef3d151-14c4-445a-9249-595e9a24df75; JSESSIONID=9EE8C8DCF6162DCA2FE0187C29BF0B8A; tfstk=gyaEdSAx842sxMbj1f3rgEWrJ50LN2XbxzMSZ7VoOvDheWNubSerd_IKRlkzIRk3O76JzQqgCk9QZzGuzR3n2kMSdYuzw-51hZ_b9W3--t6flZ3LgJuxZBYHFAYiG40ZtLV_9W3J6C9lclVpUV2YVJ0uEVmiwj0kr00l_ccjZ4YnqexMIAhor4YoqVDiwjvkr80l_5DttHciSWVk7jihGd0FW1QAcqH0tA8kuIhKxg2JVH-emXiZncbekEC-TDk0tAWAnqwo4JoU5wJxTlV4BXyRke3n4kqm-zWV8VVYfJcaEt-rIozLzmaF3nH3JYeq-lWM840Kg7obf_xqCuVT7czFcQhTR74KcqbvKYZ_gzlzyTQa3W2Umm4HLgz6efAQOzEeE3on6fkf_1ySvoccWpB-m3K-jqhZh6GB23nnhfkf_1-J2cDo_x1IO; isg=BLm5J8RI-qdgDKdAgF_DSgcFyCOTxq14BgKdB9vjgONeYsD0IReUSUT05GaUWkWw'}
|
305
|
+
path = '/Users/xigua/Downloads'
|
306
|
+
filename = 'test'
|
307
|
+
r = RequestData()
|
308
|
+
r.is_json_file = False
|
309
|
+
r.hd_sp(
|
310
|
+
date=date,
|
311
|
+
url=url,
|
312
|
+
headers=headers,
|
313
|
+
cookies=cookies,
|
314
|
+
path=path,
|
315
|
+
filename=filename,
|
316
|
+
pages = pages,
|
317
|
+
)
|
318
|
+
# print(r.datas)
|
319
|
+
df = pd.DataFrame(r.datas)
|
320
|
+
df.to_csv(os.path.join(path, 'test.csv'), index=False, header=True, encoding='utf-8_sig')
|
228
321
|
|
229
322
|
|
230
323
|
if __name__ == '__main__':
|
231
324
|
company_run()
|
232
|
-
tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2', table_name='2024双11预售实时流量分析')
|
325
|
+
# tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2', table_name='2024双11预售实时流量分析')
|
326
|
+
hd_sp_data(
|
327
|
+
service_databases=[{'company': 'mysql'}],
|
328
|
+
# db_name='生意参谋2',
|
329
|
+
# table_name='2024双11预售实时流量分析',
|
330
|
+
)
|
@@ -0,0 +1,293 @@
|
|
1
|
+
# -*- coding:utf-8 -*-
|
2
|
+
import datetime
|
3
|
+
import getpass
|
4
|
+
import json
|
5
|
+
import os
|
6
|
+
import pathlib
|
7
|
+
import platform
|
8
|
+
import re
|
9
|
+
import time
|
10
|
+
import warnings
|
11
|
+
import pandas as pd
|
12
|
+
from selenium import webdriver
|
13
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
14
|
+
from selenium.webdriver.common.by import By
|
15
|
+
from selenium.webdriver.support import expected_conditions as EC
|
16
|
+
from selenium.webdriver.chrome.service import Service
|
17
|
+
from mdbq.config import set_support
|
18
|
+
from selenium.webdriver.common.keys import Keys
|
19
|
+
from mdbq.aggregation import aggregation
|
20
|
+
from mdbq.clean import data_clean
|
21
|
+
|
22
|
+
warnings.filterwarnings('ignore')
|
23
|
+
|
24
|
+
|
25
|
+
if platform.system() == 'Windows':
|
26
|
+
# windows版本
|
27
|
+
Data_Path = r'C:\同步空间\BaiduSyncdisk'
|
28
|
+
D_PATH = str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\Downloads'))
|
29
|
+
Share_Path = str(pathlib.Path(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表')) # 共享文件根目录
|
30
|
+
elif platform.system() == 'Linux':
|
31
|
+
Data_Path = '数据中心'
|
32
|
+
D_PATH = 'Downloads'
|
33
|
+
if not os.path.exists(D_PATH):
|
34
|
+
os.makedirs(D_PATH)
|
35
|
+
Share_Path = '' # linux 通常是远程服务器,不需要访问共享
|
36
|
+
else:
|
37
|
+
Data_Path = f'/Users/{getpass.getuser()}/数据中心' # 使用Mac独立网络时
|
38
|
+
# Data_Path = '/Volumes' # 直接使用共享连接台式机时的配置, 后面接 + 自动0备份/***
|
39
|
+
D_PATH = str(pathlib.Path(f'/Users/{getpass.getuser()}/Downloads'))
|
40
|
+
Share_Path = str(pathlib.Path('/Volumes/时尚事业部/01.运营部/天猫报表')) # 共享文件根目录
|
41
|
+
|
42
|
+
|
43
|
+
def test():
|
44
|
+
"""
|
45
|
+
"""
|
46
|
+
_url = 'https://gray-merc.aikucun.com/index.html'
|
47
|
+
cookie_path = '/Users/xigua/Downloads'
|
48
|
+
print(_url)
|
49
|
+
|
50
|
+
option = webdriver.ChromeOptions() # 浏览器启动选项
|
51
|
+
option.headless = True # False指定为无界面模式
|
52
|
+
# 调整chrome启动配置
|
53
|
+
option.add_argument("--disable-gpu")
|
54
|
+
option.add_argument("--no-sandbox")
|
55
|
+
option.add_argument("--disable-dev-shm-usage")
|
56
|
+
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
57
|
+
option.add_experimental_option("useAutomationExtension", False)
|
58
|
+
# if platform.system() == 'Windows':
|
59
|
+
# service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
|
60
|
+
# else:
|
61
|
+
# service = Service('/usr/local/bin/chromedriver')
|
62
|
+
if platform.system() == 'Windows':
|
63
|
+
# 设置Chrome的路径
|
64
|
+
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
65
|
+
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
66
|
+
# os.environ["webdriver.chrome.driver"] = chrome_path
|
67
|
+
option.binary_location = chrome_path # windows 设置此参数有效
|
68
|
+
service = Service(chromedriver_path)
|
69
|
+
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
70
|
+
else:
|
71
|
+
# 设置Chrome的路径
|
72
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
73
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
74
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
75
|
+
|
76
|
+
service = Service(chromedriver_path)
|
77
|
+
_driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
|
78
|
+
|
79
|
+
print('yes')
|
80
|
+
# 登录
|
81
|
+
_driver.get(_url)
|
82
|
+
time.sleep(0.1)
|
83
|
+
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
84
|
+
breakpoint()
|
85
|
+
|
86
|
+
d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
87
|
+
print(f'{d_time} 登录成功,正在获取cookie...')
|
88
|
+
time.sleep(0.1)
|
89
|
+
|
90
|
+
|
91
|
+
_file = os.path.join(cookie_path, f'cookie_.txt')
|
92
|
+
with open(_file, 'w') as f:
|
93
|
+
# 将cookies保存为json格式
|
94
|
+
cookies_list = _driver.get_cookies()
|
95
|
+
for cookie in cookies_list:
|
96
|
+
# 该字段有问题所以删除就可以
|
97
|
+
if 'expiry' in cookie:
|
98
|
+
del cookie['expiry']
|
99
|
+
# if 'domain' in cookie:
|
100
|
+
# cookie['domain'] = '.taobao.com'
|
101
|
+
cookies_list = json.dumps(cookies_list)
|
102
|
+
f.write(cookies_list)
|
103
|
+
print(f'cookie已保存: {_file}')
|
104
|
+
_driver.quit()
|
105
|
+
|
106
|
+
|
107
|
+
class AikuCun:
|
108
|
+
def __init__(self):
|
109
|
+
self.url = 'https://gray-merc.aikucun.com/index.html'
|
110
|
+
self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
|
111
|
+
|
112
|
+
def login(self, shop_name='aikucun'):
|
113
|
+
option = webdriver.ChromeOptions()
|
114
|
+
# option.add_argument("--headless") # 设置无界面模式
|
115
|
+
# 调整chrome启动配置
|
116
|
+
option.add_argument("--disable-gpu")
|
117
|
+
option.add_argument("--no-sandbox")
|
118
|
+
option.add_argument("--disable-dev-shm-usage")
|
119
|
+
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
120
|
+
option.add_experimental_option('excludeSwitches', ['enable-logging']) # 禁止日志输出,减少控制台干扰
|
121
|
+
option.add_experimental_option("useAutomationExtension", False)
|
122
|
+
option.add_argument('--ignore-ssl-error') # 忽略ssl错误
|
123
|
+
prefs = {
|
124
|
+
'profile.default_content_settings.popups': 0, # 禁止弹出所有窗口
|
125
|
+
"browser.download.manager. showAlertOnComplete": False, # 下载完成后不显示下载完成提示框
|
126
|
+
"profile.default_content_setting_values.automatic_downloads": 1, # 允许自动下载多个文件
|
127
|
+
}
|
128
|
+
|
129
|
+
option.add_experimental_option('perfLoggingPrefs', {
|
130
|
+
'enableNetwork': True,
|
131
|
+
'enablePage': False,
|
132
|
+
})
|
133
|
+
option.set_capability("goog:loggingPrefs", {
|
134
|
+
'browser': 'ALL',
|
135
|
+
'performance': 'ALL',
|
136
|
+
})
|
137
|
+
option.set_capability("goog:perfLoggingPrefs", {
|
138
|
+
'enableNetwork': True,
|
139
|
+
'enablePage': False,
|
140
|
+
'enableTimeline': False
|
141
|
+
})
|
142
|
+
|
143
|
+
option.add_experimental_option('prefs', prefs)
|
144
|
+
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
|
145
|
+
|
146
|
+
# # 通过excludeSwitches参数禁用默认的启动路径
|
147
|
+
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
148
|
+
|
149
|
+
if platform.system() == 'Windows':
|
150
|
+
# 设置 chrome 和 chromedriver 启动路径
|
151
|
+
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
152
|
+
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
153
|
+
# os.environ["webdriver.chrome.driver"] = chrome_path
|
154
|
+
option.binary_location = chrome_path # windows 设置此参数有效
|
155
|
+
service = Service(chromedriver_path)
|
156
|
+
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
157
|
+
elif platform.system() == 'Darwin':
|
158
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
159
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
160
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
161
|
+
# option.binary_location = chrome_path # Macos 设置此参数报错
|
162
|
+
service = Service(chromedriver_path)
|
163
|
+
else:
|
164
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
165
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
166
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
167
|
+
# option.binary_location = chrome_path # macos 设置此参数报错
|
168
|
+
service = Service(chromedriver_path)
|
169
|
+
_driver = webdriver.Chrome(options=option, service=service) # 创建Chrome驱动程序实例
|
170
|
+
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
171
|
+
|
172
|
+
# 登录
|
173
|
+
_driver.get(self.url)
|
174
|
+
_driver.delete_all_cookies() # 首先清除浏览器打开已有的cookies
|
175
|
+
name_lists = os.listdir(self.cookie_path) # cookie 放在主目录下的 cookies 文件夹
|
176
|
+
for name in name_lists:
|
177
|
+
if shop_name in name and name.endswith('.txt') and '~' not in name and '.DS' not in name:
|
178
|
+
with open(os.path.join(self.cookie_path, name), 'r') as f:
|
179
|
+
cookies_list = json.load(f) # 使用json读取cookies 注意读取的是文件 所以用load而不是loads
|
180
|
+
for cookie in cookies_list:
|
181
|
+
_driver.add_cookie(cookie) # 添加cookies信息
|
182
|
+
_driver.refresh()
|
183
|
+
time.sleep(3)
|
184
|
+
return _driver
|
185
|
+
|
186
|
+
def get_data(self, shop_name='aikucun', date_num=1):
|
187
|
+
"""
|
188
|
+
date_num: 获取最近 N 天数据,0表示今天
|
189
|
+
所有数据都是逐日下载
|
190
|
+
"""
|
191
|
+
|
192
|
+
_driver = self.login(shop_name=shop_name)
|
193
|
+
_url = 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604'
|
194
|
+
_driver.get(_url)
|
195
|
+
time.sleep(3)
|
196
|
+
|
197
|
+
today = datetime.date.today()
|
198
|
+
for date_s in range(date_num):
|
199
|
+
new_date = today - datetime.timedelta(days=date_s) # 会用作文件名
|
200
|
+
str_date = str(new_date)[2:]
|
201
|
+
wait = WebDriverWait(_driver, timeout=15) #
|
202
|
+
elements = _driver.find_elements(
|
203
|
+
By.XPATH, '//input[@placeholder="开始日期"]')
|
204
|
+
# _driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
205
|
+
|
206
|
+
input_box = wait.until(
|
207
|
+
EC.element_to_be_clickable(
|
208
|
+
(By.XPATH, '//input[@placeholder="开始日期"]'))) #
|
209
|
+
|
210
|
+
# from selenium.webdriver.common.keys import Keys
|
211
|
+
for i in range(8):
|
212
|
+
input_box.send_keys(Keys.BACKSPACE)
|
213
|
+
input_box.send_keys(str_date)
|
214
|
+
time.sleep(1)
|
215
|
+
input_box = wait.until(
|
216
|
+
EC.element_to_be_clickable(
|
217
|
+
(By.XPATH, '//input[@placeholder="结束日期"]'))) # 文件名输入框
|
218
|
+
|
219
|
+
for i in range(8):
|
220
|
+
input_box.send_keys(Keys.BACKSPACE)
|
221
|
+
input_box.send_keys(str_date)
|
222
|
+
time.sleep(2)
|
223
|
+
input_box.send_keys(Keys.ENTER)
|
224
|
+
time.sleep(2)
|
225
|
+
wait.until(EC.presence_of_element_located((By.XPATH, '//button/span[contains(text(), "查询")]')))
|
226
|
+
elements = _driver.find_elements(
|
227
|
+
By.XPATH, '//button/span[contains(text(), "查询")]')
|
228
|
+
_driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
229
|
+
time.sleep(3)
|
230
|
+
wait.until(EC.presence_of_element_located(
|
231
|
+
(By.XPATH,
|
232
|
+
'//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')))
|
233
|
+
elements = _driver.find_elements(
|
234
|
+
By.XPATH,
|
235
|
+
'//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')
|
236
|
+
_driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
237
|
+
time.sleep(3)
|
238
|
+
self.clean_data(date=new_date)
|
239
|
+
_driver.quit()
|
240
|
+
|
241
|
+
def clean_data(self, date):
|
242
|
+
for root, dirs, files in os.walk(D_PATH, topdown=False):
|
243
|
+
for name in files:
|
244
|
+
if '~$' in name or 'DS_Store' in name:
|
245
|
+
continue
|
246
|
+
if name.endswith('csv'):
|
247
|
+
pattern = re.findall('[\u4e00-\u9fff]+', name)
|
248
|
+
if pattern:
|
249
|
+
continue
|
250
|
+
pattern = re.findall('^[0-9a-zA-Z_]{5,}-[0-9a-zA-Z_]+-[0-9a-zA-Z_]+-[0-9a-zA-Z_]+', name)
|
251
|
+
if not pattern:
|
252
|
+
continue
|
253
|
+
df = pd.read_csv(os.path.join(root, name), encoding='gb2312', header=0, na_filter=False)
|
254
|
+
df.insert(loc=0, column='日期', value=date) # df中插入新列
|
255
|
+
df.rename(columns={'spuId': 'spu_id'}, inplace=True)
|
256
|
+
df['数据更新时间'] = pd.to_datetime(df['数据更新时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
|
257
|
+
# df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
|
258
|
+
# print(df['数据更新时间'])
|
259
|
+
# breakpoint()
|
260
|
+
new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
|
261
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
|
262
|
+
os.remove(os.path.join(root, name))
|
263
|
+
|
264
|
+
|
265
|
+
def akucun():
|
266
|
+
akc = AikuCun()
|
267
|
+
akc.get_data(shop_name='aikucun', date_num=3)
|
268
|
+
# akc.clean_data()
|
269
|
+
|
270
|
+
# 新版 数据分类
|
271
|
+
dp = aggregation.DatabaseUpdate(path=D_PATH)
|
272
|
+
dp.new_unzip(is_move=True)
|
273
|
+
dp.cleaning(is_move=False, is_except=['临时文件']) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
274
|
+
# 将 self.datas 更新至数据库
|
275
|
+
dp.upload_df(service_databases=[
|
276
|
+
# {'home_lx': 'mongodb'},
|
277
|
+
# {'home_lx': 'mysql'},
|
278
|
+
{'company': 'mysql'},
|
279
|
+
# {'nas': 'mysql'},
|
280
|
+
])
|
281
|
+
# 数据分类
|
282
|
+
c = data_clean.DataClean(path=D_PATH, source_path=Source_Path)
|
283
|
+
c.set_up_to_mogo = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
284
|
+
c.set_up_to_mysql = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
285
|
+
c.new_unzip(is_move=True, ) # 解压文件
|
286
|
+
c.change_and_sort(is_except=['临时文件'])
|
287
|
+
c.move_all() # 移到文件到原始文件夹
|
288
|
+
|
289
|
+
|
290
|
+
if __name__ == '__main__':
|
291
|
+
pass
|
292
|
+
# test()
|
293
|
+
akucun()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|