mdbq 2.3.0__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {mdbq-2.3.0 → mdbq-2.3.1}/PKG-INFO +1 -1
  2. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/aggregation.py +8 -8
  3. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/req_post/req_tb.py +100 -2
  4. mdbq-2.3.1/mdbq/spider/aikucun.py +293 -0
  5. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq.egg-info/PKG-INFO +1 -1
  6. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq.egg-info/SOURCES.txt +2 -1
  7. {mdbq-2.3.0 → mdbq-2.3.1}/setup.py +1 -1
  8. {mdbq-2.3.0 → mdbq-2.3.1}/README.txt +0 -0
  9. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/__init__.py +0 -0
  10. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/__version__.py +0 -0
  11. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/__init__.py +0 -0
  12. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/df_types.py +0 -0
  13. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/mysql_types.py +0 -0
  14. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/optimize_data.py +0 -0
  15. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/aggregation/query_data.py +0 -0
  16. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/bdup/__init__.py +0 -0
  17. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/bdup/bdup.py +0 -0
  18. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/clean/__init__.py +0 -0
  19. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/clean/data_clean.py +0 -0
  20. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/company/__init__.py +0 -0
  21. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/company/copysh.py +0 -0
  22. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/company/home_sh.py +0 -0
  23. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/__init__.py +0 -0
  24. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/get_myconf.py +0 -0
  25. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/products.py +0 -0
  26. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/set_support.py +0 -0
  27. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/config/update_conf.py +0 -0
  28. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/dataframe/__init__.py +0 -0
  29. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/dataframe/converter.py +0 -0
  30. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/log/__init__.py +0 -0
  31. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/log/mylogger.py +0 -0
  32. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mongo/__init__.py +0 -0
  33. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mongo/mongo.py +0 -0
  34. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mysql/__init__.py +0 -0
  35. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mysql/mysql.py +0 -0
  36. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mysql/s_query.py +0 -0
  37. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/mysql/year_month_day.py +0 -0
  38. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/__init__.py +0 -0
  39. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/porxy.py +0 -0
  40. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/pov_city.py +0 -0
  41. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/sku_picture.py +0 -0
  42. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/other/ua_sj.py +0 -0
  43. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/pbix/__init__.py +0 -0
  44. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/pbix/pbix_refresh.py +0 -0
  45. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/pbix/refresh_all.py +0 -0
  46. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/pbix/refresh_all_old.py +0 -0
  47. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/req_post/__init__.py +0 -0
  48. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq/spider/__init__.py +0 -0
  49. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq.egg-info/dependency_links.txt +0 -0
  50. {mdbq-2.3.0 → mdbq-2.3.1}/mdbq.egg-info/top_level.txt +0 -0
  51. {mdbq-2.3.0 → mdbq-2.3.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.3.0
3
+ Version: 2.3.1
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1295,14 +1295,14 @@ def test2():
1295
1295
  if __name__ == '__main__':
1296
1296
  username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
1297
1297
  print(username, password, host, port)
1298
- file_dir(one_file=False, target_service='company')
1299
- # one_file_to_mysql(
1300
- # file='/Users/xigua/数据中心/原始文件2/京东报表/JD推广_全站营销报表/2024-08/万里马箱包推广1_营销概况_全站营销_2024-08-19_2024-09-02.csv',
1301
- # db_name='京东数据2',
1302
- # table_name='推广数据_全站营销',
1303
- # target_service='company',
1304
- # database='mysql'
1305
- # )
1298
+ # file_dir(one_file=False, target_service='company')
1299
+ one_file_to_mysql(
1300
+ file='/Users/xigua/Downloads/爱库存_商品榜单_spu_2024-10-17_2024-10-17.csv',
1301
+ db_name='爱库存2',
1302
+ table_name='商品spu榜单',
1303
+ target_service='company',
1304
+ database='mysql'
1305
+ )
1306
1306
 
1307
1307
  # db_name = '推广数据2'
1308
1308
  # table_name = '权益报表'
@@ -135,6 +135,58 @@ class RequestData:
135
135
  with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
136
136
  json.dump(self.datas, f, ensure_ascii=False, sort_keys=True, indent=4)
137
137
 
138
+ def hd_sp(self, date, url, headers, cookies, path, filename, pages=5):
139
+ """ 活动预售页面 分商品效果 """
140
+
141
+ self.date = date
142
+ self.url = url
143
+ self.headers = headers
144
+ self.cookies = cookies
145
+ self.path = path
146
+ self.filename = filename
147
+ for page in range(1, pages + 1):
148
+ self.url = f'{self.url}&page={page}'
149
+ result = requests.get(
150
+ self.url,
151
+ headers=self.headers,
152
+ cookies=self.cookies,
153
+ )
154
+ m_data = json.loads(result.text)
155
+ # print(m_data)
156
+ # with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
157
+ # json.dump(m_data, f, ensure_ascii=False, sort_keys=True, indent=4)
158
+ update_time = m_data['data']['updateTime']
159
+ time_stamp = m_data['data']['timestamp']
160
+ # pt_data = data['data']['data'][0] # 平台流量
161
+ # gg_data = data['data']['data'][1] # 广告流量
162
+ for all_data in m_data['data']['data']['data']:
163
+ self.datas.append({
164
+ 'activityItemDepUv': all_data['activityItemDepUv']['value'],
165
+ '商品链接': all_data['item']['detailUrl'],
166
+ '商品id': all_data['item']['itemId'],
167
+ '商品图片': all_data['item']['pictUrl'],
168
+ 'startDate': all_data['item']['startDate'],
169
+ '商品标题': all_data['item']['title'],
170
+ '预售订单金额': all_data['presaleOrdAmt']['value'],
171
+ '定金支付件数': all_data['presalePayItemCnt']['value'],
172
+ '预售访客人数': all_data['presaleUv']['value'],
173
+ '定金支付金额': all_data['sumPayDepositAmt']['value'],
174
+ '定金支付买家数': all_data['sumPayDepositByrCnt']['value'],
175
+ '支付转化率': all_data['uvPayRate']['value'],
176
+ '日期': date,
177
+ '时间戳': time_stamp,
178
+ '更新时间': update_time,
179
+ '促销活动': '2024双11预售',
180
+ '类型': '分商品效果',
181
+ })
182
+ time.sleep(random.randint(5, 10))
183
+ for item in self.datas:
184
+ if item['日期'] != '':
185
+ item.update({'日期': f'{item['日期'][0:4]}-{item['日期'][4:6]}-{item['日期'][6:8]}'})
186
+ if self.is_json_file:
187
+ with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
188
+ json.dump(self.datas, f, ensure_ascii=False, sort_keys=True, indent=4)
189
+
138
190
  def request_jd(self, date, url, headers, cookies, path, filename):
139
191
  """ 京东 """
140
192
  self.date = date
@@ -224,9 +276,55 @@ def company_run():
224
276
  while True:
225
277
  tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2',
226
278
  table_name='2024双11预售实时流量分析')
227
- time.sleep(random.uniform(1500, 2000))
279
+ time.sleep(random.randint(1500, 2000))
280
+
281
+
282
+ def hd_sp_data(service_databases=[], db_name=None, table_name=None, pages=5):
283
+ """ 2024双11预售 分商品效果 """
284
+ date = datetime.date.today().strftime('%Y%m%d')
285
+ url = (
286
+ f'https://sycm.taobao.com/datawar/v7/presaleActivity/itemCoreIndex/getItemListLive.json?'
287
+ f'activityId=94040472'
288
+ f'&itemType=0' # 必传, 查看全部商品 0, 活动商品 1 , 跨店满减商品 2 ,官方立减 3(无数据)
289
+ f'&device=1'
290
+ f'&dateRange={date}%7C{date}'
291
+ f'&dateType=today'
292
+ f'&pageSize=10' # 必传
293
+ # f'&page=1' # 必传
294
+ # f'&order=desc'
295
+ # f'&orderBy=presaleOrdAmt'
296
+ # f'&indexCode=presaleOrdAmt%2CsumPayDepositByrCnt%2CpresalePayItemCnt'
297
+ # f'&_=1729133575797'
298
+ )
299
+ headers = {
300
+ # "referer": "https://dmp.taobao.com/index_new.html",
301
+ 'User-Agent': ua_sj.get_ua(),
302
+ }
303
+ cookies = {
304
+ 'session': 't=c198527347800dafa75165f084784668; thw=cn; xlly_s=1; _tb_token_=rPWSGun4nUou9aKxviPg; _samesite_flag_=true; 3PcFlag=1729054801593; cookie2=130befc055eed2df29935197bd2b514b; sgcookie=E100aLOltfWHqLLH1qtyH3it%2BLrGH2v3MAnIBdSfu7xwjEpSyh101lblDVcj3zGpAOLv%2FXcrVNbT%2FN%2BI8KZeCoE4HBzHQk0ANtSqjOG5gIzdKamfirBxGWJyVEccitvvDZhK; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie21=W5iHLLyFfoaZ&cookie14=UoYcCoAfJ7pSQA%3D%3D; csg=1e2bdb8a; _cc_=Vq8l%2BKCLiw%3D%3D; cancelledSubSites=empty; skt=f813f8478f7318f8; v=0; cna=8+iAHxeojXcCAXjsc5Mt+BAV; mtop_partitioned_detect=1; _m_h5_tk=88c56a84a93c1199f8abe086a132c7eb_1729068459392; _m_h5_tk_enc=4b0ed8316f46edae303547d3863982a4; XSRF-TOKEN=4ef3d151-14c4-445a-9249-595e9a24df75; JSESSIONID=9EE8C8DCF6162DCA2FE0187C29BF0B8A; tfstk=gyaEdSAx842sxMbj1f3rgEWrJ50LN2XbxzMSZ7VoOvDheWNubSerd_IKRlkzIRk3O76JzQqgCk9QZzGuzR3n2kMSdYuzw-51hZ_b9W3--t6flZ3LgJuxZBYHFAYiG40ZtLV_9W3J6C9lclVpUV2YVJ0uEVmiwj0kr00l_ccjZ4YnqexMIAhor4YoqVDiwjvkr80l_5DttHciSWVk7jihGd0FW1QAcqH0tA8kuIhKxg2JVH-emXiZncbekEC-TDk0tAWAnqwo4JoU5wJxTlV4BXyRke3n4kqm-zWV8VVYfJcaEt-rIozLzmaF3nH3JYeq-lWM840Kg7obf_xqCuVT7czFcQhTR74KcqbvKYZ_gzlzyTQa3W2Umm4HLgz6efAQOzEeE3on6fkf_1ySvoccWpB-m3K-jqhZh6GB23nnhfkf_1-J2cDo_x1IO; isg=BLm5J8RI-qdgDKdAgF_DSgcFyCOTxq14BgKdB9vjgONeYsD0IReUSUT05GaUWkWw'}
305
+ path = '/Users/xigua/Downloads'
306
+ filename = 'test'
307
+ r = RequestData()
308
+ r.is_json_file = False
309
+ r.hd_sp(
310
+ date=date,
311
+ url=url,
312
+ headers=headers,
313
+ cookies=cookies,
314
+ path=path,
315
+ filename=filename,
316
+ pages = pages,
317
+ )
318
+ # print(r.datas)
319
+ df = pd.DataFrame(r.datas)
320
+ df.to_csv(os.path.join(path, 'test.csv'), index=False, header=True, encoding='utf-8_sig')
228
321
 
229
322
 
230
323
  if __name__ == '__main__':
231
324
  company_run()
232
- tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2', table_name='2024双11预售实时流量分析')
325
+ # tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2', table_name='2024双11预售实时流量分析')
326
+ hd_sp_data(
327
+ service_databases=[{'company': 'mysql'}],
328
+ # db_name='生意参谋2',
329
+ # table_name='2024双11预售实时流量分析',
330
+ )
@@ -0,0 +1,293 @@
1
+ # -*- coding:utf-8 -*-
2
+ import datetime
3
+ import getpass
4
+ import json
5
+ import os
6
+ import pathlib
7
+ import platform
8
+ import re
9
+ import time
10
+ import warnings
11
+ import pandas as pd
12
+ from selenium import webdriver
13
+ from selenium.webdriver.support.wait import WebDriverWait
14
+ from selenium.webdriver.common.by import By
15
+ from selenium.webdriver.support import expected_conditions as EC
16
+ from selenium.webdriver.chrome.service import Service
17
+ from mdbq.config import set_support
18
+ from selenium.webdriver.common.keys import Keys
19
+ from mdbq.aggregation import aggregation
20
+ from mdbq.clean import data_clean
21
+
22
+ warnings.filterwarnings('ignore')
23
+
24
+
25
+ if platform.system() == 'Windows':
26
+ # windows版本
27
+ Data_Path = r'C:\同步空间\BaiduSyncdisk'
28
+ D_PATH = str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\Downloads'))
29
+ Share_Path = str(pathlib.Path(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表')) # 共享文件根目录
30
+ elif platform.system() == 'Linux':
31
+ Data_Path = '数据中心'
32
+ D_PATH = 'Downloads'
33
+ if not os.path.exists(D_PATH):
34
+ os.makedirs(D_PATH)
35
+ Share_Path = '' # linux 通常是远程服务器,不需要访问共享
36
+ else:
37
+ Data_Path = f'/Users/{getpass.getuser()}/数据中心' # 使用Mac独立网络时
38
+ # Data_Path = '/Volumes' # 直接使用共享连接台式机时的配置, 后面接 + 自动0备份/***
39
+ D_PATH = str(pathlib.Path(f'/Users/{getpass.getuser()}/Downloads'))
40
+ Share_Path = str(pathlib.Path('/Volumes/时尚事业部/01.运营部/天猫报表')) # 共享文件根目录
41
+
42
+
43
+ def test():
44
+ """
45
+ """
46
+ _url = 'https://gray-merc.aikucun.com/index.html'
47
+ cookie_path = '/Users/xigua/Downloads'
48
+ print(_url)
49
+
50
+ option = webdriver.ChromeOptions() # 浏览器启动选项
51
+ option.headless = True # False指定为无界面模式
52
+ # 调整chrome启动配置
53
+ option.add_argument("--disable-gpu")
54
+ option.add_argument("--no-sandbox")
55
+ option.add_argument("--disable-dev-shm-usage")
56
+ option.add_experimental_option("excludeSwitches", ["enable-automation"])
57
+ option.add_experimental_option("useAutomationExtension", False)
58
+ # if platform.system() == 'Windows':
59
+ # service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
60
+ # else:
61
+ # service = Service('/usr/local/bin/chromedriver')
62
+ if platform.system() == 'Windows':
63
+ # 设置Chrome的路径
64
+ chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
65
+ chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
66
+ # os.environ["webdriver.chrome.driver"] = chrome_path
67
+ option.binary_location = chrome_path # windows 设置此参数有效
68
+ service = Service(chromedriver_path)
69
+ # service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
70
+ else:
71
+ # 设置Chrome的路径
72
+ chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
73
+ chromedriver_path = '/usr/local/chrome/chromedriver'
74
+ os.environ["webdriver.chrome.driver"] = chrome_path
75
+
76
+ service = Service(chromedriver_path)
77
+ _driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
78
+
79
+ print('yes')
80
+ # 登录
81
+ _driver.get(_url)
82
+ time.sleep(0.1)
83
+ _driver.maximize_window() # 窗口最大化 方便后续加载数据
84
+ breakpoint()
85
+
86
+ d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
87
+ print(f'{d_time} 登录成功,正在获取cookie...')
88
+ time.sleep(0.1)
89
+
90
+
91
+ _file = os.path.join(cookie_path, f'cookie_.txt')
92
+ with open(_file, 'w') as f:
93
+ # 将cookies保存为json格式
94
+ cookies_list = _driver.get_cookies()
95
+ for cookie in cookies_list:
96
+ # 该字段有问题所以删除就可以
97
+ if 'expiry' in cookie:
98
+ del cookie['expiry']
99
+ # if 'domain' in cookie:
100
+ # cookie['domain'] = '.taobao.com'
101
+ cookies_list = json.dumps(cookies_list)
102
+ f.write(cookies_list)
103
+ print(f'cookie已保存: {_file}')
104
+ _driver.quit()
105
+
106
+
107
+ class AikuCun:
108
+ def __init__(self):
109
+ self.url = 'https://gray-merc.aikucun.com/index.html'
110
+ self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
111
+
112
+ def login(self, shop_name='aikucun'):
113
+ option = webdriver.ChromeOptions()
114
+ # option.add_argument("--headless") # 设置无界面模式
115
+ # 调整chrome启动配置
116
+ option.add_argument("--disable-gpu")
117
+ option.add_argument("--no-sandbox")
118
+ option.add_argument("--disable-dev-shm-usage")
119
+ option.add_experimental_option("excludeSwitches", ["enable-automation"])
120
+ option.add_experimental_option('excludeSwitches', ['enable-logging']) # 禁止日志输出,减少控制台干扰
121
+ option.add_experimental_option("useAutomationExtension", False)
122
+ option.add_argument('--ignore-ssl-error') # 忽略ssl错误
123
+ prefs = {
124
+ 'profile.default_content_settings.popups': 0, # 禁止弹出所有窗口
125
+ "browser.download.manager. showAlertOnComplete": False, # 下载完成后不显示下载完成提示框
126
+ "profile.default_content_setting_values.automatic_downloads": 1, # 允许自动下载多个文件
127
+ }
128
+
129
+ option.add_experimental_option('perfLoggingPrefs', {
130
+ 'enableNetwork': True,
131
+ 'enablePage': False,
132
+ })
133
+ option.set_capability("goog:loggingPrefs", {
134
+ 'browser': 'ALL',
135
+ 'performance': 'ALL',
136
+ })
137
+ option.set_capability("goog:perfLoggingPrefs", {
138
+ 'enableNetwork': True,
139
+ 'enablePage': False,
140
+ 'enableTimeline': False
141
+ })
142
+
143
+ option.add_experimental_option('prefs', prefs)
144
+ option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
145
+
146
+ # # 通过excludeSwitches参数禁用默认的启动路径
147
+ # option.add_experimental_option('excludeSwitches', ['enable-automation'])
148
+
149
+ if platform.system() == 'Windows':
150
+ # 设置 chrome 和 chromedriver 启动路径
151
+ chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
152
+ chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
153
+ # os.environ["webdriver.chrome.driver"] = chrome_path
154
+ option.binary_location = chrome_path # windows 设置此参数有效
155
+ service = Service(chromedriver_path)
156
+ # service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
157
+ elif platform.system() == 'Darwin':
158
+ chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
159
+ chromedriver_path = '/usr/local/chrome/chromedriver'
160
+ os.environ["webdriver.chrome.driver"] = chrome_path
161
+ # option.binary_location = chrome_path # Macos 设置此参数报错
162
+ service = Service(chromedriver_path)
163
+ else:
164
+ chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
165
+ chromedriver_path = '/usr/local/chrome/chromedriver'
166
+ os.environ["webdriver.chrome.driver"] = chrome_path
167
+ # option.binary_location = chrome_path # macos 设置此参数报错
168
+ service = Service(chromedriver_path)
169
+ _driver = webdriver.Chrome(options=option, service=service) # 创建Chrome驱动程序实例
170
+ _driver.maximize_window() # 窗口最大化 方便后续加载数据
171
+
172
+ # 登录
173
+ _driver.get(self.url)
174
+ _driver.delete_all_cookies() # 首先清除浏览器打开已有的cookies
175
+ name_lists = os.listdir(self.cookie_path) # cookie 放在主目录下的 cookies 文件夹
176
+ for name in name_lists:
177
+ if shop_name in name and name.endswith('.txt') and '~' not in name and '.DS' not in name:
178
+ with open(os.path.join(self.cookie_path, name), 'r') as f:
179
+ cookies_list = json.load(f) # 使用json读取cookies 注意读取的是文件 所以用load而不是loads
180
+ for cookie in cookies_list:
181
+ _driver.add_cookie(cookie) # 添加cookies信息
182
+ _driver.refresh()
183
+ time.sleep(3)
184
+ return _driver
185
+
186
+ def get_data(self, shop_name='aikucun', date_num=1):
187
+ """
188
+ date_num: 获取最近 N 天数据,0表示今天
189
+ 所有数据都是逐日下载
190
+ """
191
+
192
+ _driver = self.login(shop_name=shop_name)
193
+ _url = 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604'
194
+ _driver.get(_url)
195
+ time.sleep(3)
196
+
197
+ today = datetime.date.today()
198
+ for date_s in range(date_num):
199
+ new_date = today - datetime.timedelta(days=date_s) # 会用作文件名
200
+ str_date = str(new_date)[2:]
201
+ wait = WebDriverWait(_driver, timeout=15) #
202
+ elements = _driver.find_elements(
203
+ By.XPATH, '//input[@placeholder="开始日期"]')
204
+ # _driver.execute_script("arguments[0].click();", elements[0]) # 点击
205
+
206
+ input_box = wait.until(
207
+ EC.element_to_be_clickable(
208
+ (By.XPATH, '//input[@placeholder="开始日期"]'))) #
209
+
210
+ # from selenium.webdriver.common.keys import Keys
211
+ for i in range(8):
212
+ input_box.send_keys(Keys.BACKSPACE)
213
+ input_box.send_keys(str_date)
214
+ time.sleep(1)
215
+ input_box = wait.until(
216
+ EC.element_to_be_clickable(
217
+ (By.XPATH, '//input[@placeholder="结束日期"]'))) # 文件名输入框
218
+
219
+ for i in range(8):
220
+ input_box.send_keys(Keys.BACKSPACE)
221
+ input_box.send_keys(str_date)
222
+ time.sleep(2)
223
+ input_box.send_keys(Keys.ENTER)
224
+ time.sleep(2)
225
+ wait.until(EC.presence_of_element_located((By.XPATH, '//button/span[contains(text(), "查询")]')))
226
+ elements = _driver.find_elements(
227
+ By.XPATH, '//button/span[contains(text(), "查询")]')
228
+ _driver.execute_script("arguments[0].click();", elements[0]) # 点击
229
+ time.sleep(3)
230
+ wait.until(EC.presence_of_element_located(
231
+ (By.XPATH,
232
+ '//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')))
233
+ elements = _driver.find_elements(
234
+ By.XPATH,
235
+ '//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')
236
+ _driver.execute_script("arguments[0].click();", elements[0]) # 点击
237
+ time.sleep(3)
238
+ self.clean_data(date=new_date)
239
+ _driver.quit()
240
+
241
+ def clean_data(self, date):
242
+ for root, dirs, files in os.walk(D_PATH, topdown=False):
243
+ for name in files:
244
+ if '~$' in name or 'DS_Store' in name:
245
+ continue
246
+ if name.endswith('csv'):
247
+ pattern = re.findall('[\u4e00-\u9fff]+', name)
248
+ if pattern:
249
+ continue
250
+ pattern = re.findall('^[0-9a-zA-Z_]{5,}-[0-9a-zA-Z_]+-[0-9a-zA-Z_]+-[0-9a-zA-Z_]+', name)
251
+ if not pattern:
252
+ continue
253
+ df = pd.read_csv(os.path.join(root, name), encoding='gb2312', header=0, na_filter=False)
254
+ df.insert(loc=0, column='日期', value=date) # df中插入新列
255
+ df.rename(columns={'spuId': 'spu_id'}, inplace=True)
256
+ df['数据更新时间'] = pd.to_datetime(df['数据更新时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
257
+ # df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
258
+ # print(df['数据更新时间'])
259
+ # breakpoint()
260
+ new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
261
+ df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
262
+ os.remove(os.path.join(root, name))
263
+
264
+
265
+ def akucun():
266
+ akc = AikuCun()
267
+ akc.get_data(shop_name='aikucun', date_num=3)
268
+ # akc.clean_data()
269
+
270
+ # 新版 数据分类
271
+ dp = aggregation.DatabaseUpdate(path=D_PATH)
272
+ dp.new_unzip(is_move=True)
273
+ dp.cleaning(is_move=False, is_except=['临时文件']) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
274
+ # 将 self.datas 更新至数据库
275
+ dp.upload_df(service_databases=[
276
+ # {'home_lx': 'mongodb'},
277
+ # {'home_lx': 'mysql'},
278
+ {'company': 'mysql'},
279
+ # {'nas': 'mysql'},
280
+ ])
281
+ # 数据分类
282
+ c = data_clean.DataClean(path=D_PATH, source_path=Source_Path)
283
+ c.set_up_to_mogo = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
284
+ c.set_up_to_mysql = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
285
+ c.new_unzip(is_move=True, ) # 解压文件
286
+ c.change_and_sort(is_except=['临时文件'])
287
+ c.move_all() # 移到文件到原始文件夹
288
+
289
+
290
+ if __name__ == '__main__':
291
+ pass
292
+ # test()
293
+ akucun()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.3.0
3
+ Version: 2.3.1
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -45,4 +45,5 @@ mdbq/pbix/refresh_all.py
45
45
  mdbq/pbix/refresh_all_old.py
46
46
  mdbq/req_post/__init__.py
47
47
  mdbq/req_post/req_tb.py
48
- mdbq/spider/__init__.py
48
+ mdbq/spider/__init__.py
49
+ mdbq/spider/aikucun.py
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='2.3.0',
6
+ version='2.3.1',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbsql',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes