mdbq 3.7.1__py3-none-any.whl → 3.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/log/spider_logging.py +55 -0
- mdbq/other/download_sku_picture.py +1 -1
- mdbq/spider/aikucun.py +73 -3
- {mdbq-3.7.1.dist-info → mdbq-3.7.3.dist-info}/METADATA +1 -1
- {mdbq-3.7.1.dist-info → mdbq-3.7.3.dist-info}/RECORD +8 -7
- /mdbq/other/{sku_picture.py → sku_picture_bak.py} +0 -0
- {mdbq-3.7.1.dist-info → mdbq-3.7.3.dist-info}/WHEEL +0 -0
- {mdbq-3.7.1.dist-info → mdbq-3.7.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,55 @@
|
|
1
|
+
import logging
|
2
|
+
from logging.handlers import RotatingFileHandler
|
3
|
+
import platform
|
4
|
+
import os
|
5
|
+
import getpass
|
6
|
+
|
7
|
+
|
8
|
+
def setup_logging(reMoveOldHandler=True):
|
9
|
+
"""
|
10
|
+
reMoveOldHandler: 替换根日志记录器的所有现有处理器
|
11
|
+
"""
|
12
|
+
if platform.system() == 'Windows':
|
13
|
+
from mdbq.pbix import refresh_all
|
14
|
+
D_PATH = os.path.join(f'C:\\Users\\{getpass.getuser()}\\Downloads')
|
15
|
+
elif platform.system() == 'Linux':
|
16
|
+
D_PATH = 'Downloads'
|
17
|
+
if not os.path.exists(D_PATH):
|
18
|
+
os.makedirs(D_PATH)
|
19
|
+
else:
|
20
|
+
D_PATH = os.path.join(f'/Users/{getpass.getuser()}/Downloads')
|
21
|
+
|
22
|
+
if not os.path.isdir(os.path.join(D_PATH, 'logfile')):
|
23
|
+
os.makedirs(os.path.join(D_PATH, 'logfile'))
|
24
|
+
|
25
|
+
log_file = os.path.join(D_PATH, 'logfile', 'spider_tg.log')
|
26
|
+
file_handler = RotatingFileHandler(
|
27
|
+
filename=log_file,
|
28
|
+
maxBytes=3*1024*1024, # 3MB
|
29
|
+
backupCount=10,
|
30
|
+
encoding='utf-8' # 明确指定编码(避免Windows乱码)
|
31
|
+
)
|
32
|
+
stream_handler = logging.StreamHandler() # 终端输出Handler
|
33
|
+
formatter = logging.Formatter(
|
34
|
+
fmt='[%(asctime)s] %(levelname)s %(message)s',
|
35
|
+
datefmt='%Y-%m-%d %H:%M:%S'
|
36
|
+
)
|
37
|
+
file_handler.setFormatter(formatter)
|
38
|
+
stream_handler.setFormatter(formatter) # 终端使用相同格式
|
39
|
+
file_handler.setLevel(logging.INFO)
|
40
|
+
stream_handler.setLevel(logging.INFO)
|
41
|
+
|
42
|
+
# 获取根日志记录器并添加Handler
|
43
|
+
logger = logging.getLogger()
|
44
|
+
if reMoveOldHandler:
|
45
|
+
# 移除根日志记录器的所有现有处理器
|
46
|
+
for handler in logger.handlers[:]: # 使用[:]来创建handlers列表的一个副本,因为我们在迭代时修改列表
|
47
|
+
logger.removeHandler(handler)
|
48
|
+
logger.addHandler(file_handler)
|
49
|
+
logger.addHandler(stream_handler)
|
50
|
+
logger.setLevel(logging.INFO) # 设置根日志级别
|
51
|
+
return logger
|
52
|
+
|
53
|
+
|
54
|
+
if __name__ == '__main__':
|
55
|
+
pass
|
@@ -19,7 +19,7 @@ from selenium.webdriver.common.by import By
|
|
19
19
|
from selenium.webdriver.support import expected_conditions as EC
|
20
20
|
from selenium.webdriver.chrome.service import Service
|
21
21
|
from mdbq.config import set_support
|
22
|
-
from mdbq.config import
|
22
|
+
from mdbq.config import default
|
23
23
|
from mdbq.mysql import mysql
|
24
24
|
from mdbq.mysql import s_query
|
25
25
|
from mdbq.other import ua_sj
|
mdbq/spider/aikucun.py
CHANGED
@@ -19,7 +19,8 @@ from mdbq.config import set_support
|
|
19
19
|
from selenium.webdriver.common.keys import Keys
|
20
20
|
from mdbq.other import ua_sj
|
21
21
|
from mdbq.mysql import mysql
|
22
|
-
from mdbq.
|
22
|
+
from mdbq.mysql import s_query
|
23
|
+
from mdbq.config import default
|
23
24
|
import socket
|
24
25
|
|
25
26
|
warnings.filterwarnings('ignore')
|
@@ -37,6 +38,9 @@ else:
|
|
37
38
|
upload_path = os.path.join(D_PATH, '数据上传中心', '爱库存') # 此目录位于下载文件夹
|
38
39
|
|
39
40
|
m_engine, username, password, host, port = default.get_mysql_engine(platform='Windows', hostname='xigua_lx', sql='mysql', local='remoto', config_file=None)
|
41
|
+
print(username, password, host, port)
|
42
|
+
# 实例化一个数据查询类,用来获取 cookies 表数据
|
43
|
+
download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
40
44
|
|
41
45
|
|
42
46
|
def get_cookie_aikucun():
|
@@ -118,6 +122,40 @@ def get_cookie_aikucun():
|
|
118
122
|
continue
|
119
123
|
else:
|
120
124
|
new_cookies_list.append(cookie)
|
125
|
+
|
126
|
+
######### 新增 写入 mysql #########
|
127
|
+
set_typ = {
|
128
|
+
'日期': 'date',
|
129
|
+
'domain': 'varchar(100)',
|
130
|
+
'expiry': 'int',
|
131
|
+
'httpOnly': 'varchar(20)',
|
132
|
+
'name': 'varchar(50)',
|
133
|
+
'path': 'varchar(50)',
|
134
|
+
'sameSite': 'varchar(50)',
|
135
|
+
'secure': 'varchar(50)',
|
136
|
+
'value': 'text',
|
137
|
+
'更新时间': 'timestamp'
|
138
|
+
}
|
139
|
+
_cookies_list = []
|
140
|
+
for item in cookies_list:
|
141
|
+
new_dict = {'日期': datetime.datetime.today().strftime('%Y-%m-%d'), }
|
142
|
+
for k, v in item.items():
|
143
|
+
if v is None:
|
144
|
+
v = 'None'
|
145
|
+
new_dict.update({k: v})
|
146
|
+
if 'expiry' not in new_dict:
|
147
|
+
new_dict.update({'expiry': 0})
|
148
|
+
new_dict.update({'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
|
149
|
+
_cookies_list.append(new_dict)
|
150
|
+
m_engine.insert_many_dict(
|
151
|
+
db_name='cookie文件',
|
152
|
+
table_name='main_aikucun',
|
153
|
+
dict_data_list=_cookies_list,
|
154
|
+
set_typ=set_typ,
|
155
|
+
allow_not_null=True, # 允许插入空值
|
156
|
+
)
|
157
|
+
#############################################
|
158
|
+
|
121
159
|
json_file = os.path.join(cookie_path, filename_aikucun)
|
122
160
|
with open(json_file, 'w', encoding='utf-8') as f:
|
123
161
|
json.dump(new_cookies_list, f, ensure_ascii=False, sort_keys=True, indent=4)
|
@@ -219,6 +257,38 @@ class AikuCun:
|
|
219
257
|
cookies_list = json.load(f) # 使用json读取cookies 注意读取的是文件 所以用load而不是loads
|
220
258
|
for cookie in cookies_list:
|
221
259
|
_driver.add_cookie(cookie) # 添加cookies信息
|
260
|
+
# print(cookie)
|
261
|
+
db_name = 'cookie文件'
|
262
|
+
table_name = f'main_{shop_name}'
|
263
|
+
df = download.data_to_df(
|
264
|
+
db_name=db_name,
|
265
|
+
table_name=table_name,
|
266
|
+
start_date='2025-01-01',
|
267
|
+
end_date='2030-12-11',
|
268
|
+
projection={
|
269
|
+
'domain': 1,
|
270
|
+
'expiry': 1,
|
271
|
+
'httpOnly': 1,
|
272
|
+
'name': 1,
|
273
|
+
'path': 1,
|
274
|
+
'sameSite': 1,
|
275
|
+
'secure': 1,
|
276
|
+
'value': 1,
|
277
|
+
'更新时间': 1
|
278
|
+
},
|
279
|
+
)
|
280
|
+
# 仅保留最新日期的数据
|
281
|
+
idx = df.groupby('name')['更新时间'].idxmax()
|
282
|
+
df = df.loc[idx]
|
283
|
+
df.pop('更新时间')
|
284
|
+
for item in df.to_dict('records'):
|
285
|
+
new_dict = {}
|
286
|
+
for k, v in item.items():
|
287
|
+
if v == 'False':
|
288
|
+
v = False
|
289
|
+
new_dict.update({k: v})
|
290
|
+
# _driver.add_cookie(new_dict) # 添加cookies信息
|
291
|
+
|
222
292
|
_driver.refresh()
|
223
293
|
time.sleep(3)
|
224
294
|
return _driver
|
@@ -417,8 +487,8 @@ class AikuCunNew:
|
|
417
487
|
|
418
488
|
|
419
489
|
if __name__ == '__main__':
|
420
|
-
get_cookie_aikucun() # 登录并获取 cookies
|
421
|
-
akucun(date_num=
|
490
|
+
# get_cookie_aikucun() # 登录并获取 cookies
|
491
|
+
akucun(date_num=30, headless=True) # 下载数据
|
422
492
|
|
423
493
|
# a = AikuCunNew(shop_name='aikucun')
|
424
494
|
# a.akc()
|
@@ -16,6 +16,7 @@ mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
|
16
16
|
mdbq/dataframe/converter.py,sha256=lETYhT7KXlWzWwqguqhk6vI6kj4rnOBEW1lhqKy2Abc,5035
|
17
17
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
18
18
|
mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
19
|
+
mdbq/log/spider_logging.py,sha256=CzQIflfbMSBSBjHSO3fEhcG9Oiyj6cR-YU0tObMMyxM,1975
|
19
20
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
20
21
|
mdbq/mongo/mongo.py,sha256=M9DUeUCMPDngkwn9-ui0uTiFrvfNU1kLs22s5SmoNm0,31899
|
21
22
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
@@ -23,10 +24,10 @@ mdbq/mysql/mysql.py,sha256=YgmSLkwjIUpjiGH3S-bTiaJCKe8As0WvHDOS6_ePyYs,98591
|
|
23
24
|
mdbq/mysql/s_query.py,sha256=pj5ioJfUT81Su9S-km9G49gF5F2MmXXfw_oAIUzhN28,8794
|
24
25
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
25
26
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
26
|
-
mdbq/other/download_sku_picture.py,sha256
|
27
|
+
mdbq/other/download_sku_picture.py,sha256=-ia7KIu7Ij3huWF7tXVrWEE7N4QwJfWDbBIP4pVzcQ4,44655
|
27
28
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
28
29
|
mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
|
29
|
-
mdbq/other/
|
30
|
+
mdbq/other/sku_picture_bak.py,sha256=JwSXYlzamVqcKCD2tRH2VqYVZNr8fM6f--kcGlTVRnM,50026
|
30
31
|
mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
|
31
32
|
mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
32
33
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
@@ -35,8 +36,8 @@ mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,
|
|
35
36
|
mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
36
37
|
mdbq/redis/getredis.py,sha256=oyFwE-8c6uErSGYNIO0z2ng93mH0zstRLD86MWqF6M8,25636
|
37
38
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
38
|
-
mdbq/spider/aikucun.py,sha256=
|
39
|
-
mdbq-3.7.
|
40
|
-
mdbq-3.7.
|
41
|
-
mdbq-3.7.
|
42
|
-
mdbq-3.7.
|
39
|
+
mdbq/spider/aikucun.py,sha256=qMU29gb72OK8GLBD-zENo1EmxuojRnuPyEdk5S2KDKw,21767
|
40
|
+
mdbq-3.7.3.dist-info/METADATA,sha256=p1jDAm3j6h9URD3CFONyKgTu0lFAgpy3dxWkn0Q5tnA,243
|
41
|
+
mdbq-3.7.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
42
|
+
mdbq-3.7.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
43
|
+
mdbq-3.7.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|