mdbq 4.0.11__py3-none-any.whl → 4.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/aggregation/query_data.py +7 -4
- mdbq/mysql/deduplicator.py +7 -3
- mdbq/mysql/s_query.py +7 -3
- mdbq/mysql/unique_.py +7 -3
- mdbq/mysql/uploader.py +7 -3
- mdbq/other/download_sku_picture.py +8 -5
- mdbq/spider/aikucun.py +7 -4
- {mdbq-4.0.11.dist-info → mdbq-4.0.12.dist-info}/METADATA +1 -1
- {mdbq-4.0.11.dist-info → mdbq-4.0.12.dist-info}/RECORD +12 -14
- mdbq/config/__init__.py +0 -4
- mdbq/config/config.py +0 -95
- {mdbq-4.0.11.dist-info → mdbq-4.0.12.dist-info}/WHEEL +0 -0
- {mdbq-4.0.11.dist-info → mdbq-4.0.12.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.12'
|
mdbq/aggregation/query_data.py
CHANGED
@@ -3,7 +3,7 @@ import re
|
|
3
3
|
# from mdbq.mysql import mysql
|
4
4
|
from mdbq.mysql import uploader
|
5
5
|
from mdbq.mysql import s_query
|
6
|
-
from mdbq.
|
6
|
+
from mdbq.conf import conf
|
7
7
|
from mdbq.log import mylogger
|
8
8
|
import datetime
|
9
9
|
from dateutil.relativedelta import relativedelta
|
@@ -18,9 +18,12 @@ from collections.abc import Mapping, Sequence
|
|
18
18
|
import inspect
|
19
19
|
|
20
20
|
dir_path = os.path.expanduser("~")
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
parser = conf.ConfigParser()
|
22
|
+
host, port, username, password = parser.get_section_values(
|
23
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
24
|
+
section='mysql',
|
25
|
+
keys=['host', 'port', 'username', 'password'],
|
26
|
+
)
|
24
27
|
host = 'localhost'
|
25
28
|
uld = uploader.MySQLUploader(username=username, password=password, host=host, port=int(port), pool_size=10)
|
26
29
|
|
mdbq/mysql/deduplicator.py
CHANGED
@@ -6,7 +6,7 @@ import warnings
|
|
6
6
|
import pymysql
|
7
7
|
import os
|
8
8
|
from mdbq.log import mylogger
|
9
|
-
from mdbq.
|
9
|
+
from mdbq.conf import conf
|
10
10
|
from typing import List, Dict, Optional, Any, Tuple
|
11
11
|
from dbutils.pooled_db import PooledDB
|
12
12
|
import threading
|
@@ -1351,8 +1351,12 @@ class MySQLDeduplicator:
|
|
1351
1351
|
def main():
|
1352
1352
|
logger.info('去重任务开始')
|
1353
1353
|
dir_path = os.path.expanduser("~")
|
1354
|
-
|
1355
|
-
|
1354
|
+
parser = conf.ConfigParser()
|
1355
|
+
host, port, username, password = parser.get_section_values(
|
1356
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
1357
|
+
section='mysql',
|
1358
|
+
keys=['host', 'port', 'username', 'password'],
|
1359
|
+
)
|
1356
1360
|
# host = 'localhost'
|
1357
1361
|
|
1358
1362
|
deduplicator = MySQLDeduplicator(
|
mdbq/mysql/s_query.py
CHANGED
@@ -7,7 +7,7 @@ from decimal import Decimal
|
|
7
7
|
from contextlib import closing
|
8
8
|
from mdbq.log import mylogger
|
9
9
|
import os
|
10
|
-
from mdbq.
|
10
|
+
from mdbq.conf import conf
|
11
11
|
from typing import Optional, Dict, List, Set, Tuple, Union, Any, Literal
|
12
12
|
from dbutils.pooled_db import PooledDB
|
13
13
|
import time
|
@@ -949,8 +949,12 @@ class QueryDatas:
|
|
949
949
|
|
950
950
|
def main():
|
951
951
|
dir_path = os.path.expanduser("~")
|
952
|
-
|
953
|
-
|
952
|
+
parser = conf.ConfigParser()
|
953
|
+
host, port, username, password = parser.get_section_values(
|
954
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
955
|
+
section='mysql',
|
956
|
+
keys=['host', 'port', 'username', 'password'],
|
957
|
+
)
|
954
958
|
host = 'localhost'
|
955
959
|
|
956
960
|
qd = QueryDatas(username=username, password=password, host=host, port=port)
|
mdbq/mysql/unique_.py
CHANGED
@@ -2,7 +2,7 @@ import re
|
|
2
2
|
import pymysql
|
3
3
|
from typing import List, Dict, Any, Tuple
|
4
4
|
from mdbq.log import mylogger
|
5
|
-
from mdbq.
|
5
|
+
from mdbq.conf import conf
|
6
6
|
from dbutils.pooled_db import PooledDB
|
7
7
|
import os
|
8
8
|
|
@@ -274,8 +274,12 @@ class UniqueManager:
|
|
274
274
|
|
275
275
|
def main():
|
276
276
|
dir_path = os.path.expanduser("~")
|
277
|
-
|
278
|
-
|
277
|
+
parser = conf.ConfigParser()
|
278
|
+
host, port, username, password = parser.get_section_values(
|
279
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
280
|
+
section='mysql',
|
281
|
+
keys=['host', 'port', 'username', 'password'],
|
282
|
+
)
|
279
283
|
# host = 'localhost'
|
280
284
|
|
281
285
|
my_databases = [
|
mdbq/mysql/uploader.py
CHANGED
@@ -8,7 +8,7 @@ import pymysql
|
|
8
8
|
import pandas as pd
|
9
9
|
import os
|
10
10
|
from mdbq.log import mylogger
|
11
|
-
from mdbq.
|
11
|
+
from mdbq.conf import conf
|
12
12
|
from typing import Union, List, Dict, Optional, Any, Tuple, Set
|
13
13
|
from dbutils.pooled_db import PooledDB
|
14
14
|
import json
|
@@ -1737,8 +1737,12 @@ class MySQLUploader:
|
|
1737
1737
|
|
1738
1738
|
def main():
|
1739
1739
|
dir_path = os.path.expanduser("~")
|
1740
|
-
|
1741
|
-
|
1740
|
+
parser = conf.ConfigParser()
|
1741
|
+
host, port, username, password = parser.get_section_values(
|
1742
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
1743
|
+
section='mysql',
|
1744
|
+
keys=['host', 'port', 'username', 'password'],
|
1745
|
+
)
|
1742
1746
|
host = 'localhost'
|
1743
1747
|
|
1744
1748
|
uploader = MySQLUploader(
|
@@ -17,8 +17,7 @@ from selenium.webdriver.support.wait import WebDriverWait
|
|
17
17
|
from selenium.webdriver.common.by import By
|
18
18
|
from selenium.webdriver.support import expected_conditions as EC
|
19
19
|
from selenium.webdriver.chrome.service import Service
|
20
|
-
from mdbq.
|
21
|
-
from mdbq.config import config
|
20
|
+
from mdbq.conf import conf
|
22
21
|
from mdbq.mysql import mysql
|
23
22
|
from mdbq.mysql import s_query
|
24
23
|
from mdbq.other import ua_sj
|
@@ -49,8 +48,12 @@ if not os.path.exists(upload_path): # 数据中心根目录
|
|
49
48
|
|
50
49
|
dir_path = os.path.expanduser("~")
|
51
50
|
config_file = os.path.join(dir_path, 'spd.txt')
|
52
|
-
|
53
|
-
|
51
|
+
parser = conf.ConfigParser()
|
52
|
+
host, port, username, password = parser.get_section_values(
|
53
|
+
file_path=config_file,
|
54
|
+
section='mysql',
|
55
|
+
keys=['host', 'port', 'username', 'password'],
|
56
|
+
)
|
54
57
|
m_engine = mysql.MysqlUpload(username=username, password=password, host=host, port=port, charset='utf8mb4')
|
55
58
|
|
56
59
|
if not username:
|
@@ -62,7 +65,7 @@ class LoadAccount:
|
|
62
65
|
|
63
66
|
def __init__(self):
|
64
67
|
self.url = 'https://login.taobao.com/' # 默认登录淘宝
|
65
|
-
self.cookie_path =
|
68
|
+
self.cookie_path = None
|
66
69
|
|
67
70
|
def __call__(self, *args, **kwargs):
|
68
71
|
self.check_cookie() # 检测cookie有效期, 但不阻断任务
|
mdbq/spider/aikucun.py
CHANGED
@@ -15,15 +15,18 @@ from selenium.webdriver.chrome.service import Service
|
|
15
15
|
import pymysql
|
16
16
|
from mdbq.mysql import uploader
|
17
17
|
from mdbq.mysql import s_query
|
18
|
-
from mdbq.
|
18
|
+
from mdbq.conf import conf
|
19
19
|
from mdbq.other import ua_sj
|
20
20
|
from mdbq.other import otk
|
21
21
|
from mdbq.log import mylogger
|
22
22
|
|
23
23
|
dir_path = os.path.expanduser("~")
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
parser = conf.ConfigParser()
|
25
|
+
host, port, username, password = parser.get_section_values(
|
26
|
+
file_path=os.path.join(dir_path, 'spd.txt'),
|
27
|
+
section='mysql',
|
28
|
+
keys=['host', 'port', 'username', 'password'],
|
29
|
+
)
|
27
30
|
|
28
31
|
# 实例化一个数据查询类,用来获取 cookies 表数据
|
29
32
|
logger = mylogger.MyLogger(
|
@@ -1,19 +1,17 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=BLmcVZfSHZLuZIQc5xXg2DeVBx612a2GO50fSRm0lvs,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/query_data.py,sha256=
|
5
|
-
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
6
|
-
mdbq/config/config.py,sha256=eaTfrfXQ65xLqjr5I8-HkZd_jEY1JkGinEgv3TSLeoQ,3170
|
4
|
+
mdbq/aggregation/query_data.py,sha256=Cpdtd2rteO0ZufsAeKnG73E7TgOLPX1xoPsSEZwR9zw,166780
|
7
5
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
8
6
|
mdbq/log/mylogger.py,sha256=9w_o5mYB3FooIxobq_lSa6oCYTKIhPxDFox-jeLtUHI,21714
|
9
7
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
10
|
-
mdbq/mysql/deduplicator.py,sha256=
|
8
|
+
mdbq/mysql/deduplicator.py,sha256=h1tpWnfPwDgOyhblippsl8saPIi4f4e1bZvtML6pRt0,73209
|
11
9
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
12
|
-
mdbq/mysql/s_query.py,sha256=
|
13
|
-
mdbq/mysql/unique_.py,sha256=
|
14
|
-
mdbq/mysql/uploader.py,sha256=
|
10
|
+
mdbq/mysql/s_query.py,sha256=pcqUSJXla9xCITiBpcsPcPeduUcZt_RQ9r0x4f22vqc,42919
|
11
|
+
mdbq/mysql/unique_.py,sha256=d9bEdwhlxc6oZs3IR-9vd__1FOzGxLWwGUSCHoppcZg,21111
|
12
|
+
mdbq/mysql/uploader.py,sha256=cJ_Bs_AYlLPcMh1TevkGfzFSACUDgsM8lQXE799a6Q8,81090
|
15
13
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
|
-
mdbq/other/download_sku_picture.py,sha256=
|
14
|
+
mdbq/other/download_sku_picture.py,sha256=Pf_w7j6dBbTKS00Vt4qceG1v5KaEHvz3FBFeG3BQbGE,44752
|
17
15
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
18
16
|
mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
|
19
17
|
mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
|
@@ -23,8 +21,8 @@ mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
|
|
23
21
|
mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
24
22
|
mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
|
25
23
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
26
|
-
mdbq/spider/aikucun.py,sha256=
|
27
|
-
mdbq-4.0.
|
28
|
-
mdbq-4.0.
|
29
|
-
mdbq-4.0.
|
30
|
-
mdbq-4.0.
|
24
|
+
mdbq/spider/aikucun.py,sha256=Ld46z_29n1XgDwyESY_kCHHjEF3M3LLJxiv0mFhT4sw,21546
|
25
|
+
mdbq-4.0.12.dist-info/METADATA,sha256=qvOi7h9Vx7l7BUweXU6-vvYZVQg2sSbQKrbmmSIFU2U,364
|
26
|
+
mdbq-4.0.12.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
27
|
+
mdbq-4.0.12.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
28
|
+
mdbq-4.0.12.dist-info/RECORD,,
|
mdbq/config/__init__.py
DELETED
mdbq/config/config.py
DELETED
@@ -1,95 +0,0 @@
|
|
1
|
-
import os.path
|
2
|
-
import re
|
3
|
-
|
4
|
-
|
5
|
-
def read_config(file_path):
|
6
|
-
"""读取配置文件,返回字典"""
|
7
|
-
if not os.path.isfile(file_path):
|
8
|
-
print(f'配置文件不存在: {file_path}')
|
9
|
-
return
|
10
|
-
config = {}
|
11
|
-
with open(file_path, 'r', encoding='utf-8') as file:
|
12
|
-
for line in file:
|
13
|
-
stripped_line = line.strip()
|
14
|
-
# 跳过空行和注释行(以 # 或 // 开头)
|
15
|
-
if not stripped_line or stripped_line.startswith(('#', '//')):
|
16
|
-
continue
|
17
|
-
# 处理行内注释(# 或 // 前有空格)
|
18
|
-
comment_match = re.search(r'\s+[#//]', line)
|
19
|
-
if comment_match:
|
20
|
-
line = line[:comment_match.start()].strip()
|
21
|
-
else:
|
22
|
-
line = line.strip()
|
23
|
-
# 解析键值对
|
24
|
-
if '=' in line:
|
25
|
-
key, value = line.split('=', 1)
|
26
|
-
config[key.strip()] = value.strip()
|
27
|
-
return config
|
28
|
-
|
29
|
-
|
30
|
-
def write_config(file_path, rewrite):
|
31
|
-
"""
|
32
|
-
更新配置文件中的键值对,保留注释和其他内容,修复等号空格问题
|
33
|
-
示例:
|
34
|
-
write_config('spd.txt', {'is_spider': True})
|
35
|
-
"""
|
36
|
-
# 读取所有行到内存
|
37
|
-
try:
|
38
|
-
with open(file_path, 'r', encoding='utf-8') as file:
|
39
|
-
lines = file.readlines()
|
40
|
-
except FileNotFoundError:
|
41
|
-
with open(file_path, 'w', encoding='utf-8') as file:
|
42
|
-
lines = []
|
43
|
-
|
44
|
-
new_lines = []
|
45
|
-
found_keys = set()
|
46
|
-
|
47
|
-
for line in lines:
|
48
|
-
stripped = line.strip()
|
49
|
-
if not stripped or stripped.startswith(('#', '//')):
|
50
|
-
new_lines.append(line)
|
51
|
-
continue
|
52
|
-
|
53
|
-
# 使用 partition 保留等号格式
|
54
|
-
key_part, sep, value_part = line.partition('=')
|
55
|
-
if not sep: # 没有等号的行直接保留
|
56
|
-
new_lines.append(line)
|
57
|
-
continue
|
58
|
-
|
59
|
-
key = key_part.strip()
|
60
|
-
if key in rewrite:
|
61
|
-
# 处理值部分和注释
|
62
|
-
comment_match = re.search(r'\s+([#//].*)$', value_part)
|
63
|
-
if comment_match:
|
64
|
-
comment = comment_match.group(0)
|
65
|
-
raw_value = value_part[:comment_match.start()].rstrip()
|
66
|
-
else:
|
67
|
-
comment = ''
|
68
|
-
raw_value = value_part.strip()
|
69
|
-
|
70
|
-
# 保留原值前导空格
|
71
|
-
leading_space = re.match(r'^(\s*)', value_part).group(1)
|
72
|
-
new_value = f"{leading_space}{rewrite[key]}{comment}"
|
73
|
-
|
74
|
-
# 构建新行(保留原等号格式)
|
75
|
-
new_line = f"{key_part}{sep}{new_value}\n"
|
76
|
-
new_lines.append(new_line)
|
77
|
-
found_keys.add(key)
|
78
|
-
else:
|
79
|
-
new_lines.append(line)
|
80
|
-
|
81
|
-
# 添加新键值对
|
82
|
-
for key in rewrite:
|
83
|
-
if key not in found_keys:
|
84
|
-
new_lines.append(f"{key} = {rewrite[key]}\n")
|
85
|
-
|
86
|
-
# 写入文件
|
87
|
-
with open(file_path, 'w', encoding='utf-8') as file:
|
88
|
-
file.writelines(new_lines)
|
89
|
-
|
90
|
-
|
91
|
-
if __name__ == '__main__':
|
92
|
-
res = read_config('/Users/xigua/数据中心2/spider/spd.txt')
|
93
|
-
print(res)
|
94
|
-
# write_config('spd.txt', {'is_spider': False})
|
95
|
-
|
File without changes
|
File without changes
|