mdbq 2.2.8__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +1 -1
- mdbq/req_post/__init__.py +4 -0
- mdbq/req_post/req_tb.py +232 -0
- {mdbq-2.2.8.dist-info → mdbq-2.3.0.dist-info}/METADATA +1 -1
- {mdbq-2.2.8.dist-info → mdbq-2.3.0.dist-info}/RECORD +7 -5
- {mdbq-2.2.8.dist-info → mdbq-2.3.0.dist-info}/WHEEL +0 -0
- {mdbq-2.2.8.dist-info → mdbq-2.3.0.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -362,7 +362,7 @@ class DatabaseUpdate:
|
|
362
362
|
date = re.findall(r's-(\d{4})(\d{2})(\d{2})\.', str(name))
|
363
363
|
if not date: # 阻止月数据及已转换的表格
|
364
364
|
print(f'{name} 不支持或是已转换的表格')
|
365
|
-
|
365
|
+
os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
|
366
366
|
check_remove_file = True
|
367
367
|
continue
|
368
368
|
df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
|
mdbq/req_post/req_tb.py
ADDED
@@ -0,0 +1,232 @@
|
|
1
|
+
# -*- coding: UTF-8 –*-
|
2
|
+
import os
|
3
|
+
import time
|
4
|
+
import datetime
|
5
|
+
import pandas as pd
|
6
|
+
import warnings
|
7
|
+
import requests
|
8
|
+
from mdbq.other import ua_sj
|
9
|
+
from mdbq.config import get_myconf
|
10
|
+
from mdbq.mysql import mysql
|
11
|
+
import json
|
12
|
+
import socket
|
13
|
+
import platform
|
14
|
+
import random
|
15
|
+
|
16
|
+
warnings.filterwarnings('ignore')
|
17
|
+
|
18
|
+
|
19
|
+
class RequestData:
|
20
|
+
def __init__(self):
|
21
|
+
self.date = datetime.date.today().strftime('%Y%m%d')
|
22
|
+
self.url = None
|
23
|
+
self.headers = None
|
24
|
+
self.cookies = None
|
25
|
+
self.datas = []
|
26
|
+
self.path = None
|
27
|
+
self.filename = None
|
28
|
+
self.is_json_file = False
|
29
|
+
|
30
|
+
def request_data(self, date, url, headers, cookies, path, filename):
|
31
|
+
""" 活动预售页面 流量来源 """
|
32
|
+
# date = datetime.date.today().strftime('%Y%m%d')
|
33
|
+
# url = (f'https://sycm.taobao.com/datawar/v6/activity/detail/guide/chl/presale/online/v4.json?'
|
34
|
+
# f'dateRange={date}%7C{date}'
|
35
|
+
# f'&dateType=today'
|
36
|
+
# f'&pageSize=10'
|
37
|
+
# f'&page=1'
|
38
|
+
# f'&order=desc'
|
39
|
+
# f'&orderBy=frontPreheatUv' # 必传参数
|
40
|
+
# f'&activityId=94040472' # 关键,必传参数
|
41
|
+
# # f'&activityStatus=3'
|
42
|
+
# # f'&device=2'
|
43
|
+
# # f'&indexCode=frontPreheatUv%2CfrontPayByrCnt%2CfrontPayRate'
|
44
|
+
# # f'&_=1729079731795'
|
45
|
+
# # f'&token=7e94ba030'
|
46
|
+
# )
|
47
|
+
# headers = {
|
48
|
+
# "referer": "https://dmp.taobao.com/index_new.html",
|
49
|
+
# 'User-Agent': ua_sj.get_ua(),
|
50
|
+
# }
|
51
|
+
# cookies = {
|
52
|
+
# 'session': 't=c198527347800dafa75165f084784668; thw=cn; xlly_s=1; _tb_token_=rPWSGun4nUou9aKxviPg; _samesite_flag_=true; 3PcFlag=1729054801593; cookie2=130befc055eed2df29935197bd2b514b; sgcookie=E100aLOltfWHqLLH1qtyH3it%2BLrGH2v3MAnIBdSfu7xwjEpSyh101lblDVcj3zGpAOLv%2FXcrVNbT%2FN%2BI8KZeCoE4HBzHQk0ANtSqjOG5gIzdKamfirBxGWJyVEccitvvDZhK; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie21=W5iHLLyFfoaZ&cookie14=UoYcCoAfJ7pSQA%3D%3D; csg=1e2bdb8a; _cc_=Vq8l%2BKCLiw%3D%3D; cancelledSubSites=empty; skt=f813f8478f7318f8; v=0; cna=8+iAHxeojXcCAXjsc5Mt+BAV; mtop_partitioned_detect=1; _m_h5_tk=88c56a84a93c1199f8abe086a132c7eb_1729068459392; _m_h5_tk_enc=4b0ed8316f46edae303547d3863982a4; XSRF-TOKEN=4ef3d151-14c4-445a-9249-595e9a24df75; JSESSIONID=9EE8C8DCF6162DCA2FE0187C29BF0B8A; tfstk=gyaEdSAx842sxMbj1f3rgEWrJ50LN2XbxzMSZ7VoOvDheWNubSerd_IKRlkzIRk3O76JzQqgCk9QZzGuzR3n2kMSdYuzw-51hZ_b9W3--t6flZ3LgJuxZBYHFAYiG40ZtLV_9W3J6C9lclVpUV2YVJ0uEVmiwj0kr00l_ccjZ4YnqexMIAhor4YoqVDiwjvkr80l_5DttHciSWVk7jihGd0FW1QAcqH0tA8kuIhKxg2JVH-emXiZncbekEC-TDk0tAWAnqwo4JoU5wJxTlV4BXyRke3n4kqm-zWV8VVYfJcaEt-rIozLzmaF3nH3JYeq-lWM840Kg7obf_xqCuVT7czFcQhTR74KcqbvKYZ_gzlzyTQa3W2Umm4HLgz6efAQOzEeE3on6fkf_1ySvoccWpB-m3K-jqhZh6GB23nnhfkf_1-J2cDo_x1IO; isg=BLm5J8RI-qdgDKdAgF_DSgcFyCOTxq14BgKdB9vjgONeYsD0IReUSUT05GaUWkWw'}
|
53
|
+
|
54
|
+
self.date = date
|
55
|
+
self.url = url
|
56
|
+
self.headers = headers
|
57
|
+
self.cookies = cookies
|
58
|
+
self.path = path
|
59
|
+
self.filename = filename
|
60
|
+
result = requests.get(
|
61
|
+
self.url,
|
62
|
+
headers=self.headers,
|
63
|
+
cookies=self.cookies,
|
64
|
+
)
|
65
|
+
m_data = json.loads(result.text)
|
66
|
+
update_time = m_data['data']['updateTime']
|
67
|
+
# pt_data = data['data']['data'][0] # 平台流量
|
68
|
+
# gg_data = data['data']['data'][1] # 广告流量
|
69
|
+
for all_data in m_data['data']['data']:
|
70
|
+
self.datas.append({
|
71
|
+
'frontPayByrCnt': all_data['frontPayByrCnt']['value'],
|
72
|
+
'一级标识id': all_data['pageId']['value'],
|
73
|
+
'二级标识id': '',
|
74
|
+
'三级标识id': '',
|
75
|
+
'一级来源': all_data['pageName']['value'],
|
76
|
+
'二级来源': '',
|
77
|
+
'三级来源': '',
|
78
|
+
'活动商品访客数(定金期)': all_data['frontPreheatUv']['value'],
|
79
|
+
'定金支付买家数': all_data['frontPayByrCnt']['value'],
|
80
|
+
'定金支付转化率': all_data['frontPayRate']['value'],
|
81
|
+
'日期': all_data['statDateStr']['value'],
|
82
|
+
'更新时间': update_time,
|
83
|
+
'促销活动': '2024双11预售',
|
84
|
+
})
|
85
|
+
if 'children' not in all_data.keys(): # 这一句有点多余,因为一级来源必定细分有二级来源
|
86
|
+
continue
|
87
|
+
for children_data in all_data['children']:
|
88
|
+
one_source_id = children_data['pPageId']['value']
|
89
|
+
one_source_name = children_data['pPageName']['value']
|
90
|
+
self.datas.append(
|
91
|
+
{
|
92
|
+
'frontPayByrCnt': children_data['frontPayByrCnt']['value'],
|
93
|
+
'一级标识id': children_data['pPageId']['value'],
|
94
|
+
'二级标识id': children_data['pageId']['value'],
|
95
|
+
'三级标识id': '',
|
96
|
+
'一级来源': children_data['pPageName']['value'],
|
97
|
+
'二级来源': children_data['pageName']['value'],
|
98
|
+
'三级来源': '',
|
99
|
+
'活动商品访客数(定金期)': children_data['frontPreheatUv']['value'],
|
100
|
+
'定金支付买家数': children_data['frontPayByrCnt']['value'],
|
101
|
+
'定金支付转化率': children_data['frontPayRate']['value'],
|
102
|
+
'日期': children_data['statDateStr']['value'],
|
103
|
+
'更新时间': update_time,
|
104
|
+
'促销活动': '2024双11预售',
|
105
|
+
}
|
106
|
+
)
|
107
|
+
# print(children_data['children'])
|
108
|
+
# print(children_data)
|
109
|
+
if 'children' not in children_data.keys(): # 部分二级来源没有细分的三级来源,因为需要跳过 children 字段
|
110
|
+
continue
|
111
|
+
for children_children_data in children_data['children']:
|
112
|
+
# print(children_children_data)
|
113
|
+
# print(one_source_name)
|
114
|
+
self.datas.append(
|
115
|
+
{
|
116
|
+
'frontPayByrCnt': children_children_data['frontPayByrCnt']['value'],
|
117
|
+
'一级标识id': one_source_id,
|
118
|
+
'二级标识id': children_children_data['pPageId']['value'],
|
119
|
+
'三级标识id': children_children_data['pageId']['value'],
|
120
|
+
'一级来源': one_source_name,
|
121
|
+
'二级来源': children_children_data['pPageName']['value'],
|
122
|
+
'三级来源': children_children_data['pageName']['value'],
|
123
|
+
'活动商品访客数(定金期)': children_children_data['frontPreheatUv']['value'],
|
124
|
+
'定金支付买家数': children_children_data['frontPayByrCnt']['value'],
|
125
|
+
'定金支付转化率': children_children_data['frontPayRate']['value'],
|
126
|
+
'日期': children_children_data['statDateStr']['value'],
|
127
|
+
'更新时间': update_time,
|
128
|
+
'促销活动': '2024双11预售',
|
129
|
+
}
|
130
|
+
)
|
131
|
+
for item in self.datas:
|
132
|
+
if item['日期'] != '':
|
133
|
+
item.update({'日期': f'{item['日期'][0:4]}-{item['日期'][4:6]}-{item['日期'][6:8]}'})
|
134
|
+
if self.is_json_file:
|
135
|
+
with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
136
|
+
json.dump(self.datas, f, ensure_ascii=False, sort_keys=True, indent=4)
|
137
|
+
|
138
|
+
def request_jd(self, date, url, headers, cookies, path, filename):
|
139
|
+
""" 京东 """
|
140
|
+
self.date = date
|
141
|
+
self.url = url
|
142
|
+
self.headers = headers
|
143
|
+
self.cookies = cookies
|
144
|
+
self.path = path
|
145
|
+
self.filename = filename
|
146
|
+
result = requests.post(
|
147
|
+
url,
|
148
|
+
headers=headers,
|
149
|
+
cookies=cookies,
|
150
|
+
)
|
151
|
+
print(result.text)
|
152
|
+
|
153
|
+
|
154
|
+
def tb_data(service_databases=[], db_name=None, table_name=None):
|
155
|
+
""" 2024双11预售实时流量分析 """
|
156
|
+
date = datetime.date.today().strftime('%Y%m%d')
|
157
|
+
url = (f'https://sycm.taobao.com/datawar/v6/activity/detail/guide/chl/presale/online/v4.json?'
|
158
|
+
f'dateRange={date}%7C{date}'
|
159
|
+
f'&dateType=today'
|
160
|
+
f'&pageSize=10'
|
161
|
+
f'&page=1'
|
162
|
+
f'&order=desc'
|
163
|
+
f'&orderBy=frontPreheatUv' # 必传参数
|
164
|
+
f'&activityId=94040472' # 关键,必传参数
|
165
|
+
# f'&activityStatus=3'
|
166
|
+
# f'&device=2'
|
167
|
+
# f'&indexCode=frontPreheatUv%2CfrontPayByrCnt%2CfrontPayRate'
|
168
|
+
# f'&_=1729079731795'
|
169
|
+
# f'&token=7e94ba030'
|
170
|
+
)
|
171
|
+
headers = {
|
172
|
+
"referer": "https://dmp.taobao.com/index_new.html",
|
173
|
+
'User-Agent': ua_sj.get_ua(),
|
174
|
+
}
|
175
|
+
cookies = {
|
176
|
+
'session': 't=c198527347800dafa75165f084784668; thw=cn; xlly_s=1; _tb_token_=rPWSGun4nUou9aKxviPg; _samesite_flag_=true; 3PcFlag=1729054801593; cookie2=130befc055eed2df29935197bd2b514b; sgcookie=E100aLOltfWHqLLH1qtyH3it%2BLrGH2v3MAnIBdSfu7xwjEpSyh101lblDVcj3zGpAOLv%2FXcrVNbT%2FN%2BI8KZeCoE4HBzHQk0ANtSqjOG5gIzdKamfirBxGWJyVEccitvvDZhK; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie21=W5iHLLyFfoaZ&cookie14=UoYcCoAfJ7pSQA%3D%3D; csg=1e2bdb8a; _cc_=Vq8l%2BKCLiw%3D%3D; cancelledSubSites=empty; skt=f813f8478f7318f8; v=0; cna=8+iAHxeojXcCAXjsc5Mt+BAV; mtop_partitioned_detect=1; _m_h5_tk=88c56a84a93c1199f8abe086a132c7eb_1729068459392; _m_h5_tk_enc=4b0ed8316f46edae303547d3863982a4; XSRF-TOKEN=4ef3d151-14c4-445a-9249-595e9a24df75; JSESSIONID=9EE8C8DCF6162DCA2FE0187C29BF0B8A; tfstk=gyaEdSAx842sxMbj1f3rgEWrJ50LN2XbxzMSZ7VoOvDheWNubSerd_IKRlkzIRk3O76JzQqgCk9QZzGuzR3n2kMSdYuzw-51hZ_b9W3--t6flZ3LgJuxZBYHFAYiG40ZtLV_9W3J6C9lclVpUV2YVJ0uEVmiwj0kr00l_ccjZ4YnqexMIAhor4YoqVDiwjvkr80l_5DttHciSWVk7jihGd0FW1QAcqH0tA8kuIhKxg2JVH-emXiZncbekEC-TDk0tAWAnqwo4JoU5wJxTlV4BXyRke3n4kqm-zWV8VVYfJcaEt-rIozLzmaF3nH3JYeq-lWM840Kg7obf_xqCuVT7czFcQhTR74KcqbvKYZ_gzlzyTQa3W2Umm4HLgz6efAQOzEeE3on6fkf_1ySvoccWpB-m3K-jqhZh6GB23nnhfkf_1-J2cDo_x1IO; isg=BLm5J8RI-qdgDKdAgF_DSgcFyCOTxq14BgKdB9vjgONeYsD0IReUSUT05GaUWkWw'}
|
177
|
+
path = '/Users/xigua/Downloads'
|
178
|
+
filename = 'test'
|
179
|
+
r = RequestData()
|
180
|
+
r.is_json_file = False
|
181
|
+
r.request_data(
|
182
|
+
date=date,
|
183
|
+
url=url,
|
184
|
+
headers=headers,
|
185
|
+
cookies=cookies,
|
186
|
+
path=path,
|
187
|
+
filename=filename,
|
188
|
+
)
|
189
|
+
# print(r.datas)
|
190
|
+
df = pd.DataFrame(r.datas)
|
191
|
+
# df.to_csv(os.path.join(path, 'test.csv'), index=False, header=True, encoding='utf-8_sig')
|
192
|
+
|
193
|
+
if not service_databases:
|
194
|
+
return
|
195
|
+
if not db_name or not table_name:
|
196
|
+
print(f'尚未指定 db_name/table_name 参数')
|
197
|
+
return
|
198
|
+
for dt in service_databases:
|
199
|
+
for service_name, database in dt.items():
|
200
|
+
username, password, host, port = get_myconf.select_config_values(
|
201
|
+
target_service=service_name,
|
202
|
+
database=database,
|
203
|
+
)
|
204
|
+
m = mysql.MysqlUpload(
|
205
|
+
username=username,
|
206
|
+
password=password,
|
207
|
+
host=host,
|
208
|
+
port=port,
|
209
|
+
)
|
210
|
+
m.df_to_mysql(
|
211
|
+
df=df,
|
212
|
+
db_name=db_name,
|
213
|
+
table_name=table_name,
|
214
|
+
move_insert=False, # 先删除,再插入
|
215
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
216
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
217
|
+
filename=None, # 用来追踪处理进度
|
218
|
+
service_database=dt, # 字典
|
219
|
+
)
|
220
|
+
|
221
|
+
|
222
|
+
def company_run():
|
223
|
+
if platform.system() == 'Windows' and socket.gethostname() == 'company':
|
224
|
+
while True:
|
225
|
+
tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2',
|
226
|
+
table_name='2024双11预售实时流量分析')
|
227
|
+
time.sleep(random.uniform(1500, 2000))
|
228
|
+
|
229
|
+
|
230
|
+
if __name__ == '__main__':
|
231
|
+
company_run()
|
232
|
+
tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2', table_name='2024双11预售实时流量分析')
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=DvK0ElEV-fxZb10JHrJFrOyj3oplOUJyYC0J9pDIxyE,75870
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=Wis40oL04M7E1pkvgNPjyVFAUe-zgjimjIVAikxYY8Y,4418
|
@@ -37,8 +37,10 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
|
37
37
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
38
38
|
mdbq/pbix/refresh_all.py,sha256=viOlLCmz9zg61Q2nzjgl8dChfQxnxRd1A_jmQMb2oDM,5918
|
39
39
|
mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
|
40
|
+
mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
41
|
+
mdbq/req_post/req_tb.py,sha256=8zQIk2QaNQ1KcpfoyTTLD7SwviDCAgw3Mmo1-W5ECxg,12682
|
40
42
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
41
|
-
mdbq-2.
|
42
|
-
mdbq-2.
|
43
|
-
mdbq-2.
|
44
|
-
mdbq-2.
|
43
|
+
mdbq-2.3.0.dist-info/METADATA,sha256=7-w_kfcAAlgXHvye7ttiUejkIg3FrFlfrvdqTWurkOE,245
|
44
|
+
mdbq-2.3.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
45
|
+
mdbq-2.3.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
46
|
+
mdbq-2.3.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|