mdbq 3.3.5__py3-none-any.whl → 3.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/mongo/mongo.py +24 -22
- mdbq/mysql/recheck_mysql.py +1 -1
- mdbq/spider/aikucun.py +2 -24
- {mdbq-3.3.5.dist-info → mdbq-3.3.7.dist-info}/METADATA +1 -1
- {mdbq-3.3.5.dist-info → mdbq-3.3.7.dist-info}/RECORD +7 -18
- mdbq/aggregation/df_types.py +0 -188
- mdbq/aggregation/mysql_types.py +0 -240
- mdbq/clean/__init__.py +0 -4
- mdbq/clean/clean_upload.py +0 -1350
- mdbq/clean/data_clean.py +0 -1551
- mdbq/company/__init__.py +0 -4
- mdbq/company/copysh.py +0 -447
- mdbq/config/get_myconf.py +0 -131
- mdbq/config/update_conf.py +0 -102
- mdbq/req_post/__init__.py +0 -4
- mdbq/req_post/req_tb.py +0 -624
- {mdbq-3.3.5.dist-info → mdbq-3.3.7.dist-info}/WHEEL +0 -0
- {mdbq-3.3.5.dist-info → mdbq-3.3.7.dist-info}/top_level.txt +0 -0
mdbq/req_post/req_tb.py
DELETED
@@ -1,624 +0,0 @@
|
|
1
|
-
# -*- coding: UTF-8 –*-
|
2
|
-
import os
|
3
|
-
import time
|
4
|
-
import datetime
|
5
|
-
import pandas as pd
|
6
|
-
import warnings
|
7
|
-
import requests
|
8
|
-
from mdbq.other import ua_sj
|
9
|
-
from mdbq.config import get_myconf
|
10
|
-
from mdbq.mysql import mysql
|
11
|
-
import json
|
12
|
-
import socket
|
13
|
-
import platform
|
14
|
-
import random
|
15
|
-
|
16
|
-
warnings.filterwarnings('ignore')
|
17
|
-
|
18
|
-
|
19
|
-
class RequestData:
|
20
|
-
def __init__(self):
|
21
|
-
self.date = datetime.date.today().strftime('%Y%m%d')
|
22
|
-
self.url = None
|
23
|
-
self.headers = None
|
24
|
-
self.cookies = None
|
25
|
-
self.datas = []
|
26
|
-
self.path = None
|
27
|
-
self.filename = None
|
28
|
-
self.is_json_file = False
|
29
|
-
self.df = pd.DataFrame()
|
30
|
-
|
31
|
-
def qxg_hx_data(self):
|
32
|
-
""" 抢先购 预热期核心指标 """
|
33
|
-
date = datetime.date.today().strftime('%Y-%m-%d')
|
34
|
-
url = (f'https://sycm.taobao.com/datawar/v4/activity/detail/kpi/coreIndex/live.json?'
|
35
|
-
f'activityId=92072444'
|
36
|
-
f'&status=1'
|
37
|
-
f'&dateType=today'
|
38
|
-
f'&dateRange={date}%7C{date}'
|
39
|
-
f'&_=1729216673692'
|
40
|
-
f'&token=0939158d0'
|
41
|
-
)
|
42
|
-
headers = {'User-Agent': ua_sj.get_ua()}
|
43
|
-
cookies = {
|
44
|
-
'session': 't=c198527347800dafa75165f084784668; thw=cn; cc_gray=1; 2210244713719_euacm_ac_c_uid_=713197610; 2210244713719_euacm_ac_rs_uid_=713197610; _portal_version_=new; xlly_s=1; _euacm_ac_l_uid_=2210244713719; _tb_token_=GzT2Grwtrep02E5awyhr; _samesite_flag_=true; 3PcFlag=1729299229095; cookie2=15f3dfc1aa68e07b05043bf7f8fb5565; sgcookie=E100r7l2QLYERk5SKLinmW40F%2BbdvBhfP7ZwSPi%2BjxeXI6Y%2B%2BraqfGzS%2BKX3ME%2FRfXZKeLBwECj63B245VuW%2FZBpg5X3Ydq2WK05z0QvsUxuyJQNNaVJTDy8WSQXRpKhFDHF; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie14=UoYcCoJCtZ6mUg%3D%3D&cookie21=UtASsssmfufd; csg=7d17ab64; _cc_=V32FPkk%2Fhw%3D%3D; cancelledSubSites=empty; skt=214c26d846e4ece2; cna=8+iAHxeojXcCAXjsc5Mt+BAV; v=0; XSRF-TOKEN=a4816e90-82aa-4743-b438-67e826b8ebbe; datawar_version=new; mtop_partitioned_detect=1; _m_h5_tk=c1140ed9be58a574cf0740ca0fad2f9c_1729340693031; _m_h5_tk_enc=2a93813f4e75d7928cc79cc6bc9db5d7; _euacm_ac_rs_sid_=67090549; JSESSIONID=3DBCB84C04569B30741EF0263731963E; tfstk=gbRSfuVwPHdqd3wyBX3VCcGDfR5IVHGN9y_pSeFzJ_CRvXKpc9FEE_WCOnI2ag8Pww1BSHZrr95ROMTMVJ8PY8-XJet_aL-eYzvDbFFyaYfUO_fh9coZ_fzkr6fKIj10GzfA-NE-TgF-MUjmccen_f8kyz7-7Ehwagr3NGjd9TBLkZIcSMQpvTeYHibhJuQd2qTAmie8ygBRHsQP-7Cd9HLxlwcOeP_IFgYSohIbYoEQeUSb9WdfkDj9PrNGyQ_5FGLJNLJkGlX5XUIb9cAVGNjA5In34MAXkQQHDjP5OEQBf_pjD7tBydxhW3hb2a9J5FWXtcFCoKKl3a9jJ-IJWgfRcB03tgpyJBWBwcUF2QxyGOA3VmSeQERRhhn4GHXBedCJOcGA4FVNfUSadr6gOZsZlqw3KbGMRi1XJjWceZb53qgb2pXRoZ_mlqw3KTQcPagjluph.; isg=BOrqXtnOebYXhfQ1b9KgdzAAO1aMW2618WeuUnSgRz1Qp45hXO-pxEuRN9O7V-ZN'}
|
45
|
-
# cookies = {}
|
46
|
-
path = '/Users/xigua/Downloads'
|
47
|
-
filename = 'test'
|
48
|
-
result = requests.get(
|
49
|
-
url=url,
|
50
|
-
headers=headers,
|
51
|
-
cookies=cookies,
|
52
|
-
)
|
53
|
-
m_data = json.loads(result.text)
|
54
|
-
# print(m_data)
|
55
|
-
update_time = m_data['data']['updateTime']
|
56
|
-
all_data = m_data['data']['data']
|
57
|
-
timestamp = all_data['statDate']['value'] // 1000 # 毫秒转为秒,不然无法转换时间戳
|
58
|
-
|
59
|
-
datas=[{
|
60
|
-
'日期': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(timestamp))),
|
61
|
-
'预热加购人数': all_data['preheatCartByrCnt']['value'],
|
62
|
-
'加购转化率': all_data['cartRate']['value'],
|
63
|
-
'预热加购件数': all_data['preheatCartItmCnt']['value'],
|
64
|
-
'预热访客数': all_data['preheatUv']['value'],
|
65
|
-
'收藏转化率': all_data['cltRate']['value'],
|
66
|
-
'预热收藏次数': all_data['preheatCltItmCnt']['value'],
|
67
|
-
'预热收藏人数': all_data['preheatCltByrCnt']['value'],
|
68
|
-
'更新时间': update_time,
|
69
|
-
'促销活动': '2024双11抢先购',
|
70
|
-
'版块': '预热期核心指标',
|
71
|
-
}]
|
72
|
-
df = pd.DataFrame(datas)
|
73
|
-
df = df.astype({
|
74
|
-
'预热加购人数': int,
|
75
|
-
'预热加购件数': int,
|
76
|
-
'预热访客数': int,
|
77
|
-
'预热收藏次数': int,
|
78
|
-
'预热收藏人数': int,
|
79
|
-
'促销活动': str,
|
80
|
-
'版块': str,
|
81
|
-
}, errors='raise')
|
82
|
-
return '活动分析2', '2024双11抢先购预热期核心指标', df # 注意这些是实际数据表名字
|
83
|
-
|
84
|
-
def ys_ll_data(self):
|
85
|
-
""" 活动预售页面 流量来源 """
|
86
|
-
date = datetime.date.today().strftime('%Y%m%d')
|
87
|
-
url = (f'https://sycm.taobao.com/datawar/v6/activity/detail/guide/chl/presale/online/v4.json?'
|
88
|
-
f'dateRange={date}%7C{date}'
|
89
|
-
f'&dateType=today'
|
90
|
-
f'&pageSize=10'
|
91
|
-
f'&page=1'
|
92
|
-
f'&order=desc'
|
93
|
-
f'&orderBy=frontPreheatUv' # 必传参数
|
94
|
-
f'&activityId=94040472' # 关键,必传参数
|
95
|
-
# f'&activityStatus=3'
|
96
|
-
# f'&device=2'
|
97
|
-
# f'&indexCode=frontPreheatUv%2CfrontPayByrCnt%2CfrontPayRate'
|
98
|
-
# f'&_=1729079731795'
|
99
|
-
# f'&token=7e94ba030'
|
100
|
-
)
|
101
|
-
headers = {
|
102
|
-
# "referer": "https://dmp.taobao.com/index_new.html",
|
103
|
-
'User-Agent': ua_sj.get_ua(),
|
104
|
-
}
|
105
|
-
cookies = {
|
106
|
-
'session': 't=c198527347800dafa75165f084784668; thw=cn; cc_gray=1; 2210244713719_euacm_ac_c_uid_=713197610; 2210244713719_euacm_ac_rs_uid_=713197610; _portal_version_=new; xlly_s=1; _euacm_ac_l_uid_=2210244713719; _tb_token_=GzT2Grwtrep02E5awyhr; _samesite_flag_=true; 3PcFlag=1729299229095; cookie2=15f3dfc1aa68e07b05043bf7f8fb5565; sgcookie=E100r7l2QLYERk5SKLinmW40F%2BbdvBhfP7ZwSPi%2BjxeXI6Y%2B%2BraqfGzS%2BKX3ME%2FRfXZKeLBwECj63B245VuW%2FZBpg5X3Ydq2WK05z0QvsUxuyJQNNaVJTDy8WSQXRpKhFDHF; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie14=UoYcCoJCtZ6mUg%3D%3D&cookie21=UtASsssmfufd; csg=7d17ab64; _cc_=V32FPkk%2Fhw%3D%3D; cancelledSubSites=empty; skt=214c26d846e4ece2; cna=8+iAHxeojXcCAXjsc5Mt+BAV; v=0; XSRF-TOKEN=a4816e90-82aa-4743-b438-67e826b8ebbe; datawar_version=new; mtop_partitioned_detect=1; _m_h5_tk=c1140ed9be58a574cf0740ca0fad2f9c_1729340693031; _m_h5_tk_enc=2a93813f4e75d7928cc79cc6bc9db5d7; _euacm_ac_rs_sid_=67090549; JSESSIONID=3DBCB84C04569B30741EF0263731963E; tfstk=gbRSfuVwPHdqd3wyBX3VCcGDfR5IVHGN9y_pSeFzJ_CRvXKpc9FEE_WCOnI2ag8Pww1BSHZrr95ROMTMVJ8PY8-XJet_aL-eYzvDbFFyaYfUO_fh9coZ_fzkr6fKIj10GzfA-NE-TgF-MUjmccen_f8kyz7-7Ehwagr3NGjd9TBLkZIcSMQpvTeYHibhJuQd2qTAmie8ygBRHsQP-7Cd9HLxlwcOeP_IFgYSohIbYoEQeUSb9WdfkDj9PrNGyQ_5FGLJNLJkGlX5XUIb9cAVGNjA5In34MAXkQQHDjP5OEQBf_pjD7tBydxhW3hb2a9J5FWXtcFCoKKl3a9jJ-IJWgfRcB03tgpyJBWBwcUF2QxyGOA3VmSeQERRhhn4GHXBedCJOcGA4FVNfUSadr6gOZsZlqw3KbGMRi1XJjWceZb53qgb2pXRoZ_mlqw3KTQcPagjluph.; isg=BOrqXtnOebYXhfQ1b9KgdzAAO1aMW2618WeuUnSgRz1Qp45hXO-pxEuRN9O7V-ZN'}
|
107
|
-
|
108
|
-
path = '/Users/xigua/Downloads'
|
109
|
-
filename = 'test'
|
110
|
-
|
111
|
-
result = requests.get(
|
112
|
-
url=url,
|
113
|
-
headers=headers,
|
114
|
-
cookies=cookies,
|
115
|
-
)
|
116
|
-
m_data = json.loads(result.text)
|
117
|
-
# print(m_data)
|
118
|
-
update_time = m_data['data']['updateTime']
|
119
|
-
# pt_data = data['data']['data'][0] # 平台流量
|
120
|
-
# gg_data = data['data']['data'][1] # 广告流量
|
121
|
-
datas = []
|
122
|
-
for all_data in m_data['data']['data']:
|
123
|
-
datas.append(
|
124
|
-
{
|
125
|
-
'frontPayByrCnt': all_data['frontPayByrCnt']['value'],
|
126
|
-
'一级标识id': all_data['pageId']['value'],
|
127
|
-
'二级标识id': '',
|
128
|
-
'三级标识id': '',
|
129
|
-
'一级来源': all_data['pageName']['value'],
|
130
|
-
'二级来源': '',
|
131
|
-
'三级来源': '',
|
132
|
-
'活动商品访客数(定金期)': all_data['frontPreheatUv']['value'],
|
133
|
-
'定金支付买家数': all_data['frontPayByrCnt']['value'],
|
134
|
-
'定金支付转化率': all_data['frontPayRate']['value'],
|
135
|
-
'日期': all_data['statDateStr']['value'],
|
136
|
-
'更新时间': update_time,
|
137
|
-
'促销活动': '2024双11预售',
|
138
|
-
'版块': '流量来源',
|
139
|
-
}
|
140
|
-
)
|
141
|
-
if 'children' not in all_data.keys(): # 这一句有点多余,因为一级来源必定细分有二级来源
|
142
|
-
continue
|
143
|
-
for children_data in all_data['children']:
|
144
|
-
one_source_id = children_data['pPageId']['value']
|
145
|
-
one_source_name = children_data['pPageName']['value']
|
146
|
-
datas.append(
|
147
|
-
{
|
148
|
-
'frontPayByrCnt': children_data['frontPayByrCnt']['value'],
|
149
|
-
'一级标识id': children_data['pPageId']['value'],
|
150
|
-
'二级标识id': children_data['pageId']['value'],
|
151
|
-
'三级标识id': '',
|
152
|
-
'一级来源': children_data['pPageName']['value'],
|
153
|
-
'二级来源': children_data['pageName']['value'],
|
154
|
-
'三级来源': '',
|
155
|
-
'活动商品访客数(定金期)': children_data['frontPreheatUv']['value'],
|
156
|
-
'定金支付买家数': children_data['frontPayByrCnt']['value'],
|
157
|
-
'定金支付转化率': children_data['frontPayRate']['value'],
|
158
|
-
'日期': children_data['statDateStr']['value'],
|
159
|
-
'更新时间': update_time,
|
160
|
-
'促销活动': '2024双11预售',
|
161
|
-
'版块': '流量来源',
|
162
|
-
}
|
163
|
-
)
|
164
|
-
# print(children_data['children'])
|
165
|
-
# print(children_data)
|
166
|
-
if 'children' not in children_data.keys(): # 部分二级来源没有细分的三级来源,因为需要跳过 children 字段
|
167
|
-
continue
|
168
|
-
for children_children_data in children_data['children']:
|
169
|
-
# print(children_children_data)
|
170
|
-
# print(one_source_name)
|
171
|
-
datas.append(
|
172
|
-
{
|
173
|
-
'frontPayByrCnt': children_children_data['frontPayByrCnt']['value'],
|
174
|
-
'一级标识id': one_source_id,
|
175
|
-
'二级标识id': children_children_data['pPageId']['value'],
|
176
|
-
'三级标识id': children_children_data['pageId']['value'],
|
177
|
-
'一级来源': one_source_name,
|
178
|
-
'二级来源': children_children_data['pPageName']['value'],
|
179
|
-
'三级来源': children_children_data['pageName']['value'],
|
180
|
-
'活动商品访客数(定金期)': children_children_data['frontPreheatUv']['value'],
|
181
|
-
'定金支付买家数': children_children_data['frontPayByrCnt']['value'],
|
182
|
-
'定金支付转化率': children_children_data['frontPayRate']['value'],
|
183
|
-
'日期': children_children_data['statDateStr']['value'],
|
184
|
-
'更新时间': update_time,
|
185
|
-
'促销活动': '2024双11预售',
|
186
|
-
'版块': '流量来源',
|
187
|
-
}
|
188
|
-
)
|
189
|
-
for item in datas:
|
190
|
-
if item['日期'] != '':
|
191
|
-
item.update({'日期': f'{item['日期'][0:4]}-{item['日期'][4:6]}-{item['日期'][6:8]}'})
|
192
|
-
if self.is_json_file:
|
193
|
-
if self.path and self.filename:
|
194
|
-
with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
195
|
-
json.dump(datas, f, ensure_ascii=False, sort_keys=True, indent=4)
|
196
|
-
else:
|
197
|
-
print(f'尚未指定 self.path/ self.filename')
|
198
|
-
df = pd.DataFrame(datas)
|
199
|
-
df.fillna('0', inplace=True)
|
200
|
-
df = df.astype(
|
201
|
-
{
|
202
|
-
'frontPayByrCnt': int,
|
203
|
-
'一级标识id': str,
|
204
|
-
'二级标识id': str,
|
205
|
-
'三级标识id': str,
|
206
|
-
'一级来源': str,
|
207
|
-
'二级来源': str,
|
208
|
-
'三级来源': str,
|
209
|
-
'活动商品访客数(定金期)': int,
|
210
|
-
'定金支付买家数': int,
|
211
|
-
'促销活动': str,
|
212
|
-
'版块': str,
|
213
|
-
}, errors='raise')
|
214
|
-
return '活动分析2', '2024双11预售实时流量分析', df # 注意这些是实际数据表名字
|
215
|
-
|
216
|
-
def qxg_ll(self):
|
217
|
-
flow_biz_types = {
|
218
|
-
'classic': '非全站推广期',
|
219
|
-
'qzt': '全站推广期',
|
220
|
-
}
|
221
|
-
page_types = {
|
222
|
-
'item': '商品流量',
|
223
|
-
'shop': '店铺流量',
|
224
|
-
'live': '直播流量',
|
225
|
-
'content': '内容流量',
|
226
|
-
}
|
227
|
-
for k_flow, v_flow in flow_biz_types.items():
|
228
|
-
for k_page, v_page in page_types.items():
|
229
|
-
if v_flow == '全站推广期' and v_page != '商品流量':
|
230
|
-
continue # 只有商品流量才可以传 qzt值
|
231
|
-
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
232
|
-
print(f'{now} {v_flow} -> {v_page}: 正在获取数据...')
|
233
|
-
date = datetime.date.today().strftime('%Y%m%d')
|
234
|
-
url = (
|
235
|
-
f'https://sycm.taobao.com/flow/v5/live/shop/source/tree/v4.json?'
|
236
|
-
f'dateRange={date}%7C{date}'
|
237
|
-
f'&dateType=today'
|
238
|
-
f'&order=desc'
|
239
|
-
f'&orderBy=uv'
|
240
|
-
f'&flowBizType={k_flow}' # classic: 非全站推广期,qzt: 全站推广期(只有商品流量才可以传 qzt值)
|
241
|
-
f'&pageType={k_page}' # item:商品流量,shop: 店铺流量,live: 直播流量, content: 内容流量
|
242
|
-
f'&crowdType=all'
|
243
|
-
f'&activityId=92072444'
|
244
|
-
f'&indexCode=uv'
|
245
|
-
# f'&_=1729232086296'
|
246
|
-
# f'&token=2507b8098'
|
247
|
-
)
|
248
|
-
headers = {
|
249
|
-
# "referer": "https://dmp.taobao.com/index_new.html",
|
250
|
-
'User-Agent': ua_sj.get_ua(),
|
251
|
-
}
|
252
|
-
cookies = {
|
253
|
-
'session': 't=c198527347800dafa75165f084784668; thw=cn; cc_gray=1; 2210244713719_euacm_ac_c_uid_=713197610; 2210244713719_euacm_ac_rs_uid_=713197610; _portal_version_=new; xlly_s=1; _euacm_ac_l_uid_=2210244713719; _tb_token_=GzT2Grwtrep02E5awyhr; _samesite_flag_=true; 3PcFlag=1729299229095; cookie2=15f3dfc1aa68e07b05043bf7f8fb5565; sgcookie=E100r7l2QLYERk5SKLinmW40F%2BbdvBhfP7ZwSPi%2BjxeXI6Y%2B%2BraqfGzS%2BKX3ME%2FRfXZKeLBwECj63B245VuW%2FZBpg5X3Ydq2WK05z0QvsUxuyJQNNaVJTDy8WSQXRpKhFDHF; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie14=UoYcCoJCtZ6mUg%3D%3D&cookie21=UtASsssmfufd; csg=7d17ab64; _cc_=V32FPkk%2Fhw%3D%3D; cancelledSubSites=empty; skt=214c26d846e4ece2; cna=8+iAHxeojXcCAXjsc5Mt+BAV; v=0; XSRF-TOKEN=a4816e90-82aa-4743-b438-67e826b8ebbe; datawar_version=new; mtop_partitioned_detect=1; _m_h5_tk=c1140ed9be58a574cf0740ca0fad2f9c_1729340693031; _m_h5_tk_enc=2a93813f4e75d7928cc79cc6bc9db5d7; _euacm_ac_rs_sid_=67090549; JSESSIONID=3DBCB84C04569B30741EF0263731963E; tfstk=gbRSfuVwPHdqd3wyBX3VCcGDfR5IVHGN9y_pSeFzJ_CRvXKpc9FEE_WCOnI2ag8Pww1BSHZrr95ROMTMVJ8PY8-XJet_aL-eYzvDbFFyaYfUO_fh9coZ_fzkr6fKIj10GzfA-NE-TgF-MUjmccen_f8kyz7-7Ehwagr3NGjd9TBLkZIcSMQpvTeYHibhJuQd2qTAmie8ygBRHsQP-7Cd9HLxlwcOeP_IFgYSohIbYoEQeUSb9WdfkDj9PrNGyQ_5FGLJNLJkGlX5XUIb9cAVGNjA5In34MAXkQQHDjP5OEQBf_pjD7tBydxhW3hb2a9J5FWXtcFCoKKl3a9jJ-IJWgfRcB03tgpyJBWBwcUF2QxyGOA3VmSeQERRhhn4GHXBedCJOcGA4FVNfUSadr6gOZsZlqw3KbGMRi1XJjWceZb53qgb2pXRoZ_mlqw3KTQcPagjluph.; isg=BOrqXtnOebYXhfQ1b9KgdzAAO1aMW2618WeuUnSgRz1Qp45hXO-pxEuRN9O7V-ZN'}
|
254
|
-
self.qxg_ll_data(
|
255
|
-
url=url,
|
256
|
-
headers=headers,
|
257
|
-
cookies=cookies,
|
258
|
-
flow_biz_type=v_flow,
|
259
|
-
page_type=v_page,
|
260
|
-
)
|
261
|
-
time.sleep(random.randint(5, 10))
|
262
|
-
df = pd.concat(self.datas)
|
263
|
-
df.fillna(0, inplace=True)
|
264
|
-
df = df.astype(
|
265
|
-
{
|
266
|
-
'支付买家数': int,
|
267
|
-
'详情页访客数': int,
|
268
|
-
'来源等级': int,
|
269
|
-
'showDetailChannel': int,
|
270
|
-
}, errors='raise')
|
271
|
-
# df.to_csv('/Users/xigua/Downloads/test.csv', index=False, header=True, encoding='utf-8_sig')
|
272
|
-
return '活动分析2', '2024双11抢先购预热期流量来源', df # 注意这些是实际数据表名字
|
273
|
-
|
274
|
-
def qxg_ll_data(self, url, headers, cookies, flow_biz_type, page_type):
|
275
|
-
""" 抢先购 流量来源 """
|
276
|
-
result = requests.get(
|
277
|
-
url=url,
|
278
|
-
headers=headers,
|
279
|
-
cookies=cookies,
|
280
|
-
)
|
281
|
-
json_datas = json.loads(result.text)
|
282
|
-
update_time = json_datas['data']['updateTime']
|
283
|
-
# print(update_time)
|
284
|
-
datas = []
|
285
|
-
json_datas = json_datas['data']['data']
|
286
|
-
dict_data = {}
|
287
|
-
if page_type == '直播流量' or page_type == '内容流量':
|
288
|
-
for item in json_datas:
|
289
|
-
datas.append(
|
290
|
-
{
|
291
|
-
'访客数': item['uv']['value'],
|
292
|
-
'pageId': item['pageId']['value'],
|
293
|
-
'0级来源': item['pageName']['value'],
|
294
|
-
'pPageId': item['pPageId']['value'],
|
295
|
-
'日期': update_time,
|
296
|
-
'更新时间': update_time,
|
297
|
-
'促销活动': '2024双11抢先购',
|
298
|
-
'版块': '流量来源',
|
299
|
-
'来源分类': flow_biz_type,
|
300
|
-
'流量类型': page_type,
|
301
|
-
})
|
302
|
-
json_datas = json_datas[0]['children']
|
303
|
-
|
304
|
-
for all_data in json_datas:
|
305
|
-
# one_source_id = all_data['pageId']['value']
|
306
|
-
one_source_name = all_data['pageName']['value']
|
307
|
-
# print(all_data)
|
308
|
-
for k_first, v_first in all_data.items():
|
309
|
-
# print(k_first, v_first)
|
310
|
-
|
311
|
-
if k_first == 'children':
|
312
|
-
continue
|
313
|
-
for k_second, v_second in v_first.items():
|
314
|
-
if k_second != 'value':
|
315
|
-
dict_data.update({k_second: v_second})
|
316
|
-
dict_data.update(
|
317
|
-
{
|
318
|
-
'guideToShortVideoUv': all_data['guideToShortVideoUv']['value'],
|
319
|
-
'hiddenIndexgroup': all_data['hiddenIndexgroup']['value'],
|
320
|
-
'访客数': all_data['uv']['value'],
|
321
|
-
'访客数占比': all_data['uv']['ratio'],
|
322
|
-
'支付买家数': all_data['payByrCnt']['value'],
|
323
|
-
'详情页访客数': all_data['ipvUvRelate']['value'],
|
324
|
-
'支付转化率': all_data['payRate']['value'],
|
325
|
-
'orderByrCnt': all_data['orderByrCnt']['value'],
|
326
|
-
'showDesc': all_data['showDesc']['value'],
|
327
|
-
'showChannel': all_data['showChannel']['value'],
|
328
|
-
'来源等级': all_data['pageLevel']['value'],
|
329
|
-
'channelType': all_data['channelType']['value'],
|
330
|
-
'orderAmt': all_data['orderAmt']['value'],
|
331
|
-
'pageId': all_data['pageId']['value'],
|
332
|
-
'pPageId': all_data['pPageId']['value'],
|
333
|
-
'payAmt': all_data['payAmt']['value'],
|
334
|
-
'一级来源': all_data['pageName']['value'],
|
335
|
-
'二级来源': '',
|
336
|
-
'三级来源': '',
|
337
|
-
'showDetailChannel': all_data['showDetailChannel']['value'],
|
338
|
-
'pageDesc': all_data['pageDesc']['value'],
|
339
|
-
'payPct': all_data['payPct']['value'],
|
340
|
-
'pPageId': all_data['pPageId']['value'],
|
341
|
-
'crtRate': all_data['crtRate']['value'],
|
342
|
-
'日期': update_time,
|
343
|
-
'更新时间': update_time,
|
344
|
-
'促销活动': '2024双11抢先购',
|
345
|
-
'版块': '流量来源',
|
346
|
-
'来源分类': flow_biz_type,
|
347
|
-
'流量类型': page_type,
|
348
|
-
}
|
349
|
-
)
|
350
|
-
datas.append(dict_data)
|
351
|
-
|
352
|
-
if 'children' not in all_data.keys(): # 这一句有点多余,因为一级来源必定细分有二级来源
|
353
|
-
continue
|
354
|
-
|
355
|
-
for children_data in all_data['children']:
|
356
|
-
# one_source_id = children_data['pPageId']['value']
|
357
|
-
second_source_name = children_data['pageName']['value']
|
358
|
-
for k_first, v_first in children_data.items():
|
359
|
-
# print(k_first, v_first)
|
360
|
-
dict_data = {}
|
361
|
-
if k_first == 'children':
|
362
|
-
continue
|
363
|
-
for k_second, v_second in v_first.items():
|
364
|
-
if k_second != 'value':
|
365
|
-
dict_data.update({k_second: v_second})
|
366
|
-
dict_data.update(
|
367
|
-
{
|
368
|
-
'guideToShortVideoUv': children_data['guideToShortVideoUv']['value'],
|
369
|
-
'hiddenIndexgroup': children_data['hiddenIndexgroup']['value'],
|
370
|
-
'访客数': children_data['uv']['value'],
|
371
|
-
'访客数占比': children_data['uv']['ratio'],
|
372
|
-
'支付买家数': children_data['payByrCnt']['value'],
|
373
|
-
'详情页访客数': children_data['ipvUvRelate']['value'],
|
374
|
-
'支付转化率': children_data['payRate']['value'],
|
375
|
-
'orderByrCnt': children_data['orderByrCnt']['value'],
|
376
|
-
'showDesc': children_data['showDesc']['value'],
|
377
|
-
'showChannel': children_data['showChannel']['value'],
|
378
|
-
'来源等级': children_data['pageLevel']['value'],
|
379
|
-
'channelType': children_data['channelType']['value'],
|
380
|
-
'orderAmt': children_data['orderAmt']['value'],
|
381
|
-
'pageId': children_data['pageId']['value'],
|
382
|
-
'pPageId': children_data['pPageId']['value'],
|
383
|
-
'payAmt': children_data['payAmt']['value'],
|
384
|
-
'一级来源': one_source_name,
|
385
|
-
'二级来源': children_data['pageName']['value'],
|
386
|
-
'三级来源': '',
|
387
|
-
'showDetailChannel': children_data['showDetailChannel']['value'],
|
388
|
-
'pageDesc': children_data['pageDesc']['value'],
|
389
|
-
'payPct': children_data['payPct']['value'],
|
390
|
-
'pPageId': children_data['pPageId']['value'],
|
391
|
-
'crtRate': children_data['crtRate']['value'],
|
392
|
-
'日期': update_time,
|
393
|
-
'更新时间': update_time,
|
394
|
-
'促销活动': '2024双11抢先购',
|
395
|
-
'版块': '流量来源',
|
396
|
-
'来源分类': flow_biz_type,
|
397
|
-
'流量类型': page_type,
|
398
|
-
}
|
399
|
-
)
|
400
|
-
datas.append(dict_data)
|
401
|
-
# print(children_data['children'])
|
402
|
-
# print(children_data)
|
403
|
-
if 'children' not in children_data.keys(): # 部分二级来源没有细分的三级来源,因为需要跳过 children 字段
|
404
|
-
continue
|
405
|
-
for children_children_data in children_data['children']:
|
406
|
-
# print(children_children_data)
|
407
|
-
# print(one_source_name)
|
408
|
-
for k_first, v_first in children_data.items():
|
409
|
-
# print(k_first, v_first)
|
410
|
-
dict_data = {}
|
411
|
-
if k_first == 'children':
|
412
|
-
continue
|
413
|
-
for k_second, v_second in v_first.items():
|
414
|
-
if k_second != 'value':
|
415
|
-
dict_data.update({k_second: v_second})
|
416
|
-
dict_data.update(
|
417
|
-
{
|
418
|
-
'guideToShortVideoUv': children_children_data['guideToShortVideoUv']['value'],
|
419
|
-
'hiddenIndexgroup': children_children_data['hiddenIndexgroup']['value'],
|
420
|
-
'访客数': children_children_data['uv']['value'],
|
421
|
-
'访客数占比': children_children_data['uv']['ratio'],
|
422
|
-
'支付买家数': children_children_data['payByrCnt']['value'],
|
423
|
-
'详情页访客数': children_children_data['ipvUvRelate']['value'],
|
424
|
-
'支付转化率': children_children_data['payRate']['value'],
|
425
|
-
'orderByrCnt': children_children_data['orderByrCnt']['value'],
|
426
|
-
'showDesc': children_children_data['showDesc']['value'],
|
427
|
-
'showChannel': children_children_data['showChannel']['value'],
|
428
|
-
'来源等级': children_children_data['pageLevel']['value'],
|
429
|
-
'channelType': children_children_data['channelType']['value'],
|
430
|
-
'orderAmt': children_children_data['orderAmt']['value'],
|
431
|
-
'pageId': children_children_data['pageId']['value'],
|
432
|
-
'pPageId': children_children_data['pPageId']['value'],
|
433
|
-
'payAmt': children_children_data['payAmt']['value'],
|
434
|
-
'一级来源': one_source_name,
|
435
|
-
'二级来源': second_source_name,
|
436
|
-
'三级来源': children_children_data['pageName']['value'],
|
437
|
-
'showDetailChannel': children_children_data['showDetailChannel']['value'],
|
438
|
-
'pageDesc': children_children_data['pageDesc']['value'],
|
439
|
-
'payPct': children_children_data['payPct']['value'],
|
440
|
-
'pPageId': children_children_data['pPageId']['value'],
|
441
|
-
'crtRate': children_children_data['crtRate']['value'],
|
442
|
-
'日期': update_time,
|
443
|
-
'更新时间': update_time,
|
444
|
-
'促销活动': '2024双11抢先购',
|
445
|
-
'版块': '流量来源',
|
446
|
-
'来源分类': flow_biz_type,
|
447
|
-
'流量类型': page_type,
|
448
|
-
}
|
449
|
-
)
|
450
|
-
datas.append(dict_data)
|
451
|
-
# for item in datas:
|
452
|
-
# if item['日期'] != '':
|
453
|
-
# item.update({'日期': f'{item['日期'][0:4]}-{item['日期'][4:6]}-{item['日期'][6:8]}'})
|
454
|
-
df = pd.DataFrame(datas)
|
455
|
-
self.datas.append(df)
|
456
|
-
|
457
|
-
def hd_sp(self, date, url, headers, cookies, path, filename, pages=5):
|
458
|
-
""" 活动预售页面 分商品效果 """
|
459
|
-
|
460
|
-
self.date = date
|
461
|
-
self.url = url
|
462
|
-
self.headers = headers
|
463
|
-
self.cookies = cookies
|
464
|
-
self.path = path
|
465
|
-
self.filename = filename
|
466
|
-
for page in range(1, pages + 1):
|
467
|
-
self.url = f'{self.url}&page={page}'
|
468
|
-
result = requests.get(
|
469
|
-
self.url,
|
470
|
-
headers=self.headers,
|
471
|
-
cookies=self.cookies,
|
472
|
-
)
|
473
|
-
m_data = json.loads(result.text)
|
474
|
-
# print(m_data)
|
475
|
-
# with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
476
|
-
# json.dump(m_data, f, ensure_ascii=False, sort_keys=True, indent=4)
|
477
|
-
update_time = m_data['data']['updateTime']
|
478
|
-
time_stamp = m_data['data']['timestamp']
|
479
|
-
# pt_data = data['data']['data'][0] # 平台流量
|
480
|
-
# gg_data = data['data']['data'][1] # 广告流量
|
481
|
-
for all_data in m_data['data']['data']['data']:
|
482
|
-
self.datas.append({
|
483
|
-
'activityItemDepUv': all_data['activityItemDepUv']['value'],
|
484
|
-
'商品链接': all_data['item']['detailUrl'],
|
485
|
-
'商品id': all_data['item']['itemId'],
|
486
|
-
'商品图片': all_data['item']['pictUrl'],
|
487
|
-
'startDate': all_data['item']['startDate'],
|
488
|
-
'商品标题': all_data['item']['title'],
|
489
|
-
'预售订单金额': all_data['presaleOrdAmt']['value'],
|
490
|
-
'定金支付件数': all_data['presalePayItemCnt']['value'],
|
491
|
-
'预售访客人数': all_data['presaleUv']['value'],
|
492
|
-
'定金支付金额': all_data['sumPayDepositAmt']['value'],
|
493
|
-
'定金支付买家数': all_data['sumPayDepositByrCnt']['value'],
|
494
|
-
'支付转化率': all_data['uvPayRate']['value'],
|
495
|
-
'日期': date,
|
496
|
-
'时间戳': time_stamp,
|
497
|
-
'更新时间': update_time,
|
498
|
-
'促销活动': '2024双11预售',
|
499
|
-
'类型': '分商品效果',
|
500
|
-
})
|
501
|
-
time.sleep(random.randint(5, 10))
|
502
|
-
for item in self.datas:
|
503
|
-
if item['日期'] != '':
|
504
|
-
item.update({'日期': f'{item['日期'][0:4]}-{item['日期'][4:6]}-{item['日期'][6:8]}'})
|
505
|
-
if self.is_json_file:
|
506
|
-
with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
507
|
-
json.dump(self.datas, f, ensure_ascii=False, sort_keys=True, indent=4)
|
508
|
-
|
509
|
-
def request_jd(self, date, url, headers, cookies, path, filename):
|
510
|
-
""" 京东 """
|
511
|
-
self.date = date
|
512
|
-
self.url = url
|
513
|
-
self.headers = headers
|
514
|
-
self.cookies = cookies
|
515
|
-
self.path = path
|
516
|
-
self.filename = filename
|
517
|
-
result = requests.post(
|
518
|
-
url,
|
519
|
-
headers=headers,
|
520
|
-
cookies=cookies,
|
521
|
-
)
|
522
|
-
print(result.text)
|
523
|
-
|
524
|
-
|
525
|
-
def hd_sp_data(service_databases=[], db_name=None, table_name=None, pages=5):
|
526
|
-
""" 2024双11预售 分商品效果 """
|
527
|
-
date = datetime.date.today().strftime('%Y%m%d')
|
528
|
-
url = (
|
529
|
-
f'https://sycm.taobao.com/datawar/v7/presaleActivity/itemCoreIndex/getItemListLive.json?'
|
530
|
-
f'activityId=94040472'
|
531
|
-
f'&itemType=0' # 必传, 查看全部商品 0, 活动商品 1 , 跨店满减商品 2 ,官方立减 3(无数据)
|
532
|
-
f'&device=1'
|
533
|
-
f'&dateRange={date}%7C{date}'
|
534
|
-
f'&dateType=today'
|
535
|
-
f'&pageSize=10' # 必传
|
536
|
-
# f'&page=1' # 必传
|
537
|
-
# f'&order=desc'
|
538
|
-
# f'&orderBy=presaleOrdAmt'
|
539
|
-
# f'&indexCode=presaleOrdAmt%2CsumPayDepositByrCnt%2CpresalePayItemCnt'
|
540
|
-
# f'&_=1729133575797'
|
541
|
-
)
|
542
|
-
headers = {
|
543
|
-
# "referer": "https://dmp.taobao.com/index_new.html",
|
544
|
-
'User-Agent': ua_sj.get_ua(),
|
545
|
-
}
|
546
|
-
cookies = {
|
547
|
-
'session': 't=c198527347800dafa75165f084784668; thw=cn; cc_gray=1; 2210244713719_euacm_ac_c_uid_=713197610; 2210244713719_euacm_ac_rs_uid_=713197610; _portal_version_=new; xlly_s=1; _euacm_ac_l_uid_=2210244713719; _tb_token_=GzT2Grwtrep02E5awyhr; _samesite_flag_=true; 3PcFlag=1729299229095; cookie2=15f3dfc1aa68e07b05043bf7f8fb5565; sgcookie=E100r7l2QLYERk5SKLinmW40F%2BbdvBhfP7ZwSPi%2BjxeXI6Y%2B%2BraqfGzS%2BKX3ME%2FRfXZKeLBwECj63B245VuW%2FZBpg5X3Ydq2WK05z0QvsUxuyJQNNaVJTDy8WSQXRpKhFDHF; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie14=UoYcCoJCtZ6mUg%3D%3D&cookie21=UtASsssmfufd; csg=7d17ab64; _cc_=V32FPkk%2Fhw%3D%3D; cancelledSubSites=empty; skt=214c26d846e4ece2; cna=8+iAHxeojXcCAXjsc5Mt+BAV; v=0; XSRF-TOKEN=a4816e90-82aa-4743-b438-67e826b8ebbe; datawar_version=new; mtop_partitioned_detect=1; _m_h5_tk=c1140ed9be58a574cf0740ca0fad2f9c_1729340693031; _m_h5_tk_enc=2a93813f4e75d7928cc79cc6bc9db5d7; _euacm_ac_rs_sid_=67090549; JSESSIONID=3DBCB84C04569B30741EF0263731963E; tfstk=gbRSfuVwPHdqd3wyBX3VCcGDfR5IVHGN9y_pSeFzJ_CRvXKpc9FEE_WCOnI2ag8Pww1BSHZrr95ROMTMVJ8PY8-XJet_aL-eYzvDbFFyaYfUO_fh9coZ_fzkr6fKIj10GzfA-NE-TgF-MUjmccen_f8kyz7-7Ehwagr3NGjd9TBLkZIcSMQpvTeYHibhJuQd2qTAmie8ygBRHsQP-7Cd9HLxlwcOeP_IFgYSohIbYoEQeUSb9WdfkDj9PrNGyQ_5FGLJNLJkGlX5XUIb9cAVGNjA5In34MAXkQQHDjP5OEQBf_pjD7tBydxhW3hb2a9J5FWXtcFCoKKl3a9jJ-IJWgfRcB03tgpyJBWBwcUF2QxyGOA3VmSeQERRhhn4GHXBedCJOcGA4FVNfUSadr6gOZsZlqw3KbGMRi1XJjWceZb53qgb2pXRoZ_mlqw3KTQcPagjluph.; isg=BOrqXtnOebYXhfQ1b9KgdzAAO1aMW2618WeuUnSgRz1Qp45hXO-pxEuRN9O7V-ZN'}
|
548
|
-
|
549
|
-
path = '/Users/xigua/Downloads'
|
550
|
-
filename = 'test'
|
551
|
-
r = RequestData()
|
552
|
-
r.is_json_file = False
|
553
|
-
r.hd_sp(
|
554
|
-
date=date,
|
555
|
-
url=url,
|
556
|
-
headers=headers,
|
557
|
-
cookies=cookies,
|
558
|
-
path=path,
|
559
|
-
filename=filename,
|
560
|
-
pages = pages,
|
561
|
-
)
|
562
|
-
# print(r.datas)
|
563
|
-
df = pd.DataFrame(r.datas)
|
564
|
-
df.to_csv(os.path.join(path, 'test.csv'), index=False, header=True, encoding='utf-8_sig')
|
565
|
-
|
566
|
-
|
567
|
-
def company_run(service_databases=[]):
|
568
|
-
# if platform.system() != 'Windows':
|
569
|
-
# return
|
570
|
-
# if socket.gethostname() != 'company':
|
571
|
-
# return
|
572
|
-
while True:
|
573
|
-
r = RequestData()
|
574
|
-
r.is_json_file = False
|
575
|
-
|
576
|
-
my_data_list = [
|
577
|
-
# r.ys_ll_data(), # 双 11预售实时流量分析
|
578
|
-
# r.qxg_hx_data(), # 抢先购 预热期核心指标
|
579
|
-
r.qxg_ll() # 抢先购 流量来源
|
580
|
-
]
|
581
|
-
|
582
|
-
results = []
|
583
|
-
for my_data in my_data_list:
|
584
|
-
db_name, table_name, df = my_data
|
585
|
-
if len(df) == 0:
|
586
|
-
print(f'{db_name} -> {table_name} has no data')
|
587
|
-
continue
|
588
|
-
# print(df)
|
589
|
-
results.append([db_name, table_name, df])
|
590
|
-
|
591
|
-
if not service_databases:
|
592
|
-
return
|
593
|
-
for dt in service_databases:
|
594
|
-
for service_name, database in dt.items():
|
595
|
-
username, password, host, port = get_myconf.select_config_values(
|
596
|
-
target_service=service_name,
|
597
|
-
database=database,
|
598
|
-
)
|
599
|
-
m = mysql.MysqlUpload(
|
600
|
-
username=username,
|
601
|
-
password=password,
|
602
|
-
host=host,
|
603
|
-
port=port,
|
604
|
-
)
|
605
|
-
for result in results:
|
606
|
-
db_name, table_name, df = result
|
607
|
-
m.df_to_mysql(
|
608
|
-
df=df,
|
609
|
-
db_name=db_name,
|
610
|
-
table_name=table_name,
|
611
|
-
move_insert=False, # 先删除,再插入
|
612
|
-
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
613
|
-
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
614
|
-
count=None,
|
615
|
-
filename=None, # 用来追踪处理进度
|
616
|
-
)
|
617
|
-
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
|
618
|
-
print(f'{now} {db_name} -> {table_name}: 已入库')
|
619
|
-
|
620
|
-
time.sleep(random.randint(1500, 2000))
|
621
|
-
|
622
|
-
|
623
|
-
if __name__ == '__main__':
|
624
|
-
company_run(service_databases=[{'company': 'mysql'}])
|
File without changes
|
File without changes
|