mdbq 2.2.9__tar.gz → 2.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-2.2.9 → mdbq-2.3.1}/PKG-INFO +1 -1
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/aggregation/aggregation.py +8 -8
- mdbq-2.3.1/mdbq/req_post/__init__.py +4 -0
- mdbq-2.3.1/mdbq/req_post/req_tb.py +330 -0
- mdbq-2.3.1/mdbq/spider/aikucun.py +293 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq.egg-info/SOURCES.txt +4 -1
- {mdbq-2.2.9 → mdbq-2.3.1}/setup.py +1 -1
- {mdbq-2.2.9 → mdbq-2.3.1}/README.txt +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/__version__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/aggregation/query_data.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/bdup/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/bdup/bdup.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/clean/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/clean/data_clean.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/company/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/company/copysh.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/company/home_sh.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/config/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/config/get_myconf.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/config/products.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/config/set_support.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/config/update_conf.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/dataframe/converter.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/log/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/log/mylogger.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/mongo/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/mongo/mongo.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/mysql/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/mysql/mysql.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/mysql/s_query.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/other/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/other/porxy.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/other/pov_city.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/other/sku_picture.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/other/ua_sj.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/pbix/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/pbix/refresh_all_old.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq/spider/__init__.py +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-2.2.9 → mdbq-2.3.1}/setup.cfg +0 -0
@@ -1295,14 +1295,14 @@ def test2():
|
|
1295
1295
|
if __name__ == '__main__':
|
1296
1296
|
username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1297
1297
|
print(username, password, host, port)
|
1298
|
-
file_dir(one_file=False, target_service='company')
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1298
|
+
# file_dir(one_file=False, target_service='company')
|
1299
|
+
one_file_to_mysql(
|
1300
|
+
file='/Users/xigua/Downloads/爱库存_商品榜单_spu_2024-10-17_2024-10-17.csv',
|
1301
|
+
db_name='爱库存2',
|
1302
|
+
table_name='商品spu榜单',
|
1303
|
+
target_service='company',
|
1304
|
+
database='mysql'
|
1305
|
+
)
|
1306
1306
|
|
1307
1307
|
# db_name = '推广数据2'
|
1308
1308
|
# table_name = '权益报表'
|
@@ -0,0 +1,330 @@
|
|
1
|
+
# -*- coding: UTF-8 –*-
|
2
|
+
import os
|
3
|
+
import time
|
4
|
+
import datetime
|
5
|
+
import pandas as pd
|
6
|
+
import warnings
|
7
|
+
import requests
|
8
|
+
from mdbq.other import ua_sj
|
9
|
+
from mdbq.config import get_myconf
|
10
|
+
from mdbq.mysql import mysql
|
11
|
+
import json
|
12
|
+
import socket
|
13
|
+
import platform
|
14
|
+
import random
|
15
|
+
|
16
|
+
warnings.filterwarnings('ignore')
|
17
|
+
|
18
|
+
|
19
|
+
class RequestData:
|
20
|
+
def __init__(self):
|
21
|
+
self.date = datetime.date.today().strftime('%Y%m%d')
|
22
|
+
self.url = None
|
23
|
+
self.headers = None
|
24
|
+
self.cookies = None
|
25
|
+
self.datas = []
|
26
|
+
self.path = None
|
27
|
+
self.filename = None
|
28
|
+
self.is_json_file = False
|
29
|
+
|
30
|
+
def request_data(self, date, url, headers, cookies, path, filename):
|
31
|
+
""" 活动预售页面 流量来源 """
|
32
|
+
# date = datetime.date.today().strftime('%Y%m%d')
|
33
|
+
# url = (f'https://sycm.taobao.com/datawar/v6/activity/detail/guide/chl/presale/online/v4.json?'
|
34
|
+
# f'dateRange={date}%7C{date}'
|
35
|
+
# f'&dateType=today'
|
36
|
+
# f'&pageSize=10'
|
37
|
+
# f'&page=1'
|
38
|
+
# f'&order=desc'
|
39
|
+
# f'&orderBy=frontPreheatUv' # 必传参数
|
40
|
+
# f'&activityId=94040472' # 关键,必传参数
|
41
|
+
# # f'&activityStatus=3'
|
42
|
+
# # f'&device=2'
|
43
|
+
# # f'&indexCode=frontPreheatUv%2CfrontPayByrCnt%2CfrontPayRate'
|
44
|
+
# # f'&_=1729079731795'
|
45
|
+
# # f'&token=7e94ba030'
|
46
|
+
# )
|
47
|
+
# headers = {
|
48
|
+
# "referer": "https://dmp.taobao.com/index_new.html",
|
49
|
+
# 'User-Agent': ua_sj.get_ua(),
|
50
|
+
# }
|
51
|
+
# cookies = {
|
52
|
+
# 'session': 't=c198527347800dafa75165f084784668; thw=cn; xlly_s=1; _tb_token_=rPWSGun4nUou9aKxviPg; _samesite_flag_=true; 3PcFlag=1729054801593; cookie2=130befc055eed2df29935197bd2b514b; sgcookie=E100aLOltfWHqLLH1qtyH3it%2BLrGH2v3MAnIBdSfu7xwjEpSyh101lblDVcj3zGpAOLv%2FXcrVNbT%2FN%2BI8KZeCoE4HBzHQk0ANtSqjOG5gIzdKamfirBxGWJyVEccitvvDZhK; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie21=W5iHLLyFfoaZ&cookie14=UoYcCoAfJ7pSQA%3D%3D; csg=1e2bdb8a; _cc_=Vq8l%2BKCLiw%3D%3D; cancelledSubSites=empty; skt=f813f8478f7318f8; v=0; cna=8+iAHxeojXcCAXjsc5Mt+BAV; mtop_partitioned_detect=1; _m_h5_tk=88c56a84a93c1199f8abe086a132c7eb_1729068459392; _m_h5_tk_enc=4b0ed8316f46edae303547d3863982a4; XSRF-TOKEN=4ef3d151-14c4-445a-9249-595e9a24df75; JSESSIONID=9EE8C8DCF6162DCA2FE0187C29BF0B8A; tfstk=gyaEdSAx842sxMbj1f3rgEWrJ50LN2XbxzMSZ7VoOvDheWNubSerd_IKRlkzIRk3O76JzQqgCk9QZzGuzR3n2kMSdYuzw-51hZ_b9W3--t6flZ3LgJuxZBYHFAYiG40ZtLV_9W3J6C9lclVpUV2YVJ0uEVmiwj0kr00l_ccjZ4YnqexMIAhor4YoqVDiwjvkr80l_5DttHciSWVk7jihGd0FW1QAcqH0tA8kuIhKxg2JVH-emXiZncbekEC-TDk0tAWAnqwo4JoU5wJxTlV4BXyRke3n4kqm-zWV8VVYfJcaEt-rIozLzmaF3nH3JYeq-lWM840Kg7obf_xqCuVT7czFcQhTR74KcqbvKYZ_gzlzyTQa3W2Umm4HLgz6efAQOzEeE3on6fkf_1ySvoccWpB-m3K-jqhZh6GB23nnhfkf_1-J2cDo_x1IO; isg=BLm5J8RI-qdgDKdAgF_DSgcFyCOTxq14BgKdB9vjgONeYsD0IReUSUT05GaUWkWw'}
|
53
|
+
|
54
|
+
self.date = date
|
55
|
+
self.url = url
|
56
|
+
self.headers = headers
|
57
|
+
self.cookies = cookies
|
58
|
+
self.path = path
|
59
|
+
self.filename = filename
|
60
|
+
result = requests.get(
|
61
|
+
self.url,
|
62
|
+
headers=self.headers,
|
63
|
+
cookies=self.cookies,
|
64
|
+
)
|
65
|
+
m_data = json.loads(result.text)
|
66
|
+
update_time = m_data['data']['updateTime']
|
67
|
+
# pt_data = data['data']['data'][0] # 平台流量
|
68
|
+
# gg_data = data['data']['data'][1] # 广告流量
|
69
|
+
for all_data in m_data['data']['data']:
|
70
|
+
self.datas.append({
|
71
|
+
'frontPayByrCnt': all_data['frontPayByrCnt']['value'],
|
72
|
+
'一级标识id': all_data['pageId']['value'],
|
73
|
+
'二级标识id': '',
|
74
|
+
'三级标识id': '',
|
75
|
+
'一级来源': all_data['pageName']['value'],
|
76
|
+
'二级来源': '',
|
77
|
+
'三级来源': '',
|
78
|
+
'活动商品访客数(定金期)': all_data['frontPreheatUv']['value'],
|
79
|
+
'定金支付买家数': all_data['frontPayByrCnt']['value'],
|
80
|
+
'定金支付转化率': all_data['frontPayRate']['value'],
|
81
|
+
'日期': all_data['statDateStr']['value'],
|
82
|
+
'更新时间': update_time,
|
83
|
+
'促销活动': '2024双11预售',
|
84
|
+
})
|
85
|
+
if 'children' not in all_data.keys(): # 这一句有点多余,因为一级来源必定细分有二级来源
|
86
|
+
continue
|
87
|
+
for children_data in all_data['children']:
|
88
|
+
one_source_id = children_data['pPageId']['value']
|
89
|
+
one_source_name = children_data['pPageName']['value']
|
90
|
+
self.datas.append(
|
91
|
+
{
|
92
|
+
'frontPayByrCnt': children_data['frontPayByrCnt']['value'],
|
93
|
+
'一级标识id': children_data['pPageId']['value'],
|
94
|
+
'二级标识id': children_data['pageId']['value'],
|
95
|
+
'三级标识id': '',
|
96
|
+
'一级来源': children_data['pPageName']['value'],
|
97
|
+
'二级来源': children_data['pageName']['value'],
|
98
|
+
'三级来源': '',
|
99
|
+
'活动商品访客数(定金期)': children_data['frontPreheatUv']['value'],
|
100
|
+
'定金支付买家数': children_data['frontPayByrCnt']['value'],
|
101
|
+
'定金支付转化率': children_data['frontPayRate']['value'],
|
102
|
+
'日期': children_data['statDateStr']['value'],
|
103
|
+
'更新时间': update_time,
|
104
|
+
'促销活动': '2024双11预售',
|
105
|
+
}
|
106
|
+
)
|
107
|
+
# print(children_data['children'])
|
108
|
+
# print(children_data)
|
109
|
+
if 'children' not in children_data.keys(): # 部分二级来源没有细分的三级来源,因为需要跳过 children 字段
|
110
|
+
continue
|
111
|
+
for children_children_data in children_data['children']:
|
112
|
+
# print(children_children_data)
|
113
|
+
# print(one_source_name)
|
114
|
+
self.datas.append(
|
115
|
+
{
|
116
|
+
'frontPayByrCnt': children_children_data['frontPayByrCnt']['value'],
|
117
|
+
'一级标识id': one_source_id,
|
118
|
+
'二级标识id': children_children_data['pPageId']['value'],
|
119
|
+
'三级标识id': children_children_data['pageId']['value'],
|
120
|
+
'一级来源': one_source_name,
|
121
|
+
'二级来源': children_children_data['pPageName']['value'],
|
122
|
+
'三级来源': children_children_data['pageName']['value'],
|
123
|
+
'活动商品访客数(定金期)': children_children_data['frontPreheatUv']['value'],
|
124
|
+
'定金支付买家数': children_children_data['frontPayByrCnt']['value'],
|
125
|
+
'定金支付转化率': children_children_data['frontPayRate']['value'],
|
126
|
+
'日期': children_children_data['statDateStr']['value'],
|
127
|
+
'更新时间': update_time,
|
128
|
+
'促销活动': '2024双11预售',
|
129
|
+
}
|
130
|
+
)
|
131
|
+
for item in self.datas:
|
132
|
+
if item['日期'] != '':
|
133
|
+
item.update({'日期': f'{item['日期'][0:4]}-{item['日期'][4:6]}-{item['日期'][6:8]}'})
|
134
|
+
if self.is_json_file:
|
135
|
+
with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
136
|
+
json.dump(self.datas, f, ensure_ascii=False, sort_keys=True, indent=4)
|
137
|
+
|
138
|
+
def hd_sp(self, date, url, headers, cookies, path, filename, pages=5):
|
139
|
+
""" 活动预售页面 分商品效果 """
|
140
|
+
|
141
|
+
self.date = date
|
142
|
+
self.url = url
|
143
|
+
self.headers = headers
|
144
|
+
self.cookies = cookies
|
145
|
+
self.path = path
|
146
|
+
self.filename = filename
|
147
|
+
for page in range(1, pages + 1):
|
148
|
+
self.url = f'{self.url}&page={page}'
|
149
|
+
result = requests.get(
|
150
|
+
self.url,
|
151
|
+
headers=self.headers,
|
152
|
+
cookies=self.cookies,
|
153
|
+
)
|
154
|
+
m_data = json.loads(result.text)
|
155
|
+
# print(m_data)
|
156
|
+
# with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
157
|
+
# json.dump(m_data, f, ensure_ascii=False, sort_keys=True, indent=4)
|
158
|
+
update_time = m_data['data']['updateTime']
|
159
|
+
time_stamp = m_data['data']['timestamp']
|
160
|
+
# pt_data = data['data']['data'][0] # 平台流量
|
161
|
+
# gg_data = data['data']['data'][1] # 广告流量
|
162
|
+
for all_data in m_data['data']['data']['data']:
|
163
|
+
self.datas.append({
|
164
|
+
'activityItemDepUv': all_data['activityItemDepUv']['value'],
|
165
|
+
'商品链接': all_data['item']['detailUrl'],
|
166
|
+
'商品id': all_data['item']['itemId'],
|
167
|
+
'商品图片': all_data['item']['pictUrl'],
|
168
|
+
'startDate': all_data['item']['startDate'],
|
169
|
+
'商品标题': all_data['item']['title'],
|
170
|
+
'预售订单金额': all_data['presaleOrdAmt']['value'],
|
171
|
+
'定金支付件数': all_data['presalePayItemCnt']['value'],
|
172
|
+
'预售访客人数': all_data['presaleUv']['value'],
|
173
|
+
'定金支付金额': all_data['sumPayDepositAmt']['value'],
|
174
|
+
'定金支付买家数': all_data['sumPayDepositByrCnt']['value'],
|
175
|
+
'支付转化率': all_data['uvPayRate']['value'],
|
176
|
+
'日期': date,
|
177
|
+
'时间戳': time_stamp,
|
178
|
+
'更新时间': update_time,
|
179
|
+
'促销活动': '2024双11预售',
|
180
|
+
'类型': '分商品效果',
|
181
|
+
})
|
182
|
+
time.sleep(random.randint(5, 10))
|
183
|
+
for item in self.datas:
|
184
|
+
if item['日期'] != '':
|
185
|
+
item.update({'日期': f'{item['日期'][0:4]}-{item['日期'][4:6]}-{item['日期'][6:8]}'})
|
186
|
+
if self.is_json_file:
|
187
|
+
with open(os.path.join(self.path, f'{self.filename}.json'), 'w') as f:
|
188
|
+
json.dump(self.datas, f, ensure_ascii=False, sort_keys=True, indent=4)
|
189
|
+
|
190
|
+
def request_jd(self, date, url, headers, cookies, path, filename):
|
191
|
+
""" 京东 """
|
192
|
+
self.date = date
|
193
|
+
self.url = url
|
194
|
+
self.headers = headers
|
195
|
+
self.cookies = cookies
|
196
|
+
self.path = path
|
197
|
+
self.filename = filename
|
198
|
+
result = requests.post(
|
199
|
+
url,
|
200
|
+
headers=headers,
|
201
|
+
cookies=cookies,
|
202
|
+
)
|
203
|
+
print(result.text)
|
204
|
+
|
205
|
+
|
206
|
+
def tb_data(service_databases=[], db_name=None, table_name=None):
|
207
|
+
""" 2024双11预售实时流量分析 """
|
208
|
+
date = datetime.date.today().strftime('%Y%m%d')
|
209
|
+
url = (f'https://sycm.taobao.com/datawar/v6/activity/detail/guide/chl/presale/online/v4.json?'
|
210
|
+
f'dateRange={date}%7C{date}'
|
211
|
+
f'&dateType=today'
|
212
|
+
f'&pageSize=10'
|
213
|
+
f'&page=1'
|
214
|
+
f'&order=desc'
|
215
|
+
f'&orderBy=frontPreheatUv' # 必传参数
|
216
|
+
f'&activityId=94040472' # 关键,必传参数
|
217
|
+
# f'&activityStatus=3'
|
218
|
+
# f'&device=2'
|
219
|
+
# f'&indexCode=frontPreheatUv%2CfrontPayByrCnt%2CfrontPayRate'
|
220
|
+
# f'&_=1729079731795'
|
221
|
+
# f'&token=7e94ba030'
|
222
|
+
)
|
223
|
+
headers = {
|
224
|
+
"referer": "https://dmp.taobao.com/index_new.html",
|
225
|
+
'User-Agent': ua_sj.get_ua(),
|
226
|
+
}
|
227
|
+
cookies = {
|
228
|
+
'session': 't=c198527347800dafa75165f084784668; thw=cn; xlly_s=1; _tb_token_=rPWSGun4nUou9aKxviPg; _samesite_flag_=true; 3PcFlag=1729054801593; cookie2=130befc055eed2df29935197bd2b514b; sgcookie=E100aLOltfWHqLLH1qtyH3it%2BLrGH2v3MAnIBdSfu7xwjEpSyh101lblDVcj3zGpAOLv%2FXcrVNbT%2FN%2BI8KZeCoE4HBzHQk0ANtSqjOG5gIzdKamfirBxGWJyVEccitvvDZhK; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie21=W5iHLLyFfoaZ&cookie14=UoYcCoAfJ7pSQA%3D%3D; csg=1e2bdb8a; _cc_=Vq8l%2BKCLiw%3D%3D; cancelledSubSites=empty; skt=f813f8478f7318f8; v=0; cna=8+iAHxeojXcCAXjsc5Mt+BAV; mtop_partitioned_detect=1; _m_h5_tk=88c56a84a93c1199f8abe086a132c7eb_1729068459392; _m_h5_tk_enc=4b0ed8316f46edae303547d3863982a4; XSRF-TOKEN=4ef3d151-14c4-445a-9249-595e9a24df75; JSESSIONID=9EE8C8DCF6162DCA2FE0187C29BF0B8A; tfstk=gyaEdSAx842sxMbj1f3rgEWrJ50LN2XbxzMSZ7VoOvDheWNubSerd_IKRlkzIRk3O76JzQqgCk9QZzGuzR3n2kMSdYuzw-51hZ_b9W3--t6flZ3LgJuxZBYHFAYiG40ZtLV_9W3J6C9lclVpUV2YVJ0uEVmiwj0kr00l_ccjZ4YnqexMIAhor4YoqVDiwjvkr80l_5DttHciSWVk7jihGd0FW1QAcqH0tA8kuIhKxg2JVH-emXiZncbekEC-TDk0tAWAnqwo4JoU5wJxTlV4BXyRke3n4kqm-zWV8VVYfJcaEt-rIozLzmaF3nH3JYeq-lWM840Kg7obf_xqCuVT7czFcQhTR74KcqbvKYZ_gzlzyTQa3W2Umm4HLgz6efAQOzEeE3on6fkf_1ySvoccWpB-m3K-jqhZh6GB23nnhfkf_1-J2cDo_x1IO; isg=BLm5J8RI-qdgDKdAgF_DSgcFyCOTxq14BgKdB9vjgONeYsD0IReUSUT05GaUWkWw'}
|
229
|
+
path = '/Users/xigua/Downloads'
|
230
|
+
filename = 'test'
|
231
|
+
r = RequestData()
|
232
|
+
r.is_json_file = False
|
233
|
+
r.request_data(
|
234
|
+
date=date,
|
235
|
+
url=url,
|
236
|
+
headers=headers,
|
237
|
+
cookies=cookies,
|
238
|
+
path=path,
|
239
|
+
filename=filename,
|
240
|
+
)
|
241
|
+
# print(r.datas)
|
242
|
+
df = pd.DataFrame(r.datas)
|
243
|
+
# df.to_csv(os.path.join(path, 'test.csv'), index=False, header=True, encoding='utf-8_sig')
|
244
|
+
|
245
|
+
if not service_databases:
|
246
|
+
return
|
247
|
+
if not db_name or not table_name:
|
248
|
+
print(f'尚未指定 db_name/table_name 参数')
|
249
|
+
return
|
250
|
+
for dt in service_databases:
|
251
|
+
for service_name, database in dt.items():
|
252
|
+
username, password, host, port = get_myconf.select_config_values(
|
253
|
+
target_service=service_name,
|
254
|
+
database=database,
|
255
|
+
)
|
256
|
+
m = mysql.MysqlUpload(
|
257
|
+
username=username,
|
258
|
+
password=password,
|
259
|
+
host=host,
|
260
|
+
port=port,
|
261
|
+
)
|
262
|
+
m.df_to_mysql(
|
263
|
+
df=df,
|
264
|
+
db_name=db_name,
|
265
|
+
table_name=table_name,
|
266
|
+
move_insert=False, # 先删除,再插入
|
267
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
268
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
269
|
+
filename=None, # 用来追踪处理进度
|
270
|
+
service_database=dt, # 字典
|
271
|
+
)
|
272
|
+
|
273
|
+
|
274
|
+
def company_run():
|
275
|
+
if platform.system() == 'Windows' and socket.gethostname() == 'company':
|
276
|
+
while True:
|
277
|
+
tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2',
|
278
|
+
table_name='2024双11预售实时流量分析')
|
279
|
+
time.sleep(random.randint(1500, 2000))
|
280
|
+
|
281
|
+
|
282
|
+
def hd_sp_data(service_databases=[], db_name=None, table_name=None, pages=5):
|
283
|
+
""" 2024双11预售 分商品效果 """
|
284
|
+
date = datetime.date.today().strftime('%Y%m%d')
|
285
|
+
url = (
|
286
|
+
f'https://sycm.taobao.com/datawar/v7/presaleActivity/itemCoreIndex/getItemListLive.json?'
|
287
|
+
f'activityId=94040472'
|
288
|
+
f'&itemType=0' # 必传, 查看全部商品 0, 活动商品 1 , 跨店满减商品 2 ,官方立减 3(无数据)
|
289
|
+
f'&device=1'
|
290
|
+
f'&dateRange={date}%7C{date}'
|
291
|
+
f'&dateType=today'
|
292
|
+
f'&pageSize=10' # 必传
|
293
|
+
# f'&page=1' # 必传
|
294
|
+
# f'&order=desc'
|
295
|
+
# f'&orderBy=presaleOrdAmt'
|
296
|
+
# f'&indexCode=presaleOrdAmt%2CsumPayDepositByrCnt%2CpresalePayItemCnt'
|
297
|
+
# f'&_=1729133575797'
|
298
|
+
)
|
299
|
+
headers = {
|
300
|
+
# "referer": "https://dmp.taobao.com/index_new.html",
|
301
|
+
'User-Agent': ua_sj.get_ua(),
|
302
|
+
}
|
303
|
+
cookies = {
|
304
|
+
'session': 't=c198527347800dafa75165f084784668; thw=cn; xlly_s=1; _tb_token_=rPWSGun4nUou9aKxviPg; _samesite_flag_=true; 3PcFlag=1729054801593; cookie2=130befc055eed2df29935197bd2b514b; sgcookie=E100aLOltfWHqLLH1qtyH3it%2BLrGH2v3MAnIBdSfu7xwjEpSyh101lblDVcj3zGpAOLv%2FXcrVNbT%2FN%2BI8KZeCoE4HBzHQk0ANtSqjOG5gIzdKamfirBxGWJyVEccitvvDZhK; unb=2210244713719; sn=%E4%B8%87%E9%87%8C%E9%A9%AC%E5%AE%98%E6%96%B9%E6%97%97%E8%88%B0%E5%BA%97%3A%E6%8E%A8%E5%B9%BF; uc1=cookie21=W5iHLLyFfoaZ&cookie14=UoYcCoAfJ7pSQA%3D%3D; csg=1e2bdb8a; _cc_=Vq8l%2BKCLiw%3D%3D; cancelledSubSites=empty; skt=f813f8478f7318f8; v=0; cna=8+iAHxeojXcCAXjsc5Mt+BAV; mtop_partitioned_detect=1; _m_h5_tk=88c56a84a93c1199f8abe086a132c7eb_1729068459392; _m_h5_tk_enc=4b0ed8316f46edae303547d3863982a4; XSRF-TOKEN=4ef3d151-14c4-445a-9249-595e9a24df75; JSESSIONID=9EE8C8DCF6162DCA2FE0187C29BF0B8A; tfstk=gyaEdSAx842sxMbj1f3rgEWrJ50LN2XbxzMSZ7VoOvDheWNubSerd_IKRlkzIRk3O76JzQqgCk9QZzGuzR3n2kMSdYuzw-51hZ_b9W3--t6flZ3LgJuxZBYHFAYiG40ZtLV_9W3J6C9lclVpUV2YVJ0uEVmiwj0kr00l_ccjZ4YnqexMIAhor4YoqVDiwjvkr80l_5DttHciSWVk7jihGd0FW1QAcqH0tA8kuIhKxg2JVH-emXiZncbekEC-TDk0tAWAnqwo4JoU5wJxTlV4BXyRke3n4kqm-zWV8VVYfJcaEt-rIozLzmaF3nH3JYeq-lWM840Kg7obf_xqCuVT7czFcQhTR74KcqbvKYZ_gzlzyTQa3W2Umm4HLgz6efAQOzEeE3on6fkf_1ySvoccWpB-m3K-jqhZh6GB23nnhfkf_1-J2cDo_x1IO; isg=BLm5J8RI-qdgDKdAgF_DSgcFyCOTxq14BgKdB9vjgONeYsD0IReUSUT05GaUWkWw'}
|
305
|
+
path = '/Users/xigua/Downloads'
|
306
|
+
filename = 'test'
|
307
|
+
r = RequestData()
|
308
|
+
r.is_json_file = False
|
309
|
+
r.hd_sp(
|
310
|
+
date=date,
|
311
|
+
url=url,
|
312
|
+
headers=headers,
|
313
|
+
cookies=cookies,
|
314
|
+
path=path,
|
315
|
+
filename=filename,
|
316
|
+
pages = pages,
|
317
|
+
)
|
318
|
+
# print(r.datas)
|
319
|
+
df = pd.DataFrame(r.datas)
|
320
|
+
df.to_csv(os.path.join(path, 'test.csv'), index=False, header=True, encoding='utf-8_sig')
|
321
|
+
|
322
|
+
|
323
|
+
if __name__ == '__main__':
|
324
|
+
company_run()
|
325
|
+
# tb_data(service_databases=[{'company': 'mysql'}], db_name='生意参谋2', table_name='2024双11预售实时流量分析')
|
326
|
+
hd_sp_data(
|
327
|
+
service_databases=[{'company': 'mysql'}],
|
328
|
+
# db_name='生意参谋2',
|
329
|
+
# table_name='2024双11预售实时流量分析',
|
330
|
+
)
|
@@ -0,0 +1,293 @@
|
|
1
|
+
# -*- coding:utf-8 -*-
|
2
|
+
import datetime
|
3
|
+
import getpass
|
4
|
+
import json
|
5
|
+
import os
|
6
|
+
import pathlib
|
7
|
+
import platform
|
8
|
+
import re
|
9
|
+
import time
|
10
|
+
import warnings
|
11
|
+
import pandas as pd
|
12
|
+
from selenium import webdriver
|
13
|
+
from selenium.webdriver.support.wait import WebDriverWait
|
14
|
+
from selenium.webdriver.common.by import By
|
15
|
+
from selenium.webdriver.support import expected_conditions as EC
|
16
|
+
from selenium.webdriver.chrome.service import Service
|
17
|
+
from mdbq.config import set_support
|
18
|
+
from selenium.webdriver.common.keys import Keys
|
19
|
+
from mdbq.aggregation import aggregation
|
20
|
+
from mdbq.clean import data_clean
|
21
|
+
|
22
|
+
warnings.filterwarnings('ignore')
|
23
|
+
|
24
|
+
|
25
|
+
if platform.system() == 'Windows':
|
26
|
+
# windows版本
|
27
|
+
Data_Path = r'C:\同步空间\BaiduSyncdisk'
|
28
|
+
D_PATH = str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\Downloads'))
|
29
|
+
Share_Path = str(pathlib.Path(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表')) # 共享文件根目录
|
30
|
+
elif platform.system() == 'Linux':
|
31
|
+
Data_Path = '数据中心'
|
32
|
+
D_PATH = 'Downloads'
|
33
|
+
if not os.path.exists(D_PATH):
|
34
|
+
os.makedirs(D_PATH)
|
35
|
+
Share_Path = '' # linux 通常是远程服务器,不需要访问共享
|
36
|
+
else:
|
37
|
+
Data_Path = f'/Users/{getpass.getuser()}/数据中心' # 使用Mac独立网络时
|
38
|
+
# Data_Path = '/Volumes' # 直接使用共享连接台式机时的配置, 后面接 + 自动0备份/***
|
39
|
+
D_PATH = str(pathlib.Path(f'/Users/{getpass.getuser()}/Downloads'))
|
40
|
+
Share_Path = str(pathlib.Path('/Volumes/时尚事业部/01.运营部/天猫报表')) # 共享文件根目录
|
41
|
+
|
42
|
+
|
43
|
+
def test():
|
44
|
+
"""
|
45
|
+
"""
|
46
|
+
_url = 'https://gray-merc.aikucun.com/index.html'
|
47
|
+
cookie_path = '/Users/xigua/Downloads'
|
48
|
+
print(_url)
|
49
|
+
|
50
|
+
option = webdriver.ChromeOptions() # 浏览器启动选项
|
51
|
+
option.headless = True # False指定为无界面模式
|
52
|
+
# 调整chrome启动配置
|
53
|
+
option.add_argument("--disable-gpu")
|
54
|
+
option.add_argument("--no-sandbox")
|
55
|
+
option.add_argument("--disable-dev-shm-usage")
|
56
|
+
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
57
|
+
option.add_experimental_option("useAutomationExtension", False)
|
58
|
+
# if platform.system() == 'Windows':
|
59
|
+
# service = Service(os.path.join(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))
|
60
|
+
# else:
|
61
|
+
# service = Service('/usr/local/bin/chromedriver')
|
62
|
+
if platform.system() == 'Windows':
|
63
|
+
# 设置Chrome的路径
|
64
|
+
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
65
|
+
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
66
|
+
# os.environ["webdriver.chrome.driver"] = chrome_path
|
67
|
+
option.binary_location = chrome_path # windows 设置此参数有效
|
68
|
+
service = Service(chromedriver_path)
|
69
|
+
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
70
|
+
else:
|
71
|
+
# 设置Chrome的路径
|
72
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
73
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
74
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
75
|
+
|
76
|
+
service = Service(chromedriver_path)
|
77
|
+
_driver = webdriver.Chrome(service=service, options=option) # 创建Chrome驱动程序实例
|
78
|
+
|
79
|
+
print('yes')
|
80
|
+
# 登录
|
81
|
+
_driver.get(_url)
|
82
|
+
time.sleep(0.1)
|
83
|
+
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
84
|
+
breakpoint()
|
85
|
+
|
86
|
+
d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
87
|
+
print(f'{d_time} 登录成功,正在获取cookie...')
|
88
|
+
time.sleep(0.1)
|
89
|
+
|
90
|
+
|
91
|
+
_file = os.path.join(cookie_path, f'cookie_.txt')
|
92
|
+
with open(_file, 'w') as f:
|
93
|
+
# 将cookies保存为json格式
|
94
|
+
cookies_list = _driver.get_cookies()
|
95
|
+
for cookie in cookies_list:
|
96
|
+
# 该字段有问题所以删除就可以
|
97
|
+
if 'expiry' in cookie:
|
98
|
+
del cookie['expiry']
|
99
|
+
# if 'domain' in cookie:
|
100
|
+
# cookie['domain'] = '.taobao.com'
|
101
|
+
cookies_list = json.dumps(cookies_list)
|
102
|
+
f.write(cookies_list)
|
103
|
+
print(f'cookie已保存: {_file}')
|
104
|
+
_driver.quit()
|
105
|
+
|
106
|
+
|
107
|
+
class AikuCun:
|
108
|
+
def __init__(self):
|
109
|
+
self.url = 'https://gray-merc.aikucun.com/index.html'
|
110
|
+
self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
|
111
|
+
|
112
|
+
def login(self, shop_name='aikucun'):
|
113
|
+
option = webdriver.ChromeOptions()
|
114
|
+
# option.add_argument("--headless") # 设置无界面模式
|
115
|
+
# 调整chrome启动配置
|
116
|
+
option.add_argument("--disable-gpu")
|
117
|
+
option.add_argument("--no-sandbox")
|
118
|
+
option.add_argument("--disable-dev-shm-usage")
|
119
|
+
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
120
|
+
option.add_experimental_option('excludeSwitches', ['enable-logging']) # 禁止日志输出,减少控制台干扰
|
121
|
+
option.add_experimental_option("useAutomationExtension", False)
|
122
|
+
option.add_argument('--ignore-ssl-error') # 忽略ssl错误
|
123
|
+
prefs = {
|
124
|
+
'profile.default_content_settings.popups': 0, # 禁止弹出所有窗口
|
125
|
+
"browser.download.manager. showAlertOnComplete": False, # 下载完成后不显示下载完成提示框
|
126
|
+
"profile.default_content_setting_values.automatic_downloads": 1, # 允许自动下载多个文件
|
127
|
+
}
|
128
|
+
|
129
|
+
option.add_experimental_option('perfLoggingPrefs', {
|
130
|
+
'enableNetwork': True,
|
131
|
+
'enablePage': False,
|
132
|
+
})
|
133
|
+
option.set_capability("goog:loggingPrefs", {
|
134
|
+
'browser': 'ALL',
|
135
|
+
'performance': 'ALL',
|
136
|
+
})
|
137
|
+
option.set_capability("goog:perfLoggingPrefs", {
|
138
|
+
'enableNetwork': True,
|
139
|
+
'enablePage': False,
|
140
|
+
'enableTimeline': False
|
141
|
+
})
|
142
|
+
|
143
|
+
option.add_experimental_option('prefs', prefs)
|
144
|
+
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
|
145
|
+
|
146
|
+
# # 通过excludeSwitches参数禁用默认的启动路径
|
147
|
+
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
148
|
+
|
149
|
+
if platform.system() == 'Windows':
|
150
|
+
# 设置 chrome 和 chromedriver 启动路径
|
151
|
+
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
152
|
+
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
153
|
+
# os.environ["webdriver.chrome.driver"] = chrome_path
|
154
|
+
option.binary_location = chrome_path # windows 设置此参数有效
|
155
|
+
service = Service(chromedriver_path)
|
156
|
+
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
157
|
+
elif platform.system() == 'Darwin':
|
158
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
159
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
160
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
161
|
+
# option.binary_location = chrome_path # Macos 设置此参数报错
|
162
|
+
service = Service(chromedriver_path)
|
163
|
+
else:
|
164
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
165
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
166
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
167
|
+
# option.binary_location = chrome_path # macos 设置此参数报错
|
168
|
+
service = Service(chromedriver_path)
|
169
|
+
_driver = webdriver.Chrome(options=option, service=service) # 创建Chrome驱动程序实例
|
170
|
+
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
171
|
+
|
172
|
+
# 登录
|
173
|
+
_driver.get(self.url)
|
174
|
+
_driver.delete_all_cookies() # 首先清除浏览器打开已有的cookies
|
175
|
+
name_lists = os.listdir(self.cookie_path) # cookie 放在主目录下的 cookies 文件夹
|
176
|
+
for name in name_lists:
|
177
|
+
if shop_name in name and name.endswith('.txt') and '~' not in name and '.DS' not in name:
|
178
|
+
with open(os.path.join(self.cookie_path, name), 'r') as f:
|
179
|
+
cookies_list = json.load(f) # 使用json读取cookies 注意读取的是文件 所以用load而不是loads
|
180
|
+
for cookie in cookies_list:
|
181
|
+
_driver.add_cookie(cookie) # 添加cookies信息
|
182
|
+
_driver.refresh()
|
183
|
+
time.sleep(3)
|
184
|
+
return _driver
|
185
|
+
|
186
|
+
def get_data(self, shop_name='aikucun', date_num=1):
|
187
|
+
"""
|
188
|
+
date_num: 获取最近 N 天数据,0表示今天
|
189
|
+
所有数据都是逐日下载
|
190
|
+
"""
|
191
|
+
|
192
|
+
_driver = self.login(shop_name=shop_name)
|
193
|
+
_url = 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604'
|
194
|
+
_driver.get(_url)
|
195
|
+
time.sleep(3)
|
196
|
+
|
197
|
+
today = datetime.date.today()
|
198
|
+
for date_s in range(date_num):
|
199
|
+
new_date = today - datetime.timedelta(days=date_s) # 会用作文件名
|
200
|
+
str_date = str(new_date)[2:]
|
201
|
+
wait = WebDriverWait(_driver, timeout=15) #
|
202
|
+
elements = _driver.find_elements(
|
203
|
+
By.XPATH, '//input[@placeholder="开始日期"]')
|
204
|
+
# _driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
205
|
+
|
206
|
+
input_box = wait.until(
|
207
|
+
EC.element_to_be_clickable(
|
208
|
+
(By.XPATH, '//input[@placeholder="开始日期"]'))) #
|
209
|
+
|
210
|
+
# from selenium.webdriver.common.keys import Keys
|
211
|
+
for i in range(8):
|
212
|
+
input_box.send_keys(Keys.BACKSPACE)
|
213
|
+
input_box.send_keys(str_date)
|
214
|
+
time.sleep(1)
|
215
|
+
input_box = wait.until(
|
216
|
+
EC.element_to_be_clickable(
|
217
|
+
(By.XPATH, '//input[@placeholder="结束日期"]'))) # 文件名输入框
|
218
|
+
|
219
|
+
for i in range(8):
|
220
|
+
input_box.send_keys(Keys.BACKSPACE)
|
221
|
+
input_box.send_keys(str_date)
|
222
|
+
time.sleep(2)
|
223
|
+
input_box.send_keys(Keys.ENTER)
|
224
|
+
time.sleep(2)
|
225
|
+
wait.until(EC.presence_of_element_located((By.XPATH, '//button/span[contains(text(), "查询")]')))
|
226
|
+
elements = _driver.find_elements(
|
227
|
+
By.XPATH, '//button/span[contains(text(), "查询")]')
|
228
|
+
_driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
229
|
+
time.sleep(3)
|
230
|
+
wait.until(EC.presence_of_element_located(
|
231
|
+
(By.XPATH,
|
232
|
+
'//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')))
|
233
|
+
elements = _driver.find_elements(
|
234
|
+
By.XPATH,
|
235
|
+
'//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')
|
236
|
+
_driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
237
|
+
time.sleep(3)
|
238
|
+
self.clean_data(date=new_date)
|
239
|
+
_driver.quit()
|
240
|
+
|
241
|
+
def clean_data(self, date):
|
242
|
+
for root, dirs, files in os.walk(D_PATH, topdown=False):
|
243
|
+
for name in files:
|
244
|
+
if '~$' in name or 'DS_Store' in name:
|
245
|
+
continue
|
246
|
+
if name.endswith('csv'):
|
247
|
+
pattern = re.findall('[\u4e00-\u9fff]+', name)
|
248
|
+
if pattern:
|
249
|
+
continue
|
250
|
+
pattern = re.findall('^[0-9a-zA-Z_]{5,}-[0-9a-zA-Z_]+-[0-9a-zA-Z_]+-[0-9a-zA-Z_]+', name)
|
251
|
+
if not pattern:
|
252
|
+
continue
|
253
|
+
df = pd.read_csv(os.path.join(root, name), encoding='gb2312', header=0, na_filter=False)
|
254
|
+
df.insert(loc=0, column='日期', value=date) # df中插入新列
|
255
|
+
df.rename(columns={'spuId': 'spu_id'}, inplace=True)
|
256
|
+
df['数据更新时间'] = pd.to_datetime(df['数据更新时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
|
257
|
+
# df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
|
258
|
+
# print(df['数据更新时间'])
|
259
|
+
# breakpoint()
|
260
|
+
new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
|
261
|
+
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
|
262
|
+
os.remove(os.path.join(root, name))
|
263
|
+
|
264
|
+
|
265
|
+
def akucun():
|
266
|
+
akc = AikuCun()
|
267
|
+
akc.get_data(shop_name='aikucun', date_num=3)
|
268
|
+
# akc.clean_data()
|
269
|
+
|
270
|
+
# 新版 数据分类
|
271
|
+
dp = aggregation.DatabaseUpdate(path=D_PATH)
|
272
|
+
dp.new_unzip(is_move=True)
|
273
|
+
dp.cleaning(is_move=False, is_except=['临时文件']) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
274
|
+
# 将 self.datas 更新至数据库
|
275
|
+
dp.upload_df(service_databases=[
|
276
|
+
# {'home_lx': 'mongodb'},
|
277
|
+
# {'home_lx': 'mysql'},
|
278
|
+
{'company': 'mysql'},
|
279
|
+
# {'nas': 'mysql'},
|
280
|
+
])
|
281
|
+
# 数据分类
|
282
|
+
c = data_clean.DataClean(path=D_PATH, source_path=Source_Path)
|
283
|
+
c.set_up_to_mogo = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
284
|
+
c.set_up_to_mysql = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
285
|
+
c.new_unzip(is_move=True, ) # 解压文件
|
286
|
+
c.change_and_sort(is_except=['临时文件'])
|
287
|
+
c.move_all() # 移到文件到原始文件夹
|
288
|
+
|
289
|
+
|
290
|
+
if __name__ == '__main__':
|
291
|
+
pass
|
292
|
+
# test()
|
293
|
+
akucun()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|