mdbq 3.2.8__py3-none-any.whl → 3.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +60 -19
- mdbq/aggregation/query_data.py +79 -17
- mdbq/mysql/mysql.py +13 -0
- mdbq/spider/aikucun.py +99 -29
- {mdbq-3.2.8.dist-info → mdbq-3.2.10.dist-info}/METADATA +1 -1
- {mdbq-3.2.8.dist-info → mdbq-3.2.10.dist-info}/RECORD +8 -8
- {mdbq-3.2.8.dist-info → mdbq-3.2.10.dist-info}/WHEEL +0 -0
- {mdbq-3.2.8.dist-info → mdbq-3.2.10.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1171,15 +1171,56 @@ def upload_dir(path, db_name, collection_name, json_path=None):
|
|
1171
1171
|
for col in df.columns.tolist():
|
1172
1172
|
df[col] = df[col].apply(lambda x: 0 if str(x) == '' else x)
|
1173
1173
|
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1174
|
+
if '更新时间' not in df.columns.tolist():
|
1175
|
+
df['更新时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
1176
|
+
|
1177
|
+
# set_typ = {
|
1178
|
+
# '日期': 'date',
|
1179
|
+
# '店铺名称': 'varchar(100)',
|
1180
|
+
# 'spu_id': 'varchar(100)',
|
1181
|
+
# '图片': 'varchar(255)',
|
1182
|
+
# '序号': 'smallint',
|
1183
|
+
# '商品名称': 'varchar(255)',
|
1184
|
+
# '商品款号': 'varchar(255)',
|
1185
|
+
# '一级类目名称': 'varchar(255)',
|
1186
|
+
# '二级类目名称': 'varchar(255)',
|
1187
|
+
# '三级类目名称': 'varchar(255)',
|
1188
|
+
# '数据更新时间': 'timestamp',
|
1189
|
+
# '更新时间': 'timestamp',
|
1190
|
+
# }
|
1191
|
+
# new_dict = {
|
1192
|
+
# '日期': '',
|
1193
|
+
# '店铺名称': '',
|
1194
|
+
# '序号': '',
|
1195
|
+
# '商品名称': '',
|
1196
|
+
# 'spu_id': '',
|
1197
|
+
# '商品款号': '',
|
1198
|
+
# '一级类目名称': '',
|
1199
|
+
# '二级类目名称': '',
|
1200
|
+
# '三级类目名称': '',
|
1201
|
+
# '访客量': '',
|
1202
|
+
# '浏览量': '',
|
1203
|
+
# '下单gmv': '',
|
1204
|
+
# '成交gmv': '',
|
1205
|
+
# '支付人数_成交': '',
|
1206
|
+
# }
|
1207
|
+
# for dict_data in df.to_dict(orient='records'):
|
1208
|
+
# new_dict.update(dict_data)
|
1209
|
+
# m.dict_to_mysql(
|
1210
|
+
# db_name=db_name,
|
1211
|
+
# table_name=collection_name,
|
1212
|
+
# dict_data=new_dict,
|
1213
|
+
# # icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
|
1214
|
+
# unique_main_key=None,
|
1215
|
+
# set_typ=set_typ,
|
1216
|
+
# )
|
1177
1217
|
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
|
1178
1218
|
move_insert=False, # 先删除,再插入
|
1179
1219
|
df_sql = True,
|
1180
1220
|
drop_duplicates=False,
|
1181
1221
|
count=f'{i}/{count}',
|
1182
1222
|
filename=name,
|
1223
|
+
set_typ={},
|
1183
1224
|
)
|
1184
1225
|
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
|
1185
1226
|
|
@@ -1279,24 +1320,24 @@ if __name__ == '__main__':
|
|
1279
1320
|
# host = ''
|
1280
1321
|
# port = ''
|
1281
1322
|
#
|
1282
|
-
# 上传 1 个文件到数据库
|
1283
|
-
one_file_to_mysql(
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1287
|
-
)
|
1323
|
+
# # 上传 1 个文件到数据库
|
1324
|
+
# one_file_to_mysql(
|
1325
|
+
# file=r'/Users/xigua/Downloads/城市等级.csv',
|
1326
|
+
# db_name='属性设置3',
|
1327
|
+
# table_name='城市等级',
|
1328
|
+
# )
|
1288
1329
|
|
1289
1330
|
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1331
|
+
col = 1
|
1332
|
+
if col:
|
1333
|
+
# 上传一个目录到指定数据库
|
1334
|
+
db_name = '爱库存2'
|
1335
|
+
table_name = '商品spu榜单'
|
1336
|
+
upload_dir(
|
1337
|
+
path=r'/Users/xigua/Downloads/数据上传中心',
|
1338
|
+
db_name=db_name,
|
1339
|
+
collection_name=table_name,
|
1340
|
+
)
|
1300
1341
|
|
1301
1342
|
|
1302
1343
|
|
mdbq/aggregation/query_data.py
CHANGED
@@ -1834,22 +1834,52 @@ class MysqlDatasQuery:
|
|
1834
1834
|
}
|
1835
1835
|
min_date = df['日期'].min()
|
1836
1836
|
max_date = df['日期'].max()
|
1837
|
+
new_dict = {
|
1838
|
+
'日期': '',
|
1839
|
+
'店铺名称': '',
|
1840
|
+
'场次信息': '',
|
1841
|
+
'场次id': '',
|
1842
|
+
'直播开播时间': '',
|
1843
|
+
'开播时长': '',
|
1844
|
+
'封面图点击率': '',
|
1845
|
+
'观看人数': '',
|
1846
|
+
'观看次数': '',
|
1847
|
+
'新增粉丝数': '',
|
1848
|
+
'流量券消耗': '',
|
1849
|
+
'观看总时长': '',
|
1850
|
+
'人均观看时长': '',
|
1851
|
+
'次均观看时长': '',
|
1852
|
+
'商品点击人数': '',
|
1853
|
+
'商品点击次数': '',
|
1854
|
+
'商品点击率': '',
|
1855
|
+
'加购人数': '',
|
1856
|
+
'加购件数': '',
|
1857
|
+
'加购次数': '',
|
1858
|
+
'成交金额': '',
|
1859
|
+
'成交人数': '',
|
1860
|
+
'成交件数': '',
|
1861
|
+
'成交笔数': '',
|
1862
|
+
'成交转化率': '',
|
1863
|
+
'退款人数': '',
|
1864
|
+
'退款笔数': '',
|
1865
|
+
'退款件数': '',
|
1866
|
+
'退款金额': '',
|
1867
|
+
'预售定金支付金额': '',
|
1868
|
+
'预售预估总金额': '',
|
1869
|
+
}
|
1837
1870
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1838
1871
|
print(f'{now} 正在更新: mysql ({host}:{port}) {db_name}/{table_name} -> {min_date}~{max_date}')
|
1839
|
-
|
1840
|
-
|
1841
|
-
|
1842
|
-
|
1843
|
-
|
1844
|
-
|
1845
|
-
|
1846
|
-
|
1847
|
-
|
1848
|
-
|
1849
|
-
|
1850
|
-
set_typ=set_typ,
|
1851
|
-
|
1852
|
-
)
|
1872
|
+
for dict_data in df.to_dict(orient='records'):
|
1873
|
+
new_dict.update(dict_data)
|
1874
|
+
m_engine.dict_to_mysql(
|
1875
|
+
db_name=db_name,
|
1876
|
+
table_name=table_name,
|
1877
|
+
dict_data=new_dict,
|
1878
|
+
unique_main_key=None,
|
1879
|
+
icm_update=['场次id'], # 唯一组合键
|
1880
|
+
main_key=None, # 指定索引列, 通常用日期列,默认会设置日期为索引
|
1881
|
+
set_typ=set_typ, # 指定数据类型
|
1882
|
+
)
|
1853
1883
|
return True
|
1854
1884
|
|
1855
1885
|
# @try_except
|
@@ -2243,15 +2273,47 @@ class MysqlDatasQuery:
|
|
2243
2273
|
set_typ = {
|
2244
2274
|
'日期': 'date',
|
2245
2275
|
'店铺名称': 'varchar(100)',
|
2246
|
-
'序号': 'int',
|
2247
2276
|
'spu_id': 'varchar(100)',
|
2248
2277
|
'图片': 'varchar(255)',
|
2278
|
+
'序号': 'smallint',
|
2279
|
+
'商品名称': 'varchar(255)',
|
2280
|
+
'商品款号': 'varchar(255)',
|
2281
|
+
'一级类目名称': 'varchar(255)',
|
2282
|
+
'二级类目名称': 'varchar(255)',
|
2283
|
+
'三级类目名称': 'varchar(255)',
|
2249
2284
|
'数据更新时间': 'timestamp',
|
2285
|
+
'更新时间': 'timestamp',
|
2250
2286
|
}
|
2251
2287
|
min_date = df['日期'].min()
|
2252
2288
|
max_date = df['日期'].max()
|
2253
2289
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
2254
2290
|
print(f'{now} 正在更新: mysql ({host}:{port}) {db_name}/{table_name} -> {min_date}~{max_date}')
|
2291
|
+
# new_dict = {
|
2292
|
+
# '日期': '',
|
2293
|
+
# '店铺名称': '',
|
2294
|
+
# '序号': '',
|
2295
|
+
# '商品名称': '',
|
2296
|
+
# 'spu_id': '',
|
2297
|
+
# '商品款号': '',
|
2298
|
+
# '一级类目名称': '',
|
2299
|
+
# '二级类目名称': '',
|
2300
|
+
# '三级类目名称': '',
|
2301
|
+
# '访客量': '',
|
2302
|
+
# '浏览量': '',
|
2303
|
+
# '下单gmv': '',
|
2304
|
+
# '成交gmv': '',
|
2305
|
+
# '支付人数_成交': '',
|
2306
|
+
# }
|
2307
|
+
# for dict_data in df.to_dict(orient='records'):
|
2308
|
+
# new_dict.update(dict_data)
|
2309
|
+
# m_engine.dict_to_mysql(
|
2310
|
+
# db_name=db_name,
|
2311
|
+
# table_name=table_name,
|
2312
|
+
# dict_data=new_dict,
|
2313
|
+
# icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
|
2314
|
+
# unique_main_key=None,
|
2315
|
+
# set_typ=set_typ,
|
2316
|
+
# )
|
2255
2317
|
m_engine.df_to_mysql(
|
2256
2318
|
df=df,
|
2257
2319
|
db_name=db_name,
|
@@ -2264,7 +2326,7 @@ class MysqlDatasQuery:
|
|
2264
2326
|
filename=None, # 用来追踪处理进度
|
2265
2327
|
reset_id=False, # 是否重置自增列
|
2266
2328
|
set_typ=set_typ,
|
2267
|
-
|
2329
|
+
|
2268
2330
|
)
|
2269
2331
|
return True
|
2270
2332
|
|
@@ -3256,7 +3318,7 @@ if __name__ == '__main__':
|
|
3256
3318
|
# 3. 清理聚合数据
|
3257
3319
|
optimize_data.op_data(
|
3258
3320
|
db_name_lists=['聚合数据'],
|
3259
|
-
days=
|
3321
|
+
days=100, # 清理聚合数据的日期长度
|
3260
3322
|
is_mongo=False,
|
3261
3323
|
is_mysql=True,
|
3262
3324
|
)
|
mdbq/mysql/mysql.py
CHANGED
@@ -136,6 +136,10 @@ class MysqlUpload:
|
|
136
136
|
set_typ: {}
|
137
137
|
allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
|
138
138
|
"""
|
139
|
+
if icm_update:
|
140
|
+
if main_key or unique_main_key:
|
141
|
+
print(f'icm_update/unique_main_key/unique_main_key 参数不能同时设定')
|
142
|
+
return
|
139
143
|
if not main_key:
|
140
144
|
main_key = []
|
141
145
|
if not unique_main_key:
|
@@ -468,6 +472,15 @@ class MysqlUpload:
|
|
468
472
|
filename: 用来追踪处理进度,传这个参数是方便定位产生错误的文件
|
469
473
|
allow_not_null: 创建允许插入空值的列,正常情况下不允许空值
|
470
474
|
"""
|
475
|
+
if icm_update:
|
476
|
+
if move_insert or df_sql or drop_duplicates:
|
477
|
+
print(f'icm_update/move_insert/df_sql/drop_duplicates 参数不能同时设定')
|
478
|
+
return
|
479
|
+
if move_insert:
|
480
|
+
if icm_update or df_sql or drop_duplicates:
|
481
|
+
print(f'icm_update/move_insert/df_sql/drop_duplicates 参数不能同时设定')
|
482
|
+
return
|
483
|
+
|
471
484
|
self.filename = filename
|
472
485
|
if isinstance(df, pd.DataFrame):
|
473
486
|
if len(df) == 0:
|
mdbq/spider/aikucun.py
CHANGED
@@ -108,7 +108,29 @@ def get_cookie_aikucun():
|
|
108
108
|
time.sleep(0.1)
|
109
109
|
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
110
110
|
print(f'请登录并切换到百宝箱,再保存 cookies: \n https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604')
|
111
|
-
|
111
|
+
wait = WebDriverWait(_driver, timeout=15)
|
112
|
+
input_box = wait.until(
|
113
|
+
EC.element_to_be_clickable(
|
114
|
+
(By.XPATH, '//input[@placeholder="请输入用户名"]'))) #
|
115
|
+
input_box.send_keys('广东万里马实业股份有限公司')
|
116
|
+
input_box = wait.until(
|
117
|
+
EC.element_to_be_clickable(
|
118
|
+
(By.XPATH, '//input[@placeholder="请输入密码"]'))) #
|
119
|
+
input_box.send_keys('wlm123$$$')
|
120
|
+
time.sleep(0.1)
|
121
|
+
elements = _driver.find_elements(
|
122
|
+
By.XPATH, '//button[@class="merchant_login_btn" and contains(text(), "登录")]')
|
123
|
+
_driver.execute_script("arguments[0].click();", elements[0])
|
124
|
+
for i in range(100):
|
125
|
+
try:
|
126
|
+
wait.until(
|
127
|
+
EC.element_to_be_clickable(
|
128
|
+
(By.XPATH, '//div[@class="user-info nav-user-slider"]')))
|
129
|
+
_driver.get(' https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604')
|
130
|
+
time.sleep(3)
|
131
|
+
break
|
132
|
+
except:
|
133
|
+
time.sleep(5)
|
112
134
|
|
113
135
|
d_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
114
136
|
print(f'{d_time} 登录成功,正在获取cookie...')
|
@@ -116,13 +138,16 @@ def get_cookie_aikucun():
|
|
116
138
|
|
117
139
|
# 将cookies保存为json格式
|
118
140
|
cookies_list = _driver.get_cookies()
|
141
|
+
new_cookies_list = []
|
119
142
|
for cookie in cookies_list:
|
120
143
|
# 该字段有问题所以删除就可以
|
121
|
-
if '
|
122
|
-
|
144
|
+
if 'HWWAFSESTIME' in cookie:
|
145
|
+
continue
|
146
|
+
else:
|
147
|
+
new_cookies_list.append(cookie)
|
123
148
|
json_file = os.path.join(cookie_path, filename_aikucun)
|
124
149
|
with open(json_file, 'w', encoding='utf-8') as f:
|
125
|
-
json.dump(
|
150
|
+
json.dump(new_cookies_list, f, ensure_ascii=False, sort_keys=True, indent=4)
|
126
151
|
print(f'cookie已保存: {json_file}')
|
127
152
|
|
128
153
|
# _file = os.path.join(cookie_path, filename_aikucun)
|
@@ -147,9 +172,10 @@ class AikuCun:
|
|
147
172
|
self.sp_url = 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant?LS=true&shopId=1814114991487782914&from=menu&v=0.1936043279838604'
|
148
173
|
self.cookie_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cookies')
|
149
174
|
|
150
|
-
def login(self, shop_name='aikucun'):
|
175
|
+
def login(self, shop_name='aikucun', headless=False):
|
151
176
|
option = webdriver.ChromeOptions()
|
152
|
-
|
177
|
+
if headless:
|
178
|
+
option.add_argument("--headless") # 设置无界面模式
|
153
179
|
# 调整chrome启动配置
|
154
180
|
option.add_argument("--disable-gpu")
|
155
181
|
option.add_argument("--no-sandbox")
|
@@ -224,13 +250,13 @@ class AikuCun:
|
|
224
250
|
time.sleep(3)
|
225
251
|
return _driver
|
226
252
|
|
227
|
-
def get_data(self, shop_name='aikucun', date_num=1):
|
253
|
+
def get_data(self, shop_name='aikucun', date_num=1, headless=True):
|
228
254
|
"""
|
229
255
|
date_num: 获取最近 N 天数据,0表示今天
|
230
256
|
所有数据都是逐日下载
|
231
257
|
"""
|
232
258
|
|
233
|
-
_driver = self.login(shop_name=shop_name)
|
259
|
+
_driver = self.login(shop_name=shop_name, headless=headless)
|
234
260
|
|
235
261
|
_driver.get(self.sp_url)
|
236
262
|
time.sleep(3)
|
@@ -239,7 +265,8 @@ class AikuCun:
|
|
239
265
|
today = datetime.date.today()
|
240
266
|
for date_s in range(date_num):
|
241
267
|
new_date = today - datetime.timedelta(days=date_s) # 会用作文件名
|
242
|
-
|
268
|
+
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
269
|
+
print(f'{now} 正在下载爱库存文件 {date_s+1}/{date_num}: {new_date}')
|
243
270
|
str_date = str(new_date)[2:]
|
244
271
|
wait = WebDriverWait(_driver, timeout=15) #
|
245
272
|
elements = _driver.find_elements(
|
@@ -273,6 +300,15 @@ class AikuCun:
|
|
273
300
|
wait.until(EC.presence_of_element_located(
|
274
301
|
(By.XPATH,
|
275
302
|
'//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')))
|
303
|
+
|
304
|
+
elements = _driver.find_elements(
|
305
|
+
By.XPATH,
|
306
|
+
'//div[@class="ak-page-list__table-empty" and contains(text(), "暂无数据")]')
|
307
|
+
if elements:
|
308
|
+
print(f'cookies 可能已过期,无法下载')
|
309
|
+
_driver.quit()
|
310
|
+
return
|
311
|
+
|
276
312
|
elements = _driver.find_elements(
|
277
313
|
By.XPATH,
|
278
314
|
'//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')
|
@@ -283,10 +319,18 @@ class AikuCun:
|
|
283
319
|
|
284
320
|
def clean_data(self, date):
|
285
321
|
set_typ = {
|
322
|
+
'日期': 'date',
|
286
323
|
'店铺名称': 'varchar(100)',
|
287
324
|
'spu_id': 'varchar(100)',
|
288
325
|
'图片': 'varchar(255)',
|
326
|
+
'序号': 'smallint',
|
327
|
+
'商品名称': 'varchar(255)',
|
328
|
+
'商品款号': 'varchar(255)',
|
329
|
+
'一级类目名称': 'varchar(255)',
|
330
|
+
'二级类目名称': 'varchar(255)',
|
331
|
+
'三级类目名称': 'varchar(255)',
|
289
332
|
'数据更新时间': 'timestamp',
|
333
|
+
'更新时间': 'timestamp',
|
290
334
|
}
|
291
335
|
for root, dirs, files in os.walk(upload_path, topdown=False):
|
292
336
|
for name in files:
|
@@ -307,33 +351,60 @@ class AikuCun:
|
|
307
351
|
df.insert(loc=0, column='日期', value=date) # df中插入新列
|
308
352
|
df.insert(loc=1, column='店铺名称', value='爱库存平台') # df中插入新列
|
309
353
|
df.rename(columns={'spuId': 'spu_id'}, inplace=True)
|
310
|
-
df['数据更新时间'] = pd.to_datetime(df['数据更新时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
|
354
|
+
# df['数据更新时间'] = pd.to_datetime(df['数据更新时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
|
311
355
|
# df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
|
312
356
|
# print(df['数据更新时间'])
|
313
357
|
# breakpoint()
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
358
|
+
new_dict = {
|
359
|
+
'日期': '',
|
360
|
+
'店铺名称': '',
|
361
|
+
'序号': '',
|
362
|
+
'商品名称': '',
|
363
|
+
'spu_id': '',
|
364
|
+
'商品款号': '',
|
365
|
+
'一级类目名称': '',
|
366
|
+
'二级类目名称': '',
|
367
|
+
'三级类目名称': '',
|
368
|
+
'访客量': '',
|
369
|
+
'浏览量': '',
|
370
|
+
'下单gmv': '',
|
371
|
+
'成交gmv': '',
|
372
|
+
'支付人数_成交': '',
|
373
|
+
}
|
374
|
+
for dict_data in df.to_dict(orient='records'):
|
375
|
+
new_dict.update(dict_data)
|
376
|
+
new_dict.update({'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')})
|
377
|
+
m_engine.dict_to_mysql(
|
378
|
+
db_name='爱库存2',
|
379
|
+
table_name='商品spu榜单',
|
380
|
+
dict_data=new_dict,
|
381
|
+
icm_update=['日期', '店铺名称', 'spu_id', '商品款号'],
|
382
|
+
unique_main_key=None,
|
383
|
+
set_typ=set_typ,
|
384
|
+
)
|
385
|
+
|
386
|
+
# m_engine.df_to_mysql(
|
387
|
+
# df=df,
|
388
|
+
# db_name='爱库存2',
|
389
|
+
# table_name='商品spu榜单',
|
390
|
+
# icm_update=[], # 增量更新, 在聚合数据中使用,其他不要用
|
391
|
+
# move_insert=False, # 先删除,再插入
|
392
|
+
# df_sql=True, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
393
|
+
# drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
394
|
+
# count=None,
|
395
|
+
# filename=None, # 用来追踪处理进度
|
396
|
+
# reset_id=False, # 是否重置自增列
|
397
|
+
# set_typ=set_typ,
|
398
|
+
# )
|
328
399
|
|
329
400
|
new_name = f'爱库存_商品榜单_spu_{date}_{date}.csv'
|
330
401
|
df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False)
|
331
402
|
os.remove(os.path.join(root, name))
|
332
403
|
|
333
404
|
|
334
|
-
def akucun():
|
405
|
+
def akucun(headless=True, date_num=10):
|
335
406
|
akc = AikuCun()
|
336
|
-
akc.get_data(shop_name='aikucun', date_num=
|
407
|
+
akc.get_data(shop_name='aikucun', date_num=date_num, headless=headless) # 获取最近 N 天数据,0表示今天
|
337
408
|
# akc.clean_data()
|
338
409
|
|
339
410
|
# # 新版 数据分类
|
@@ -404,9 +475,8 @@ class AikuCunNew:
|
|
404
475
|
|
405
476
|
|
406
477
|
if __name__ == '__main__':
|
407
|
-
|
408
|
-
|
409
|
-
akucun()
|
478
|
+
# get_cookie_aikucun() # 登录并获取 cookies
|
479
|
+
akucun(date_num=100, headless=True) # 下载数据
|
410
480
|
|
411
481
|
# a = AikuCunNew(shop_name='aikucun')
|
412
482
|
# a.akc()
|
@@ -1,11 +1,11 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=kdWeVjvUoWOZhidez0FyMtutIrPwnjLCY7USaQVNxRk,76336
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=RXIv7cACCgYyehAxMjUYi_S7rVyjIwXKWMaM3nduGtA,3068
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=GbmvkRYEv_xg90vHp2FszjFZuMqO3ZPSEp6lZrnOrIE,148227
|
9
9
|
mdbq/aggregation/query_data_bak.py,sha256=r1FU0C4zjXln7oVSrRkElh4Ehl-9mYhGcq57jLbViUA,104071
|
10
10
|
mdbq/aggregation/query_data_bak20241124.py,sha256=oY95ZK3qt3Wx9pdZKZ5cvDh45Yi5yGj1kl8G6riumHA,144513
|
11
11
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
@@ -28,7 +28,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
28
28
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
29
29
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
30
30
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
31
|
-
mdbq/mysql/mysql.py,sha256=
|
31
|
+
mdbq/mysql/mysql.py,sha256=z3RXzPiVQzJzPBoyLr1XL5QXAtXehjbkxWVoBCQBaqY,64373
|
32
32
|
mdbq/mysql/recheck_mysql.py,sha256=rgTpvDMWYTyEn7UQdlig-pdXDluTgiU8JG6lkMh8DV0,8665
|
33
33
|
mdbq/mysql/s_query.py,sha256=MbIprZ4yJDAZ9AahZPzl7hqS695Vs0P-AJNwAtA_EEc,9287
|
34
34
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
@@ -45,8 +45,8 @@ mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,
|
|
45
45
|
mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
46
46
|
mdbq/req_post/req_tb.py,sha256=qg7pet73IgKGmCwxaeUyImJIoeK_pBQT9BBKD7fkBNg,36160
|
47
47
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
48
|
-
mdbq/spider/aikucun.py,sha256=
|
49
|
-
mdbq-3.2.
|
50
|
-
mdbq-3.2.
|
51
|
-
mdbq-3.2.
|
52
|
-
mdbq-3.2.
|
48
|
+
mdbq/spider/aikucun.py,sha256=nIKKZOZbemKqcrikcrMmtksLgJjjzeU0I99teBgU1jE,22439
|
49
|
+
mdbq-3.2.10.dist-info/METADATA,sha256=l7LN00jP2XEWyB9qTPGKZIbV0Aucaa57dyB50mgImJU,244
|
50
|
+
mdbq-3.2.10.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
51
|
+
mdbq-3.2.10.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
52
|
+
mdbq-3.2.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|