mdbq 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/query_data.py +42 -0
- mdbq/mysql/mysql.py +39 -2
- mdbq/other/sku_picture.py +285 -20
- {mdbq-2.1.0.dist-info → mdbq-2.1.2.dist-info}/METADATA +1 -1
- {mdbq-2.1.0.dist-info → mdbq-2.1.2.dist-info}/RECORD +7 -7
- {mdbq-2.1.0.dist-info → mdbq-2.1.2.dist-info}/WHEEL +0 -0
- {mdbq-2.1.0.dist-info → mdbq-2.1.2.dist-info}/top_level.txt +0 -0
mdbq/aggregation/query_data.py
CHANGED
@@ -421,6 +421,27 @@ class MysqlDatasQuery:
|
|
421
421
|
projection=projection,
|
422
422
|
)
|
423
423
|
return df
|
424
|
+
def spu_sales(self):
|
425
|
+
start_date, end_date = self.months_data(num=self.months)
|
426
|
+
projection = {
|
427
|
+
'日期': 1,
|
428
|
+
'商品id': 1,
|
429
|
+
'货号': 1,
|
430
|
+
'成交单量': 1,
|
431
|
+
'成交金额': 1,
|
432
|
+
'访客数': 1,
|
433
|
+
'成交客户数': 1,
|
434
|
+
'加购商品件数': 1,
|
435
|
+
'加购人数': 1,
|
436
|
+
}
|
437
|
+
df = self.download.data_to_df(
|
438
|
+
db_name='京东数据2',
|
439
|
+
table_name='spu_商品明细',
|
440
|
+
start_date=start_date,
|
441
|
+
end_date=end_date,
|
442
|
+
projection=projection,
|
443
|
+
)
|
444
|
+
return df
|
424
445
|
|
425
446
|
@staticmethod
|
426
447
|
def months_data(num=0, end_date=None):
|
@@ -1190,6 +1211,21 @@ class GroupBy:
|
|
1190
1211
|
}
|
1191
1212
|
)
|
1192
1213
|
return df
|
1214
|
+
elif '京东_spu_商品明细' in table_name:
|
1215
|
+
df = df[df['商品id'] != '合计']
|
1216
|
+
df = df.groupby(['日期', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'],
|
1217
|
+
as_index=False).agg(
|
1218
|
+
**{
|
1219
|
+
'成交单量': ('成交单量', np.max),
|
1220
|
+
'成交金额': ('成交金额', np.max),
|
1221
|
+
}
|
1222
|
+
)
|
1223
|
+
self.data_jdtg.update(
|
1224
|
+
{
|
1225
|
+
table_name: df,
|
1226
|
+
}
|
1227
|
+
)
|
1228
|
+
return df
|
1193
1229
|
elif '京东_关键词报表' in table_name:
|
1194
1230
|
df_lin = df[['计划id', '推广计划']]
|
1195
1231
|
df_lin.drop_duplicates(subset=['计划id'], keep='last', inplace=True, ignore_index=True)
|
@@ -1580,6 +1616,12 @@ def data_aggregation(service_databases=[{}], months=1):
|
|
1580
1616
|
'唯一主键': ['日期', '商品id', '成交单量'],
|
1581
1617
|
'数据主体': sdq.sku_sales(),
|
1582
1618
|
},
|
1619
|
+
{
|
1620
|
+
'数据库名': '聚合数据',
|
1621
|
+
'集合名': '京东_spu_商品明细',
|
1622
|
+
'唯一主键': ['日期', '商品id', '成交单量'],
|
1623
|
+
'数据主体': sdq.spu_sales(),
|
1624
|
+
},
|
1583
1625
|
{
|
1584
1626
|
'数据库名': '聚合数据',
|
1585
1627
|
'集合名': '天猫_人群报表',
|
mdbq/mysql/mysql.py
CHANGED
@@ -68,8 +68,8 @@ class MysqlUpload:
|
|
68
68
|
|
69
69
|
return wrapper
|
70
70
|
|
71
|
-
@try_except
|
72
|
-
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'home_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None):
|
71
|
+
# @try_except
|
72
|
+
def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'home_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None, reset_id=False):
|
73
73
|
"""
|
74
74
|
将 df 写入数据库
|
75
75
|
db_name: 数据库名称
|
@@ -167,6 +167,17 @@ class MysqlUpload:
|
|
167
167
|
index=False,
|
168
168
|
chunksize=1000
|
169
169
|
)
|
170
|
+
try:
|
171
|
+
cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
172
|
+
result = cursor.fetchone()
|
173
|
+
if result:
|
174
|
+
cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
175
|
+
cursor.execute(
|
176
|
+
f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
177
|
+
cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
178
|
+
except Exception as e:
|
179
|
+
print(f'{e}')
|
180
|
+
connection.rollback()
|
170
181
|
connection.close()
|
171
182
|
return
|
172
183
|
|
@@ -205,6 +216,19 @@ class MysqlUpload:
|
|
205
216
|
index=False,
|
206
217
|
chunksize=1000
|
207
218
|
)
|
219
|
+
# 6. 重置自增列
|
220
|
+
if reset_id:
|
221
|
+
try:
|
222
|
+
cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
223
|
+
result = cursor.fetchone()
|
224
|
+
if result:
|
225
|
+
cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
226
|
+
cursor.execute(
|
227
|
+
f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
228
|
+
cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
229
|
+
except Exception as e:
|
230
|
+
print(f'{e}')
|
231
|
+
connection.rollback()
|
208
232
|
connection.close()
|
209
233
|
return
|
210
234
|
|
@@ -293,6 +317,19 @@ class MysqlUpload:
|
|
293
317
|
# print(values)
|
294
318
|
print(f'mysql -> df_to_mysql 报错: {e}, {self.filename}')
|
295
319
|
# breakpoint()
|
320
|
+
|
321
|
+
# 6. 重置自增列
|
322
|
+
try:
|
323
|
+
cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
324
|
+
result = cursor.fetchone()
|
325
|
+
if result:
|
326
|
+
cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
327
|
+
cursor.execute(
|
328
|
+
f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
329
|
+
cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
330
|
+
except Exception as e:
|
331
|
+
print(f'{e}')
|
332
|
+
connection.rollback()
|
296
333
|
connection.commit() # 提交事务
|
297
334
|
connection.close()
|
298
335
|
|
mdbq/other/sku_picture.py
CHANGED
@@ -11,6 +11,7 @@ import time
|
|
11
11
|
import warnings
|
12
12
|
import pandas as pd
|
13
13
|
from lxml import etree
|
14
|
+
from rich.pretty import pretty_repr
|
14
15
|
from selenium import webdriver
|
15
16
|
from selenium.webdriver.support.wait import WebDriverWait
|
16
17
|
from selenium.webdriver.common.by import By
|
@@ -31,11 +32,13 @@ from openpyxl.utils import get_column_letter
|
|
31
32
|
warnings.filterwarnings('ignore')
|
32
33
|
|
33
34
|
if platform.system() == 'Windows':
|
34
|
-
|
35
|
-
|
35
|
+
D_PATH = os.path.normpath(f'C:\\Users\\{getpass.getuser()}\\Downloads')
|
36
|
+
Share_Path = os.path.normpath(r'\\192.168.1.198\时尚事业部\01.运营部\天猫报表') # 共享文件根目录
|
36
37
|
elif platform.system() == 'Darwin':
|
37
|
-
|
38
|
+
D_PATH = os.path.normpath(f'/Users/{getpass.getuser()}/Downloads')
|
39
|
+
Share_Path = os.path.normpath('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
|
38
40
|
else:
|
41
|
+
D_PATH = 'Downloads'
|
39
42
|
Share_Path = ''
|
40
43
|
|
41
44
|
|
@@ -85,9 +88,25 @@ class LoadAccount:
|
|
85
88
|
option.add_experimental_option('prefs', prefs)
|
86
89
|
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
|
87
90
|
if platform.system() == 'Windows':
|
88
|
-
|
91
|
+
# 设置 chrome 和 chromedriver 启动路径
|
92
|
+
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
93
|
+
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
94
|
+
# os.environ["webdriver.chrome.driver"] = chrome_path
|
95
|
+
option.binary_location = chrome_path # windows 设置此参数有效
|
96
|
+
service = Service(chromedriver_path)
|
97
|
+
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
98
|
+
elif platform.system() == 'Darwin':
|
99
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
100
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
101
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
102
|
+
# option.binary_location = chrome_path # Macos 设置此参数报错
|
103
|
+
service = Service(chromedriver_path)
|
89
104
|
else:
|
90
|
-
|
105
|
+
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
106
|
+
chromedriver_path = '/usr/local/chrome/chromedriver'
|
107
|
+
os.environ["webdriver.chrome.driver"] = chrome_path
|
108
|
+
# option.binary_location = chrome_path # macos 设置此参数报错
|
109
|
+
service = Service(chromedriver_path)
|
91
110
|
_driver = webdriver.Chrome(options=option, service=service, ) # 创建Chrome驱动程序实例
|
92
111
|
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
93
112
|
|
@@ -355,8 +374,9 @@ class SkuPicture:
|
|
355
374
|
self.urls = []
|
356
375
|
self.datas = [] # 从单品页面获取数据,存储这部分数据,作为中转
|
357
376
|
self.df = pd.DataFrame()
|
377
|
+
self.xlsx_save_path = os.path.join(D_PATH, '商品id_编码_图片_临时文件.xlsx')
|
358
378
|
|
359
|
-
def each_page(self):
|
379
|
+
def each_page(self, as_local_file=True):
|
360
380
|
wait = WebDriverWait(self.driver, timeout=15)
|
361
381
|
num = len(self.urls)
|
362
382
|
i = 0
|
@@ -432,13 +452,13 @@ class SkuPicture:
|
|
432
452
|
'sku编码': sku_spbm,
|
433
453
|
'商家编码': data['商家编码'],
|
434
454
|
'推荐卖点': desc,
|
435
|
-
'
|
455
|
+
'获取与下载': '已获取',
|
436
456
|
'类目': leimu,
|
437
457
|
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
438
458
|
}
|
439
459
|
)
|
440
460
|
except Exception as e:
|
441
|
-
|
461
|
+
print('报错信息: ', e, '-'*10, data)
|
442
462
|
pass
|
443
463
|
i += 1
|
444
464
|
# if i > 3:
|
@@ -448,15 +468,16 @@ class SkuPicture:
|
|
448
468
|
results = []
|
449
469
|
for data in self.datas:
|
450
470
|
try:
|
451
|
-
|
452
|
-
results.append(
|
471
|
+
_df = pd.DataFrame.from_dict(data, orient='columns')
|
472
|
+
results.append(_df)
|
453
473
|
except:
|
454
474
|
pass
|
455
475
|
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
self.df =
|
476
|
+
self.df = pd.concat(results) # 更新 df
|
477
|
+
self.df = self.df[self.df['sku图片链接'] != '0']
|
478
|
+
if results and as_local_file:
|
479
|
+
self.df.to_excel(self.xlsx_save_path, index=False, header=True, engine='openpyxl',
|
480
|
+
freeze_panes=(1, 0))
|
460
481
|
|
461
482
|
def read_df(self):
|
462
483
|
path = os.path.join(self.path, self.filename)
|
@@ -486,6 +507,7 @@ class DownloadPicture():
|
|
486
507
|
if not os.path.exists(self.save_path):
|
487
508
|
os.mkdir(self.save_path)
|
488
509
|
self.local_file = ''
|
510
|
+
self.finish_download = []
|
489
511
|
|
490
512
|
def get_df_from_service(self):
|
491
513
|
start_date, end_date = self.months_data(num=self.months)
|
@@ -535,16 +557,43 @@ class DownloadPicture():
|
|
535
557
|
i += 1
|
536
558
|
time.sleep(0.5)
|
537
559
|
|
538
|
-
def download_data_from_local(self):
|
560
|
+
def download_data_from_local(self, col_name='sku图片链接'):
|
539
561
|
if not os.path.exists(self.save_path):
|
540
562
|
os.mkdir(self.save_path)
|
541
563
|
dict_data = self.df.to_dict('records')
|
542
564
|
num = len(dict_data)
|
543
565
|
i = 0
|
544
566
|
for data in dict_data:
|
545
|
-
url = data[
|
546
|
-
self.filename = f'{data['商品id']}_{data['商家编码']}.jpg'
|
567
|
+
url = data[col_name]
|
568
|
+
self.filename = f'{data['商品id']}_{data['商家编码']}_{data['sku编码']}.jpg'
|
569
|
+
if os.path.isfile(os.path.join(self.save_path, self.filename)):
|
570
|
+
i += 1
|
571
|
+
continue
|
572
|
+
if 'https' not in url:
|
573
|
+
i += 1
|
574
|
+
continue
|
575
|
+
|
576
|
+
print(f'正在下载: {i}/{num}, {data['商品id']}')
|
577
|
+
self.headers.update({'User-Agent': ua_sj.get_ua()})
|
578
|
+
res = requests.get(url, headers=self.headers) # 下载图片到内存
|
579
|
+
# 保存图片到本地文件夹
|
580
|
+
with open(os.path.join(self.save_path, self.filename), 'wb') as f:
|
581
|
+
f.write(res.content)
|
582
|
+
self.finish_download.append(data['sku编码'])
|
583
|
+
i += 1
|
584
|
+
time.sleep(0.5)
|
585
|
+
|
586
|
+
def download_from_df(self, col_name='商品图片'):
|
587
|
+
if not os.path.exists(self.save_path):
|
588
|
+
os.mkdir(self.save_path)
|
589
|
+
dict_data = self.df.to_dict('records')
|
590
|
+
num = len(dict_data)
|
591
|
+
i = 1
|
592
|
+
for data in dict_data:
|
593
|
+
url = data[col_name]
|
594
|
+
self.filename = f'{data['店铺名称']}_{data['商品id']}_{data['商家编码']}.jpg'
|
547
595
|
if os.path.isfile(os.path.join(self.save_path, self.filename)):
|
596
|
+
self.finish_download.append(data['商品id'])
|
548
597
|
i += 1
|
549
598
|
continue
|
550
599
|
if 'https' not in url:
|
@@ -557,6 +606,7 @@ class DownloadPicture():
|
|
557
606
|
# 保存图片到本地文件夹
|
558
607
|
with open(os.path.join(self.save_path, self.filename), 'wb') as f:
|
559
608
|
f.write(res.content)
|
609
|
+
self.finish_download.append(data['商品id'])
|
560
610
|
i += 1
|
561
611
|
time.sleep(0.5)
|
562
612
|
|
@@ -761,12 +811,227 @@ def main3():
|
|
761
811
|
p.insert_data()
|
762
812
|
|
763
813
|
|
764
|
-
def
|
765
|
-
"""
|
814
|
+
def download_sku(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录', col_name='sku图片链接'):
|
815
|
+
""" 从数据库中获取商品id信息 """
|
816
|
+
# 实例化一个下载类
|
817
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
818
|
+
download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
819
|
+
projection = {
|
820
|
+
'宝贝id': 1,
|
821
|
+
'商家编码': 1,
|
822
|
+
}
|
823
|
+
df = download.data_to_df(
|
824
|
+
db_name='生意经2',
|
825
|
+
table_name='宝贝指标',
|
826
|
+
start_date='2019-01-01',
|
827
|
+
end_date='2099-12-31',
|
828
|
+
projection=projection,
|
829
|
+
)
|
830
|
+
df.rename(columns={'宝贝id': '商品id'}, inplace=True)
|
831
|
+
df.drop_duplicates(subset='商品id', keep='last', inplace=True, ignore_index=True)
|
832
|
+
df = df.head(2)
|
833
|
+
|
834
|
+
projection = {
|
835
|
+
'商品id': 1,
|
836
|
+
'商家编码': 1,
|
837
|
+
}
|
838
|
+
df_new = download.data_to_df(
|
839
|
+
db_name='属性设置2',
|
840
|
+
table_name='商品素材下载记录',
|
841
|
+
start_date='2019-01-01',
|
842
|
+
end_date='2099-12-31',
|
843
|
+
projection=projection,
|
844
|
+
)
|
845
|
+
df_new.drop_duplicates(subset='商品id', keep='last', inplace=True, ignore_index=True)
|
846
|
+
# 使用merge获取交集
|
847
|
+
df = pd.merge(df, df_new, left_on=['商品id'], right_on=['商品id'], how='left')
|
848
|
+
df.rename(columns={'商家编码_x': '商家编码'}, inplace=True)
|
849
|
+
df.pop('商家编码_y')
|
850
|
+
urls = df.to_dict('records')
|
851
|
+
|
852
|
+
_driver = LoadAccount() # 账号域不同, 要重新实例化
|
853
|
+
tb_driver2 = _driver.load_account(shop_name='万里马官方旗舰店')
|
854
|
+
if tb_driver2:
|
855
|
+
s = SkuPicture(driver=tb_driver2)
|
856
|
+
s.urls = urls
|
857
|
+
s.each_page(as_local_file=True) # 根据 urls 获取每个商品数据并更新 df
|
858
|
+
tb_driver2.quit()
|
859
|
+
|
860
|
+
# 回传数据库
|
861
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
862
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
863
|
+
m.df_to_mysql(
|
864
|
+
df=s.df,
|
865
|
+
db_name=db_name,
|
866
|
+
table_name=table_name,
|
867
|
+
move_insert=True, # 先删除,再插入
|
868
|
+
df_sql=False,
|
869
|
+
drop_duplicates=False,
|
870
|
+
icm_update=[],
|
871
|
+
service_database={service_name: database},
|
872
|
+
) # 3. 回传数据库
|
873
|
+
|
874
|
+
# 从数据库中读取数据,并下载素材到本地
|
875
|
+
|
876
|
+
# 留空,必须留空
|
877
|
+
projection = {
|
878
|
+
# '商品id': 1,
|
879
|
+
# '商家编码': 1,
|
880
|
+
# 'sku编码': 1,
|
881
|
+
# col_name: 1,
|
882
|
+
# '获取与下载': 1,
|
883
|
+
}
|
884
|
+
df = download.data_to_df(
|
885
|
+
db_name=db_name,
|
886
|
+
table_name=table_name,
|
887
|
+
start_date='2019-01-01',
|
888
|
+
end_date='2099-12-31',
|
889
|
+
projection=projection,
|
890
|
+
)
|
891
|
+
df = df[df['获取与下载'] != '已下载']
|
892
|
+
|
893
|
+
# 实例化一个下载器类
|
894
|
+
d = DownloadPicture(service_name=service_name)
|
895
|
+
d.save_path = os.path.join(D_PATH, col_name) # 下载图片到本地时的存储位置
|
896
|
+
d.filename = f'{db_name}_{table_name}.xlsx'
|
897
|
+
d.df = df
|
898
|
+
d.download_data_from_local(col_name=col_name)
|
899
|
+
df['获取与下载'] = df.apply(lambda x: '已下载' if x['sku编码'] in d.finish_download else x['获取与下载'], axis=1)
|
900
|
+
|
901
|
+
# 回传数据库
|
902
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
903
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
904
|
+
m.df_to_mysql(
|
905
|
+
df=df,
|
906
|
+
db_name=db_name,
|
907
|
+
table_name=table_name,
|
908
|
+
move_insert=True, # 先删除,再插入
|
909
|
+
df_sql=False,
|
910
|
+
drop_duplicates=False,
|
911
|
+
icm_update=[],
|
912
|
+
service_database={service_name: database},
|
913
|
+
) # 3. 回传数据库
|
914
|
+
|
915
|
+
|
916
|
+
def download_spu(service_name='company', database='mysql', db_name='属性设置2', table_name='商品spu素材下载记录', col_name='商品图片'):
|
917
|
+
"""
|
918
|
+
|
919
|
+
"""
|
766
920
|
|
921
|
+
# 1. 从商品素材导出中获取数据
|
922
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
923
|
+
download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
924
|
+
projection = {
|
925
|
+
'店铺名称': 1,
|
926
|
+
'商品id': 1,
|
927
|
+
'商品标题': 1,
|
928
|
+
'商品状态': 1,
|
929
|
+
'商品白底图': 1,
|
930
|
+
'方版场景图': 1,
|
931
|
+
'日期':1,
|
932
|
+
}
|
933
|
+
df = download.data_to_df(
|
934
|
+
db_name='属性设置2',
|
935
|
+
table_name='商品素材导出',
|
936
|
+
start_date='2019-01-01',
|
937
|
+
end_date='2099-12-31',
|
938
|
+
projection=projection,
|
939
|
+
)
|
940
|
+
df['商品id'] = df['商品id'].astype('int64')
|
941
|
+
df['日期'] = df['日期'].astype('datetime64[ns]')
|
942
|
+
df = df[(df['商品白底图'] != '0') | (df['方版场景图'] != '0')]
|
943
|
+
# 白底图优先
|
944
|
+
df['商品图片'] = df[['商品白底图', '方版场景图']].apply(
|
945
|
+
lambda x: x['商品白底图'] if x['商品白底图'] != '0' else x['方版场景图'], axis=1)
|
946
|
+
# # 方版场景图优先
|
947
|
+
# df['商品图片'] = df[['商品白底图', '方版场景图']].apply(
|
948
|
+
# lambda x: x['方版场景图'] if x['方版场景图'] != '0' else x['商品白底图'], axis=1)
|
949
|
+
df.sort_values(by=['商品id', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
|
950
|
+
df.drop_duplicates(subset=['商品id'], keep='last', inplace=True, ignore_index=True)
|
951
|
+
# df = df[['商品id', '商品图片', '日期']]
|
952
|
+
df['商品图片'] = df['商品图片'].apply(lambda x: x if 'http' in x else None) # 检查是否是 http 链接
|
953
|
+
df.dropna(how='all', subset=['商品图片'], axis=0, inplace=True) # 删除指定列含有空值的行
|
954
|
+
df['商品链接'] = df['商品id'].apply(
|
955
|
+
lambda x: f'https://detail.tmall.com/item.htm?id={str(x)}' if x and '.com' not in str(x) else x)
|
956
|
+
df.sort_values(by='商品id', ascending=False, ignore_index=True, inplace=True) # ascending=False 降序排列
|
957
|
+
df['商品id'] = df['商品id'].astype(str)
|
958
|
+
# df = df.head(2)
|
959
|
+
|
960
|
+
# 2. 从商品id编码表 中获取数据
|
961
|
+
projection = {
|
962
|
+
'宝贝id': 1,
|
963
|
+
'商家编码': 1,
|
964
|
+
}
|
965
|
+
df_spbm = download.data_to_df(
|
966
|
+
db_name='聚合数据',
|
967
|
+
table_name='商品id编码表',
|
968
|
+
start_date='2019-01-01',
|
969
|
+
end_date='2099-12-31',
|
970
|
+
projection=projection,
|
971
|
+
)
|
972
|
+
df_spbm.drop_duplicates(subset=['宝贝id'], keep='last', inplace=True, ignore_index=True)
|
973
|
+
# 合并两个表
|
974
|
+
df = pd.merge(df, df_spbm, left_on=['商品id'], right_on=['宝贝id'], how='left')
|
975
|
+
df.pop('宝贝id')
|
976
|
+
df['获取与下载'] = '已获取'
|
977
|
+
df['时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
978
|
+
# df.to_csv(os.path.join(D_PATH, f'{col_name}.csv'), index=False, header=True, encoding='utf-8_sig')
|
979
|
+
if '方版场景图' in df.columns.tolist():
|
980
|
+
df['方版场景图'] = df['方版场景图'].astype(str)
|
981
|
+
|
982
|
+
# 3. 更新数据库
|
983
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
984
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
985
|
+
m.df_to_mysql(
|
986
|
+
df=df,
|
987
|
+
db_name=db_name,
|
988
|
+
table_name=table_name,
|
989
|
+
move_insert=True, # 先删除,再插入
|
990
|
+
df_sql=False,
|
991
|
+
drop_duplicates=False,
|
992
|
+
icm_update=[],
|
993
|
+
service_database={service_name: database},
|
994
|
+
) # 3. 回传数据库
|
995
|
+
|
996
|
+
# 4. 从数据库中提取未下载的数据
|
997
|
+
projection = {}
|
998
|
+
df_before = download.data_to_df(
|
999
|
+
db_name=db_name,
|
1000
|
+
table_name=table_name,
|
1001
|
+
start_date='2019-01-01',
|
1002
|
+
end_date='2099-12-31',
|
1003
|
+
projection=projection,
|
1004
|
+
)
|
1005
|
+
df = df_before[df_before['获取与下载'] != '已下载']
|
1006
|
+
|
1007
|
+
if len(df) > 0:
|
1008
|
+
# 5. 实例化一个下载器类,并下载数据
|
1009
|
+
d = DownloadPicture(service_name=service_name)
|
1010
|
+
d.save_path = os.path.join(D_PATH, '商品id_商家编码_图片') # 下载图片到本地时的存储位置
|
1011
|
+
d.filename = f'{db_name}_{table_name}.xlsx'
|
1012
|
+
d.df = df
|
1013
|
+
d.download_from_df(col_name=col_name)
|
1014
|
+
df['获取与下载'] = df.apply(lambda x: '已下载' if x['商品id'] in d.finish_download else x['获取与下载'], axis=1)
|
1015
|
+
|
1016
|
+
# 6. 回传数据库
|
1017
|
+
username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
|
1018
|
+
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
1019
|
+
m.df_to_mysql(
|
1020
|
+
df=df,
|
1021
|
+
db_name=db_name,
|
1022
|
+
table_name=table_name,
|
1023
|
+
move_insert=True, # 先删除,再插入
|
1024
|
+
df_sql=False,
|
1025
|
+
drop_duplicates=False,
|
1026
|
+
icm_update=[],
|
1027
|
+
service_database={service_name: database},
|
1028
|
+
reset_id=True,
|
1029
|
+
) # 3. 回传数据库
|
767
1030
|
|
768
1031
|
|
769
1032
|
if __name__ == '__main__':
|
770
1033
|
# main(service_name='home_lx', database='mysql')
|
771
|
-
main2(service_name='home_lx', database='mysql')
|
1034
|
+
# main2(service_name='home_lx', database='mysql')
|
772
1035
|
# main3()
|
1036
|
+
# download_sku(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录')
|
1037
|
+
download_spu(service_name='company', database='mysql', db_name='属性设置2', table_name='商品spu素材下载记录', col_name='商品图片')
|
@@ -5,7 +5,7 @@ mdbq/aggregation/aggregation.py,sha256=us2FJjLCYlyttATHc0xYRo1ZvcC4E7lB_JI034jI6
|
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=Wis40oL04M7E1pkvgNPjyVFAUe-zgjimjIVAikxYY8Y,4418
|
8
|
-
mdbq/aggregation/query_data.py,sha256
|
8
|
+
mdbq/aggregation/query_data.py,sha256=AsXH0LEnmuf1k35UvzALeJwmZwBWaenh8wfEv-u2s8U,82089
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
@@ -25,19 +25,19 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
25
25
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
26
26
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
27
27
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
28
|
-
mdbq/mysql/mysql.py,sha256=
|
28
|
+
mdbq/mysql/mysql.py,sha256=zSGiKCMexc_6OSNMHHCM6SkY1sCKxtRuDbY352Xk9gI,46603
|
29
29
|
mdbq/mysql/s_query.py,sha256=fIQvQKPyV7rvSUuxVWXv9S5FmCnIM4GHKconE1Zn5BA,8378
|
30
30
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
31
31
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
32
32
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
33
33
|
mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
|
34
|
-
mdbq/other/sku_picture.py,sha256=
|
34
|
+
mdbq/other/sku_picture.py,sha256=pGPQrAQluP1VRLA1UdSmdlG3JKpw8-zqy8b4r3cpzEE,47733
|
35
35
|
mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
|
36
36
|
mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
37
37
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
38
38
|
mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
|
39
39
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
40
|
-
mdbq-2.1.
|
41
|
-
mdbq-2.1.
|
42
|
-
mdbq-2.1.
|
43
|
-
mdbq-2.1.
|
40
|
+
mdbq-2.1.2.dist-info/METADATA,sha256=a6hTSyyVaqUNjVuLygswCreJKwgkx_cy8E7umhTVQjM,245
|
41
|
+
mdbq-2.1.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
42
|
+
mdbq-2.1.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
43
|
+
mdbq-2.1.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|