mdbq 4.0.51__tar.gz → 4.0.53__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-4.0.51 → mdbq-4.0.53}/PKG-INFO +1 -1
- mdbq-4.0.53/mdbq/__version__.py +1 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/aggregation/query_data.py +30 -29
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/aggregation/set_typ_dict.py +22 -1
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/mysql/uploader.py +4 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq.egg-info/SOURCES.txt +1 -2
- mdbq-4.0.51/mdbq/__version__.py +0 -1
- mdbq-4.0.51/mdbq/spider/aikucun.py +0 -530
- {mdbq-4.0.51 → mdbq-4.0.53}/README.txt +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/__init__.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/log/__init__.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/log/mylogger.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/myconf/__init__.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/myconf/myconf.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/mysql/__init__.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/mysql/deduplicator.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/mysql/mysql.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/mysql/s_query.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/mysql/unique_.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/other/__init__.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/other/download_sku_picture.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/other/error_handler.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/other/otk.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/other/pov_city.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/other/ua_sj.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/pbix/__init__.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/redis/__init__.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/redis/getredis.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq/spider/__init__.py +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/setup.cfg +0 -0
- {mdbq-4.0.51 → mdbq-4.0.53}/setup.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
VERSION = '4.0.53'
|
@@ -247,7 +247,7 @@ class MysqlDatasQuery:
|
|
247
247
|
df = df_shop_gmv.merge(df_real_sales, on=['日期', '店铺名称'], how='outer') # 平台数据合并销售数据
|
248
248
|
df = df.merge(df_tg_data, on=['日期', '店铺名称'], how='outer') # 合并推广数据
|
249
249
|
df.fillna(0, inplace=True)
|
250
|
-
set_typ = SET_TYP_DICT
|
250
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
251
251
|
return df, {
|
252
252
|
'db_name': db_name,
|
253
253
|
'table_name': table_name,
|
@@ -451,7 +451,7 @@ class MysqlDatasQuery:
|
|
451
451
|
}
|
452
452
|
)
|
453
453
|
df.insert(loc=1, column='推广渠道', value='万相台无界版')
|
454
|
-
set_typ = SET_TYP_DICT
|
454
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
455
455
|
# 制作其他聚合表
|
456
456
|
self.pf_datas.append(
|
457
457
|
{
|
@@ -599,7 +599,7 @@ class MysqlDatasQuery:
|
|
599
599
|
}
|
600
600
|
)
|
601
601
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
602
|
-
set_typ = SET_TYP_DICT
|
602
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
603
603
|
return df, {
|
604
604
|
'db_name': db_name,
|
605
605
|
'table_name': table_name,
|
@@ -690,7 +690,7 @@ class MysqlDatasQuery:
|
|
690
690
|
}
|
691
691
|
)
|
692
692
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
693
|
-
set_typ = SET_TYP_DICT
|
693
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
694
694
|
return df, {
|
695
695
|
'db_name': db_name,
|
696
696
|
'table_name': table_name,
|
@@ -781,7 +781,7 @@ class MysqlDatasQuery:
|
|
781
781
|
}
|
782
782
|
)
|
783
783
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
784
|
-
set_typ = SET_TYP_DICT
|
784
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
785
785
|
return df, {
|
786
786
|
'db_name': db_name,
|
787
787
|
'table_name': table_name,
|
@@ -841,7 +841,7 @@ class MysqlDatasQuery:
|
|
841
841
|
else '300+' if x >= 300
|
842
842
|
else '300以下'
|
843
843
|
)
|
844
|
-
set_typ = SET_TYP_DICT
|
844
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
845
845
|
return df, {
|
846
846
|
'db_name': db_name,
|
847
847
|
'table_name': table_name,
|
@@ -1030,7 +1030,7 @@ class MysqlDatasQuery:
|
|
1030
1030
|
else x['人群分类'], axis=1
|
1031
1031
|
)
|
1032
1032
|
df['人群分类'] = df['人群分类'].apply(lambda x: str(x).upper() if x else x)
|
1033
|
-
set_typ = SET_TYP_DICT
|
1033
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1034
1034
|
df.fillna(0, inplace=True)
|
1035
1035
|
return df, {
|
1036
1036
|
'db_name': db_name,
|
@@ -1146,7 +1146,7 @@ class MysqlDatasQuery:
|
|
1146
1146
|
)
|
1147
1147
|
else:
|
1148
1148
|
df['词分类'] = df['词名字_词包名字'].apply(lambda x: self.ret_keyword(keyword=str(x), as_file=False))
|
1149
|
-
set_typ = SET_TYP_DICT
|
1149
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1150
1150
|
return df, {
|
1151
1151
|
'db_name': db_name,
|
1152
1152
|
'table_name': table_name,
|
@@ -1295,7 +1295,7 @@ class MysqlDatasQuery:
|
|
1295
1295
|
'数据主体': df[['日期', '店铺名称', '推广渠道', '营销场景', '花费', '展现量', '观看次数', '加购量', '成交笔数', '成交金额', '直接成交笔数', '直接成交金额']]
|
1296
1296
|
},
|
1297
1297
|
) # 制作其他聚合表
|
1298
|
-
set_typ = SET_TYP_DICT
|
1298
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1299
1299
|
return df, {
|
1300
1300
|
'db_name': db_name,
|
1301
1301
|
'table_name': table_name,
|
@@ -1389,7 +1389,7 @@ class MysqlDatasQuery:
|
|
1389
1389
|
'数据主体': df[['日期', '店铺名称', '推广渠道', '营销场景', '花费', '展现量', '点击量', '加购量', '成交笔数', '成交金额']]
|
1390
1390
|
},
|
1391
1391
|
) # 制作其他聚合表
|
1392
|
-
set_typ = SET_TYP_DICT
|
1392
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1393
1393
|
logger.info('更新', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1394
1394
|
return df, {
|
1395
1395
|
'db_name': db_name,
|
@@ -1430,7 +1430,7 @@ class MysqlDatasQuery:
|
|
1430
1430
|
idx = df.groupby(['日期', '商品id'])['更新时间'].idxmax()
|
1431
1431
|
df = df.loc[idx]
|
1432
1432
|
df.rename(columns={'商品id': '宝贝id'}, inplace=True)
|
1433
|
-
set_typ = SET_TYP_DICT
|
1433
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1434
1434
|
logger.info('更新', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1435
1435
|
return df, {
|
1436
1436
|
'db_name': db_name,
|
@@ -1472,7 +1472,7 @@ class MysqlDatasQuery:
|
|
1472
1472
|
idx = df.groupby(['日期', 'sku_id'])['更新时间'].idxmax()
|
1473
1473
|
df = df.loc[idx]
|
1474
1474
|
df.rename(columns={'白底图': '商品图片'}, inplace=True)
|
1475
|
-
set_typ = SET_TYP_DICT
|
1475
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1476
1476
|
return df, {
|
1477
1477
|
'db_name': db_name,
|
1478
1478
|
'table_name': table_name,
|
@@ -1523,7 +1523,7 @@ class MysqlDatasQuery:
|
|
1523
1523
|
return item['上市年份']
|
1524
1524
|
|
1525
1525
|
df['上市年份'] = df['商品id'].apply(lambda x: check_year(x))
|
1526
|
-
set_typ = SET_TYP_DICT
|
1526
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1527
1527
|
return df, {
|
1528
1528
|
'db_name': db_name,
|
1529
1529
|
'table_name': table_name,
|
@@ -1603,7 +1603,7 @@ class MysqlDatasQuery:
|
|
1603
1603
|
df['上市季节'] = df['上市年月'].apply(lambda x: check_jijie(x))
|
1604
1604
|
p = df.pop('上市季节')
|
1605
1605
|
df.insert(loc=9, column='上市季节', value=p)
|
1606
|
-
set_typ = SET_TYP_DICT
|
1606
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1607
1607
|
return df, {
|
1608
1608
|
'db_name': db_name,
|
1609
1609
|
'table_name': table_name,
|
@@ -1690,9 +1690,10 @@ class MysqlDatasQuery:
|
|
1690
1690
|
df = pd.merge(df, df_visitor1, how='left', left_on='一级来源', right_on='一级来源')
|
1691
1691
|
df = pd.merge(df, df_visitor2, how='left', left_on='二级来源', right_on='二级来源')
|
1692
1692
|
df = pd.merge(df, df_visitor3, how='left', left_on='三级来源', right_on='三级来源')
|
1693
|
+
|
1693
1694
|
for col in ['一级来源索引', '二级来源索引', '三级来源索引']:
|
1694
1695
|
df[col] = df[col].apply(lambda x: 1000 if str(x) == 'nan' else x)
|
1695
|
-
set_typ = SET_TYP_DICT
|
1696
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1696
1697
|
return df, {
|
1697
1698
|
'db_name': db_name,
|
1698
1699
|
'table_name': table_name,
|
@@ -1721,7 +1722,7 @@ class MysqlDatasQuery:
|
|
1721
1722
|
df = pd.DataFrame(data=data_values)
|
1722
1723
|
df.sort_values(by=['款号', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
|
1723
1724
|
df.drop_duplicates(subset=['款号'], keep='last', inplace=True, ignore_index=True)
|
1724
|
-
set_typ = SET_TYP_DICT
|
1725
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1725
1726
|
return df, {
|
1726
1727
|
'db_name': db_name,
|
1727
1728
|
'table_name': table_name,
|
@@ -1813,7 +1814,7 @@ class MysqlDatasQuery:
|
|
1813
1814
|
cols = list(df.columns)
|
1814
1815
|
cols.insert(3, cols.pop(cols.index('spu_id')))
|
1815
1816
|
df = df[cols]
|
1816
|
-
set_typ = SET_TYP_DICT
|
1817
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1817
1818
|
return df, {
|
1818
1819
|
'db_name': db_name,
|
1819
1820
|
'table_name': table_name,
|
@@ -1868,7 +1869,7 @@ class MysqlDatasQuery:
|
|
1868
1869
|
}
|
1869
1870
|
)
|
1870
1871
|
df = df[df['花费'] > 0]
|
1871
|
-
set_typ = SET_TYP_DICT
|
1872
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1872
1873
|
return df, {
|
1873
1874
|
'db_name': db_name,
|
1874
1875
|
'table_name': table_name,
|
@@ -1944,7 +1945,7 @@ class MysqlDatasQuery:
|
|
1944
1945
|
df['k_是否品牌词'] = df['k_是否品牌词'].apply(lambda x: '品牌词' if x else '-')
|
1945
1946
|
df['s_是否品牌词'] = df['搜索词'].str.contains('万里马|wanlima', regex=True)
|
1946
1947
|
df['s_是否品牌词'] = df['s_是否品牌词'].apply(lambda x: '品牌词' if x else '-')
|
1947
|
-
set_typ = SET_TYP_DICT
|
1948
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1948
1949
|
return df, {
|
1949
1950
|
'db_name': db_name,
|
1950
1951
|
'table_name': table_name,
|
@@ -1994,7 +1995,7 @@ class MysqlDatasQuery:
|
|
1994
1995
|
idx = df.groupby(['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'])['更新时间'].idxmax()
|
1995
1996
|
df = df.loc[idx]
|
1996
1997
|
df = df[['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数', '成交单量', '成交金额']]
|
1997
|
-
set_typ = SET_TYP_DICT
|
1998
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
1998
1999
|
return df, {
|
1999
2000
|
'db_name': db_name,
|
2000
2001
|
'table_name': table_name,
|
@@ -2044,7 +2045,7 @@ class MysqlDatasQuery:
|
|
2044
2045
|
idx = df.groupby(['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'])['更新时间'].idxmax()
|
2045
2046
|
df = df.loc[idx]
|
2046
2047
|
df = df[['日期', '店铺名称', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数', '成交单量', '成交金额']]
|
2047
|
-
set_typ = SET_TYP_DICT
|
2048
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
2048
2049
|
return df, {
|
2049
2050
|
'db_name': db_name,
|
2050
2051
|
'table_name': table_name,
|
@@ -2104,7 +2105,7 @@ class MysqlDatasQuery:
|
|
2104
2105
|
idx = df.groupby(['日期', '店铺名称', '词类型', '搜索词'])['更新时间'].idxmax()
|
2105
2106
|
df = df.loc[idx]
|
2106
2107
|
df = df[['日期', '店铺名称', '词类型', '搜索词', '访客数', '加购人数', '支付金额', '支付转化率', '支付买家数', '客单价', 'uv价值']]
|
2107
|
-
set_typ = SET_TYP_DICT
|
2108
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
2108
2109
|
return df, {
|
2109
2110
|
'db_name': db_name,
|
2110
2111
|
'table_name': table_name,
|
@@ -2133,7 +2134,7 @@ class MysqlDatasQuery:
|
|
2133
2134
|
projection={},
|
2134
2135
|
)
|
2135
2136
|
df.drop_duplicates(subset=['场次id'], keep='first', inplace=True, ignore_index=True)
|
2136
|
-
set_typ = SET_TYP_DICT
|
2137
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
2137
2138
|
ordered_columns = [
|
2138
2139
|
'日期',
|
2139
2140
|
'店铺名称',
|
@@ -2618,7 +2619,7 @@ class MysqlDatasQuery:
|
|
2618
2619
|
}
|
2619
2620
|
)
|
2620
2621
|
df.sort_values(['日期', '店铺名称', '花费'], ascending=[False, False, False], ignore_index=True, inplace=True)
|
2621
|
-
set_typ = SET_TYP_DICT
|
2622
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
2622
2623
|
return df, {
|
2623
2624
|
'db_name': db_name,
|
2624
2625
|
'table_name': table_name,
|
@@ -2694,7 +2695,7 @@ class MysqlDatasQuery:
|
|
2694
2695
|
df = df[new_columns]
|
2695
2696
|
df['更新时间'] = df.pop('更新时间')
|
2696
2697
|
df = df.astype({'日期': 'datetime64[ns]'}, errors='ignore')
|
2697
|
-
set_typ = SET_TYP_DICT
|
2698
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
2698
2699
|
return df, {
|
2699
2700
|
'db_name': db_name,
|
2700
2701
|
'table_name': table_name,
|
@@ -2723,7 +2724,7 @@ class MysqlDatasQuery:
|
|
2723
2724
|
projection=projection,
|
2724
2725
|
)
|
2725
2726
|
df.drop_duplicates(subset=['日期', '人群类型', '店铺名称', '人群规模', '广告投入金额'], keep='last', inplace=True, ignore_index=True)
|
2726
|
-
set_typ = SET_TYP_DICT
|
2727
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
2727
2728
|
return df, {
|
2728
2729
|
'db_name': db_name,
|
2729
2730
|
'table_name': table_name,
|
@@ -2762,7 +2763,7 @@ class MysqlDatasQuery:
|
|
2762
2763
|
df.rename(columns={'起始日期': '日期'}, inplace=True)
|
2763
2764
|
|
2764
2765
|
# df.drop_duplicates(subset=['日期', '店铺名称', '场景id', '父渠道id'], keep='last', inplace=True, ignore_index=True)
|
2765
|
-
set_typ = SET_TYP_DICT
|
2766
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
2766
2767
|
return df, {
|
2767
2768
|
'db_name': db_name,
|
2768
2769
|
'table_name': table_name,
|
@@ -2822,7 +2823,7 @@ class MysqlDatasQuery:
|
|
2822
2823
|
df['用户年龄'] = df['用户年龄'].apply(lambda x: '~'.join(re.findall(r'^(\d+).*-(\d+)岁$', str(x))[0]) if '岁' in str(x) else x)
|
2823
2824
|
df['消费能力等级'] = df['消费能力等级'].apply(lambda x: f'L{''.join(re.findall(r'(\d)', str(x)))}' if '购买力' in str(x) else x)
|
2824
2825
|
df.rename(columns={'消耗_元': '消耗'}, inplace=True)
|
2825
|
-
set_typ = SET_TYP_DICT
|
2826
|
+
set_typ = SET_TYP_DICT.get(f'{db_name}_{table_name}', {})
|
2826
2827
|
return df, {
|
2827
2828
|
'db_name': db_name,
|
2828
2829
|
'table_name': table_name,
|
@@ -3395,4 +3396,4 @@ if __name__ == '__main__':
|
|
3395
3396
|
)
|
3396
3397
|
sdq = MysqlDatasQuery(download_manager=download_manager)
|
3397
3398
|
sdq.months = 1
|
3398
|
-
sdq.
|
3399
|
+
sdq.dplyd(db_name='聚合数据', table_name='店铺流量来源构成')
|
@@ -38,7 +38,7 @@ SET_TYP_DICT = {
|
|
38
38
|
'直接成交笔数': 'int',
|
39
39
|
'直接成交金额': 'decimal(12,2)',
|
40
40
|
},
|
41
|
-
'聚合数据_
|
41
|
+
'聚合数据_奥莱店_主体报表': {
|
42
42
|
'日期': 'date',
|
43
43
|
'推广渠道': 'varchar(100)',
|
44
44
|
'店铺名称': 'varchar(255)',
|
@@ -582,6 +582,27 @@ SET_TYP_DICT = {
|
|
582
582
|
'实际消耗': 'decimal(10,2)',
|
583
583
|
'推广成交金额': 'decimal(12,2)',
|
584
584
|
},
|
585
|
+
'聚合数据_店铺流量来源构成': {
|
586
|
+
'日期': 'DATE',
|
587
|
+
'店铺名称': 'varchar(255)',
|
588
|
+
'类别': 'varchar(100)',
|
589
|
+
'来源构成': 'varchar(100)',
|
590
|
+
'一级来源': 'varchar(100)',
|
591
|
+
'二级来源': 'varchar(100)',
|
592
|
+
'三级来源': 'varchar(100)',
|
593
|
+
'访客数': 'int',
|
594
|
+
'支付金额': 'decimal(12,2)',
|
595
|
+
'支付买家数': 'smallint',
|
596
|
+
'支付转化率': 'decimal(10,4)',
|
597
|
+
'加购人数': 'smallint',
|
598
|
+
'加购件数': 'smallint',
|
599
|
+
'下单买家数': 'smallint',
|
600
|
+
'关注店铺人数': 'smallint',
|
601
|
+
'一级来源索引': 'smallint',
|
602
|
+
'二级来源索引': 'smallint',
|
603
|
+
'三级来源索引': 'smallint',
|
604
|
+
'更新时间': 'timestamp',
|
605
|
+
},
|
585
606
|
}
|
586
607
|
|
587
608
|
|
@@ -1071,6 +1071,10 @@ class MySQLUploader:
|
|
1071
1071
|
# 统一处理原始数据中列名的特殊字符
|
1072
1072
|
data = self.normalize_column_names(data)
|
1073
1073
|
|
1074
|
+
if not set_typ:
|
1075
|
+
logger.warning('set_typ为空, 将自动推断数据类型, 可能存在数据类型识别错误', {
|
1076
|
+
'func': sys._getframe().f_code.co_name,
|
1077
|
+
})
|
1074
1078
|
# set_typ的键清洗
|
1075
1079
|
if not set_typ:
|
1076
1080
|
set_typ = {}
|
mdbq-4.0.51/mdbq/__version__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
VERSION = '4.0.51'
|
@@ -1,530 +0,0 @@
|
|
1
|
-
# -*- coding:utf-8 -*-
|
2
|
-
import datetime
|
3
|
-
import requests
|
4
|
-
import json
|
5
|
-
import os
|
6
|
-
import re
|
7
|
-
import time
|
8
|
-
import platform
|
9
|
-
import getpass
|
10
|
-
from selenium import webdriver
|
11
|
-
from selenium.webdriver.support.wait import WebDriverWait
|
12
|
-
from selenium.webdriver.common.by import By
|
13
|
-
from selenium.webdriver.support import expected_conditions as EC
|
14
|
-
from selenium.webdriver.chrome.service import Service
|
15
|
-
import pymysql
|
16
|
-
from mdbq.mysql import uploader
|
17
|
-
from mdbq.mysql import s_query
|
18
|
-
from mdbq.myconf import myconf
|
19
|
-
from mdbq.other import ua_sj
|
20
|
-
from mdbq.other import otk
|
21
|
-
from mdbq.log import mylogger
|
22
|
-
|
23
|
-
dir_path = os.path.expanduser("~")
|
24
|
-
parser = myconf.ConfigParser()
|
25
|
-
host, port, username, password = parser.get_section_values(
|
26
|
-
file_path=os.path.join(dir_path, 'spd.txt'),
|
27
|
-
section='mysql',
|
28
|
-
keys=['host', 'port', 'username', 'password'],
|
29
|
-
)
|
30
|
-
|
31
|
-
# 实例化一个数据查询类,用来获取 cookies 表数据
|
32
|
-
logger = mylogger.MyLogger(
|
33
|
-
logging_mode='file',
|
34
|
-
log_level='info',
|
35
|
-
log_format='json',
|
36
|
-
max_log_size=50,
|
37
|
-
backup_count=5,
|
38
|
-
enable_async=False, # 是否启用异步日志
|
39
|
-
sample_rate=1, # 采样DEBUG/INFO日志
|
40
|
-
sensitive_fields=[], # 敏感字段过滤
|
41
|
-
enable_metrics=False, # 是否启用性能指标
|
42
|
-
)
|
43
|
-
|
44
|
-
|
45
|
-
def keep_connect(_db_name, _config, max_try: int=10):
|
46
|
-
attempts = 1
|
47
|
-
while attempts <= max_try:
|
48
|
-
try:
|
49
|
-
connection = pymysql.connect(**_config) # 连接数据库
|
50
|
-
return connection
|
51
|
-
except Exception as e:
|
52
|
-
logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try, '错误信息': e})
|
53
|
-
attempts += 1
|
54
|
-
time.sleep(30)
|
55
|
-
logger.error('连接失败', {'数据库': _db_name, '主机': host, '端口': port, '重试次数': attempts, '最大重试次数': max_try})
|
56
|
-
return None
|
57
|
-
|
58
|
-
|
59
|
-
class AikuCun:
|
60
|
-
def __init__(self, uld_manager, download_manager):
|
61
|
-
self.url = 'https://gray-merc.aikucun.com/index.html'
|
62
|
-
self.db_name = 'cookie文件'
|
63
|
-
self.table_name = 'main_aikucun'
|
64
|
-
self.shop_name = '万里马爱库存'
|
65
|
-
self.token = None
|
66
|
-
self.today = datetime.date.today()
|
67
|
-
self.start_date = (self.today - datetime.timedelta(days=7)).strftime('%Y-%m-%d')
|
68
|
-
self.end_date = (self.today - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
|
69
|
-
self.error_count = 0
|
70
|
-
self.uld = uld_manager
|
71
|
-
self.download = download_manager
|
72
|
-
|
73
|
-
def logining(self, shop_name='aikucun', headless=False):
|
74
|
-
option = webdriver.ChromeOptions()
|
75
|
-
if headless:
|
76
|
-
option.add_argument("--headless") # 设置无界面模式
|
77
|
-
# 调整chrome启动配置
|
78
|
-
option.add_argument("--disable-gpu")
|
79
|
-
option.add_argument("--no-sandbox")
|
80
|
-
option.add_argument("--disable-dev-shm-usage")
|
81
|
-
option.add_experimental_option("excludeSwitches", ["enable-automation"])
|
82
|
-
option.add_experimental_option('excludeSwitches', ['enable-logging']) # 禁止日志输出,减少控制台干扰
|
83
|
-
option.add_experimental_option("useAutomationExtension", False)
|
84
|
-
option.add_argument('--ignore-ssl-error') # 忽略ssl错误
|
85
|
-
prefs = {
|
86
|
-
'profile.default_content_settings.popups': 0, # 禁止弹出所有窗口
|
87
|
-
"browser.download.manager. showAlertOnComplete": False, # 下载完成后不显示下载完成提示框
|
88
|
-
"profile.default_content_setting_values.automatic_downloads": 1, # 允许自动下载多个文件
|
89
|
-
}
|
90
|
-
|
91
|
-
option.add_experimental_option('perfLoggingPrefs', {
|
92
|
-
'enableNetwork': True,
|
93
|
-
'enablePage': False,
|
94
|
-
})
|
95
|
-
option.set_capability("goog:loggingPrefs", {
|
96
|
-
'browser': 'ALL',
|
97
|
-
'performance': 'ALL',
|
98
|
-
})
|
99
|
-
option.set_capability("goog:perfLoggingPrefs", {
|
100
|
-
'enableNetwork': True,
|
101
|
-
'enablePage': False,
|
102
|
-
'enableTimeline': False
|
103
|
-
})
|
104
|
-
|
105
|
-
option.add_experimental_option('prefs', prefs)
|
106
|
-
option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实验性参数, 左上角小字
|
107
|
-
|
108
|
-
# # 修改默认下载文件夹路径
|
109
|
-
# option.add_experimental_option("prefs", {"download.default_directory": f'{upload_path}'})
|
110
|
-
|
111
|
-
# # 通过excludeSwitches参数禁用默认的启动路径
|
112
|
-
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
113
|
-
|
114
|
-
if platform.system() == 'Windows':
|
115
|
-
# 设置 chrome 和 chromedriver 启动路径
|
116
|
-
chrome_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chrome_win64\\chrome.exe')
|
117
|
-
chromedriver_path = os.path.join(f'C:\\Users\\{getpass.getuser()}', 'chrome\\chromedriver.exe')
|
118
|
-
# os.environ["webdriver.chrome.driver"] = chrome_path
|
119
|
-
option.binary_location = chrome_path # windows 设置此参数有效
|
120
|
-
service = Service(chromedriver_path)
|
121
|
-
# service = Service(str(pathlib.Path(f'C:\\Users\\{getpass.getuser()}\\chromedriver.exe'))) # 旧路径
|
122
|
-
elif platform.system() == 'Darwin':
|
123
|
-
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
124
|
-
chromedriver_path = '/usr/local/chrome/chromedriver'
|
125
|
-
os.environ["webdriver.chrome.driver"] = chrome_path
|
126
|
-
# option.binary_location = chrome_path # Macos 设置此参数报错
|
127
|
-
service = Service(chromedriver_path)
|
128
|
-
elif platform.system().lower() == 'linux':
|
129
|
-
# ubuntu
|
130
|
-
chrome_path = '/usr/bin/google-chrome'
|
131
|
-
chromedriver_path = '/usr/local/bin/chromedriver'
|
132
|
-
# option.binary_location = chrome_path # macOS 设置此参数有效
|
133
|
-
service = Service(chromedriver_path)
|
134
|
-
else:
|
135
|
-
chrome_path = '/usr/local/chrome/Google Chrome for Testing.app'
|
136
|
-
chromedriver_path = '/usr/local/chrome/chromedriver'
|
137
|
-
os.environ["webdriver.chrome.driver"] = chrome_path
|
138
|
-
# option.binary_location = chrome_path # macos 设置此参数报错
|
139
|
-
service = Service(chromedriver_path)
|
140
|
-
_driver = webdriver.Chrome(options=option, service=service) # 创建Chrome驱动程序实例
|
141
|
-
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
142
|
-
|
143
|
-
# 登录
|
144
|
-
_driver.get(url='https://gray-merc.aikucun.com/index.html') # self.url 可能被修改,这里使用固定页面获取 sign
|
145
|
-
time.sleep(0.1)
|
146
|
-
_driver.maximize_window() # 窗口最大化 方便后续加载数据
|
147
|
-
wait = WebDriverWait(_driver, timeout=15)
|
148
|
-
input_box = wait.until(
|
149
|
-
EC.element_to_be_clickable(
|
150
|
-
(By.XPATH, '//input[@placeholder="请输入用户名"]'))) #
|
151
|
-
input_box.send_keys('广东万里马实业股份有限公司')
|
152
|
-
input_box = wait.until(
|
153
|
-
EC.element_to_be_clickable(
|
154
|
-
(By.XPATH, '//input[@placeholder="请输入密码"]'))) #
|
155
|
-
input_box.send_keys('wlm123$$$')
|
156
|
-
time.sleep(0.1)
|
157
|
-
elements = _driver.find_elements(
|
158
|
-
By.XPATH, '//button[@class="merchant_login_btn" and contains(text(), "登录")]')
|
159
|
-
_driver.execute_script("arguments[0].click();", elements[0])
|
160
|
-
for i in range(100):
|
161
|
-
try:
|
162
|
-
wait.until(
|
163
|
-
EC.element_to_be_clickable(
|
164
|
-
(By.XPATH, '//div[@class="user-info nav-user-slider"]')))
|
165
|
-
break
|
166
|
-
except:
|
167
|
-
time.sleep(5)
|
168
|
-
local_storage = _driver.execute_script("return window.localStorage;")
|
169
|
-
if 'token' in local_storage.keys():
|
170
|
-
self.token = {
|
171
|
-
'日期': datetime.datetime.today().strftime('%Y-%m-%d'),
|
172
|
-
'平台': '爱库存',
|
173
|
-
'店铺名称': self.shop_name,
|
174
|
-
'token': local_storage['token'],
|
175
|
-
'来源位置': 'localstorage',
|
176
|
-
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
177
|
-
}
|
178
|
-
time.sleep(5)
|
179
|
-
_driver.quit()
|
180
|
-
|
181
|
-
def save_token(self):
|
182
|
-
if not self.token:
|
183
|
-
logger.error('self.token 不能为空')
|
184
|
-
return
|
185
|
-
set_typ = {
|
186
|
-
'日期': 'DATE',
|
187
|
-
'平台': 'varchar(50)',
|
188
|
-
'店铺名称': 'varchar(50)',
|
189
|
-
'token': 'varchar(255)',
|
190
|
-
'来源位置': 'varchar(50)',
|
191
|
-
'更新时间': 'timestamp'
|
192
|
-
}
|
193
|
-
# 更新至数据库记录
|
194
|
-
self.uld.upload_data(
|
195
|
-
db_name=self.db_name,
|
196
|
-
table_name=self.table_name,
|
197
|
-
data=self.token,
|
198
|
-
set_typ=set_typ,
|
199
|
-
primary_keys=[],
|
200
|
-
check_duplicate=False,
|
201
|
-
update_on_duplicate=False,
|
202
|
-
duplicate_columns=[],
|
203
|
-
allow_null=False,
|
204
|
-
partition_by=None,
|
205
|
-
partition_date_column='日期',
|
206
|
-
auto_create=True,
|
207
|
-
indexes=[],
|
208
|
-
transaction_mode='row', # 事务模式
|
209
|
-
)
|
210
|
-
|
211
|
-
def get_data_from_bbx(self, start_date=None, end_date=None, item_type='spu', page_num=1, page_size=300):
|
212
|
-
"""
|
213
|
-
这里获取的数据等同于"查询"按钮的数据, 没有"营销后供货额/供货价" 这2个字段, 如果通过下载按钮的报表则有两个字段
|
214
|
-
"""
|
215
|
-
item_type = re.sub('爱库存_', '', item_type)
|
216
|
-
if start_date:
|
217
|
-
self.start_date = start_date
|
218
|
-
if end_date:
|
219
|
-
self.end_date = end_date
|
220
|
-
date_list = otk.dates_between(start_date=self.start_date, end_date=self.end_date)
|
221
|
-
|
222
|
-
df = self.download.data_to_df(
|
223
|
-
db_name=self.db_name,
|
224
|
-
table_name=self.table_name,
|
225
|
-
start_date='2025-03-07',
|
226
|
-
end_date='2039-12-31',
|
227
|
-
projection={
|
228
|
-
'日期': 1,
|
229
|
-
'平台': 1,
|
230
|
-
'店铺名称': 1,
|
231
|
-
'token': 1,
|
232
|
-
'更新时间': 1
|
233
|
-
},
|
234
|
-
)
|
235
|
-
if len(df) == 0:
|
236
|
-
self.logining()
|
237
|
-
self.save_token()
|
238
|
-
else:
|
239
|
-
# 仅保留最新日期的数据
|
240
|
-
idx = df.groupby(['平台', '店铺名称'])['更新时间'].idxmax()
|
241
|
-
df = df.loc[idx][['token']]
|
242
|
-
if len(df) == 0:
|
243
|
-
logger.error(f'从数据库获取的 token 不能为空')
|
244
|
-
return
|
245
|
-
self.token = df.iloc[0, 0]
|
246
|
-
|
247
|
-
self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/{item_type}/list'
|
248
|
-
headers = {
|
249
|
-
'headers': ua_sj.get_ua(),
|
250
|
-
'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
|
251
|
-
'content-type': 'application/json;charset=UTF-8',
|
252
|
-
'origin': 'https://treasurebox.aikucun.com',
|
253
|
-
'system': 'merchant',
|
254
|
-
'token': self.token, # 从浏览器本地存储空间获取
|
255
|
-
}
|
256
|
-
num = 1
|
257
|
-
results = []
|
258
|
-
for date in date_list:
|
259
|
-
if self.error_count > 5:
|
260
|
-
logger.logger('已退出请求 -> self.error_count > 5')
|
261
|
-
break
|
262
|
-
req_date = re.sub('-', '', date)
|
263
|
-
data = {
|
264
|
-
'beginDate': req_date,
|
265
|
-
'brandIds': [],
|
266
|
-
'cropId': '',
|
267
|
-
'cropName': '',
|
268
|
-
'ctgryOneIds': [],
|
269
|
-
'ctgryThreeIds': [],
|
270
|
-
'ctgryTwoIds': [],
|
271
|
-
'dimValue': '',
|
272
|
-
'endDate': req_date,
|
273
|
-
'merchantShopCode': '',
|
274
|
-
'orderByName': 'dealGmv',
|
275
|
-
'orderType': 'desc',
|
276
|
-
'pageNum': page_num,
|
277
|
-
'pageSize': page_size
|
278
|
-
}
|
279
|
-
|
280
|
-
res = requests.post(
|
281
|
-
url=self.url,
|
282
|
-
headers=headers,
|
283
|
-
# cookies=cookies,
|
284
|
-
data=json.dumps(data)
|
285
|
-
)
|
286
|
-
logger.info('获取数据', {'进度': num/len(date_list), '日期': date, '榜单类型': item_type})
|
287
|
-
if not res.json().get('success', None):
|
288
|
-
logger.error('没有获取到数据, 请求不成功, 如果连续请求失败 > 5, 则需重新获取cookie后继续')
|
289
|
-
num += 1
|
290
|
-
self.error_count += 1
|
291
|
-
time.sleep(1)
|
292
|
-
continue
|
293
|
-
if not res.json().get('data', {}).get('rows', None):
|
294
|
-
logger.error("返回的数据字典异常, ['data']['rows'] 不能为空")
|
295
|
-
num += 1
|
296
|
-
self.error_count += 1
|
297
|
-
time.sleep(1)
|
298
|
-
continue
|
299
|
-
results += [(date, res.json()['data']['rows'])]
|
300
|
-
num += 1
|
301
|
-
time.sleep(1)
|
302
|
-
if num % 32 == 0:
|
303
|
-
logger.info("避免频繁请求, 正在休眠...")
|
304
|
-
# time.sleep(60)
|
305
|
-
|
306
|
-
return results
|
307
|
-
|
308
|
-
def insert_datas(self, data_list, db_name, table_name):
|
309
|
-
"""数据清洗"""
|
310
|
-
if not data_list:
|
311
|
-
return
|
312
|
-
chanel_name = {
|
313
|
-
'availableNum': '可售库存数',
|
314
|
-
'availableSkuCnt': '在架sku数',
|
315
|
-
'brandName': '品牌名',
|
316
|
-
'ctgryOneName': '一级类目名称',
|
317
|
-
'ctgryThreeName': '三级类目名称',
|
318
|
-
'ctgryTwoName': '二级类目名称',
|
319
|
-
'dealBuyerCnt': '支付人数_成交',
|
320
|
-
'dealBuyerCntRate': '成交率_成交',
|
321
|
-
'dealGmv': '成交gmv',
|
322
|
-
'dealIdolCnt': '销售爱豆人数',
|
323
|
-
'dealProductCnt': '销售量_成交',
|
324
|
-
'dealProductCntRate': '售罄率',
|
325
|
-
'dealSkuCnt': '成交sku数',
|
326
|
-
'dealTwoCnt': '订单数_成交',
|
327
|
-
'downSkuCnt': '可售sku数',
|
328
|
-
'etlInsertTime': '数据更新时间',
|
329
|
-
'forwardConfirmCnt': '转发爱豆人数',
|
330
|
-
'forwardConfirmNum': '转发次数',
|
331
|
-
'merStyleNo': '商品款号', # spu 榜单
|
332
|
-
'styleNo': '商品货号', # sku 榜单
|
333
|
-
'orderBuyerCnt': '支付人数_交易',
|
334
|
-
'orderBuyerCntRate': '成交率_交易',
|
335
|
-
'orderGmv': '下单gmv',
|
336
|
-
'orderProductCnt': '销售量_交易',
|
337
|
-
'orderSkuCnt': '下单sku数',
|
338
|
-
'orderTwoCnt': '订单数_交易',
|
339
|
-
'pictureUrl': '图片',
|
340
|
-
'pvNum': '浏览量',
|
341
|
-
'rn': '序号',
|
342
|
-
'spuId': 'spuid',
|
343
|
-
'spuName': '商品名称',
|
344
|
-
'supplyAmount': '供货额',
|
345
|
-
'supplyPerAmount': '供货价',
|
346
|
-
'uvNum': '访客量',
|
347
|
-
'colorName': '颜色',
|
348
|
-
'sizeName': '尺码',
|
349
|
-
'barCode': '条码', # sku榜单 款号 + 颜色编码
|
350
|
-
}
|
351
|
-
# 移除未翻译的列名
|
352
|
-
res_col = [item for item in chanel_name.keys() if chanel_name[item] == '']
|
353
|
-
for item in res_col:
|
354
|
-
del chanel_name[item]
|
355
|
-
|
356
|
-
_results = []
|
357
|
-
for item_ in data_list:
|
358
|
-
end_date, d_list = item_
|
359
|
-
for main_data_dict in d_list:
|
360
|
-
dict_data_before = {}
|
361
|
-
# 添加数据
|
362
|
-
dict_data_before.update({k: v for k, v in main_data_dict.items()})
|
363
|
-
# 初始化 dict_data
|
364
|
-
dict_data = {
|
365
|
-
'日期': end_date,
|
366
|
-
'平台': '爱库存',
|
367
|
-
'店铺名称': self.shop_name
|
368
|
-
}
|
369
|
-
for k, v in dict_data_before.items():
|
370
|
-
# 翻译键名
|
371
|
-
[dict_data.update({name_v: v}) for name_k, name_v in chanel_name.items() if k == name_k]
|
372
|
-
# 没有翻译的键值也要保留
|
373
|
-
not_in_rename = [item for item in dict_data_before.keys() if item not in chanel_name.keys()]
|
374
|
-
[dict_data.update({item: dict_data_before[item]}) for item in not_in_rename]
|
375
|
-
dict_data.update(
|
376
|
-
{
|
377
|
-
'更新时间': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
378
|
-
}
|
379
|
-
)
|
380
|
-
new_dict_data = {}
|
381
|
-
for k, v in dict_data.items():
|
382
|
-
if v and str(v).lower() != 'none' and str(v) != 'null':
|
383
|
-
new_dict_data.update({k: v})
|
384
|
-
else:
|
385
|
-
new_dict_data.update({k: 0})
|
386
|
-
_results.append(new_dict_data)
|
387
|
-
set_typ = {
|
388
|
-
'可售库存数': 'INT',
|
389
|
-
'在架sku数': 'INT',
|
390
|
-
'品牌名': 'varchar(50)',
|
391
|
-
'一级类目名称': 'varchar(50)',
|
392
|
-
'三级类目名称': 'varchar(50)',
|
393
|
-
'二级类目名称': 'varchar(50)',
|
394
|
-
'支付人数_成交': 'INT',
|
395
|
-
'成交率_成交': 'decimal(10,4)',
|
396
|
-
'成交gmv': 'decimal(10,2)',
|
397
|
-
'销售爱豆人数': 'INT',
|
398
|
-
'销售量_成交': 'INT',
|
399
|
-
'售罄率': 'decimal(10,4)',
|
400
|
-
'成交sku数': 'INT',
|
401
|
-
'订单数_成交': 'INT',
|
402
|
-
'可售sku数': 'INT',
|
403
|
-
'数据更新时间': 'DATETIME',
|
404
|
-
'转发爱豆人数': 'INT',
|
405
|
-
'转发次数': 'INT',
|
406
|
-
'商品款号': 'varchar(50)',
|
407
|
-
'支付人数_交易': 'INT',
|
408
|
-
'成交率_交易': 'decimal(10,4)',
|
409
|
-
'下单gmv': 'decimal(10,2)',
|
410
|
-
'销售量_交易': 'INT',
|
411
|
-
'下单sku数': 'INT',
|
412
|
-
'订单数_交易': 'INT',
|
413
|
-
'图片': 'varchar(255)',
|
414
|
-
'浏览量': 'INT',
|
415
|
-
'序号': 'INT',
|
416
|
-
'spuid': 'varchar(50)',
|
417
|
-
'商品名称': 'varchar(50)',
|
418
|
-
'供货额': 'decimal(10,2)',
|
419
|
-
'供货价': 'decimal(10,2)',
|
420
|
-
'访客量': 'INT',
|
421
|
-
'颜色': 'varchar(50)',
|
422
|
-
'尺码': 'varchar(50)',
|
423
|
-
'货号': 'varchar(50)', # 款号 + 颜色编码
|
424
|
-
}
|
425
|
-
logger.info('更新数据库', {'店铺名称': self.shop_name, '库': db_name, '表': table_name})
|
426
|
-
if 'spu' in table_name:
|
427
|
-
drop_dup = ['日期', '平台', '店铺名称', '商品款号', '访客量']
|
428
|
-
else:
|
429
|
-
drop_dup = ['日期', '平台', '店铺名称', '条码']
|
430
|
-
self.uld.upload_data(
|
431
|
-
db_name=db_name,
|
432
|
-
table_name=table_name,
|
433
|
-
data=_results,
|
434
|
-
set_typ=set_typ, # 定义列和数据类型
|
435
|
-
primary_keys=[], # 创建唯一主键
|
436
|
-
check_duplicate=False, # 检查重复数据
|
437
|
-
update_on_duplicate=False, # 遇到重复时更新数据,默认 False 跳过
|
438
|
-
duplicate_columns=drop_dup, # 指定排重的组合键
|
439
|
-
allow_null=False, # 允许插入空值
|
440
|
-
partition_by=None, # 按年/月分表
|
441
|
-
partition_date_column='日期', # 用于分表的日期列名,默认为'日期'
|
442
|
-
auto_create=True, # 表不存在时自动创建, 默认参数不要更改
|
443
|
-
indexes=[], # 指定索引列
|
444
|
-
transaction_mode='row', # 事务模式
|
445
|
-
unique_keys=[drop_dup], # 唯一约束列表
|
446
|
-
)
|
447
|
-
|
448
|
-
def get_sign(self):
|
449
|
-
sign = 'bbcf5b9cf3d3b8ba9c22550dcba8a3ce97be766f'
|
450
|
-
current_timestamp_ms = '1741396070777'
|
451
|
-
# current_timestamp_ms = int(round(time.time() * 1000))
|
452
|
-
self.url = f'https://treasurebox.aikucun.com/api/web/merchant/treasure/commodity/sku/list?time={current_timestamp_ms}&sign={sign}'
|
453
|
-
headers = {
|
454
|
-
'headers': ua_sj.get_ua(),
|
455
|
-
'referer': 'https://treasurebox.aikucun.com/dashboard/commodity/ranking/merchant',
|
456
|
-
'content-type': 'application/json;charset=UTF-8',
|
457
|
-
'origin': 'https://treasurebox.aikucun.com',
|
458
|
-
# 'system': 'merchant',
|
459
|
-
# 'token': self.token, # 从浏览器本地存储空间获取
|
460
|
-
}
|
461
|
-
data = {
|
462
|
-
'beginDate': '20250307',
|
463
|
-
'brandIds': [],
|
464
|
-
'cropId': '',
|
465
|
-
'cropName': '',
|
466
|
-
'ctgryOneIds': [],
|
467
|
-
'ctgryThreeIds': [],
|
468
|
-
'ctgryTwoIds': [],
|
469
|
-
'dimValue': '',
|
470
|
-
'endDate': '20250307',
|
471
|
-
'merchantShopCode': '',
|
472
|
-
'orderByName': 'dealGmv',
|
473
|
-
'orderType': 'desc',
|
474
|
-
'pageNum': 1,
|
475
|
-
'pageSize': 10
|
476
|
-
}
|
477
|
-
res = requests.post(
|
478
|
-
url=self.url,
|
479
|
-
headers=headers,
|
480
|
-
data=json.dumps(data)
|
481
|
-
)
|
482
|
-
|
483
|
-
|
484
|
-
def main(start_date, end_date=None, item_type=['爱库存_spu']):
|
485
|
-
db_config = {
|
486
|
-
'username': username,
|
487
|
-
'password': password,
|
488
|
-
'host': host,
|
489
|
-
'port': int(port),
|
490
|
-
'pool_size': 3
|
491
|
-
}
|
492
|
-
with uploader.MySQLUploader(**db_config) as uld:
|
493
|
-
with s_query.QueryDatas(**db_config) as download:
|
494
|
-
ak = AikuCun(uld_manager=uld, download_manager=download)
|
495
|
-
# ak.get_sign()
|
496
|
-
for type_ in item_type:
|
497
|
-
if type_ not in ['爱库存_spu', '爱库存_sku']:
|
498
|
-
logger.error(f'{item_type} 非法参数: {type_}')
|
499
|
-
continue
|
500
|
-
for i in range(2):
|
501
|
-
data_list = ak.get_data_from_bbx(
|
502
|
-
start_date=start_date,
|
503
|
-
end_date=end_date,
|
504
|
-
item_type=type_,
|
505
|
-
page_num=1,
|
506
|
-
page_size=300
|
507
|
-
)
|
508
|
-
if not data_list:
|
509
|
-
ak.logining()
|
510
|
-
ak.save_token()
|
511
|
-
ak.error_count = 0 # 重置错误计数器
|
512
|
-
else:
|
513
|
-
break
|
514
|
-
|
515
|
-
ak.insert_datas(
|
516
|
-
data_list=data_list,
|
517
|
-
db_name='爱库存2',
|
518
|
-
table_name=f'{type_}榜单'
|
519
|
-
)
|
520
|
-
|
521
|
-
|
522
|
-
if __name__ == '__main__':
|
523
|
-
main(
|
524
|
-
start_date='2025-05-13',
|
525
|
-
# end_date='2025-04-28', # 不传则默认到今天
|
526
|
-
item_type=[
|
527
|
-
'爱库存_spu',
|
528
|
-
'爱库存_sku'
|
529
|
-
]
|
530
|
-
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|