mdbq 1.8.8__py3-none-any.whl → 1.8.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +4 -3
- mdbq/aggregation/query_data.py +1 -1
- mdbq/other/sku_picture.py +48 -5
- {mdbq-1.8.8.dist-info → mdbq-1.8.9.dist-info}/METADATA +1 -1
- {mdbq-1.8.8.dist-info → mdbq-1.8.9.dist-info}/RECORD +7 -7
- {mdbq-1.8.8.dist-info → mdbq-1.8.9.dist-info}/WHEEL +0 -0
- {mdbq-1.8.8.dist-info → mdbq-1.8.9.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -326,6 +326,7 @@ class DatabaseUpdate:
|
|
326
326
|
df['省份'] = pov
|
327
327
|
df['省+市'] = df[['省份', '城市']].apply(lambda x: f'{x["省份"]}-{x["城市"]}', axis=1)
|
328
328
|
df.replace('NAN', 0, inplace=True)
|
329
|
+
df['笔单价'] = df.apply(lambda x: 0 if x['销售量'] == 0 else 0 if x['销售量'] == '0' else x['笔单价'], axis=1)
|
329
330
|
elif name.endswith('csv') and 'order' in name:
|
330
331
|
# 生意经,订单数据,仅限月数据
|
331
332
|
pattern = re.findall(r'(.*)(\d{4})(\d{2})(\d{2})-(\d{4})(\d{2})(\d{2})', name)
|
@@ -1109,10 +1110,10 @@ if __name__ == '__main__':
|
|
1109
1110
|
# database='mysql'
|
1110
1111
|
# )
|
1111
1112
|
|
1112
|
-
db_name = '
|
1113
|
-
table_name = '
|
1113
|
+
db_name = '生意经2'
|
1114
|
+
table_name = '省份城市分析'
|
1114
1115
|
upload_dir(
|
1115
|
-
path='/Users/xigua/数据中心/原始文件2
|
1116
|
+
path='/Users/xigua/数据中心/原始文件2/生意经/地域分布',
|
1116
1117
|
db_name=db_name,
|
1117
1118
|
collection_name=table_name,
|
1118
1119
|
dbs={'mysql': True, 'mongodb': False},
|
mdbq/aggregation/query_data.py
CHANGED
@@ -1483,7 +1483,7 @@ def main():
|
|
1483
1483
|
|
1484
1484
|
|
1485
1485
|
if __name__ == '__main__':
|
1486
|
-
data_aggregation(service_databases=[{'company': 'mysql'}], months=
|
1486
|
+
data_aggregation(service_databases=[{'company': 'mysql'}], months=24) # 正常的聚合所有数据
|
1487
1487
|
# data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
|
1488
1488
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
1489
1489
|
|
mdbq/other/sku_picture.py
CHANGED
@@ -483,6 +483,7 @@ class DownloadPicture():
|
|
483
483
|
self.filename = ''
|
484
484
|
if not os.path.exists(self.save_path):
|
485
485
|
os.mkdir(self.save_path)
|
486
|
+
self.local_file = ''
|
486
487
|
|
487
488
|
def get_df_from_service(self):
|
488
489
|
start_date, end_date = self.months_data(num=self.months)
|
@@ -501,7 +502,14 @@ class DownloadPicture():
|
|
501
502
|
projection=projection,
|
502
503
|
)
|
503
504
|
|
505
|
+
def get_df_from_local(self):
|
506
|
+
if not os.path.isfile(self.local_file):
|
507
|
+
return
|
508
|
+
self.df = pd.read_excel(self.local_file, header=0, engine='openpyxl')
|
509
|
+
|
504
510
|
def download_data(self):
|
511
|
+
if not os.path.exists(self.save_path):
|
512
|
+
os.mkdir(self.save_path)
|
505
513
|
dict_data = self.df.to_dict('records')
|
506
514
|
num = len(dict_data)
|
507
515
|
i = 0
|
@@ -525,6 +533,31 @@ class DownloadPicture():
|
|
525
533
|
i += 1
|
526
534
|
time.sleep(0.5)
|
527
535
|
|
536
|
+
def download_data_from_local(self):
|
537
|
+
if not os.path.exists(self.save_path):
|
538
|
+
os.mkdir(self.save_path)
|
539
|
+
dict_data = self.df.to_dict('records')
|
540
|
+
num = len(dict_data)
|
541
|
+
i = 0
|
542
|
+
for data in dict_data:
|
543
|
+
url = data['商品图片']
|
544
|
+
self.filename = f'{data['商品id']}_{data['商家编码']}.jpg'
|
545
|
+
if os.path.isfile(os.path.join(self.save_path, self.filename)):
|
546
|
+
i += 1
|
547
|
+
continue
|
548
|
+
if 'https' not in url:
|
549
|
+
i += 1
|
550
|
+
continue
|
551
|
+
|
552
|
+
print(f'正在下载: {i}/{num}, {data['商品id']}')
|
553
|
+
self.headers.update({'User-Agent': ua_sj.get_ua()})
|
554
|
+
res = requests.get(url, headers=self.headers) # 下载图片到内存
|
555
|
+
# 保存图片到本地文件夹
|
556
|
+
with open(os.path.join(self.save_path, self.filename), 'wb') as f:
|
557
|
+
f.write(res.content)
|
558
|
+
i += 1
|
559
|
+
time.sleep(0.5)
|
560
|
+
|
528
561
|
@staticmethod
|
529
562
|
def months_data(num=0, end_date=None):
|
530
563
|
""" 读取近 num 个月的数据, 0 表示读取当月的数据 """
|
@@ -708,20 +741,30 @@ def main(service_name, database):
|
|
708
741
|
def main2(service_name, database):
|
709
742
|
""" 从数据库读取数据,并下载图片到本地 """
|
710
743
|
d = DownloadPicture(service_name=service_name)
|
711
|
-
d.save_path = '/Users/xigua/Downloads/sku图片链接' # 下载图片到本地时的存储位置
|
712
|
-
d.get_df_from_service()
|
713
|
-
d.download_data()
|
744
|
+
# d.save_path = '/Users/xigua/Downloads/sku图片链接' # 下载图片到本地时的存储位置
|
745
|
+
# d.get_df_from_service() # 从数据库读取数据
|
746
|
+
# d.download_data()
|
747
|
+
|
748
|
+
d.save_path = '/Users/xigua/Downloads/商品id_商家编码_图片' # 下载图片到本地时的存储位置
|
749
|
+
d.local_file = '/Users/xigua/Downloads/商品id图片对照表.xlsx'
|
750
|
+
d.get_df_from_local()
|
751
|
+
d.download_data_from_local()
|
714
752
|
|
715
753
|
|
716
754
|
def main3():
|
755
|
+
""" """
|
717
756
|
p = InsertPicture()
|
718
757
|
p.filename = 'test.xlsx'
|
719
758
|
# p.header = 1
|
720
759
|
p.insert_data()
|
721
760
|
|
722
761
|
|
762
|
+
def main4():
|
763
|
+
""" 从 文件中读取图片链接并下载到本地 """
|
764
|
+
|
765
|
+
|
723
766
|
|
724
767
|
if __name__ == '__main__':
|
725
|
-
main(service_name='home_lx', database='mysql')
|
726
|
-
|
768
|
+
# main(service_name='home_lx', database='mysql')
|
769
|
+
main2(service_name='home_lx', database='mysql')
|
727
770
|
# main3()
|
@@ -1,11 +1,11 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=bnprBEpIsm5i__0uwIzXNs3sf33j8gdOWSV5BAhP_qQ,64761
|
5
5
|
mdbq/aggregation/df_types.py,sha256=oQJS2IBU3_IO6GMgbssHuC2yCjNnbta0QPGrFOwNLnU,7591
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=u4f5Pdy_foD225FzNigOJ20FCiu0wedP2gVvlai-mUw,70447
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
@@ -30,13 +30,13 @@ mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,
|
|
30
30
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
31
31
|
mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
|
32
32
|
mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
|
33
|
-
mdbq/other/sku_picture.py,sha256=
|
33
|
+
mdbq/other/sku_picture.py,sha256=1psnDTSPXSh4m3Oeru9rfyjDh7xMd-8Tc2sN0B0gf4I,35615
|
34
34
|
mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
|
35
35
|
mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
|
36
36
|
mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
|
37
37
|
mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
|
38
38
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
39
|
-
mdbq-1.8.
|
40
|
-
mdbq-1.8.
|
41
|
-
mdbq-1.8.
|
42
|
-
mdbq-1.8.
|
39
|
+
mdbq-1.8.9.dist-info/METADATA,sha256=W-KPHDcJh3W9bhHKVZGHltO1ozY-FCnDXBa2ePkU_wY,245
|
40
|
+
mdbq-1.8.9.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
41
|
+
mdbq-1.8.9.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
42
|
+
mdbq-1.8.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|