mdbq 2.1.1__py3-none-any.whl → 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -421,6 +421,27 @@ class MysqlDatasQuery:
421
421
  projection=projection,
422
422
  )
423
423
  return df
424
+ def spu_sales(self):
425
+ start_date, end_date = self.months_data(num=self.months)
426
+ projection = {
427
+ '日期': 1,
428
+ '商品id': 1,
429
+ '货号': 1,
430
+ '成交单量': 1,
431
+ '成交金额': 1,
432
+ '访客数': 1,
433
+ '成交客户数': 1,
434
+ '加购商品件数': 1,
435
+ '加购人数': 1,
436
+ }
437
+ df = self.download.data_to_df(
438
+ db_name='京东数据2',
439
+ table_name='spu_商品明细',
440
+ start_date=start_date,
441
+ end_date=end_date,
442
+ projection=projection,
443
+ )
444
+ return df
424
445
 
425
446
  @staticmethod
426
447
  def months_data(num=0, end_date=None):
@@ -1190,6 +1211,21 @@ class GroupBy:
1190
1211
  }
1191
1212
  )
1192
1213
  return df
1214
+ elif '京东_spu_商品明细' in table_name:
1215
+ df = df[df['商品id'] != '合计']
1216
+ df = df.groupby(['日期', '商品id', '货号', '访客数', '成交客户数', '加购商品件数', '加购人数'],
1217
+ as_index=False).agg(
1218
+ **{
1219
+ '成交单量': ('成交单量', np.max),
1220
+ '成交金额': ('成交金额', np.max),
1221
+ }
1222
+ )
1223
+ self.data_jdtg.update(
1224
+ {
1225
+ table_name: df,
1226
+ }
1227
+ )
1228
+ return df
1193
1229
  elif '京东_关键词报表' in table_name:
1194
1230
  df_lin = df[['计划id', '推广计划']]
1195
1231
  df_lin.drop_duplicates(subset=['计划id'], keep='last', inplace=True, ignore_index=True)
@@ -1580,6 +1616,12 @@ def data_aggregation(service_databases=[{}], months=1):
1580
1616
  '唯一主键': ['日期', '商品id', '成交单量'],
1581
1617
  '数据主体': sdq.sku_sales(),
1582
1618
  },
1619
+ {
1620
+ '数据库名': '聚合数据',
1621
+ '集合名': '京东_spu_商品明细',
1622
+ '唯一主键': ['日期', '商品id', '成交单量'],
1623
+ '数据主体': sdq.spu_sales(),
1624
+ },
1583
1625
  {
1584
1626
  '数据库名': '聚合数据',
1585
1627
  '集合名': '天猫_人群报表',
mdbq/mysql/mysql.py CHANGED
@@ -68,8 +68,8 @@ class MysqlUpload:
68
68
 
69
69
  return wrapper
70
70
 
71
- @try_except
72
- def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'home_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None):
71
+ # @try_except
72
+ def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'home_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None, reset_id=False):
73
73
  """
74
74
  将 df 写入数据库
75
75
  db_name: 数据库名称
@@ -167,6 +167,17 @@ class MysqlUpload:
167
167
  index=False,
168
168
  chunksize=1000
169
169
  )
170
+ try:
171
+ cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
172
+ result = cursor.fetchone()
173
+ if result:
174
+ cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
175
+ cursor.execute(
176
+ f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
177
+ cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
178
+ except Exception as e:
179
+ print(f'{e}')
180
+ connection.rollback()
170
181
  connection.close()
171
182
  return
172
183
 
@@ -205,6 +216,19 @@ class MysqlUpload:
205
216
  index=False,
206
217
  chunksize=1000
207
218
  )
219
+ # 6. 重置自增列
220
+ if reset_id:
221
+ try:
222
+ cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
223
+ result = cursor.fetchone()
224
+ if result:
225
+ cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
226
+ cursor.execute(
227
+ f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
228
+ cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
229
+ except Exception as e:
230
+ print(f'{e}')
231
+ connection.rollback()
208
232
  connection.close()
209
233
  return
210
234
 
@@ -293,6 +317,19 @@ class MysqlUpload:
293
317
  # print(values)
294
318
  print(f'mysql -> df_to_mysql 报错: {e}, {self.filename}')
295
319
  # breakpoint()
320
+
321
+ # 6. 重置自增列
322
+ try:
323
+ cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
324
+ result = cursor.fetchone()
325
+ if result:
326
+ cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
327
+ cursor.execute(
328
+ f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
329
+ cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
330
+ except Exception as e:
331
+ print(f'{e}')
332
+ connection.rollback()
296
333
  connection.commit() # 提交事务
297
334
  connection.close()
298
335
 
mdbq/other/sku_picture.py CHANGED
@@ -11,6 +11,7 @@ import time
11
11
  import warnings
12
12
  import pandas as pd
13
13
  from lxml import etree
14
+ from rich.pretty import pretty_repr
14
15
  from selenium import webdriver
15
16
  from selenium.webdriver.support.wait import WebDriverWait
16
17
  from selenium.webdriver.common.by import By
@@ -582,6 +583,33 @@ class DownloadPicture():
582
583
  i += 1
583
584
  time.sleep(0.5)
584
585
 
586
+ def download_from_df(self, col_name='商品图片'):
587
+ if not os.path.exists(self.save_path):
588
+ os.mkdir(self.save_path)
589
+ dict_data = self.df.to_dict('records')
590
+ num = len(dict_data)
591
+ i = 1
592
+ for data in dict_data:
593
+ url = data[col_name]
594
+ self.filename = f'{data['店铺名称']}_{data['商品id']}_{data['商家编码']}.jpg'
595
+ if os.path.isfile(os.path.join(self.save_path, self.filename)):
596
+ self.finish_download.append(data['商品id'])
597
+ i += 1
598
+ continue
599
+ if 'https' not in url:
600
+ i += 1
601
+ continue
602
+
603
+ print(f'正在下载: {i}/{num}, {data['商品id']}')
604
+ self.headers.update({'User-Agent': ua_sj.get_ua()})
605
+ res = requests.get(url, headers=self.headers) # 下载图片到内存
606
+ # 保存图片到本地文件夹
607
+ with open(os.path.join(self.save_path, self.filename), 'wb') as f:
608
+ f.write(res.content)
609
+ self.finish_download.append(data['商品id'])
610
+ i += 1
611
+ time.sleep(0.5)
612
+
585
613
  @staticmethod
586
614
  def months_data(num=0, end_date=None):
587
615
  """ 读取近 num 个月的数据, 0 表示读取当月的数据 """
@@ -783,7 +811,7 @@ def main3():
783
811
  p.insert_data()
784
812
 
785
813
 
786
- def get_sp_id(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录', col_name='sku图片链接'):
814
+ def download_sku(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录', col_name='sku图片链接'):
787
815
  """ 从数据库中获取商品id信息 """
788
816
  # 实例化一个下载类
789
817
  username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
@@ -885,9 +913,125 @@ def get_sp_id(service_name='company', database='mysql', db_name='属性设置2',
885
913
  ) # 3. 回传数据库
886
914
 
887
915
 
916
+ def download_spu(service_name='company', database='mysql', db_name='属性设置2', table_name='商品spu素材下载记录', col_name='商品图片'):
917
+ """
918
+
919
+ """
920
+
921
+ # 1. 从商品素材导出中获取数据
922
+ username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
923
+ download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
924
+ projection = {
925
+ '店铺名称': 1,
926
+ '商品id': 1,
927
+ '商品标题': 1,
928
+ '商品状态': 1,
929
+ '商品白底图': 1,
930
+ '方版场景图': 1,
931
+ '日期':1,
932
+ }
933
+ df = download.data_to_df(
934
+ db_name='属性设置2',
935
+ table_name='商品素材导出',
936
+ start_date='2019-01-01',
937
+ end_date='2099-12-31',
938
+ projection=projection,
939
+ )
940
+ df['商品id'] = df['商品id'].astype('int64')
941
+ df['日期'] = df['日期'].astype('datetime64[ns]')
942
+ df = df[(df['商品白底图'] != '0') | (df['方版场景图'] != '0')]
943
+ # 白底图优先
944
+ df['商品图片'] = df[['商品白底图', '方版场景图']].apply(
945
+ lambda x: x['商品白底图'] if x['商品白底图'] != '0' else x['方版场景图'], axis=1)
946
+ # # 方版场景图优先
947
+ # df['商品图片'] = df[['商品白底图', '方版场景图']].apply(
948
+ # lambda x: x['方版场景图'] if x['方版场景图'] != '0' else x['商品白底图'], axis=1)
949
+ df.sort_values(by=['商品id', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
950
+ df.drop_duplicates(subset=['商品id'], keep='last', inplace=True, ignore_index=True)
951
+ # df = df[['商品id', '商品图片', '日期']]
952
+ df['商品图片'] = df['商品图片'].apply(lambda x: x if 'http' in x else None) # 检查是否是 http 链接
953
+ df.dropna(how='all', subset=['商品图片'], axis=0, inplace=True) # 删除指定列含有空值的行
954
+ df['商品链接'] = df['商品id'].apply(
955
+ lambda x: f'https://detail.tmall.com/item.htm?id={str(x)}' if x and '.com' not in str(x) else x)
956
+ df.sort_values(by='商品id', ascending=False, ignore_index=True, inplace=True) # ascending=False 降序排列
957
+ df['商品id'] = df['商品id'].astype(str)
958
+ # df = df.head(2)
959
+
960
+ # 2. 从商品id编码表 中获取数据
961
+ projection = {
962
+ '宝贝id': 1,
963
+ '商家编码': 1,
964
+ }
965
+ df_spbm = download.data_to_df(
966
+ db_name='聚合数据',
967
+ table_name='商品id编码表',
968
+ start_date='2019-01-01',
969
+ end_date='2099-12-31',
970
+ projection=projection,
971
+ )
972
+ df_spbm.drop_duplicates(subset=['宝贝id'], keep='last', inplace=True, ignore_index=True)
973
+ # 合并两个表
974
+ df = pd.merge(df, df_spbm, left_on=['商品id'], right_on=['宝贝id'], how='left')
975
+ df.pop('宝贝id')
976
+ df['获取与下载'] = '已获取'
977
+ df['时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
978
+ # df.to_csv(os.path.join(D_PATH, f'{col_name}.csv'), index=False, header=True, encoding='utf-8_sig')
979
+ if '方版场景图' in df.columns.tolist():
980
+ df['方版场景图'] = df['方版场景图'].astype(str)
981
+
982
+ # 3. 更新数据库
983
+ username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
984
+ m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
985
+ m.df_to_mysql(
986
+ df=df,
987
+ db_name=db_name,
988
+ table_name=table_name,
989
+ move_insert=True, # 先删除,再插入
990
+ df_sql=False,
991
+ drop_duplicates=False,
992
+ icm_update=[],
993
+ service_database={service_name: database},
994
+ ) # 3. 回传数据库
995
+
996
+ # 4. 从数据库中提取未下载的数据
997
+ projection = {}
998
+ df_before = download.data_to_df(
999
+ db_name=db_name,
1000
+ table_name=table_name,
1001
+ start_date='2019-01-01',
1002
+ end_date='2099-12-31',
1003
+ projection=projection,
1004
+ )
1005
+ df = df_before[df_before['获取与下载'] != '已下载']
1006
+
1007
+ if len(df) > 0:
1008
+ # 5. 实例化一个下载器类,并下载数据
1009
+ d = DownloadPicture(service_name=service_name)
1010
+ d.save_path = os.path.join(D_PATH, '商品id_商家编码_图片') # 下载图片到本地时的存储位置
1011
+ d.filename = f'{db_name}_{table_name}.xlsx'
1012
+ d.df = df
1013
+ d.download_from_df(col_name=col_name)
1014
+ df['获取与下载'] = df.apply(lambda x: '已下载' if x['商品id'] in d.finish_download else x['获取与下载'], axis=1)
1015
+
1016
+ # 6. 回传数据库
1017
+ username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
1018
+ m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
1019
+ m.df_to_mysql(
1020
+ df=df,
1021
+ db_name=db_name,
1022
+ table_name=table_name,
1023
+ move_insert=True, # 先删除,再插入
1024
+ df_sql=False,
1025
+ drop_duplicates=False,
1026
+ icm_update=[],
1027
+ service_database={service_name: database},
1028
+ reset_id=True,
1029
+ ) # 3. 回传数据库
1030
+
888
1031
 
889
1032
  if __name__ == '__main__':
890
1033
  # main(service_name='home_lx', database='mysql')
891
1034
  # main2(service_name='home_lx', database='mysql')
892
1035
  # main3()
893
- get_sp_id(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录')
1036
+ # download_sku(service_name='company', database='mysql', db_name='属性设置2', table_name='商品素材下载记录')
1037
+ download_spu(service_name='company', database='mysql', db_name='属性设置2', table_name='商品spu素材下载记录', col_name='商品图片')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.1.1
3
+ Version: 2.1.2
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -5,7 +5,7 @@ mdbq/aggregation/aggregation.py,sha256=us2FJjLCYlyttATHc0xYRo1ZvcC4E7lB_JI034jI6
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
7
7
  mdbq/aggregation/optimize_data.py,sha256=Wis40oL04M7E1pkvgNPjyVFAUe-zgjimjIVAikxYY8Y,4418
8
- mdbq/aggregation/query_data.py,sha256=-oW4QMZESaK2e_MCKli3iK46jZGGdaQHAr-LduZ_Wo0,80524
8
+ mdbq/aggregation/query_data.py,sha256=AsXH0LEnmuf1k35UvzALeJwmZwBWaenh8wfEv-u2s8U,82089
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
@@ -25,19 +25,19 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
25
25
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
26
26
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
27
27
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
28
- mdbq/mysql/mysql.py,sha256=LJJja2S5OWc-3lOHDmsVFJieFM3U69pbyWBEYQVn7P4,44541
28
+ mdbq/mysql/mysql.py,sha256=zSGiKCMexc_6OSNMHHCM6SkY1sCKxtRuDbY352Xk9gI,46603
29
29
  mdbq/mysql/s_query.py,sha256=fIQvQKPyV7rvSUuxVWXv9S5FmCnIM4GHKconE1Zn5BA,8378
30
30
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
31
31
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
32
32
  mdbq/other/porxy.py,sha256=UHfgEyXugogvXgsG68a7QouUCKaohTKKkI4RN-kYSdQ,4961
33
33
  mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
34
- mdbq/other/sku_picture.py,sha256=lYzm2L4SlfWq09ddr_41JjP5UcpksZpnRQYOoXNXA-k,41279
34
+ mdbq/other/sku_picture.py,sha256=pGPQrAQluP1VRLA1UdSmdlG3JKpw8-zqy8b4r3cpzEE,47733
35
35
  mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
36
36
  mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
37
37
  mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
38
38
  mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
39
39
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
40
- mdbq-2.1.1.dist-info/METADATA,sha256=TGqdUDMFAQNf7fe5e3DPcL7MCfwm7UgIuAYvZoDGhdA,245
41
- mdbq-2.1.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
42
- mdbq-2.1.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
43
- mdbq-2.1.1.dist-info/RECORD,,
40
+ mdbq-2.1.2.dist-info/METADATA,sha256=a6hTSyyVaqUNjVuLygswCreJKwgkx_cy8E7umhTVQjM,245
41
+ mdbq-2.1.2.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
42
+ mdbq-2.1.2.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
43
+ mdbq-2.1.2.dist-info/RECORD,,
File without changes