mdbq 1.7.1__tar.gz → 1.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {mdbq-1.7.1 → mdbq-1.7.3}/PKG-INFO +1 -1
  2. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/aggregation/aggregation.py +21 -6
  3. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/aggregation/query_data.py +72 -7
  4. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/clean/data_clean.py +17 -0
  5. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq.egg-info/PKG-INFO +1 -1
  6. {mdbq-1.7.1 → mdbq-1.7.3}/setup.py +1 -1
  7. {mdbq-1.7.1 → mdbq-1.7.3}/README.txt +0 -0
  8. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/__init__.py +0 -0
  9. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/__version__.py +0 -0
  10. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/aggregation/__init__.py +0 -0
  11. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/aggregation/df_types.py +0 -0
  12. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/aggregation/mysql_types.py +0 -0
  13. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/aggregation/optimize_data.py +0 -0
  14. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/bdup/__init__.py +0 -0
  15. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/bdup/bdup.py +0 -0
  16. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/clean/__init__.py +0 -0
  17. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/company/__init__.py +0 -0
  18. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/company/copysh.py +0 -0
  19. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/config/__init__.py +0 -0
  20. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/config/get_myconf.py +0 -0
  21. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/config/products.py +0 -0
  22. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/config/set_support.py +0 -0
  23. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/config/update_conf.py +0 -0
  24. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/dataframe/__init__.py +0 -0
  25. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/dataframe/converter.py +0 -0
  26. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/log/__init__.py +0 -0
  27. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/log/mylogger.py +0 -0
  28. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/mongo/__init__.py +0 -0
  29. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/mongo/mongo.py +0 -0
  30. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/mysql/__init__.py +0 -0
  31. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/mysql/mysql.py +0 -0
  32. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/mysql/s_query.py +0 -0
  33. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/mysql/year_month_day.py +0 -0
  34. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/other/__init__.py +0 -0
  35. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/other/porxy.py +0 -0
  36. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/other/pov_city.py +0 -0
  37. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/other/ua_sj.py +0 -0
  38. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/pbix/__init__.py +0 -0
  39. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/pbix/pbix_refresh.py +0 -0
  40. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/pbix/refresh_all.py +0 -0
  41. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq/spider/__init__.py +0 -0
  42. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq.egg-info/SOURCES.txt +0 -0
  43. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq.egg-info/dependency_links.txt +0 -0
  44. {mdbq-1.7.1 → mdbq-1.7.3}/mdbq.egg-info/top_level.txt +0 -0
  45. {mdbq-1.7.1 → mdbq-1.7.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.7.1
3
+ Version: 1.7.3
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -530,6 +530,17 @@ class DatabaseUpdate:
530
530
  df.drop("'当前时间'", axis=1, inplace=True)
531
531
  df.rename(columns={'全站ROI': '全站roi'}, inplace=True)
532
532
  df.insert(loc=1, column='产品线', value='全站营销')
533
+ elif name.endswith('.csv') and '关键词点击成交报表_pbix同步_勿删改' in name:
534
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
535
+ for col in df.columns.tolist():
536
+ if '(' in col:
537
+ new_col = re.sub('[()]', '_', col)
538
+ new_col = new_col.strip('_')
539
+ df.rename(columns={col: new_col}, inplace=True)
540
+ df['日期'] = df['日期'].apply(lambda x: f'{str(x)[:4]}-{str(x)[4:6]}-{str(x)[6:8]}')
541
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
542
+ # min_clm = str(df['日期'].min()).split(' ')[0]
543
+ # max_clm = str(df['日期'].max()).split(' ')[0]
533
544
 
534
545
  # 商品素材,必须保持放在最后处理
535
546
  elif name.endswith('xlsx'):
@@ -901,7 +912,7 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
901
912
 
902
913
  if dbs['mysql']:
903
914
  username, password, host, port = get_myconf.select_config_values(
904
- target_service='home_lx',
915
+ target_service='company',
905
916
  database='mysql',
906
917
  )
907
918
  m = mysql.MysqlUpload(
@@ -960,7 +971,9 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
960
971
  d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
961
972
  if dbs['mysql']: # drop_duplicates: 值为 True 时检查重复数据再插入
962
973
  m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
963
- drop_duplicates=True, filename=name, count=f'{i}/{count}')
974
+ df_sql = True,
975
+ drop_duplicates=False,
976
+ filename=name, count=f'{i}/{count}')
964
977
  # nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
965
978
  except Exception as e:
966
979
  print(name, e)
@@ -1068,11 +1081,13 @@ if __name__ == '__main__':
1068
1081
  # target_service='company',
1069
1082
  # database='mysql'
1070
1083
  # )
1071
- db_name = '市场数据2'
1072
- table_name = '类目洞察_属性分析_商品发现'
1084
+
1085
+ db_name = '京东数据2'
1086
+ table_name = '推广数据_关键词报表'
1073
1087
  upload_dir(
1074
- path='/Users/xigua/Downloads/类目洞察/属性分析/商品发现',
1088
+ path='/Users/xigua/数据中心/原始文件2/京东报表/JD推广_关键词报表/2024-09',
1075
1089
  db_name=db_name,
1076
1090
  collection_name=table_name,
1077
- dbs={'mysql': False, 'mongodb': False},
1091
+ dbs={'mysql': True, 'mongodb': False},
1078
1092
  )
1093
+
@@ -342,6 +342,39 @@ class MysqlDatasQuery:
342
342
  projection=projection,
343
343
  )
344
344
  return df
345
+ def jd_gjc(self):
346
+ start_date, end_date = self.months_data(num=self.months)
347
+ projection = {
348
+ '日期': 1,
349
+ '产品线': 1,
350
+ '计划类型': 1,
351
+ '计划id': 1,
352
+ '推广计划': 1,
353
+ '搜索词': 1,
354
+ '关键词': 1,
355
+ '关键词购买类型': 1,
356
+ '广告定向类型': 1,
357
+ '花费': 1,
358
+ '展现数': 1,
359
+ '点击数': 1,
360
+ '直接订单行': 1,
361
+ '直接订单金额': 1,
362
+ '总订单行': 1,
363
+ '总订单金额': 1,
364
+ '总加购数': 1,
365
+ '下单新客数_去重': 1,
366
+ '领券数': 1,
367
+ '商品关注数': 1,
368
+ '店铺关注数': 1
369
+ }
370
+ df = self.download.data_to_df(
371
+ db_name='京东数据2',
372
+ table_name='推广数据_关键词报表',
373
+ start_date=start_date,
374
+ end_date=end_date,
375
+ projection=projection,
376
+ )
377
+ return df
345
378
  def sku_sales(self):
346
379
  start_date, end_date = self.months_data(num=self.months)
347
380
  projection = {
@@ -541,7 +574,7 @@ class GroupBy:
541
574
  )
542
575
  df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
543
576
  return df
544
- elif '关键词报表' in table_name:
577
+ elif '天猫_关键词报表' in table_name:
545
578
  df.rename(columns={
546
579
  '场景名字': '营销场景',
547
580
  '宝贝id': '商品id',
@@ -581,6 +614,8 @@ class GroupBy:
581
614
  }
582
615
  )
583
616
  df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
617
+ df['是否品牌词'] = df['词名字/词包名字'].str.contains('万里马|wanlima', regex=True)
618
+ df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '')
584
619
  return df
585
620
  elif '天猫_超级直播' in table_name:
586
621
  df.rename(columns={
@@ -832,6 +867,29 @@ class GroupBy:
832
867
  }
833
868
  )
834
869
  return df
870
+ elif '京东_关键词报表' in table_name:
871
+ df_lin = df[['计划id', '推广计划']]
872
+ df_lin.drop_duplicates(subset=['计划id'], keep='last', inplace=True, ignore_index=True)
873
+ df = df.groupby(['日期', '产品线', '计划类型', '计划id', '搜索词', '关键词', '关键词购买类型', '广告定向类型', '展现数', '点击数', '花费'],
874
+ as_index=False).agg(
875
+ **{
876
+ '直接订单行': ('直接订单行', np.max),
877
+ '直接订单金额': ('直接订单金额', np.max),
878
+ '总订单行': ('总订单行', np.max),
879
+ '总订单金额': ('总订单金额', np.max),
880
+ '总加购数': ('总加购数', np.max),
881
+ '下单新客数': ('下单新客数_去重', np.max),
882
+ '领券数': ('领券数', np.max),
883
+ '商品关注数': ('商品关注数', np.max),
884
+ '店铺关注数': ('店铺关注数', np.max)
885
+ }
886
+ )
887
+ df = pd.merge(df, df_lin, how='left', left_on='计划id', right_on='计划id')
888
+ df['k_是否品牌词'] = df['关键词'].str.contains('万里马|wanlima', regex=True)
889
+ df['k_是否品牌词'] = df['k_是否品牌词'].apply(lambda x: '品牌词' if x else '')
890
+ df['s_是否品牌词'] = df['搜索词'].str.contains('万里马|wanlima', regex=True)
891
+ df['s_是否品牌词'] = df['s_是否品牌词'].apply(lambda x: '品牌词' if x else '')
892
+ return df
835
893
  else:
836
894
  print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
837
895
  return pd.DataFrame({})
@@ -1052,9 +1110,9 @@ def data_aggregation_one(service_databases=[{}], months=1):
1052
1110
  data_dict = [
1053
1111
  {
1054
1112
  '数据库名': '聚合数据',
1055
- '集合名': '天猫_超级直播',
1056
- '唯一主键': ['日期', '推广渠道', '营销场景', '花费'],
1057
- '数据主体': sdq.tg_cjzb(),
1113
+ '集合名': '京东_关键词报表',
1114
+ '唯一主键': ['日期', '产品线', '搜索词', '关键词', '展现数', '花费'],
1115
+ '数据主体': sdq.jd_gjc(),
1058
1116
  },
1059
1117
  ]
1060
1118
  ######################################################
@@ -1069,8 +1127,9 @@ def data_aggregation_one(service_databases=[{}], months=1):
1069
1127
  df=df,
1070
1128
  db_name=db_name,
1071
1129
  table_name=table_name,
1130
+ df_sql=True,
1072
1131
  drop_duplicates=False,
1073
- icm_update=unique_key_list,
1132
+ # icm_update=unique_key_list,
1074
1133
  service_database=service_database,
1075
1134
  ) # 3. 回传数据库
1076
1135
 
@@ -1171,6 +1230,12 @@ def data_aggregation(service_databases=[{}], months=1):
1171
1230
  '唯一主键': ['日期', '推广渠道', '营销场景', '花费'],
1172
1231
  '数据主体': sdq.tg_cjzb(),
1173
1232
  },
1233
+ {
1234
+ '数据库名': '聚合数据',
1235
+ '集合名': '京东_关键词报表',
1236
+ '唯一主键': ['日期', '产品线', '搜索词', '关键词', '展现数', '花费'],
1237
+ '数据主体': sdq.jd_gjc(),
1238
+ },
1174
1239
  ]
1175
1240
  for items in data_dict: # 遍历返回结果
1176
1241
  db_name, table_name, unique_key_list, df = items['数据库名'], items['集合名'], items['唯一主键'], items['数据主体']
@@ -1245,7 +1310,7 @@ def main():
1245
1310
 
1246
1311
 
1247
1312
  if __name__ == '__main__':
1248
- data_aggregation(service_databases=[{'company': 'mysql'}], months=24) # 正常的聚合所有数据
1249
- # data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
1313
+ # data_aggregation(service_databases=[{'company': 'mysql'}], months=0) # 正常的聚合所有数据
1314
+ data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
1250
1315
  # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
1251
1316
 
@@ -683,6 +683,20 @@ class DataClean:
683
683
  m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='万相台_人群洞察')
684
684
 
685
685
  # ----------------------- 京东数据处理分界线 -----------------------
686
+ elif name.endswith('.csv') and '关键词点击成交报表_pbix同步_勿删改' in name:
687
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
688
+ for col in df.columns.tolist():
689
+ if '(' in col:
690
+ new_col = re.sub('[()]', '_', col)
691
+ new_col = new_col.strip('_')
692
+ df.rename(columns={col: new_col}, inplace=True)
693
+ df['日期'] = df['日期'].apply(lambda x: f'{str(x)[:4]}-{str(x)[4:6]}-{str(x)[6:8]}')
694
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
695
+ min_clm = str(df['日期'].min()).split(' ')[0]
696
+ max_clm = str(df['日期'].max()).split(' ')[0]
697
+ new_name = f'京东推广关键词点击成交报表_{min_clm}_{max_clm}.csv'
698
+ self.save_to_csv(df, root, new_name)
699
+ os.remove(os.path.join(root, name))
686
700
  elif name.endswith('.csv') and '营销概况_全站营销' in name:
687
701
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=1, na_filter=False)
688
702
  df = df[(df['日期'] != '日期') & (df['日期'] != '汇总') & (df['日期'] != '0') & (df['花费'] != '0') & (df['花费'] != '0.00')]
@@ -1199,6 +1213,9 @@ class DataClean:
1199
1213
  elif name.endswith('.csv') and '营销概况_全站营销' in name:
1200
1214
  t_path = str(pathlib.Path(self.source_path, '京东报表/JD推广_全站营销报表'))
1201
1215
  bib(t_path, _as_month=True)
1216
+ elif name.endswith('.csv') and '京东推广关键词点击成交报表' in name:
1217
+ t_path = str(pathlib.Path(self.source_path, '京东报表/JD推广_关键词报表'))
1218
+ bib(t_path, _as_month=True)
1202
1219
  # 京东分界线 ------- 结束标记
1203
1220
 
1204
1221
  def attribute(self, path=None, _str='商品素材导出', ):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.7.1
3
+ Version: 1.7.3
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='1.7.1',
6
+ version='1.7.3',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbsql',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes