mdbq 1.9.1__tar.gz → 1.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {mdbq-1.9.1 → mdbq-1.9.2}/PKG-INFO +1 -1
  2. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/aggregation/aggregation.py +35 -9
  3. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/aggregation/query_data.py +55 -3
  4. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/clean/data_clean.py +28 -3
  5. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/mysql/mysql.py +3 -0
  6. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq.egg-info/PKG-INFO +1 -1
  7. {mdbq-1.9.1 → mdbq-1.9.2}/setup.py +1 -1
  8. {mdbq-1.9.1 → mdbq-1.9.2}/README.txt +0 -0
  9. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/__init__.py +0 -0
  10. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/__version__.py +0 -0
  11. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/aggregation/__init__.py +0 -0
  12. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/aggregation/df_types.py +0 -0
  13. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/aggregation/mysql_types.py +0 -0
  14. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/aggregation/optimize_data.py +0 -0
  15. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/bdup/__init__.py +0 -0
  16. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/bdup/bdup.py +0 -0
  17. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/clean/__init__.py +0 -0
  18. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/company/__init__.py +0 -0
  19. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/company/copysh.py +0 -0
  20. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/config/__init__.py +0 -0
  21. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/config/get_myconf.py +0 -0
  22. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/config/products.py +0 -0
  23. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/config/set_support.py +0 -0
  24. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/config/update_conf.py +0 -0
  25. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/dataframe/__init__.py +0 -0
  26. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/dataframe/converter.py +0 -0
  27. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/log/__init__.py +0 -0
  28. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/log/mylogger.py +0 -0
  29. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/mongo/__init__.py +0 -0
  30. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/mongo/mongo.py +0 -0
  31. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/mysql/__init__.py +0 -0
  32. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/mysql/s_query.py +0 -0
  33. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/mysql/year_month_day.py +0 -0
  34. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/other/__init__.py +0 -0
  35. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/other/porxy.py +0 -0
  36. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/other/pov_city.py +0 -0
  37. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/other/sku_picture.py +0 -0
  38. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/other/ua_sj.py +0 -0
  39. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/pbix/__init__.py +0 -0
  40. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/pbix/pbix_refresh.py +0 -0
  41. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/pbix/refresh_all.py +0 -0
  42. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq/spider/__init__.py +0 -0
  43. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq.egg-info/SOURCES.txt +0 -0
  44. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq.egg-info/dependency_links.txt +0 -0
  45. {mdbq-1.9.1 → mdbq-1.9.2}/mdbq.egg-info/top_level.txt +0 -0
  46. {mdbq-1.9.1 → mdbq-1.9.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.9.1
3
+ Version: 1.9.2
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -61,7 +61,6 @@ class DatabaseUpdate:
61
61
  for name in files:
62
62
  if '~$' in name or '.DS' in name or '.localized' in name or '.ini' in name or '$RECYCLE.BIN' in name or 'Icon' in name:
63
63
  continue
64
-
65
64
  db_name = None # 初始化/重置变量,避免进入下一个循环
66
65
  collection_name = None
67
66
  for data in datas: # 根据标题对照表适配 db_name 和 collection_name
@@ -189,6 +188,22 @@ class DatabaseUpdate:
189
188
  collection_name='店铺来源_日数据_旧版'
190
189
  elif name.endswith('.csv') and '客户运营平台_客户列表' in name:
191
190
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
191
+ elif name.endswith('.xlsx') and '直播分场次效果' in name:
192
+ pattern = re.findall(r'(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})', name)
193
+ if pattern:
194
+ continue
195
+ df = pd.read_excel(os.path.join(root, name), header=0)
196
+ if len(df) == 0:
197
+ print(f'{name} 报表数据为空')
198
+ continue
199
+ df.replace(to_replace=['--'], value='', regex=False, inplace=True)
200
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
201
+ df['直播开播时间'] = pd.to_datetime(df['直播开播时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
202
+ df.insert(loc=0, column='日期', value=df['直播开播时间'])
203
+ df['日期'] = df['日期'].apply(
204
+ lambda x: pd.to_datetime(str(x).split(' ')[0], format='%Y-%m-%d', errors='ignore') if x else x)
205
+ df.insert(loc=1, column='店铺', value='万里马官方旗舰店')
206
+
192
207
  elif name.endswith('.xls') and '生意参谋' in name and '无线店铺三级流量来源详情' in name:
193
208
  # 店铺来源,手淘搜索,关键词
194
209
  pattern = re.findall(r'(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})', name)
@@ -1115,12 +1130,23 @@ if __name__ == '__main__':
1115
1130
  # database='mysql'
1116
1131
  # )
1117
1132
 
1118
- db_name = '生意经2'
1119
- table_name = '省份城市分析'
1120
- upload_dir(
1121
- path='/Users/xigua/数据中心/原始文件2/生意经/地域分布',
1122
- db_name=db_name,
1123
- collection_name=table_name,
1124
- dbs={'mysql': True, 'mongodb': False},
1125
- )
1133
+ # db_name = '生意经2'
1134
+ # table_name = '省份城市分析'
1135
+ # upload_dir(
1136
+ # path='/Users/xigua/数据中心/原始文件2/生意经/地域分布',
1137
+ # db_name=db_name,
1138
+ # collection_name=table_name,
1139
+ # dbs={'mysql': True, 'mongodb': False},
1140
+ # )
1141
+ #
1126
1142
 
1143
+ # 新版 数据分类
1144
+ dp = DatabaseUpdate(path='/Users/xigua/Downloads')
1145
+ dp.new_unzip(is_move=True)
1146
+ dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
1147
+ # 将 self.datas 更新至数据库
1148
+ dp.upload_df(service_databases=[
1149
+ # {'home_lx': 'mongodb'},
1150
+ {'company': 'mysql'},
1151
+ # {'nas': 'mysql'},
1152
+ ])
@@ -454,6 +454,50 @@ class MysqlDatasQuery:
454
454
  )
455
455
  return df
456
456
 
457
+ def zb_ccfx(self):
458
+ start_date, end_date = self.months_data(num=self.months)
459
+ projection = {
460
+ '日期': 1,
461
+ '店铺': 1,
462
+ '场次信息': 1,
463
+ '场次id': 1,
464
+ '直播开播时间': 1,
465
+ '开播时长': 1,
466
+ '封面图点击率': 1,
467
+ '观看人数': 1,
468
+ '观看次数': 1,
469
+ '新增粉丝数': 1,
470
+ '流量券消耗': 1,
471
+ '观看总时长(秒)': 1,
472
+ '人均观看时长(秒)': 1,
473
+ '次均观看时长(秒)': 1,
474
+ '商品点击人数': 1,
475
+ '商品点击次数': 1,
476
+ '商品点击率': 1,
477
+ '加购人数': 1,
478
+ '加购件数': 1,
479
+ '加购次数': 1,
480
+ '成交金额(元)': 1,
481
+ '成交人数': 1,
482
+ '成交件数': 1,
483
+ '成交笔数': 1,
484
+ '成交转化率': 1,
485
+ '退款人数': 1,
486
+ '退款笔数': 1,
487
+ '退款件数': 1,
488
+ '退款金额(元)': 1,
489
+ '预售定金支付金额(元)': 1,
490
+ '预售预估总金额(元)': 1,
491
+ }
492
+ df = self.download.data_to_df(
493
+ db_name='生意参谋2',
494
+ table_name='直播场次分析',
495
+ start_date=start_date,
496
+ end_date=end_date,
497
+ projection=projection,
498
+ )
499
+ return df
500
+
457
501
  class GroupBy:
458
502
  """
459
503
  数据聚合和导出
@@ -1016,6 +1060,9 @@ class GroupBy:
1016
1060
  }
1017
1061
  )
1018
1062
  return df
1063
+ elif '直播场次分析' in table_name:
1064
+ df.drop_duplicates(subset=['日期', '直播开播时间', '观看人数'], keep='first', inplace=True, ignore_index=True)
1065
+ return df
1019
1066
  else:
1020
1067
  print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
1021
1068
  return pd.DataFrame({})
@@ -1056,7 +1103,6 @@ class GroupBy:
1056
1103
  df['毛利率'] = df.apply(lambda x: round((x['销售额'] - x['商品成本']) / x['销售额'], 4) if x['销售额'] > 0 else 0, axis=1)
1057
1104
  df['盈亏'] = df.apply(lambda x: x['商品毛利'] - x['花费'], axis=1)
1058
1105
  return df
1059
-
1060
1106
  def performance_concat(self, bb_tg=True):
1061
1107
  tg, zb, pxb = self.data_tgyj['天猫汇总表调用'], self.data_tgyj['天猫_超级直播'], self.data_tgyj['天猫_品销宝账户报表']
1062
1108
  zb.rename(columns={
@@ -1385,6 +1431,12 @@ def data_aggregation(service_databases=[{}], months=1):
1385
1431
  '唯一主键': ['日期', '关键词', '访客数'],
1386
1432
  '数据主体': sdq.tm_search(),
1387
1433
  },
1434
+ {
1435
+ '数据库名': '聚合数据',
1436
+ '集合名': '生意参谋_直播场次分析',
1437
+ '唯一主键': ['日期', '直播开播时间'],
1438
+ '数据主体': sdq.zb_ccfx(),
1439
+ },
1388
1440
  ]
1389
1441
  for items in data_dict: # 遍历返回结果
1390
1442
  db_name, table_name, unique_key_list, df = items['数据库名'], items['集合名'], items['唯一主键'], items['数据主体']
@@ -1402,7 +1454,7 @@ def data_aggregation(service_databases=[{}], months=1):
1402
1454
  service_database=service_database,
1403
1455
  )
1404
1456
  g.sp_index_datas = pd.DataFrame() # 重置,不然下个循环会继续刷入数据库
1405
- # g.as_csv(df=df, filename=table_name + '.csv') # 导出 csv
1457
+ # # g.as_csv(df=df, filename=table_name + '.csv') # 导出 csv
1406
1458
  if '日期' in df.columns.tolist():
1407
1459
  m.df_to_mysql(
1408
1460
  df=df,
@@ -1483,7 +1535,7 @@ def main():
1483
1535
 
1484
1536
 
1485
1537
  if __name__ == '__main__':
1486
- data_aggregation(service_databases=[{'company': 'mysql'}], months=24) # 正常的聚合所有数据
1538
+ data_aggregation(service_databases=[{'company': 'mysql'}], months=1) # 正常的聚合所有数据
1487
1539
  # data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
1488
1540
  # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
1489
1541
 
@@ -443,7 +443,26 @@ class DataClean:
443
443
  if self.set_up_to_mysql:
444
444
  m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_自助取数_店铺流量_月数据')
445
445
  os.remove(os.path.join(root, name))
446
-
446
+ elif name.endswith('.xlsx') and '直播分场次效果' in name:
447
+ pattern = re.findall(r'(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})', name)
448
+ if pattern:
449
+ continue
450
+ df = pd.read_excel(os.path.join(root, name), header=0)
451
+ if len(df) == 0:
452
+ print(f'{name} 报表数据为空')
453
+ continue
454
+ df.replace(to_replace=['--'], value='0', regex=False, inplace=True)
455
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
456
+ df['直播开播时间'] = pd.to_datetime(df['直播开播时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
457
+ df.insert(loc=0, column='日期', value=df['直播开播时间'])
458
+ df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(str(x).split(' ')[0], format='%Y-%m-%d', errors='ignore') if x else x)
459
+ df.insert(loc=1, column='店铺', value='万里马官方旗舰店')
460
+ min_clm = str(df.min()['直播开播时间']).split(' ')[0]
461
+ max_clm = str(df.max()['直播开播时间']).split(' ')[0]
462
+ new_name = f'{os.path.splitext(name)[0]}_{min_clm}_{max_clm}.csv'
463
+ new_name = re.sub(r' ?(\(\d+\))', '',new_name)
464
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
465
+ os.remove(os.path.join(root, name))
447
466
  elif name.endswith('.csv') and 'baobei' in name:
448
467
  # 生意经宝贝指标日数据
449
468
  # print(name)
@@ -1192,6 +1211,12 @@ class DataClean:
1192
1211
  elif name.endswith('.csv') and '客户运营平台_客户列表' in name:
1193
1212
  t_path = str(pathlib.Path(self.source_path, '生意参谋/客户运营平台'))
1194
1213
  bib(t_path, _as_month=True)
1214
+ elif name.endswith('.csv') and '直播分场次效果' in name:
1215
+ pattern = re.findall(r'(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})', name)
1216
+ if not pattern:
1217
+ continue
1218
+ t_path = str(pathlib.Path(self.source_path, '生意参谋/直播场次分析'))
1219
+ bib(t_path, _as_month=True)
1195
1220
  # 京东分界线 ------- 开始标记
1196
1221
  # 京东分界线
1197
1222
  elif name.endswith('.csv') and '全部渠道_商品明细' in name:
@@ -1435,11 +1460,11 @@ def main():
1435
1460
  c.set_up_to_mysql = False
1436
1461
  c.new_unzip(is_move=True) # 解压文件
1437
1462
  c.change_and_sort()
1438
- # c.move_all() # 移到文件到原始文件夹
1463
+ c.move_all() # 移到文件到原始文件夹
1439
1464
  # c.attribute() # 商品素材重命名和分类
1440
1465
 
1441
1466
 
1442
1467
  if __name__ == '__main__':
1443
- # main()
1468
+ main()
1444
1469
  username, password, host, port = get_myconf.select_config_values(target_service='aliyun', database='mongodb')
1445
1470
  print(username, password, host, port)
@@ -81,6 +81,9 @@ class MysqlUpload:
81
81
  else:
82
82
  print(f'{db_name}: {table_name} 传入的 df 不是有效的 dataframe 结构, {self.filename}')
83
83
  return
84
+ if not db_name or db_name == 'None':
85
+ print(f'{db_name} 不能为 None')
86
+ return
84
87
 
85
88
  cv = converter.DataFrameConverter()
86
89
  df = cv.convert_df_cols(df=df) # 清理 dataframe 非法值
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.9.1
3
+ Version: 1.9.2
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='1.9.1',
6
+ version='1.9.2',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbsql',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes