mdbq 1.7.6__py3-none-any.whl → 1.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1100,12 +1100,12 @@ if __name__ == '__main__':
1100
1100
  # database='mysql'
1101
1101
  # )
1102
1102
 
1103
- # db_name = '推广数据2'
1104
- # table_name = '超级直播'
1105
- # upload_dir(
1106
- # path='/Users/xigua/数据中心/原始文件2/推广报表/超级直播',
1107
- # db_name=db_name,
1108
- # collection_name=table_name,
1109
- # dbs={'mysql': True, 'mongodb': False},
1110
- # )
1103
+ db_name = '推广数据2'
1104
+ table_name = '营销场景报表'
1105
+ upload_dir(
1106
+ path='/Users/xigua/数据中心/原始文件2/推广报表/营销场景报表',
1107
+ db_name=db_name,
1108
+ collection_name=table_name,
1109
+ dbs={'mysql': True, 'mongodb': False},
1110
+ )
1111
1111
 
@@ -1048,10 +1048,20 @@ class GroupBy:
1048
1048
  '直接成交金额': float,
1049
1049
  '自然流量曝光量': int,
1050
1050
  }, errors='raise')
1051
+ # tg = tg.groupby(['日期', '推广渠道', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
1052
+ # **{'加购量': ('加购量', np.max),
1053
+ # '成交笔数': ('成交笔数', np.max),
1054
+ # '成交金额': ('成交金额', np.max),
1055
+ # '自然流量曝光量': ('自然流量曝光量', np.max),
1056
+ # '直接成交笔数': ('直接成交笔数', np.max),
1057
+ # '直接成交金额': ('直接成交金额', np.max)
1058
+ # }
1059
+ # )
1051
1060
  df = pd.concat([tg, zb, pxb], axis=0, ignore_index=True)
1052
1061
  df.fillna(0, inplace=True) # concat 之后要填充空值
1053
1062
  df = df.astype(
1054
1063
  {
1064
+ '商品id': str,
1055
1065
  '自然流量曝光量': int,
1056
1066
  }
1057
1067
  )
@@ -1249,13 +1259,13 @@ def data_aggregation(service_databases=[{}], months=1):
1249
1259
  {
1250
1260
  '数据库名': '聚合数据',
1251
1261
  '集合名': '天猫生意经_宝贝指标',
1252
- '唯一主键': ['日期', '宝贝id'],
1262
+ '唯一主键': ['日期', '宝贝id'], # 不能加其他字段做主键,比如销售额,是变动的,不是唯一的
1253
1263
  '数据主体': sdq.syj(),
1254
1264
  },
1255
1265
  {
1256
1266
  '数据库名': '聚合数据',
1257
1267
  '集合名': '天猫_店铺来源_日数据',
1258
- '唯一主键': ['日期', '一级来源', '二级来源', '三级来源'],
1268
+ '唯一主键': ['日期', '一级来源', '二级来源', '三级来源', '访客数'],
1259
1269
  '数据主体': sdq.dplyd(),
1260
1270
  },
1261
1271
  {
@@ -1327,7 +1337,7 @@ def data_aggregation(service_databases=[{}], months=1):
1327
1337
  {
1328
1338
  '数据库名': '聚合数据',
1329
1339
  '集合名': '天猫_品销宝账户报表',
1330
- '唯一主键': ['日期', '报表类型'],
1340
+ '唯一主键': ['日期', '报表类型', '推广渠道', '营销场景', '花费'],
1331
1341
  '数据主体': sdq.pxb_zh(),
1332
1342
  },
1333
1343
  ]
@@ -1347,23 +1357,36 @@ def data_aggregation(service_databases=[{}], months=1):
1347
1357
  )
1348
1358
  g.sp_index_datas = pd.DataFrame() # 重置,不然下个循环会继续刷入数据库
1349
1359
  # g.as_csv(df=df, filename=table_name + '.csv') # 导出 csv
1350
- m.df_to_mysql(
1351
- df=df,
1352
- db_name=db_name,
1353
- table_name=table_name,
1354
- # df_sql=True,
1355
- drop_duplicates=False,
1356
- icm_update=unique_key_list,
1357
- service_database=service_database,
1358
- ) # 3. 回传数据库
1360
+ if '日期' in df.columns.tolist():
1361
+ m.df_to_mysql(
1362
+ df=df,
1363
+ db_name=db_name,
1364
+ table_name=table_name,
1365
+ move_insert=True, # 先删除,再插入
1366
+ # df_sql=True,
1367
+ # drop_duplicates=False,
1368
+ # icm_update=unique_key_list,
1369
+ service_database=service_database,
1370
+ ) # 3. 回传数据库
1371
+ else: # 没有日期列的就用主键排重
1372
+ m.df_to_mysql(
1373
+ df=df,
1374
+ db_name=db_name,
1375
+ table_name=table_name,
1376
+ # df_sql=True,
1377
+ drop_duplicates=False,
1378
+ icm_update=unique_key_list,
1379
+ service_database=service_database,
1380
+ ) # 3. 回传数据库
1359
1381
  res = g.performance(bb_tg=True) # 盈亏表,依赖其他表,单独做
1360
1382
  m.df_to_mysql(
1361
1383
  df=res,
1362
1384
  db_name='聚合数据',
1363
1385
  table_name='_全店商品销售',
1386
+ move_insert=True, # 先删除,再插入
1364
1387
  # df_sql=True,
1365
- drop_duplicates=False,
1366
- icm_update=['日期', '商品id'], # 设置唯一主键
1388
+ # drop_duplicates=False,
1389
+ # icm_update=['日期', '商品id'], # 设置唯一主键
1367
1390
  service_database=service_database,
1368
1391
  )
1369
1392
  res = g.performance(bb_tg=False) # 盈亏表,依赖其他表,单独做
@@ -1371,9 +1394,10 @@ def data_aggregation(service_databases=[{}], months=1):
1371
1394
  df=res,
1372
1395
  db_name='聚合数据',
1373
1396
  table_name='_推广商品销售',
1397
+ move_insert=True, # 先删除,再插入
1374
1398
  # df_sql=True,
1375
- drop_duplicates=False,
1376
- icm_update=['日期', '商品id'], # 设置唯一主键
1399
+ # drop_duplicates=False,
1400
+ # icm_update=['日期', '商品id'], # 设置唯一主键
1377
1401
  service_database=service_database,
1378
1402
  )
1379
1403
 
@@ -1382,9 +1406,10 @@ def data_aggregation(service_databases=[{}], months=1):
1382
1406
  df=res,
1383
1407
  db_name='聚合数据',
1384
1408
  table_name='天猫_推广汇总',
1409
+ move_insert=True, # 先删除,再插入
1385
1410
  # df_sql=True,
1386
- drop_duplicates=False,
1387
- icm_update=['日期', '商品id'], # 设置唯一主键
1411
+ # drop_duplicates=False,
1412
+ # icm_update=['日期', '推广渠道', '营销场景', '商品id', '花费', '展现量', '点击量'], # 设置唯一主键
1388
1413
  service_database=service_database,
1389
1414
  )
1390
1415
 
@@ -1394,9 +1419,10 @@ def data_aggregation(service_databases=[{}], months=1):
1394
1419
  df=res,
1395
1420
  db_name='聚合数据',
1396
1421
  table_name='_京东_推广商品销售',
1422
+ move_insert=True, # 先删除,再插入
1397
1423
  # df_sql=True,
1398
- drop_duplicates=False,
1399
- icm_update=['日期', '跟单sku id', '货号', '花费'], # 设置唯一主键
1424
+ # drop_duplicates=False,
1425
+ # icm_update=['日期', '跟单sku id', '货号', '花费'], # 设置唯一主键
1400
1426
  service_database=service_database,
1401
1427
  )
1402
1428
 
@@ -1410,7 +1436,7 @@ def main():
1410
1436
 
1411
1437
 
1412
1438
  if __name__ == '__main__':
1413
- data_aggregation(service_databases=[{'home_lx': 'mysql'}], months=1) # 正常的聚合所有数据
1439
+ data_aggregation(service_databases=[{'company': 'mysql'}], months=1) # 正常的聚合所有数据
1414
1440
  # data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
1415
1441
  # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
1416
1442
 
mdbq/company/copysh.py CHANGED
@@ -20,6 +20,7 @@ from mdbq.config import get_myconf
20
20
  from mdbq.config import set_support
21
21
  from mdbq.config import products
22
22
  from mdbq.mysql import mysql
23
+ from mdbq.pbix import refresh_all
23
24
  warnings.filterwarnings('ignore')
24
25
 
25
26
 
@@ -250,6 +251,16 @@ class TbFiles:
250
251
  else:
251
252
  print(f'{src} 所需同步的文件不存在,请检查:pd_list参数')
252
253
 
254
+ excel_path = os.path.join(self.share_path, 'EXCEL报表')
255
+ files = os.listdir(excel_path)
256
+ r = refresh_all.RefreshAll()
257
+ for file in files:
258
+ if file.endswith('.xlsx'):
259
+ now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
260
+ print(f'正在刷新 excel: {file}')
261
+ r.refresh_excel2(excel_file=os.path.join(excel_path, file))
262
+ time.sleep(10)
263
+
253
264
  self.before_max_time = self.check_change() # 重置值, 避免重复同步
254
265
 
255
266
  now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
mdbq/mysql/mysql.py CHANGED
@@ -57,11 +57,14 @@ class MysqlUpload:
57
57
  }
58
58
  self.filename = None
59
59
 
60
- def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'home_lx': 'mysql'}, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None):
60
+ def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'home_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None):
61
61
  """
62
62
  将 df 写入数据库
63
63
  db_name: 数据库名称
64
64
  table_name: 集合/表名称
65
+ move_insert: 根据df 的日期,先移除数据库数据,再插入, df_sql, drop_duplicates, icm_update 都要设置为 False
66
+ 原则上只限于聚合数据使用,原始数据插入时不要设置
67
+
65
68
  df_sql: 这是一个临时参数, 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重,初创表大量上传数据的时候使用
66
69
  drop_duplicates: 值为 True 时检查重复数据再插入,反之直接上传,数据量大时会比较慢
67
70
  icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_duplicates 改为 False
@@ -162,20 +165,34 @@ class MysqlUpload:
162
165
  elif cl:
163
166
  mysql_types.mysql_all_dtypes(service_database=service_database) # 更新所有数据库所有数据表的 dtypes 信息到本地 json
164
167
 
165
- # # 4. 移除指定日期范围内的数据,仅限于聚合数据使用,其他情况不要设置
166
- # if drop_duplicates and '日期' in df.columns.tolist():
167
- # dates = df['日期'].values.tolist()
168
- # start_date = pd.to_datetime(min(dates)).strftime('%Y-%m-%d')
169
- # end_date = (pd.to_datetime(max(dates)) + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
170
- # sql = f"DELETE FROM `{table_name}` WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
171
- # cursor.execute(sql)
172
- # connection.commit()
173
-
174
- # 5. 更新插入数据
168
+ # 4. 更新插入数据
175
169
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
176
170
  for service_name, database in service_database.items():
177
171
  print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {service_name}, {self.filename}')
178
172
 
173
+ # 5. 移除指定日期范围内的数据,原则上只限于聚合数据使用,原始数据插入时不要设置
174
+ if move_insert and '日期' in df.columns.tolist():
175
+ # 移除数据
176
+ dates = df['日期'].values.tolist()
177
+ start_date = pd.to_datetime(min(dates)).strftime('%Y-%m-%d')
178
+ end_date = (pd.to_datetime(max(dates)) + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
179
+ sql = f"DELETE FROM `{table_name}` WHERE {'日期'} BETWEEN '%s' AND '%s'" % (start_date, end_date)
180
+ cursor.execute(sql)
181
+ connection.commit()
182
+
183
+ # 插入数据
184
+ engine = create_engine(
185
+ f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
186
+ df.to_sql(
187
+ name=table_name,
188
+ con=engine,
189
+ if_exists='append',
190
+ index=False,
191
+ chunksize=1000
192
+ )
193
+ connection.close()
194
+ return
195
+
179
196
  datas = df.to_dict(orient='records')
180
197
  for data in datas:
181
198
  # data 是传进来待处理的数据, 不是数据库数据
mdbq/pbix/refresh_all.py CHANGED
@@ -17,7 +17,7 @@ class RefreshAll:
17
17
  def __init__(self):
18
18
  self.my_conf = os.path.join(set_support.SetSupport(dirname='support').dirname, '.my_conf')
19
19
  self.pbix_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'ref_list.txt')
20
- self.excel_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'cp_list.txt')
20
+ self.excel_path = os.path.join(set_support.SetSupport(dirname='support').dirname, 'ref_list.txt')
21
21
  self.run_py_path = 'run_py'
22
22
  self.procname = 'PBIDesktop.exe'
23
23
 
@@ -84,6 +84,31 @@ class RefreshAll:
84
84
  except Exception as e:
85
85
  print(e)
86
86
 
87
+ def refresh_excel2(self, excel_file):
88
+ # 刷新 excel
89
+ if excel_file.endswith('.xlsx'):
90
+ try:
91
+ print(f'正在刷新 >>>{excel_file}')
92
+ xlapp = win32com.client.Dispatch('Excel.Application') # 创建Excel程序App
93
+ xlapp.Visible = False # 窗口是否可见
94
+ xlapp.DisplayAlerts = False # 是否显示警告信息
95
+ wb = xlapp.Workbooks.Open(excel_file)
96
+ conjuncts = wb.Connections.Count # 统计工作簿含有多少外部链接
97
+ if conjuncts == 0:
98
+ wb.Close(SaveChanges=False)
99
+ xlapp.Quit()
100
+ else:
101
+ time.sleep(2)
102
+ wb.RefreshAll()
103
+ xlapp.CalculateUntilAsyncQueriesDone()
104
+ time.sleep(2)
105
+ wb.Save()
106
+ wb.Close(SaveChanges=True)
107
+ xlapp.Quit()
108
+ print('文件刷新 >>>' + excel_file)
109
+ except Exception as e:
110
+ print(e)
111
+
87
112
  def pbi(self, path, _timeout=300):
88
113
  """
89
114
  这是原本属于独立的库模块: pbix_refresh
@@ -146,7 +171,7 @@ class RefreshAll:
146
171
 
147
172
 
148
173
  if __name__ == '__main__':
149
- # r = RefreshAll()
174
+ r = RefreshAll()
150
175
  # r.refresh_pbix()
151
- # r.refresh_excel()
176
+ r.refresh_excel()
152
177
  pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.7.6
3
+ Version: 1.7.8
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,17 +1,17 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=d7pYUku7Wbxl0tvKKNTG7mppOjGqg0LF62OpfW8fVBk,64120
4
+ mdbq/aggregation/aggregation.py,sha256=sgsetJHK4fOcXvqQCVgJoSIwZQLMznVG3I-MqHlW_fM,64116
5
5
  mdbq/aggregation/df_types.py,sha256=oQJS2IBU3_IO6GMgbssHuC2yCjNnbta0QPGrFOwNLnU,7591
6
6
  mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
7
7
  mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
8
- mdbq/aggregation/query_data.py,sha256=WxLtzR6s6gIPe6e1hB3xycZirrN83IZ0s0PyI0t2Cls,66792
8
+ mdbq/aggregation/query_data.py,sha256=9UhWwFAhMieC5iOIna_sAhsk2WvSpzGd5aDvkRo_pVE,68486
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
12
12
  mdbq/clean/data_clean.py,sha256=T0WYOKFwNZTNk3temKOw1K2H54kxu9QBJjlTbkMtxNk,94217
13
13
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
14
- mdbq/company/copysh.py,sha256=WCZ92vCJAy6_ZFeOxWL-U9gArIpyga4xts-s1wKsspY,17268
14
+ mdbq/company/copysh.py,sha256=z1jql2UABdKGGPYF6VRhXcBwCYaCBFR91kZwthBlOdU,17754
15
15
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/config/get_myconf.py,sha256=-CFEW0dQh4OIwVgwK-cL0eVp1LN3PjJgN89d4P5TB9I,6011
17
17
  mdbq/config/products.py,sha256=vIK8DJ-F3XXwvNPK-4OJq2tZITNlL6Sub8QBdoOng8U,5676
@@ -24,7 +24,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
24
24
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
25
25
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
26
26
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
27
- mdbq/mysql/mysql.py,sha256=cIK_GI6Ggb9LsxsvVUv0AviD7kdyCumk_eQ9MSOwsms,43320
27
+ mdbq/mysql/mysql.py,sha256=UKnBmywqTzc0VJfZGlC-9KzV7I--9P7H-jspUp_IvtU,44071
28
28
  mdbq/mysql/s_query.py,sha256=fIQvQKPyV7rvSUuxVWXv9S5FmCnIM4GHKconE1Zn5BA,8378
29
29
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
30
30
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -33,9 +33,9 @@ mdbq/other/pov_city.py,sha256=AEOmCOzOwyjHi9LLZWPKi6DUuSC-_M163664I52u9qw,21050
33
33
  mdbq/other/ua_sj.py,sha256=JuVYzc_5QZ9s_oQSrTHVKkQv4S_7-CWx4oIKOARn_9U,22178
34
34
  mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
35
35
  mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
36
- mdbq/pbix/refresh_all.py,sha256=tgy762608HMaXWynbOURIf2UVMuSPybzrDXQnOOcnZU,6102
36
+ mdbq/pbix/refresh_all.py,sha256=sBZ61LKvm-raa9ROnC-AAvPYLU7dbudmuxy__5QCB2A,7176
37
37
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
38
- mdbq-1.7.6.dist-info/METADATA,sha256=QzujGJTSxIt0YV7h24bB6lTwt0Tw7QNStfyJncdGqxY,245
39
- mdbq-1.7.6.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
40
- mdbq-1.7.6.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
- mdbq-1.7.6.dist-info/RECORD,,
38
+ mdbq-1.7.8.dist-info/METADATA,sha256=GSjzapW9LjD8Jco2uKTvfb6Ex9lWJW1Eo33BpvXLUls,245
39
+ mdbq-1.7.8.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
40
+ mdbq-1.7.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
41
+ mdbq-1.7.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.1.0)
2
+ Generator: bdist_wheel (0.44.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5