mdbq 1.2.5__tar.gz → 1.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {mdbq-1.2.5 → mdbq-1.2.7}/PKG-INFO +1 -1
  2. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/aggregation/aggregation.py +28 -2
  3. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/aggregation/mysql_types.py +1 -1
  4. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/aggregation/query_data.py +49 -18
  5. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/clean/data_clean.py +19 -19
  6. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/dataframe/converter.py +2 -1
  7. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/mysql/mysql.py +77 -7
  8. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/mysql/s_query.py +3 -0
  9. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq.egg-info/PKG-INFO +1 -1
  10. {mdbq-1.2.5 → mdbq-1.2.7}/setup.py +1 -1
  11. {mdbq-1.2.5 → mdbq-1.2.7}/README.txt +0 -0
  12. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/__init__.py +0 -0
  13. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/__version__.py +0 -0
  14. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/aggregation/__init__.py +0 -0
  15. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/aggregation/df_types.py +0 -0
  16. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/aggregation/optimize_data.py +0 -0
  17. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/bdup/__init__.py +0 -0
  18. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/bdup/bdup.py +0 -0
  19. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/clean/__init__.py +0 -0
  20. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/company/__init__.py +0 -0
  21. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/company/copysh.py +0 -0
  22. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/config/__init__.py +0 -0
  23. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/config/get_myconf.py +0 -0
  24. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/config/products.py +0 -0
  25. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/config/set_support.py +0 -0
  26. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/config/update_conf.py +0 -0
  27. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/dataframe/__init__.py +0 -0
  28. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/log/__init__.py +0 -0
  29. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/log/mylogger.py +0 -0
  30. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/mongo/__init__.py +0 -0
  31. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/mongo/mongo.py +0 -0
  32. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/mysql/__init__.py +0 -0
  33. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/mysql/year_month_day.py +0 -0
  34. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/other/__init__.py +0 -0
  35. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/other/porxy.py +0 -0
  36. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/other/pov_city.py +0 -0
  37. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/other/ua_sj.py +0 -0
  38. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/pbix/__init__.py +0 -0
  39. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/pbix/pbix_refresh.py +0 -0
  40. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/pbix/refresh_all.py +0 -0
  41. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq/spider/__init__.py +0 -0
  42. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq.egg-info/SOURCES.txt +0 -0
  43. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq.egg-info/dependency_links.txt +0 -0
  44. {mdbq-1.2.5 → mdbq-1.2.7}/mdbq.egg-info/top_level.txt +0 -0
  45. {mdbq-1.2.5 → mdbq-1.2.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.2.5
3
+ Version: 1.2.7
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -300,6 +300,7 @@ class DatabaseUpdate:
300
300
  df.insert(loc=0, column='日期', value=date)
301
301
  df['省份'] = pov
302
302
  df['省+市'] = df[['省份', '城市']].apply(lambda x: f'{x["省份"]}-{x["城市"]}', axis=1)
303
+ df.replace('NAN', 0, inplace=True)
303
304
  elif name.endswith('csv') and 'order' in name:
304
305
  # 生意经,订单数据,仅限月数据
305
306
  pattern = re.findall(r'(.*)(\d{4})(\d{2})(\d{2})-(\d{4})(\d{2})(\d{2})', name)
@@ -639,7 +640,13 @@ class DatabaseUpdate:
639
640
  collection_name=collection_name,
640
641
  is_file_dtype=True, # 默认本地文件优先: True
641
642
  )
642
- m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name)
643
+ m.df_to_mysql(
644
+ df=df,
645
+ db_name=db_name,
646
+ table_name=collection_name,
647
+ df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
648
+ drop_dup=True # 值为 True 时检查重复数据再插入,反之直接上传
649
+ )
643
650
  df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
644
651
 
645
652
  def new_unzip(self, path=None, is_move=None):
@@ -850,6 +857,8 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
850
857
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
851
858
  if len(df) == 0:
852
859
  continue
860
+ # if '新版' not in name:
861
+ # continue
853
862
  cv = converter.DataFrameConverter()
854
863
  df = cv.convert_df_cols(df=df) # 清理列名和 df 中的非法字符
855
864
  try:
@@ -973,5 +982,22 @@ if __name__ == '__main__':
973
982
  # target_service='home_lx',
974
983
  # database='mysql'
975
984
  # )
985
+ # db_name = '生意参谋2'
986
+ # table_name = '店铺来源_日数据_新版'
987
+ # upload_dir(
988
+ # path='/Users/xigua/数据中心/原始文件2/生意参谋/流量来源',
989
+ # db_name=db_name,
990
+ # collection_name=table_name,
991
+ # dbs={'mysql': True, 'mongodb': False},
992
+ # )
993
+
994
+ # test2()
976
995
 
977
- test2()
996
+ file = '/Users/xigua/Downloads/余额查询.csv'
997
+ df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False)
998
+ username, password, host, port = get_myconf.select_config_values(target_service='company', database='mysql')
999
+ m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
1000
+ m.df_to_mysql(df=df, db_name='test', table_name='增量更新测试',
1001
+ drop_dup=False,
1002
+ icm_update=['日期', '推广费余额']
1003
+ )
@@ -206,7 +206,7 @@ def mysql_all_dtypes(db_name=None, table_name=None, path=None):
206
206
  time.sleep(0.5)
207
207
 
208
208
  d = DataTypes()
209
- d.json_file = os.path.join(path, 'mysql_types.json') # # json 保存位置
209
+ d.json_file = os.path.join(path, f'mysql_types.json') # # json 保存位置
210
210
  for result in results:
211
211
  for db_n, table_n in result.items():
212
212
  # print(db_n, table_n, db_name, table_name)
@@ -240,6 +240,7 @@ class GroupBy:
240
240
  '总成交金额': '成交金额'
241
241
  }, inplace=True)
242
242
  df = df.astype({
243
+ '商品id': str,
243
244
  '花费': float,
244
245
  '展现量': int,
245
246
  '点击量': int,
@@ -263,12 +264,13 @@ class GroupBy:
263
264
  )
264
265
  else:
265
266
  df = df.groupby(['日期', '营销场景', '商品id', '花费', '展现量', '点击量'], as_index=False).agg(
266
- **{'加购量': ('加购量', np.min),
267
- '成交笔数': ('成交笔数', np.min),
268
- '成交金额': ('成交金额', np.min),
269
- '自然流量曝光量': ('自然流量曝光量', np.min),
270
- '直接成交笔数': ('直接成交笔数', np.max),
271
- '直接成交金额': ('直接成交金额', np.max)
267
+ **{
268
+ '加购量': ('加购量', np.min),
269
+ '成交笔数': ('成交笔数', np.min),
270
+ '成交金额': ('成交金额', np.min),
271
+ '自然流量曝光量': ('自然流量曝光量', np.min),
272
+ '直接成交笔数': ('直接成交笔数', np.max),
273
+ '直接成交金额': ('直接成交金额', np.max)
272
274
  }
273
275
  )
274
276
  df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
@@ -276,10 +278,10 @@ class GroupBy:
276
278
  **{
277
279
  '花费': ('花费', np.sum),
278
280
  '成交笔数': ('成交笔数', np.max),
279
- '成交金额': ('成交金额', np.max),
280
- '自然流量曝光量': ('自然流量曝光量', np.max),
281
- '直接成交笔数': ('直接成交笔数', np.max),
282
- '直接成交金额': ('直接成交金额', np.max)
281
+ '成交金额': ('成交金额', np.max),
282
+ '自然流量曝光量': ('自然流量曝光量', np.max),
283
+ '直接成交笔数': ('直接成交笔数', np.max),
284
+ '直接成交金额': ('直接成交金额', np.max)
283
285
  }
284
286
  )
285
287
  self.data_tgyj.update(
@@ -290,6 +292,7 @@ class GroupBy:
290
292
  return df
291
293
  elif '宝贝指标' in table_name:
292
294
  """ 聚合时不可以加商家编码,编码有些是空白,有些是 0 """
295
+ df['宝贝id'] = df['宝贝id'].astype(str)
293
296
  df.fillna(0, inplace=True)
294
297
  # df = df[(df['销售额'] != 0) | (df['退款额'] != 0)] # 注释掉, 因为后续使用生意经作为基准合并推广表,需确保所有商品id 齐全
295
298
  df = df.groupby(['日期', '宝贝id', '行业类目'], as_index=False).agg(
@@ -320,6 +323,7 @@ class GroupBy:
320
323
  elif '店铺来源_日数据' in table_name:
321
324
  return df
322
325
  elif '商品id编码表' in table_name:
326
+ df['宝贝id'] = df['宝贝id'].astype(str)
323
327
  df.drop_duplicates(subset='宝贝id', keep='last', inplace=True, ignore_index=True)
324
328
  # df['行业类目'] = df['行业类目'].apply(lambda x: re.sub(' ', '', x))
325
329
  try:
@@ -359,6 +363,7 @@ class GroupBy:
359
363
  table_name: df[['商品id', '商品图片']],
360
364
  }
361
365
  )
366
+ df['商品id'] = df['商品id'].astype(str)
362
367
  return df
363
368
  elif '商品成本' in table_name:
364
369
  df.sort_values(by=['款号', '日期'], ascending=[False, True], ignore_index=True, inplace=True)
@@ -373,7 +378,7 @@ class GroupBy:
373
378
  print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
374
379
  return pd.DataFrame({})
375
380
 
376
- @try_except
381
+ # @try_except
377
382
  def performance(self, bb_tg=True):
378
383
  # print(self.data_tgyj)
379
384
  tg, syj, idbm, pic, cost = (
@@ -390,13 +395,14 @@ class GroupBy:
390
395
  df = pd.merge(tg, df, how='left', left_on='商品id', right_on='宝贝id')
391
396
  df.drop(labels='宝贝id', axis=1, inplace=True)
392
397
  if bb_tg is True:
393
- # 生意经合并推广表,完整的数据表,包含全店所有推广、销售数据
398
+ # 生意经合并推广表,完整的数据表,包含全店所有推广、销售数据
394
399
  df = pd.merge(syj, df, how='left', left_on=['日期', '宝贝id'], right_on=['日期', '商品id'])
395
400
  else:
396
401
  # 推广表合并生意经 , 以推广数据为基准,销售数据不齐全
397
402
  df = pd.merge(df, syj, how='left', left_on=['日期', '商品id'], right_on=['日期', '宝贝id'])
398
403
  df.drop(labels='宝贝id', axis=1, inplace=True)
399
404
  df.drop_duplicates(subset=['日期', '商品id', '花费', '销售额'], keep='last', inplace=True, ignore_index=True)
405
+ df['成本价'] = df['成本价'].astype('float64')
400
406
  df['商品成本'] = df.apply(lambda x: (x['成本价'] + x['销售额']/x['销售量'] * 0.11 + 6) * x['销售量'] if x['销售量'] > 0 else 0, axis=1)
401
407
  df['商品毛利'] = df.apply(lambda x: x['销售额'] - x['商品成本'], axis=1)
402
408
  df['毛利率'] = df.apply(lambda x: round((x['销售额'] - x['商品成本']) / x['销售额'], 4) if x['销售额'] > 0 else 0, axis=1)
@@ -493,53 +499,78 @@ def data_aggregation(service_databases=[{}]):
493
499
  for service_database in service_databases:
494
500
  for service_name, database in service_database.items():
495
501
  sdq = MysqlDatasQuery(target_service=service_name) # 实例化数据处理类
496
- sdq.months = 1 # 设置数据周期, 1 表示近 2 个月
502
+ sdq.months = 0 # 设置数据周期, 1 表示近 2 个月
497
503
  g = GroupBy() # 实例化数据聚合类
498
504
  # 实例化数据库连接
499
505
  username, password, host, port = get_myconf.select_config_values(target_service=service_name, database=database)
500
506
  m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
501
507
 
508
+ # 从数据库中获取数据, 返回包含 df 数据的字典
502
509
  data_dict = [
503
510
  {
504
511
  '数据库名': '聚合数据',
505
512
  '集合名': '宝贝主体报表',
513
+ '唯一主键': ['日期', '推广渠道', '营销场景', '商品id'],
506
514
  '数据主体': sdq.tg_wxt(),
507
515
  },
508
516
  {
509
517
  '数据库名': '聚合数据',
510
518
  '集合名': '天猫生意经_宝贝指标',
519
+ '唯一主键': ['日期', '宝贝id'],
511
520
  '数据主体': sdq.syj(),
512
521
  },
513
522
  {
514
523
  '数据库名': '聚合数据',
515
524
  '集合名': '天猫_店铺来源_日数据',
525
+ '唯一主键': ['日期', '一级来源', '二级来源', '三级来源'],
516
526
  '数据主体': sdq.dplyd(),
517
527
  },
518
528
  {
519
529
  '数据库名': '聚合数据',
520
530
  '集合名': '商品id编码表',
531
+ '唯一主键': ['宝贝id'],
521
532
  '数据主体': sdq.idbm(),
522
533
  },
523
534
  {
524
535
  '数据库名': '聚合数据',
525
536
  '集合名': '商品id图片对照表',
537
+ '唯一主键': ['商品id'],
526
538
  '数据主体': sdq.sp_picture(),
527
539
  },
528
540
  {
529
541
  '数据库名': '聚合数据',
530
542
  '集合名': '商品成本',
543
+ '唯一主键': ['款号'],
531
544
  '数据主体': sdq.sp_cost(),
532
545
  },
533
546
  ]
534
- for items in data_dict:
535
- db_name, table_name, df = items['数据库名'], items['集合名'], items['数据主体']
547
+ for items in data_dict: # 遍历返回结果
548
+ db_name, table_name, unique_key_list, df = items['数据库名'], items['集合名'], items['唯一主键'], items['数据主体']
536
549
  df = g.groupby(df=df, table_name=table_name, is_maximize=True) # 2. 聚合数据
537
550
  # g.as_csv(df=df, filename=table_name + '.csv')
538
- m.df_to_mysql(df=df, db_name=db_name, table_name=table_name) # 3. 回传数据库
551
+ m.df_to_mysql(
552
+ df=df,
553
+ db_name=db_name,
554
+ table_name=table_name,
555
+ drop_dup=False,
556
+ icm_update=unique_key_list
557
+ ) # 3. 回传数据库
539
558
  res = g.performance(bb_tg=True) # 盈亏表,依赖其他表,单独做
540
- m.df_to_mysql(df=res, db_name='聚合数据', table_name='_全店商品销售')
559
+ m.df_to_mysql(
560
+ df=res,
561
+ db_name='聚合数据',
562
+ table_name='_全店商品销售',
563
+ drop_dup=False,
564
+ icm_update=['日期', '商品id'] # 设置唯一主键
565
+ )
541
566
  res = g.performance(bb_tg=False) # 盈亏表,依赖其他表,单独做
542
- m.df_to_mysql(df=res, db_name='聚合数据', table_name='_推广商品销售')
567
+ m.df_to_mysql(
568
+ df=res,
569
+ db_name='聚合数据',
570
+ table_name='_推广商品销售',
571
+ drop_dup=False,
572
+ icm_update=['日期', '商品id'] # 设置唯一主键
573
+ )
543
574
 
544
575
  # optimize_data.op_data(service_databases=service_databases, days=3650) # 立即启动对聚合数据的清理工作
545
576
 
@@ -1136,15 +1136,15 @@ class DataClean:
1136
1136
  if not path:
1137
1137
  path = self.path
1138
1138
 
1139
- if self.set_up_to_mogo:
1140
- username, password, host, port = get_myconf.select_config_values(target_service='home_lx',
1141
- database='mongodb')
1142
- d = mongo.UploadMongo(username=username, password=password, host=host, port=port,
1143
- drop_duplicates=False
1144
- )
1145
- if self.set_up_to_mysql:
1146
- username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
1147
- m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
1139
+ # if self.set_up_to_mogo:
1140
+ # username, password, host, port = get_myconf.select_config_values(target_service='home_lx',
1141
+ # database='mongodb')
1142
+ # d = mongo.UploadMongo(username=username, password=password, host=host, port=port,
1143
+ # drop_duplicates=False
1144
+ # )
1145
+ # if self.set_up_to_mysql:
1146
+ # username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
1147
+ # m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
1148
1148
  new_save_path = os.path.join(self.source_path, '属性设置', '商品素材')
1149
1149
  for root, dirs, files in os.walk(path, topdown=False):
1150
1150
  for name in files:
@@ -1181,17 +1181,17 @@ class DataClean:
1181
1181
  )
1182
1182
  # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
1183
1183
  self.save_to_csv(df, new_save_path, new_name, encoding='utf-8_sig')
1184
- try:
1185
- if self.set_up_to_mogo:
1186
- d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
1187
- if self.set_up_to_mysql:
1188
- m.df_to_mysql(df=df, db_name=db_name, tabel_name=collection_name)
1189
- except Exception as e:
1190
- print(e)
1184
+ # try:
1185
+ # if self.set_up_to_mogo:
1186
+ # d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
1187
+ # if self.set_up_to_mysql:
1188
+ # m.df_to_mysql(df=df, db_name=db_name, tabel_name=collection_name)
1189
+ # except Exception as e:
1190
+ # print(e)
1191
1191
  os.remove(os.path.join(root, name))
1192
- if self.set_up_to_mogo:
1193
- if d.client:
1194
- d.client.close() # 必须手动关闭数据库连接
1192
+ # if self.set_up_to_mogo:
1193
+ # if d.client:
1194
+ # d.client.close() # 必须手动关闭数据库连接
1195
1195
 
1196
1196
  # @try_except
1197
1197
  def new_unzip(self, path=None, is_move=None):
@@ -61,7 +61,8 @@ class DataFrameConverter(object):
61
61
  # 转换日期样式的列为日期类型
62
62
  value = df.loc[0, col]
63
63
  if value:
64
- res = re.match(r'\d{4}-\d{2}-\d{2}|\d{4}-\d{2}-\d{2} |\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', str(value))
64
+ res = re.match(r'\d{4}-\d{2}-\d{2}|\d{4}-\d{2}-\d{2} |\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
65
+ r'|\d{4}/\d{1}/\d{1}|\d{4}/\d{1}/\d{2}|\d{4}/\d{2}/\d{1}|\d{4}/\d{2}/\d{2}', str(value))
65
66
  if res:
66
67
  try:
67
68
  df[col] = df[col].apply(lambda x: pd.to_datetime(x))
@@ -10,6 +10,7 @@ import pymysql
10
10
  import numpy as np
11
11
  import pandas as pd
12
12
  import sqlalchemy.types
13
+ from macholib.mach_o import rpath_command
13
14
  from more_itertools.more import iequals
14
15
  from pandas.core.dtypes.common import INT64_DTYPE
15
16
  from sqlalchemy import create_engine
@@ -60,15 +61,17 @@ class MysqlUpload:
60
61
  }
61
62
  self.filename = None
62
63
 
63
- def df_to_mysql(self, df, table_name, db_name='远程数据源', df_sql=False, drop_dup=True, drop_duplicates=False, filename=None, count=None):
64
+ def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], icm_up=[], df_sql=False, drop_dup=True, drop_duplicates=False, filename=None, count=None):
64
65
  """
65
66
  将 df 写入数据库
66
67
  db_name: 数据库名称
67
68
  table_name: 集合/表名称
68
- df_sql: 使用 df.to_sql 函数上传整个表
69
- drop_duplicates:仅限于聚合数据使用,其他情况不要设置此参数
69
+ df_sql: 这是一个临时参数, 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
70
+ drop_duplicates:值为 True 时(仅限于聚合数据使用),其他情况不要设置此参数
70
71
  drop_dup: 值为 True 时检查重复数据再插入,反之直接上传
71
72
  filename: 传这个参数是方便定位产生错误的文件
73
+ icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_dup 改为 False
74
+ 使用增量更新: 必须确保 icm_update 传进来的列必须是数据表中唯一主键,值不会发生变化,不会重复,否则可能产生错乱覆盖情况
72
75
  """
73
76
  self.filename = filename
74
77
  if isinstance(df, pd.DataFrame):
@@ -153,7 +156,7 @@ class MysqlUpload:
153
156
 
154
157
  if df_sql:
155
158
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
156
- print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count},{self.filename}')
159
+ print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
157
160
  engine = create_engine(
158
161
  f"mysql+pymysql://{self.username}:{self.password}@{self.host}:{self.port}/{db_name}") # 创建数据库引擎
159
162
  df.to_sql(
@@ -168,6 +171,7 @@ class MysqlUpload:
168
171
 
169
172
  # print(cl, db_n, tb_n)
170
173
  # 返回这些结果的目的是等添加完列再写 json 文件才能读到 types 信息
174
+ # ⚠️ mysql_all_dtypes 函数默认只读取 home_lx 的数据库信息,不会读取其他系统
171
175
  if cl and db_n and tb_n:
172
176
  mysql_types.mysql_all_dtypes(db_name=db_name, table_name=table_name) # 更新一个表的 dtypes
173
177
  elif cl and db_n:
@@ -186,12 +190,15 @@ class MysqlUpload:
186
190
 
187
191
  # 5. 更新插入数据
188
192
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
189
- print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count},{self.filename}')
193
+ print(f'{now}正在更新 mysql ({self.host}:{self.port}) {db_name}/{table_name}, {count}, {self.filename}')
190
194
 
191
195
  datas = df.to_dict(orient='records')
192
196
  for data in datas:
197
+ # data 是传进来待处理的数据, 不是数据库数据
198
+ # print(data)
199
+ # data 示例: {'日期': Timestamp('2024-08-27 00:00:00'), '推广费余额': 33299, '品销宝余额': 2930.73, '短信剩余': 67471}
193
200
  try:
194
- cols = ', '.join(f"`{item}`" for item in data.keys()) # 列名转义
201
+ cols = ', '.join(f"`{item}`" for item in data.keys()) # 列名需要转义
195
202
  # data.update({item: f"{data[item]}" for item in data.keys()}) # 全部值转字符, 不是必须的
196
203
  values = ', '.join([f"'{item}'" for item in data.values()]) # 值要加单引号 ''
197
204
  condition = []
@@ -200,7 +207,7 @@ class MysqlUpload:
200
207
  condition = ' AND '.join(condition) # 构建查询条件
201
208
  # print(condition)
202
209
 
203
- if drop_dup:
210
+ if drop_dup: # 查重插入
204
211
  sql = f"SELECT {cols} FROM `{table_name}` WHERE {condition}"
205
212
  # sql = f"SELECT {cols} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
206
213
  cursor.execute(sql)
@@ -210,6 +217,69 @@ class MysqlUpload:
210
217
  cursor.execute(sql)
211
218
  # else:
212
219
  # print(f'重复数据不插入: {condition[:50]}...')
220
+ elif icm_update: # 增量更新
221
+ """ 使用增量更新: 需确保 icm_update['主键'] 传进来的列必须是数据表中唯一主键,值不会发生变化且不会重复,否则可能产生覆盖情况 """
222
+ sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
223
+ cursor.execute(sql, (db_name, {table_name}))
224
+ columns = cursor.fetchall()
225
+ cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
226
+ update_col = [item for item in cols_exist if item not in icm_update and item != 'id'] # 除了主键外的其他列
227
+
228
+ # unique_keys 示例: `日期`, `推广费余额`
229
+ unique_keys = ', '.join(f"`{item}`" for item in update_col) # 列名需要转义
230
+ condition = []
231
+ for up_col in icm_update:
232
+ condition += [f"`{up_col}` = '{data[up_col]}'"]
233
+ condition = ' AND '.join(condition) # condition值示例: `品销宝余额` = '2930.73' AND `短信剩余` = '67471'
234
+ sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE {condition}"
235
+ # print(sql)
236
+ # sql = f"SELECT {unique_keys} FROM `{table_name}` WHERE `创建时间` = '2014-09-19 14:32:33'"
237
+ cursor.execute(sql)
238
+ results = cursor.fetchall() # results 是数据库取出的数据
239
+ if results: # 有数据返回,再进行增量更新检查
240
+ for result in results: # results 是数据库数据, data 是传进来的数据
241
+ not_change_col = []
242
+ change_values = []
243
+ for col in update_col:
244
+ # 因为 mysql 里面有 decimal 数据类型,要移除末尾的 0 再做比较(df 默认将 5.00 小数截断为 5.0)
245
+ if str(data[col]).rstrip('0') != str(result[col]).rstrip('0'): # 传进来的数据和数据库比较, 有变化
246
+ # print(f'{col} 列的值有变化,{str(data[col]) } != {str(result[col])}')
247
+ change_values += [f"`{col}` = '{str(data[col])}'"]
248
+ not_change_col += [item for item in update_col if item != col]
249
+
250
+ # change_values 是 df 传进来且和数据库对比后,发生了变化的数据,值示例: [`品销宝余额` = '9999.0', `短信剩余` = '888']
251
+ if change_values: # change_values 有数据返回,表示值需要更新
252
+ not_change_values = [f"`{col}` = '{str(data[col])}'" for col in not_change_col]
253
+ not_change_values = ' AND '.join(not_change_values) # 示例: `短信剩余` = '888' AND `test1` = '93'
254
+ # print(change_values, not_change_values)
255
+ condition += f' AND {not_change_values}' # 重新构建完整的查询条件,将未发生变化的列加进查询条件
256
+ change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
257
+ sql = f"UPDATE {table_name} SET {change_values} WHERE {condition}"
258
+ # print(sql)
259
+ cursor.execute(sql)
260
+ else: # 没有数据返回,则直接插入数据
261
+ sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
262
+ cursor.execute(sql)
263
+ # elif icm_up:
264
+ # sql = 'SELECT COLUMN_NAME FROM information_schema.columns WHERE table_schema = %s AND table_name = %s'
265
+ # cursor.execute(sql, (db_name, {table_name}))
266
+ # columns = cursor.fetchall()
267
+ # cols_exist = [col['COLUMN_NAME'] for col in columns] # 数据表的所有列, 返回 list
268
+ # cols_exist = [item for item in cols_exist if item != 'id']
269
+ # update_col = [item for item in cols_exist if item not in icm_up] # 除了主键外的其他列
270
+ #
271
+ # unique_keys = ', '.join([f"`{item}`" for item in cols_exist])
272
+ # unique_keys_values = ', '.join([f"'{data[item]}'" for item in cols_exist])
273
+ #
274
+ # change_values = []
275
+ # for col in update_col:
276
+ # change_values += [f"`{col}` = '{str(data[col])}'"]
277
+ # change_values = ', '.join(f"{item}" for item in change_values) # 注意这里 item 外面没有反引号
278
+ # # print(change_values)
279
+ # sql = f"INSERT INTO `{table_name}` ({unique_keys}) VALUES ({unique_keys_values}) ON DUPLICATE KEY UPDATE {change_values};"
280
+ # print(sql)
281
+ # # cursor.execute(sql)
282
+
213
283
  else:
214
284
  sql = f"INSERT INTO `{table_name}` ({cols}) VALUES ({values});"
215
285
  cursor.execute(sql)
@@ -12,6 +12,7 @@ from sqlalchemy import create_engine
12
12
  import os
13
13
  import calendar
14
14
  from mdbq.config import get_myconf
15
+ from mdbq.dataframe import converter
15
16
 
16
17
  warnings.filterwarnings('ignore')
17
18
  """
@@ -84,6 +85,8 @@ class QueryDatas:
84
85
 
85
86
  if len(df) == 0:
86
87
  print(f'database: {db_name}, table: {table_name} 查询的数据为空')
88
+ cv = converter.DataFrameConverter()
89
+ df = cv.convert_df_cols(df)
87
90
  return df
88
91
 
89
92
  def columns_to_list(self, db_name, table_name, columns_name) -> list:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 1.2.5
3
+ Version: 1.2.7
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='1.2.5',
6
+ version='1.2.7',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbsql',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes