mdbq 2.7.1__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,14 @@ import pandas as pd
4
4
  from functools import wraps
5
5
  import chardet
6
6
  import zipfile
7
+ import socket
7
8
  from pyzipper import PyZipFile
8
9
  import os
9
10
  import platform
10
11
  import json
11
12
  from mdbq.mongo import mongo
12
13
  from mdbq.mysql import mysql
13
- from mdbq.config import get_myconf
14
+ from mdbq.config import myconfig
14
15
  from mdbq.aggregation import df_types
15
16
  from mdbq.config import products
16
17
  from mdbq.aggregation import optimize_data
@@ -41,18 +42,32 @@ else:
41
42
  Share_Path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
42
43
 
43
44
  upload_path = os.path.join(D_PATH, '数据上传中心') # 此目录位于下载文件夹
44
- source_path = os.path.join(Data_Path, '原始文件2') # 此目录保存下载并清洗过的文件,作为数据库备份
45
+ # source_path = os.path.join(Data_Path, '原始文件2') # 此目录保存下载并清洗过的文件,作为数据库备份
45
46
  source_path3 = os.path.join(Data_Path, '原始文件3') # 此目录保存下载并清洗过的文件,作为数据库备份
46
47
 
48
+ username, password, host, port, service_database = None, None, None, None, None,
49
+ if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
50
+ conf = myconfig.main()
51
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
52
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
53
+ service_database = {'xigua_lx': 'mysql'}
54
+ elif socket.gethostname().lower() in ['company', 'mac2.local']:
55
+ conf = myconfig.main()
56
+ conf_data = conf['Windows']['company']['mysql']['local']
57
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
58
+ service_database = {'company': 'mysql'}
59
+ if not username:
60
+ print(f'找不到主机:')
61
+
62
+
47
63
 
48
64
  class DataClean:
49
65
  """ 数据分类 """
50
66
 
51
- def __init__(self, path, source_path, service_databases):
67
+ def __init__(self, path, source_path):
52
68
  self.path = path # 数据源位置,下载文件夹
53
69
  self.source_path = source_path # 原始文件保存目录
54
70
  self.datas = []
55
- self.service_databases = service_databases
56
71
 
57
72
  @staticmethod
58
73
  def try_except(func): # 在类内部定义一个异常处理方法
@@ -264,6 +279,7 @@ class DataClean:
264
279
  os.remove(os.path.join(root, name))
265
280
  elif name.endswith('.csv') and 'dmp人群报表_' in name:
266
281
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
282
+ df = df[df['日期'] != '']
267
283
  if len(df) == 0:
268
284
  print(f'{name} 报表数据为空')
269
285
  os.remove(os.path.join(root, name))
@@ -917,7 +933,7 @@ class DataClean:
917
933
 
918
934
  if name.endswith('.xlsx') and '商品素材_' in name:
919
935
  shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
920
- df = pd.read_excel(os.path.join(root, name), header=0, engine='xlrd')
936
+ df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
921
937
  if '日期' not in df.columns.tolist():
922
938
  df.insert(loc=0, column='日期', value=datetime.datetime.today().strftime('%Y-%m-%d'))
923
939
  if '店铺名称' not in df.columns.tolist():
@@ -1007,7 +1023,7 @@ class DataClean:
1007
1023
  t_path = os.path.join(self.source_path, '生意参谋', '商品属性')
1008
1024
  bib(t_path, _as_month=True)
1009
1025
  elif name.endswith('.csv') and '爱库存_商品榜单_' in name:
1010
- t_path = os.path.join(self.source_path, '爱库存', 'spu商品榜单')
1026
+ t_path = os.path.join(self.source_path, '爱库存', '商品spu榜单')
1011
1027
  bib(t_path, _as_month=True)
1012
1028
  elif name.endswith('.csv') and '手淘搜索_本店引流词_' in name:
1013
1029
  t_path = os.path.join(self.source_path, '生意参谋', '手淘搜索_本店引流词')
@@ -1049,7 +1065,6 @@ class DataClean:
1049
1065
  t_path = os.path.join(self.source_path, '达摩盘', 'dmp人群报表')
1050
1066
  bib(t_path, _as_month=True)
1051
1067
 
1052
-
1053
1068
  # @try_except
1054
1069
  def move_sjy(self, path=None, is_except=[]):
1055
1070
  if not path:
@@ -1127,22 +1142,22 @@ class DataClean:
1127
1142
  if 'py_xg' not in name: # 排除非目标文件
1128
1143
  continue
1129
1144
 
1130
- if name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
1145
+ if name.endswith('.csv') and '京东商智_spu_商品明细' in name:
1131
1146
  t_path = os.path.join(self.source_path, '京东报表', '京东商智_spu_商品明细')
1132
1147
  bib(t_path, _as_month=True)
1133
- elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
1148
+ elif name.endswith('.csv') and '京东商智_sku_商品明细' in name:
1134
1149
  t_path = os.path.join(self.source_path, '京东报表', '京东商智_sku_商品明细')
1135
1150
  bib(t_path, _as_month=True)
1136
- elif name.endswith('.xlsx') and '京东推广_搜索词' in name:
1151
+ elif name.endswith('.csv') and '京东推广_搜索词' in name:
1137
1152
  t_path = os.path.join(self.source_path, '京东报表', '搜索词报表')
1138
1153
  bib(t_path, _as_month=True)
1139
- elif name.endswith('.xlsx') and '京东推广_点击成交' in name:
1154
+ elif name.endswith('.csv') and '京东推广_点击成交' in name:
1140
1155
  t_path = os.path.join(self.source_path, '京东报表', '推广报表')
1141
1156
  bib(t_path, _as_month=True)
1142
- elif name.endswith('.xlsx') and '京东推广_关键词点击' in name:
1157
+ elif name.endswith('.csv') and '京东推广_关键词点击' in name:
1143
1158
  t_path = os.path.join(self.source_path, '京东报表', '关键词报表')
1144
1159
  bib(t_path, _as_month=True)
1145
- elif name.endswith('.xlsx') and '京东商智_店铺来源_三级来源' in name:
1160
+ elif name.endswith('.csv') and '京东商智_店铺来源_三级来源' in name:
1146
1161
  t_path = os.path.join(self.source_path, '京东报表', '店铺来源_三级来源')
1147
1162
  bib(t_path, _as_month=True)
1148
1163
 
@@ -1411,118 +1426,59 @@ class DataClean:
1411
1426
  if _new_root != _root or '__MACOSX' in _root:
1412
1427
  shutil.rmtree(_root)
1413
1428
 
1414
- def upload_df(self, service_databases=None, path=None):
1429
+ def upload_df(self, path=None):
1415
1430
  """
1416
1431
  将清洗后的 df 上传数据库, copysh.py 调用
1417
1432
  """
1418
- if not service_databases:
1419
- service_databases = self.service_databases
1420
1433
  df_to_json = df_types.DataTypes() # json 文件, 包含数据的 dtypes 信息
1421
- for service_database in service_databases:
1422
- for service_name, database in service_database.items():
1423
- # print(service_name, database)
1424
- if database == 'mongodb':
1425
- username, password, host, port = get_myconf.select_config_values(
1426
- target_service=service_name,
1427
- database=database,
1428
- )
1429
- d = mongo.UploadMongo(
1430
- username=username,
1431
- password=password,
1432
- host=host,
1433
- port=port,
1434
- drop_duplicates=False,
1435
- )
1436
- for data in self.datas:
1437
- db_name, collection_name, df = data['数据库名'], data['集合名称'], data['数据主体']
1438
- df_to_json.get_df_types(
1439
- df=df,
1440
- db_name=db_name,
1441
- collection_name=collection_name,
1442
- is_file_dtype=True, # 默认本地文件优先: True
1443
- )
1444
- d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
1445
- if d.client:
1446
- d.client.close()
1447
-
1448
- elif database == 'mysql':
1449
- username, password, host, port = get_myconf.select_config_values(
1450
- target_service=service_name,
1451
- database=database,
1452
- )
1453
- m = mysql.MysqlUpload(
1454
- username=username,
1455
- password=password,
1456
- host=host,
1457
- port=port,
1458
- )
1459
- for data in self.datas:
1460
- df, db_name, collection_name, rt_filename = data['数据主体'], data['数据库名'], data['集合名称'], data['文件名']
1461
- df_to_json.get_df_types(
1462
- df=df,
1463
- db_name=db_name,
1464
- collection_name=collection_name,
1465
- is_file_dtype=True, # 默认本地文件优先: True
1466
- )
1467
- m.df_to_mysql(
1468
- df=df,
1469
- db_name=db_name,
1470
- table_name=collection_name,
1471
- move_insert=True, # 先删除,再插入
1472
- df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
1473
- drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
1474
- filename=rt_filename, # 用来追踪处理进度
1475
- service_database=service_database, # 字典
1476
- )
1477
- df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
1478
-
1479
-
1480
- def test():
1481
- # main_key = '单元报表'
1482
- path = f'/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表'
1483
- for root, dirs, files in os.walk(path, topdown=False):
1484
- for name in files:
1485
- if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
1486
- continue
1487
- # if 'py_xg' in name:
1488
- # continue
1489
- # if 'TM_旧表_字段' in root:
1490
- # continue
1491
-
1492
- if name.endswith('.csv'):
1493
- print(name)
1494
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1495
- # if '店铺名称' not in df.columns.tolist():
1496
- # df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
1497
- # df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
1498
- # df.replace(to_replace=[','], value='', regex=True, inplace=True)
1499
- # if '统计日期' in df.columns.tolist() and '日期' not in df.columns.tolist():
1500
- # df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
1501
- # shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
1502
- # df.insert(loc=1, column='店铺名称', value=shop_name)
1503
-
1504
- date_all = re.findall(r'_(\d{4}-\d{2}-\d{2})_', name)[0]
1505
-
1506
- date = re.findall(r'_(\d{4}-\d{2})-\d{2}', name)[0]
1507
-
1508
- new_path = f'/Users/xigua/数据中心/原始文件3/天猫_生意参谋/商品排行/{date}'
1509
- # new_path = os.path.join(new_path, date) # 添加 年月分类
1510
- if not os.path.exists(new_path):
1511
- os.makedirs(new_path, exist_ok=True)
1512
- # print(date_all)
1513
-
1514
- new_name = f'py_xg_商品排行_万里马官方旗舰店_{date_all}.csv'
1515
- # print(os.path.join(new_path, new_name))
1516
- # breakpoint()
1517
- df.to_csv(os.path.join(new_path, new_name), encoding='utf-8_sig', index=False, header=True)
1518
- # try:
1519
- # df.to_excel(os.path.join(new_path, new_name),
1520
- # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
1521
- # except Exception as e:
1522
- # print(e)
1523
-
1524
-
1525
- def date_table(service_databases=[{}]):
1434
+
1435
+ # d = mongo.UploadMongo(
1436
+ # username=username,
1437
+ # password=password,
1438
+ # host=host,
1439
+ # port=port,
1440
+ # drop_duplicates=False,
1441
+ # )
1442
+ # for data in self.datas:
1443
+ # db_name, collection_name, df = data['数据库名'], data['集合名称'], data['数据主体']
1444
+ # df_to_json.get_df_types(
1445
+ # df=df,
1446
+ # db_name=db_name,
1447
+ # collection_name=collection_name,
1448
+ # is_file_dtype=True, # 默认本地文件优先: True
1449
+ # )
1450
+ # d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
1451
+ # if d.client:
1452
+ # d.client.close()
1453
+
1454
+ m = mysql.MysqlUpload(
1455
+ username=username,
1456
+ password=password,
1457
+ host=host,
1458
+ port=port,
1459
+ )
1460
+ for data in self.datas:
1461
+ df, db_name, collection_name, rt_filename = data['数据主体'], data['数据库名'], data['集合名称'], data['文件名']
1462
+ df_to_json.get_df_types(
1463
+ df=df,
1464
+ db_name=db_name,
1465
+ collection_name=collection_name,
1466
+ is_file_dtype=True, # 默认本地文件优先: True
1467
+ )
1468
+ m.df_to_mysql(
1469
+ df=df,
1470
+ db_name=db_name,
1471
+ table_name=collection_name,
1472
+ move_insert=True, # 先删除,再插入
1473
+ df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
1474
+ drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
1475
+ filename=rt_filename, # 用来追踪处理进度
1476
+ service_database=service_database, # 字典
1477
+ )
1478
+ df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
1479
+
1480
+
1481
+ def date_table():
1526
1482
  """
1527
1483
  生成 pbix 使用的日期表
1528
1484
  """
@@ -1549,47 +1505,32 @@ def date_table(service_databases=[{}]):
1549
1505
  df['月索引'] = mon
1550
1506
  df.sort_values('日期', ascending=False, ignore_index=True, inplace=True)
1551
1507
 
1552
- for service_database in service_databases:
1553
- for service_name, database in service_database.items():
1554
- username, password, host, port = get_myconf.select_config_values(
1555
- target_service=service_name,
1556
- database=database,
1557
- )
1558
- m = mysql.MysqlUpload(
1559
- username=username,
1560
- password=password,
1561
- host=host,
1562
- port=port,
1563
- )
1564
- m.df_to_mysql(
1565
- df=df,
1566
- db_name='聚合数据',
1567
- table_name='日期表',
1568
- move_insert=True, # 先删除,再插入
1569
- df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
1570
- drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
1571
- filename=None, # 用来追踪处理进度
1572
- service_database=service_database, # 用来追踪处理进度
1573
- )
1508
+ m = mysql.MysqlUpload(
1509
+ username=username,
1510
+ password=password,
1511
+ host=host,
1512
+ port=port,
1513
+ )
1514
+ m.df_to_mysql(
1515
+ df=df,
1516
+ db_name='聚合数据',
1517
+ table_name='日期表',
1518
+ move_insert=True, # 先删除,再插入
1519
+ df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
1520
+ drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
1521
+ filename=None, # 用来追踪处理进度
1522
+ service_database=service_database, # 用来追踪处理进度
1523
+ )
1574
1524
 
1575
1525
 
1576
- def main(service_databases=None, is_mysql=False):
1526
+ def main(is_mysql=False):
1577
1527
  """
1578
1528
  is_mysql: 调试时加,False: 是否后续的聚合数据
1579
1529
  """
1580
1530
 
1581
- if not service_databases:
1582
- service_databases = [
1583
- # {'home_lx': 'mongodb'},
1584
- {'home_lx': 'mysql'},
1585
- # {'company': 'mysql'},
1586
- # {'nas': 'mysql'},
1587
- ]
1588
-
1589
1531
  cn = DataClean(
1590
1532
  path=upload_path, # 源文件目录,下载文件夹
1591
1533
  source_path=source_path3, # 原始文件保存目录
1592
- service_databases=service_databases
1593
1534
  )
1594
1535
  cn.new_unzip(is_move=True) # 解压文件, is_move 解压后是否删除原 zip 压缩文件
1595
1536
  cn.sycm_tm(is_except=['except']) # 天猫生意参谋
@@ -1600,7 +1541,7 @@ def main(service_databases=None, is_mysql=False):
1600
1541
  # cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
1601
1542
  cn.jd_reports(is_except=['except']) # 清洗京东报表
1602
1543
  cn.sp_scene_clean(is_except=['except']) # 商品素材
1603
- cn.upload_df(service_databases=service_databases) # 上传数据库
1544
+ cn.upload_df() # 上传数据库
1604
1545
 
1605
1546
  cn.move_sycm(is_except=['临时文件', ]) # 生意参谋,移到文件到原始文件夹
1606
1547
  cn.move_dmp(is_except=['临时文件', ]) # 达摩盘
@@ -1613,64 +1554,35 @@ def main(service_databases=None, is_mysql=False):
1613
1554
  return
1614
1555
 
1615
1556
  # 更新日期表
1616
- date_table(service_databases=service_databases)
1617
- # 更新货品年份基准表, 属性设置 2 - 货品年份基准
1557
+ date_table()
1558
+ # 更新货品年份基准表, 属性设置 3 - 货品年份基准
1618
1559
  p = products.Products()
1619
- p.to_mysql(service_databases=service_databases)
1560
+ p.to_mysql()
1620
1561
 
1562
+ conf = myconfig.main()
1563
+ data = conf['Windows']['xigua_lx']['mysql']['local']
1564
+ db_list = conf['Windows']['xigua_lx']['mysql']['数据库集']
1565
+ db_list = [item for item in db_list if item != '聚合数据']
1621
1566
  # 清理所有非聚合数据的库
1622
1567
  optimize_data.op_data(
1623
- db_name_lists=[
1624
- '京东数据3',
1625
- '属性设置3',
1626
- '推广数据2',
1627
- '生意参谋3',
1628
- '推广数据_淘宝店',
1629
- '爱库存2'
1630
- '生意参谋3',
1631
- '生意经2',
1632
- # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
1633
- '达摩盘3',
1634
- ],
1635
- days=100,
1568
+ db_name_lists=db_list,
1569
+ days=5,
1636
1570
  is_mongo=True,
1637
1571
  is_mysql=True,
1638
- service_databases=service_databases
1639
1572
  )
1640
1573
 
1641
1574
  # 数据聚合
1642
- query_data.data_aggregation(service_databases=service_databases, months=3)
1575
+ query_data.data_aggregation(months=3)
1643
1576
  time.sleep(60)
1644
1577
 
1645
1578
  # 清理聚合数据, mongodb 中没有聚合数据,所以只需要清理 mysql 即可
1646
1579
  optimize_data.op_data(
1647
1580
  db_name_lists=['聚合数据'],
1648
1581
  days=3650,
1649
- service_databases=service_databases,
1650
1582
  is_mongo=False,
1651
1583
  is_mysql=True,
1652
1584
  )
1653
1585
 
1654
1586
 
1655
1587
  if __name__ == '__main__':
1656
- main(
1657
- service_databases = [
1658
- # {'company': 'mysql'},
1659
- {'home_lx': 'mysql'},
1660
- # {'home_lx': 'mongodb'},
1661
- # {'nas': 'mysql'},
1662
- ],
1663
- is_mysql = False, # 清理聚合数据
1664
- )
1665
- # date_table(service_databases=[{'company': 'mysql'}])
1666
- # c = DataClean(
1667
- # path=upload_path, # 源文件目录,下载文件夹
1668
- # source_path=source_path3, # 原始文件保存目录
1669
- # service_databases=[{'home_lx': 'mysql'},]
1670
- # )
1671
- # c.sp_scene_clean(is_except=['except']) # 商品素材
1672
- # c.move_tg_tm(is_except=['临时文件', ]) # 天猫,移到文件到原始文件夹
1673
-
1674
-
1675
- # test()
1676
-
1588
+ main(is_mysql=False)
@@ -0,0 +1,30 @@
1
+ # -*- coding: UTF-8 –*-
2
+ import os
3
+ import json
4
+ from mdbq.config import set_support
5
+
6
+
7
+
8
+ def main():
9
+ support_path = set_support.SetSupport(dirname='support').dirname
10
+ file = os.path.join(support_path, 'my_config.txt')
11
+ if not os.path.isfile(file):
12
+ print(f'缺少配置文件,无法读取配置文件: {file}')
13
+ return
14
+ with open(file, 'r', encoding='utf-8') as f:
15
+ config_datas = json.load(f)
16
+ return config_datas
17
+
18
+
19
+ def write_back(datas):
20
+ """ 将数据写回本地 """
21
+ support_path = set_support.SetSupport(dirname='support').dirname
22
+ file = os.path.join(support_path, 'my_config.txt')
23
+ with open(file, 'w+', encoding='utf-8') as f:
24
+ json.dump(datas, f, ensure_ascii=False, sort_keys=False, indent=4)
25
+
26
+
27
+
28
+ if __name__ == '__main__':
29
+ d = main()
30
+ print(d)
mdbq/config/products.py CHANGED
@@ -3,15 +3,29 @@ import json
3
3
  import os
4
4
  import platform
5
5
  import getpass
6
+ import socket
6
7
  import pandas as pd
7
8
  from mdbq.mysql import mysql
8
- from mdbq.config import get_myconf
9
+ from mdbq.config import myconfig
9
10
  from numpy.ma.core import product
10
11
 
11
12
  """
12
13
  天猫货品年份基准对照
13
14
  用于聚合数据,通过此数据表进一步可确定商品上架年月
14
15
  """
16
+ username, password, host, port, service_database = None, None, None, None, None,
17
+ if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
18
+ conf = myconfig.main()
19
+ data = conf['Windows']['xigua_lx']['mysql']['local']
20
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
21
+ service_database = {'xigua_lx': 'mysql'}
22
+ elif socket.gethostname().lower() in ['company', 'mac2.local']:
23
+ conf = myconfig.main()
24
+ data = conf['Windows']['company']['mysql']['local']
25
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
26
+ service_database = {'company': 'mysql'}
27
+ if not username:
28
+ print(f'找不到主机:')
15
29
 
16
30
 
17
31
  class Products:
@@ -107,31 +121,25 @@ class Products:
107
121
  ]
108
122
  self.datas += my_datas
109
123
 
110
- def to_mysql(self, service_databases=[{'home_lx': 'mysql'}]):
124
+ def to_mysql(self):
111
125
  self.update_my_datas()
112
126
  df = pd.DataFrame(self.datas)
113
- for service_database in service_databases:
114
- for service_name, database in service_database.items():
115
- username, password, host, port = get_myconf.select_config_values(
116
- target_service=service_name,
117
- database=database,
118
- )
119
- m = mysql.MysqlUpload(
120
- username=username,
121
- password=password,
122
- host=host,
123
- port=port,
124
- )
125
- m.df_to_mysql(
126
- df=df,
127
- db_name='属性设置2',
128
- table_name='货品年份基准',
129
- move_insert = False,
130
- df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
131
- drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
132
- icm_update=[],
133
- service_database=service_database, # 用来追踪处理进度
134
- )
127
+ m = mysql.MysqlUpload(
128
+ username=username,
129
+ password=password,
130
+ host=host,
131
+ port=port,
132
+ )
133
+ m.df_to_mysql(
134
+ df=df,
135
+ db_name='属性设置3',
136
+ table_name='货品年份基准',
137
+ move_insert = False,
138
+ df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
139
+ drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
140
+ icm_update=[],
141
+ service_database=service_database, # 用来追踪处理进度
142
+ )
135
143
 
136
144
  def market_date(self, product_id: int):
137
145
  try:
@@ -153,13 +161,3 @@ if __name__ == '__main__':
153
161
  p = Products()
154
162
  year = p.market_date(product_id=product_id)
155
163
  print(f'{product_id}: {year}')
156
-
157
- p.to_mysql(service_databases=[
158
- {
159
- 'home_lx': 'mysql'
160
- },
161
- {
162
- 'company': 'mysql'
163
- }
164
- ]
165
- )
mdbq/mysql/mysql.py CHANGED
@@ -12,7 +12,7 @@ import pandas as pd
12
12
  from sqlalchemy import create_engine
13
13
  import os
14
14
  import calendar
15
- from mdbq.config import get_myconf
15
+ from mdbq.config import myconfig
16
16
  from mdbq.config import set_support
17
17
  from mdbq.dataframe import converter
18
18
  from mdbq.aggregation import mysql_types
@@ -49,7 +49,7 @@ class MysqlUpload:
49
49
  self.port = port
50
50
  self.config = {
51
51
  'host': self.host,
52
- 'port': self.port,
52
+ 'port': int(self.port),
53
53
  'user': self.username,
54
54
  'password': self.password,
55
55
  'charset': charset, # utf8mb4 支持存储四字节的UTF-8字符集
@@ -69,7 +69,7 @@ class MysqlUpload:
69
69
  return wrapper
70
70
 
71
71
  @try_except
72
- def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'home_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None, reset_id=False):
72
+ def df_to_mysql(self, df, table_name, db_name='远程数据源', icm_update=[], service_database={'xigua_lx': 'mysql'}, move_insert=False, df_sql=False, drop_duplicates=False, filename=None, count=None, json_path=None, reset_id=False):
73
73
  """
74
74
  将 df 写入数据库
75
75
  db_name: 数据库名称
@@ -185,13 +185,13 @@ class MysqlUpload:
185
185
 
186
186
  # print(cl, db_n, tb_n)
187
187
  # 返回这些结果的目的是等添加完列再写 json 文件才能读到 types 信息
188
- # ⚠️ mysql_all_dtypes 函数默认只读取 home_lx 的数据库信息,不会读取其他系统
188
+ # ⚠️ mysql_all_dtypes 函数默认只读取 xigua_lx 的数据库信息,不会读取其他系统
189
189
  if cl and db_n and tb_n:
190
- mysql_types.mysql_all_dtypes(service_database=service_database, db_name=db_name, table_name=table_name) # 更新一个表的 dtypes
190
+ mysql_types.mysql_all_dtypes(db_name=db_name, table_name=table_name) # 更新一个表的 dtypes
191
191
  elif cl and db_n:
192
- mysql_types.mysql_all_dtypes(service_database=service_database, db_name=db_name) # 更新一个数据库的 dtypes
192
+ mysql_types.mysql_all_dtypes(db_name=db_name) # 更新一个数据库的 dtypes
193
193
  elif cl:
194
- mysql_types.mysql_all_dtypes(service_database=service_database) # 更新所有数据库所有数据表的 dtypes 信息到本地 json
194
+ mysql_types.mysql_all_dtypes() # 更新所有数据库所有数据表的 dtypes 信息到本地 json
195
195
 
196
196
  # 4. 更新插入数据
197
197
  now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S ")
@@ -555,7 +555,7 @@ class OptimizeDatas:
555
555
  self.charset = charset
556
556
  self.config = {
557
557
  'host': self.host,
558
- 'port': self.port,
558
+ 'port': int(self.port),
559
559
  'user': self.username,
560
560
  'password': self.password,
561
561
  'charset': self.charset, # utf8mb4 支持存储四字节的UTF-8字符集
@@ -864,38 +864,8 @@ def year_month_day_bak(start_date, end_date):
864
864
  return results # start_date至end_date之间的所有年月日
865
865
 
866
866
 
867
- def download_datas_bak(table_name, save_path, start_date):
868
- username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
869
- print(username, password, host, port)
870
- m = MysqlUpload(username=username, password=password, host=host, port=port)
871
- m.port = port
872
- results = year_month_day(start_date=start_date, end_date='today')
873
- # print(results)
874
- for result in results:
875
- start_date = result['起始日期']
876
- end_date = result['结束日期']
877
- # print(start_date, end_date)
878
- df = m.read_mysql(db_name='天猫数据1', table_name=table_name, start_date=start_date, end_date=end_date)
879
- if len(df) == 0:
880
- continue
881
- path = os.path.join(save_path, f'{table_name}_{str(start_date)}_{str(end_date)}.csv')
882
- df['日期'] = df['日期'].apply(lambda x: re.sub(' .*', '', str(x)))
883
- df.to_csv(path, index=False, encoding='utf-8_sig', header=True)
884
-
885
-
886
867
  if __name__ == '__main__':
887
- username, password, host, port = get_myconf.select_config_values(target_service='home_lx', database='mysql')
888
- # print(username, password, host, port)
889
- path = '/Users/xigua/Downloads/人群洞察.csv'
890
- df = pd.read_csv(path, encoding='utf-8_sig', header=0, na_filter=False)
891
- # print(df)
892
- m = MysqlUpload(username=username, password=password, host=host, port=port)
893
- m.df_to_mysql(
894
- df=df,
895
- db_name='test',
896
- table_name='测试数据',
897
- drop_duplicates=True,
898
- # service_name=service_name,
899
- # service_databases=service_databases,
900
- )
901
-
868
+ conf = myconfig.main()
869
+ data = conf['Windows']['xigua_lx']['mysql']['local']
870
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
871
+ print(username, password, host, port)