mdbq 2.7.0__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,14 @@ import pandas as pd
4
4
  from functools import wraps
5
5
  import chardet
6
6
  import zipfile
7
+ import socket
7
8
  from pyzipper import PyZipFile
8
9
  import os
9
10
  import platform
10
11
  import json
11
12
  from mdbq.mongo import mongo
12
13
  from mdbq.mysql import mysql
13
- from mdbq.config import get_myconf
14
+ from mdbq.config import myconfig
14
15
  from mdbq.aggregation import df_types
15
16
  from mdbq.config import products
16
17
  from mdbq.aggregation import optimize_data
@@ -41,18 +42,31 @@ else:
41
42
  Share_Path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
42
43
 
43
44
  upload_path = os.path.join(D_PATH, '数据上传中心') # 此目录位于下载文件夹
44
- source_path = os.path.join(Data_Path, '原始文件2') # 此目录保存下载并清洗过的文件,作为数据库备份
45
+ # source_path = os.path.join(Data_Path, '原始文件2') # 此目录保存下载并清洗过的文件,作为数据库备份
45
46
  source_path3 = os.path.join(Data_Path, '原始文件3') # 此目录保存下载并清洗过的文件,作为数据库备份
46
47
 
48
+ if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
49
+ conf = myconfig.main()
50
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
51
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
52
+ service_database = {'xigua_lx': 'mysql'}
53
+ elif socket.gethostname().lower() in ['company', 'Mac2.local']:
54
+ conf = myconfig.main()
55
+ conf_data = conf['Windows']['company']['mysql']['local']
56
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
57
+ service_database = {'company': 'mysql'}
58
+ if not username:
59
+ print(f'找不到主机:')
60
+
61
+
47
62
 
48
63
  class DataClean:
49
64
  """ 数据分类 """
50
65
 
51
- def __init__(self, path, source_path, service_databases):
66
+ def __init__(self, path, source_path):
52
67
  self.path = path # 数据源位置,下载文件夹
53
68
  self.source_path = source_path # 原始文件保存目录
54
69
  self.datas = []
55
- self.service_databases = service_databases
56
70
 
57
71
  @staticmethod
58
72
  def try_except(func): # 在类内部定义一个异常处理方法
@@ -145,6 +159,7 @@ class DataClean:
145
159
  df = pd.read_excel(os.path.join(root, name), header=4)
146
160
  if len(df) == 0:
147
161
  print(f'{name} 报表数据不能为空')
162
+ os.remove(os.path.join(root, name))
148
163
  continue
149
164
  df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
150
165
  df.replace(to_replace=[','], value='', regex=True, inplace=True)
@@ -159,6 +174,7 @@ class DataClean:
159
174
  df = pd.read_excel(os.path.join(root, name), header=5, engine='xlrd')
160
175
  if len(df) == 0:
161
176
  print(f'{name} 报表数据不能为空')
177
+ os.remove(os.path.join(root, name))
162
178
  continue
163
179
  df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
164
180
  df.replace(to_replace=[','], value='', regex=True, inplace=True)
@@ -255,6 +271,7 @@ class DataClean:
255
271
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
256
272
  if len(df) == 0:
257
273
  print(f'{name} 报表数据为空')
274
+ os.remove(os.path.join(root, name))
258
275
  continue
259
276
  new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
260
277
  self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
@@ -263,6 +280,7 @@ class DataClean:
263
280
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
264
281
  if len(df) == 0:
265
282
  print(f'{name} 报表数据为空')
283
+ os.remove(os.path.join(root, name))
266
284
  continue
267
285
  for col in df.columns.tolist():
268
286
  if '(' in col or ')' in col:
@@ -563,6 +581,7 @@ class DataClean:
563
581
  name_st = re.findall(r'([\u4e00-\u9fa5]+)\(分日', name)
564
582
  if not name_st:
565
583
  print(f'{name} 正则提取文件名失败')
584
+ os.remove(os.path.join(root, name))
566
585
  continue
567
586
  encoding = self.get_encoding(file_path=os.path.join(root, name))
568
587
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
@@ -802,8 +821,9 @@ class DataClean:
802
821
  if not is_continue:
803
822
  continue
804
823
 
805
- if name.endswith('.xlsx') and '京东推广_' in name:
806
- df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
824
+ if name.endswith('.csv') and '京东推广_' in name:
825
+ # df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
826
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
807
827
  new_name = f'py_xg_{name}'
808
828
  os.rename(os.path.join(root, name), os.path.join(root, new_name))
809
829
  elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
@@ -813,9 +833,10 @@ class DataClean:
813
833
  df.insert(loc=0, column='日期', value=pattern)
814
834
  df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
815
835
  df.fillna(0, inplace=True)
816
- new_name = f'py_xg_{name}'
817
- df.to_excel(os.path.join(upload_path, new_name),
818
- index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
836
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
837
+ df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
838
+ # df.to_excel(os.path.join(upload_path, new_name),
839
+ # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
819
840
  os.remove(os.path.join(root, name))
820
841
  elif name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
821
842
  df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
@@ -824,9 +845,10 @@ class DataClean:
824
845
  df.insert(loc=0, column='日期', value=pattern)
825
846
  df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
826
847
  df.fillna(0, inplace=True)
827
- new_name = f'py_xg_{name}'
828
- df.to_excel(os.path.join(upload_path, new_name),
829
- index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
848
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
849
+ df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
850
+ # df.to_excel(os.path.join(upload_path, new_name),
851
+ # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
830
852
  os.remove(os.path.join(root, name))
831
853
  elif name.endswith('.xlsx') and '京东商智_店铺来源_三级来源' in name:
832
854
  df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
@@ -836,9 +858,10 @@ class DataClean:
836
858
  if '环比' in col or '同比' in col:
837
859
  df.drop(col, axis=1, inplace=True)
838
860
  df.fillna(0, inplace=True)
839
- new_name = f'py_xg_{name}'
840
- df.to_excel(os.path.join(upload_path, new_name),
841
- index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
861
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
862
+ df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
863
+ # df.to_excel(os.path.join(upload_path, new_name),
864
+ # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
842
865
  os.remove(os.path.join(root, name))
843
866
 
844
867
  # 将数据传入 self.datas 等待更新进数据库
@@ -908,7 +931,7 @@ class DataClean:
908
931
 
909
932
  if name.endswith('.xlsx') and '商品素材_' in name:
910
933
  shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
911
- df = pd.read_excel(os.path.join(root, name), header=0, engine='xlrd')
934
+ df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
912
935
  if '日期' not in df.columns.tolist():
913
936
  df.insert(loc=0, column='日期', value=datetime.datetime.today().strftime('%Y-%m-%d'))
914
937
  if '店铺名称' not in df.columns.tolist():
@@ -998,7 +1021,7 @@ class DataClean:
998
1021
  t_path = os.path.join(self.source_path, '生意参谋', '商品属性')
999
1022
  bib(t_path, _as_month=True)
1000
1023
  elif name.endswith('.csv') and '爱库存_商品榜单_' in name:
1001
- t_path = os.path.join(self.source_path, '爱库存', 'spu商品榜单')
1024
+ t_path = os.path.join(self.source_path, '爱库存', '商品spu榜单')
1002
1025
  bib(t_path, _as_month=True)
1003
1026
  elif name.endswith('.csv') and '手淘搜索_本店引流词_' in name:
1004
1027
  t_path = os.path.join(self.source_path, '生意参谋', '手淘搜索_本店引流词')
@@ -1040,7 +1063,6 @@ class DataClean:
1040
1063
  t_path = os.path.join(self.source_path, '达摩盘', 'dmp人群报表')
1041
1064
  bib(t_path, _as_month=True)
1042
1065
 
1043
-
1044
1066
  # @try_except
1045
1067
  def move_sjy(self, path=None, is_except=[]):
1046
1068
  if not path:
@@ -1119,10 +1141,10 @@ class DataClean:
1119
1141
  continue
1120
1142
 
1121
1143
  if name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
1122
- t_path = os.path.join(self.source_path, '京东报表', 'spu_商品明细')
1144
+ t_path = os.path.join(self.source_path, '京东报表', '京东商智_spu_商品明细')
1123
1145
  bib(t_path, _as_month=True)
1124
1146
  elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
1125
- t_path = os.path.join(self.source_path, '京东报表', 'sku_商品明细')
1147
+ t_path = os.path.join(self.source_path, '京东报表', '京东商智_sku_商品明细')
1126
1148
  bib(t_path, _as_month=True)
1127
1149
  elif name.endswith('.xlsx') and '京东推广_搜索词' in name:
1128
1150
  t_path = os.path.join(self.source_path, '京东报表', '搜索词报表')
@@ -1402,118 +1424,59 @@ class DataClean:
1402
1424
  if _new_root != _root or '__MACOSX' in _root:
1403
1425
  shutil.rmtree(_root)
1404
1426
 
1405
- def upload_df(self, service_databases=None, path=None):
1427
+ def upload_df(self, path=None):
1406
1428
  """
1407
1429
  将清洗后的 df 上传数据库, copysh.py 调用
1408
1430
  """
1409
- if not service_databases:
1410
- service_databases = self.service_databases
1411
1431
  df_to_json = df_types.DataTypes() # json 文件, 包含数据的 dtypes 信息
1412
- for service_database in service_databases:
1413
- for service_name, database in service_database.items():
1414
- # print(service_name, database)
1415
- if database == 'mongodb':
1416
- username, password, host, port = get_myconf.select_config_values(
1417
- target_service=service_name,
1418
- database=database,
1419
- )
1420
- d = mongo.UploadMongo(
1421
- username=username,
1422
- password=password,
1423
- host=host,
1424
- port=port,
1425
- drop_duplicates=False,
1426
- )
1427
- for data in self.datas:
1428
- db_name, collection_name, df = data['数据库名'], data['集合名称'], data['数据主体']
1429
- df_to_json.get_df_types(
1430
- df=df,
1431
- db_name=db_name,
1432
- collection_name=collection_name,
1433
- is_file_dtype=True, # 默认本地文件优先: True
1434
- )
1435
- d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
1436
- if d.client:
1437
- d.client.close()
1438
-
1439
- elif database == 'mysql':
1440
- username, password, host, port = get_myconf.select_config_values(
1441
- target_service=service_name,
1442
- database=database,
1443
- )
1444
- m = mysql.MysqlUpload(
1445
- username=username,
1446
- password=password,
1447
- host=host,
1448
- port=port,
1449
- )
1450
- for data in self.datas:
1451
- df, db_name, collection_name, rt_filename = data['数据主体'], data['数据库名'], data['集合名称'], data['文件名']
1452
- df_to_json.get_df_types(
1453
- df=df,
1454
- db_name=db_name,
1455
- collection_name=collection_name,
1456
- is_file_dtype=True, # 默认本地文件优先: True
1457
- )
1458
- m.df_to_mysql(
1459
- df=df,
1460
- db_name=db_name,
1461
- table_name=collection_name,
1462
- move_insert=True, # 先删除,再插入
1463
- df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
1464
- drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
1465
- filename=rt_filename, # 用来追踪处理进度
1466
- service_database=service_database, # 字典
1467
- )
1468
- df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
1469
-
1470
-
1471
- def test():
1472
- # main_key = '单元报表'
1473
- path = f'/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表'
1474
- for root, dirs, files in os.walk(path, topdown=False):
1475
- for name in files:
1476
- if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
1477
- continue
1478
- # if 'py_xg' in name:
1479
- # continue
1480
- # if 'TM_旧表_字段' in root:
1481
- # continue
1482
-
1483
- if name.endswith('.csv'):
1484
- print(name)
1485
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1486
- # if '店铺名称' not in df.columns.tolist():
1487
- # df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
1488
- # df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
1489
- # df.replace(to_replace=[','], value='', regex=True, inplace=True)
1490
- # if '统计日期' in df.columns.tolist() and '日期' not in df.columns.tolist():
1491
- # df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
1492
- # shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
1493
- # df.insert(loc=1, column='店铺名称', value=shop_name)
1494
-
1495
- date_all = re.findall(r'_(\d{4}-\d{2}-\d{2})_', name)[0]
1496
-
1497
- date = re.findall(r'_(\d{4}-\d{2})-\d{2}', name)[0]
1498
-
1499
- new_path = f'/Users/xigua/数据中心/原始文件3/天猫_生意参谋/商品排行/{date}'
1500
- # new_path = os.path.join(new_path, date) # 添加 年月分类
1501
- if not os.path.exists(new_path):
1502
- os.makedirs(new_path, exist_ok=True)
1503
- # print(date_all)
1504
-
1505
- new_name = f'py_xg_商品排行_万里马官方旗舰店_{date_all}.csv'
1506
- # print(os.path.join(new_path, new_name))
1507
- # breakpoint()
1508
- df.to_csv(os.path.join(new_path, new_name), encoding='utf-8_sig', index=False, header=True)
1509
- # try:
1510
- # df.to_excel(os.path.join(new_path, new_name),
1511
- # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
1512
- # except Exception as e:
1513
- # print(e)
1514
-
1515
-
1516
- def date_table(service_databases=[{}]):
1432
+
1433
+ # d = mongo.UploadMongo(
1434
+ # username=username,
1435
+ # password=password,
1436
+ # host=host,
1437
+ # port=port,
1438
+ # drop_duplicates=False,
1439
+ # )
1440
+ # for data in self.datas:
1441
+ # db_name, collection_name, df = data['数据库名'], data['集合名称'], data['数据主体']
1442
+ # df_to_json.get_df_types(
1443
+ # df=df,
1444
+ # db_name=db_name,
1445
+ # collection_name=collection_name,
1446
+ # is_file_dtype=True, # 默认本地文件优先: True
1447
+ # )
1448
+ # d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
1449
+ # if d.client:
1450
+ # d.client.close()
1451
+
1452
+ m = mysql.MysqlUpload(
1453
+ username=username,
1454
+ password=password,
1455
+ host=host,
1456
+ port=port,
1457
+ )
1458
+ for data in self.datas:
1459
+ df, db_name, collection_name, rt_filename = data['数据主体'], data['数据库名'], data['集合名称'], data['文件名']
1460
+ df_to_json.get_df_types(
1461
+ df=df,
1462
+ db_name=db_name,
1463
+ collection_name=collection_name,
1464
+ is_file_dtype=True, # 默认本地文件优先: True
1465
+ )
1466
+ m.df_to_mysql(
1467
+ df=df,
1468
+ db_name=db_name,
1469
+ table_name=collection_name,
1470
+ move_insert=True, # 先删除,再插入
1471
+ df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
1472
+ drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
1473
+ filename=rt_filename, # 用来追踪处理进度
1474
+ service_database=service_database, # 字典
1475
+ )
1476
+ df_to_json.as_json_file() # 写入 json 文件, 包含数据的 dtypes 信息
1477
+
1478
+
1479
+ def date_table():
1517
1480
  """
1518
1481
  生成 pbix 使用的日期表
1519
1482
  """
@@ -1540,58 +1503,43 @@ def date_table(service_databases=[{}]):
1540
1503
  df['月索引'] = mon
1541
1504
  df.sort_values('日期', ascending=False, ignore_index=True, inplace=True)
1542
1505
 
1543
- for service_database in service_databases:
1544
- for service_name, database in service_database.items():
1545
- username, password, host, port = get_myconf.select_config_values(
1546
- target_service=service_name,
1547
- database=database,
1548
- )
1549
- m = mysql.MysqlUpload(
1550
- username=username,
1551
- password=password,
1552
- host=host,
1553
- port=port,
1554
- )
1555
- m.df_to_mysql(
1556
- df=df,
1557
- db_name='聚合数据',
1558
- table_name='日期表',
1559
- move_insert=True, # 先删除,再插入
1560
- df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
1561
- drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
1562
- filename=None, # 用来追踪处理进度
1563
- service_database=service_database, # 用来追踪处理进度
1564
- )
1506
+ m = mysql.MysqlUpload(
1507
+ username=username,
1508
+ password=password,
1509
+ host=host,
1510
+ port=port,
1511
+ )
1512
+ m.df_to_mysql(
1513
+ df=df,
1514
+ db_name='聚合数据',
1515
+ table_name='日期表',
1516
+ move_insert=True, # 先删除,再插入
1517
+ df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
1518
+ drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
1519
+ filename=None, # 用来追踪处理进度
1520
+ service_database=service_database, # 用来追踪处理进度
1521
+ )
1565
1522
 
1566
1523
 
1567
- def main(service_databases=None, is_mysql=False):
1524
+ def main(is_mysql=False):
1568
1525
  """
1569
1526
  is_mysql: 调试时加,False: 是否后续的聚合数据
1570
1527
  """
1571
1528
 
1572
- if not service_databases:
1573
- service_databases = [
1574
- # {'home_lx': 'mongodb'},
1575
- {'home_lx': 'mysql'},
1576
- # {'company': 'mysql'},
1577
- # {'nas': 'mysql'},
1578
- ]
1579
-
1580
1529
  cn = DataClean(
1581
1530
  path=upload_path, # 源文件目录,下载文件夹
1582
1531
  source_path=source_path3, # 原始文件保存目录
1583
- service_databases=service_databases
1584
1532
  )
1585
1533
  cn.new_unzip(is_move=True) # 解压文件, is_move 解压后是否删除原 zip 压缩文件
1586
1534
  cn.sycm_tm(is_except=['except']) # 天猫生意参谋
1587
1535
  cn.dmp_tm(is_except=['except']) # 达摩盘
1588
1536
  cn.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
1589
1537
  cn.syj_reports_tm(is_except=['except']) # 天猫生意经
1590
- """ 淘宝生意经,不可以和天猫同时运行 """
1538
+ # # 淘宝生意经,不可以和天猫同时运行
1591
1539
  # cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
1592
1540
  cn.jd_reports(is_except=['except']) # 清洗京东报表
1593
1541
  cn.sp_scene_clean(is_except=['except']) # 商品素材
1594
- cn.upload_df(service_databases=service_databases) # 上传数据库
1542
+ cn.upload_df() # 上传数据库
1595
1543
 
1596
1544
  cn.move_sycm(is_except=['临时文件', ]) # 生意参谋,移到文件到原始文件夹
1597
1545
  cn.move_dmp(is_except=['临时文件', ]) # 达摩盘
@@ -1604,64 +1552,41 @@ def main(service_databases=None, is_mysql=False):
1604
1552
  return
1605
1553
 
1606
1554
  # 更新日期表
1607
- date_table(service_databases=service_databases)
1608
- # 更新货品年份基准表, 属性设置 2 - 货品年份基准
1555
+ date_table()
1556
+ # 更新货品年份基准表, 属性设置 3 - 货品年份基准
1609
1557
  p = products.Products()
1610
- p.to_mysql(service_databases=service_databases)
1558
+ p.to_mysql()
1611
1559
 
1560
+ conf = myconfig.main()
1561
+ data = conf['Windows']['xigua_lx']['mysql']['local']
1562
+ db_list = conf['Windows']['xigua_lx']['mysql']['数据库集']
1563
+ db_list = [item for item in db_list if item != '聚合数据']
1612
1564
  # 清理所有非聚合数据的库
1613
1565
  optimize_data.op_data(
1614
- db_name_lists=[
1615
- '京东数据3',
1616
- '属性设置3',
1617
- '推广数据2',
1618
- '生意参谋3',
1619
- '推广数据_淘宝店',
1620
- '爱库存2'
1621
- '生意参谋3',
1622
- '生意经2',
1623
- # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
1624
- '达摩盘3',
1625
- ],
1626
- days=100,
1566
+ db_name_lists=db_list,
1567
+ days=5,
1627
1568
  is_mongo=True,
1628
1569
  is_mysql=True,
1629
- service_databases=service_databases
1630
1570
  )
1631
1571
 
1632
1572
  # 数据聚合
1633
- query_data.data_aggregation(service_databases=service_databases, months=3)
1573
+ query_data.data_aggregation(months=3)
1634
1574
  time.sleep(60)
1635
1575
 
1636
1576
  # 清理聚合数据, mongodb 中没有聚合数据,所以只需要清理 mysql 即可
1637
1577
  optimize_data.op_data(
1638
1578
  db_name_lists=['聚合数据'],
1639
1579
  days=3650,
1640
- service_databases=service_databases,
1641
1580
  is_mongo=False,
1642
1581
  is_mysql=True,
1643
1582
  )
1644
1583
 
1645
1584
 
1646
1585
  if __name__ == '__main__':
1647
- main(
1648
- service_databases = [
1649
- {'company': 'mysql'},
1650
- # {'home_lx': 'mysql'},
1651
- # {'home_lx': 'mongodb'},
1652
- # {'nas': 'mysql'},
1653
- ],
1654
- is_mysql = False, # 清理聚合数据
1655
- )
1656
- # date_table(service_databases=[{'company': 'mysql'}])
1657
- # c = DataClean(
1658
- # path=upload_path, # 源文件目录,下载文件夹
1659
- # source_path=source_path3, # 原始文件保存目录
1660
- # service_databases=[{'home_lx': 'mysql'},]
1661
- # )
1662
- # c.sp_scene_clean(is_except=['except']) # 商品素材
1663
- # c.move_tg_tm(is_except=['临时文件', ]) # 天猫,移到文件到原始文件夹
1664
-
1665
-
1666
- # test()
1586
+ main(is_mysql=True)
1667
1587
 
1588
+ # conf = myconfig.main()
1589
+ # data = conf['Windows']['xigua_lx']['mysql']['local']
1590
+ # db_list = conf['Windows']['xigua_lx']['mysql']['数据库集']
1591
+ # db_list = [item for item in db_list if item != '聚合数据']
1592
+ # print(db_list)
mdbq/company/copysh.py CHANGED
@@ -321,7 +321,7 @@ def op_data(days: int =100):
321
321
  # 清理所有非聚合数据的库
322
322
  optimize_data.op_data(
323
323
  db_name_lists=[
324
- '京东数据3',
324
+ '京东数据2',
325
325
  '属性设置3',
326
326
  '推广数据2',
327
327
  '推广数据_淘宝店',
@@ -367,7 +367,6 @@ def main():
367
367
  op_data(days=100)
368
368
 
369
369
  t.sleep_minutes = 5 # 同步前休眠时间
370
- # 4. 同步共享文件
371
370
  t.tb_file()
372
371
  time.sleep(600) # 检测间隔
373
372
 
@@ -0,0 +1,30 @@
1
+ # -*- coding: UTF-8 –*-
2
+ import os
3
+ import json
4
+ from mdbq.config import set_support
5
+
6
+
7
+
8
+ def main():
9
+ support_path = set_support.SetSupport(dirname='support').dirname
10
+ file = os.path.join(support_path, 'my_config.txt')
11
+ if not os.path.isfile(file):
12
+ print(f'缺少配置文件,无法读取配置文件: {file}')
13
+ return
14
+ with open(file, 'r', encoding='utf-8') as f:
15
+ config_datas = json.load(f)
16
+ return config_datas
17
+
18
+
19
+ def write_back(datas):
20
+ """ 将数据写回本地 """
21
+ support_path = set_support.SetSupport(dirname='support').dirname
22
+ file = os.path.join(support_path, 'my_config.txt')
23
+ with open(file, 'w+', encoding='utf-8') as f:
24
+ json.dump(datas, f, ensure_ascii=False, sort_keys=False, indent=4)
25
+
26
+
27
+
28
+ if __name__ == '__main__':
29
+ d = main()
30
+ print(d)
mdbq/config/products.py CHANGED
@@ -3,15 +3,28 @@ import json
3
3
  import os
4
4
  import platform
5
5
  import getpass
6
+ import socket
6
7
  import pandas as pd
7
8
  from mdbq.mysql import mysql
8
- from mdbq.config import get_myconf
9
+ from mdbq.config import myconfig
9
10
  from numpy.ma.core import product
10
11
 
11
12
  """
12
13
  天猫货品年份基准对照
13
14
  用于聚合数据,通过此数据表进一步可确定商品上架年月
14
15
  """
16
+ if socket.gethostname().lower() in ['xigua_lx', 'xigua1', 'macbook pro']:
17
+ conf = myconfig.main()
18
+ data = conf['Windows']['xigua_lx']['mysql']['local']
19
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
20
+ service_database = {'xigua_lx': 'mysql'}
21
+ elif socket.gethostname().lower() in ['company', 'Mac2.local']:
22
+ conf = myconfig.main()
23
+ data = conf['Windows']['company']['mysql']['local']
24
+ username, password, host, port = data['username'], data['password'], data['host'], data['port']
25
+ service_database = {'company': 'mysql'}
26
+ if not username:
27
+ print(f'找不到主机:')
15
28
 
16
29
 
17
30
  class Products:
@@ -107,31 +120,25 @@ class Products:
107
120
  ]
108
121
  self.datas += my_datas
109
122
 
110
- def to_mysql(self, service_databases=[{'home_lx': 'mysql'}]):
123
+ def to_mysql(self):
111
124
  self.update_my_datas()
112
125
  df = pd.DataFrame(self.datas)
113
- for service_database in service_databases:
114
- for service_name, database in service_database.items():
115
- username, password, host, port = get_myconf.select_config_values(
116
- target_service=service_name,
117
- database=database,
118
- )
119
- m = mysql.MysqlUpload(
120
- username=username,
121
- password=password,
122
- host=host,
123
- port=port,
124
- )
125
- m.df_to_mysql(
126
- df=df,
127
- db_name='属性设置2',
128
- table_name='货品年份基准',
129
- move_insert = False,
130
- df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
131
- drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
132
- icm_update=[],
133
- service_database=service_database, # 用来追踪处理进度
134
- )
126
+ m = mysql.MysqlUpload(
127
+ username=username,
128
+ password=password,
129
+ host=host,
130
+ port=port,
131
+ )
132
+ m.df_to_mysql(
133
+ df=df,
134
+ db_name='属性设置3',
135
+ table_name='货品年份基准',
136
+ move_insert = False,
137
+ df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
138
+ drop_duplicates=True, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
139
+ icm_update=[],
140
+ service_database=service_database, # 用来追踪处理进度
141
+ )
135
142
 
136
143
  def market_date(self, product_id: int):
137
144
  try:
@@ -153,13 +160,3 @@ if __name__ == '__main__':
153
160
  p = Products()
154
161
  year = p.market_date(product_id=product_id)
155
162
  print(f'{product_id}: {year}')
156
-
157
- p.to_mysql(service_databases=[
158
- {
159
- 'home_lx': 'mysql'
160
- },
161
- {
162
- 'company': 'mysql'
163
- }
164
- ]
165
- )