mdbq 2.7.0__py3-none-any.whl → 2.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  # -*- coding:utf-8 -*-
2
2
  import warnings
3
+ from unittest.mock import inplace
4
+
3
5
  import pandas as pd
4
6
  import numpy as np
5
7
  import chardet
@@ -1118,7 +1120,7 @@ class DatabaseUpdate:
1118
1120
  def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': True}, json_path=None, target_service='company'):
1119
1121
  """ 上传一个文件夹到 mysql 或者 mongodb 数据库 """
1120
1122
  if not os.path.isdir(path):
1121
- print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 路径不存在或错误: {path}')
1123
+ print(f'{os.path.splitext(os.path.basename(__file__))[0]}.upload_dir: 函数只接受文件夹路径,不是一个文件夹: {path}')
1122
1124
  return
1123
1125
 
1124
1126
  if dbs['mongodb']:
@@ -1288,29 +1290,40 @@ def file_dir(one_file=True, target_service='company'):
1288
1290
 
1289
1291
 
1290
1292
  def test():
1291
- path = '/Users/xigua/数据中心/原始文件2/京东报表/JD商品明细spu'
1293
+ path = os.path.relpath(r'C:\Users\Administrator\Downloads\JD商品明细sku')
1292
1294
  for root, dirs, files in os.walk(path, topdown=False):
1293
1295
  for name in files:
1294
1296
  if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
1297
+ print(name)
1298
+ # df = pd.read_excel(os.path.join(root, name), header=0)
1295
1299
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1296
- df['最近上架时间'].loc[0] = df['最近上架时间'].loc[1]
1297
- # print(df[['日期', '最近上架时间']])
1300
+ cols = df.columns.tolist()
1301
+ if '店铺名称' not in cols:
1302
+ df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
1303
+ if '曝光量' in cols:
1304
+ df.rename(columns={
1305
+ '曝光量': '搜索曝光量',
1306
+ '点击次数': '搜索点击次数',
1307
+ '点击率': '搜索点击率',
1308
+ }, inplace=True)
1309
+ if '取消金额' in cols:
1310
+ df.rename(columns={
1311
+ '取消金额': '取消及售后退款金额',
1312
+ '取消商品件数': '取消及售后退款件数',
1313
+ '取消单量': '取消及售后退款单量',
1314
+ }, inplace=True)
1315
+ if '取消及售后退款金额' not in cols:
1316
+ df['取消及售后退款金额'] = '0.0'
1317
+ df['取消及售后退款件数'] = 0
1318
+ df['取消及售后退款单量'] = 0
1298
1319
  df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
1320
+ # new_name = f'{os.path.splitext(name)[0]}.xlsx'
1321
+ # df.to_excel(os.path.join(root, name),
1322
+ # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
1299
1323
  # break
1300
1324
  # break
1301
1325
 
1302
1326
 
1303
- def test2():
1304
- dp = DatabaseUpdate(path='/Users/xigua/Downloads')
1305
- dp.new_unzip(is_move=True)
1306
- dp.cleaning(is_move=False, ) # 清洗数据, 存入 self.datas
1307
- dp.upload_df(service_databases=[
1308
- # {'home_lx': 'mongodb'},
1309
- {'company': 'mysql'},
1310
- # {'nas': 'mysql'}
1311
- ], path=None, service_name=None)
1312
-
1313
-
1314
1327
  if __name__ == '__main__':
1315
1328
  username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
1316
1329
  print(username, password, host, port)
@@ -1326,24 +1339,15 @@ if __name__ == '__main__':
1326
1339
  # )
1327
1340
 
1328
1341
  # 上传一个目录到指定数据库
1329
- db_name = '推广数据2'
1330
- table_name = '营销场景报表'
1342
+ db_name = '京东数据3'
1343
+ table_name = '京东商智_spu_商品明细'
1331
1344
  upload_dir(
1332
- path='/Users/xigua/数据中心/原始文件3/天猫推广报表/营销场景报表',
1345
+ path=os.path.relpath(r'C:\同步空间\BaiduSyncdisk\原始文件3\京东报表\京东商智_spu_商品明细'),
1333
1346
  db_name=db_name,
1334
1347
  collection_name=table_name,
1335
1348
  dbs={'mysql': True, 'mongodb': False},
1336
- target_service='company',
1349
+ target_service='home_lx',
1337
1350
  )
1338
1351
 
1339
1352
 
1340
- # # 新版 数据分类
1341
- # dp = DatabaseUpdate(path='/Users/xigua/Downloads')
1342
- # dp.new_unzip(is_move=True)
1343
- # dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
1344
- # # 将 self.datas 更新至数据库
1345
- # # dp.upload_df(service_databases=[
1346
- # # # {'home_lx': 'mongodb'},
1347
- # # {'company': 'mysql'},
1348
- # # # {'nas': 'mysql'},
1349
- # # ])
1353
+ # test()
@@ -2316,7 +2316,7 @@ def main():
2316
2316
  if __name__ == '__main__':
2317
2317
  data_aggregation(
2318
2318
  service_databases=[{'company': 'mysql'}],
2319
- months=12,
2320
- is_juhe=True, # 立即启动对聚合数据的清理工作
2321
- # less_dict=['营销场景报表'], # 单独聚合某一个数据库
2319
+ months=1,
2320
+ is_juhe=False, # 立即启动对聚合数据的清理工作
2321
+ # less_dict=['生意参谋_直播场次分析'], # 单独聚合某一个数据库
2322
2322
  )
@@ -145,6 +145,7 @@ class DataClean:
145
145
  df = pd.read_excel(os.path.join(root, name), header=4)
146
146
  if len(df) == 0:
147
147
  print(f'{name} 报表数据不能为空')
148
+ os.remove(os.path.join(root, name))
148
149
  continue
149
150
  df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
150
151
  df.replace(to_replace=[','], value='', regex=True, inplace=True)
@@ -159,6 +160,7 @@ class DataClean:
159
160
  df = pd.read_excel(os.path.join(root, name), header=5, engine='xlrd')
160
161
  if len(df) == 0:
161
162
  print(f'{name} 报表数据不能为空')
163
+ os.remove(os.path.join(root, name))
162
164
  continue
163
165
  df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
164
166
  df.replace(to_replace=[','], value='', regex=True, inplace=True)
@@ -255,6 +257,7 @@ class DataClean:
255
257
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
256
258
  if len(df) == 0:
257
259
  print(f'{name} 报表数据为空')
260
+ os.remove(os.path.join(root, name))
258
261
  continue
259
262
  new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
260
263
  self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
@@ -263,6 +266,7 @@ class DataClean:
263
266
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
264
267
  if len(df) == 0:
265
268
  print(f'{name} 报表数据为空')
269
+ os.remove(os.path.join(root, name))
266
270
  continue
267
271
  for col in df.columns.tolist():
268
272
  if '(' in col or ')' in col:
@@ -563,6 +567,7 @@ class DataClean:
563
567
  name_st = re.findall(r'([\u4e00-\u9fa5]+)\(分日', name)
564
568
  if not name_st:
565
569
  print(f'{name} 正则提取文件名失败')
570
+ os.remove(os.path.join(root, name))
566
571
  continue
567
572
  encoding = self.get_encoding(file_path=os.path.join(root, name))
568
573
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
@@ -802,8 +807,9 @@ class DataClean:
802
807
  if not is_continue:
803
808
  continue
804
809
 
805
- if name.endswith('.xlsx') and '京东推广_' in name:
806
- df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
810
+ if name.endswith('.csv') and '京东推广_' in name:
811
+ # df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
812
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
807
813
  new_name = f'py_xg_{name}'
808
814
  os.rename(os.path.join(root, name), os.path.join(root, new_name))
809
815
  elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
@@ -813,9 +819,10 @@ class DataClean:
813
819
  df.insert(loc=0, column='日期', value=pattern)
814
820
  df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
815
821
  df.fillna(0, inplace=True)
816
- new_name = f'py_xg_{name}'
817
- df.to_excel(os.path.join(upload_path, new_name),
818
- index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
822
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
823
+ df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
824
+ # df.to_excel(os.path.join(upload_path, new_name),
825
+ # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
819
826
  os.remove(os.path.join(root, name))
820
827
  elif name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
821
828
  df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
@@ -824,9 +831,10 @@ class DataClean:
824
831
  df.insert(loc=0, column='日期', value=pattern)
825
832
  df.insert(loc=1, column='店铺名称', value='京东箱包旗舰店')
826
833
  df.fillna(0, inplace=True)
827
- new_name = f'py_xg_{name}'
828
- df.to_excel(os.path.join(upload_path, new_name),
829
- index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
834
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
835
+ df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
836
+ # df.to_excel(os.path.join(upload_path, new_name),
837
+ # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
830
838
  os.remove(os.path.join(root, name))
831
839
  elif name.endswith('.xlsx') and '京东商智_店铺来源_三级来源' in name:
832
840
  df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
@@ -836,9 +844,10 @@ class DataClean:
836
844
  if '环比' in col or '同比' in col:
837
845
  df.drop(col, axis=1, inplace=True)
838
846
  df.fillna(0, inplace=True)
839
- new_name = f'py_xg_{name}'
840
- df.to_excel(os.path.join(upload_path, new_name),
841
- index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
847
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
848
+ df.to_csv(os.path.join(root, new_name), encoding='utf-8_sig', index=False, header=True)
849
+ # df.to_excel(os.path.join(upload_path, new_name),
850
+ # index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
842
851
  os.remove(os.path.join(root, name))
843
852
 
844
853
  # 将数据传入 self.datas 等待更新进数据库
@@ -1119,10 +1128,10 @@ class DataClean:
1119
1128
  continue
1120
1129
 
1121
1130
  if name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
1122
- t_path = os.path.join(self.source_path, '京东报表', 'spu_商品明细')
1131
+ t_path = os.path.join(self.source_path, '京东报表', '京东商智_spu_商品明细')
1123
1132
  bib(t_path, _as_month=True)
1124
1133
  elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
1125
- t_path = os.path.join(self.source_path, '京东报表', 'sku_商品明细')
1134
+ t_path = os.path.join(self.source_path, '京东报表', '京东商智_sku_商品明细')
1126
1135
  bib(t_path, _as_month=True)
1127
1136
  elif name.endswith('.xlsx') and '京东推广_搜索词' in name:
1128
1137
  t_path = os.path.join(self.source_path, '京东报表', '搜索词报表')
@@ -1587,7 +1596,7 @@ def main(service_databases=None, is_mysql=False):
1587
1596
  cn.dmp_tm(is_except=['except']) # 达摩盘
1588
1597
  cn.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
1589
1598
  cn.syj_reports_tm(is_except=['except']) # 天猫生意经
1590
- """ 淘宝生意经,不可以和天猫同时运行 """
1599
+ # # 淘宝生意经,不可以和天猫同时运行
1591
1600
  # cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
1592
1601
  cn.jd_reports(is_except=['except']) # 清洗京东报表
1593
1602
  cn.sp_scene_clean(is_except=['except']) # 商品素材
@@ -1646,8 +1655,8 @@ def main(service_databases=None, is_mysql=False):
1646
1655
  if __name__ == '__main__':
1647
1656
  main(
1648
1657
  service_databases = [
1649
- {'company': 'mysql'},
1650
- # {'home_lx': 'mysql'},
1658
+ # {'company': 'mysql'},
1659
+ {'home_lx': 'mysql'},
1651
1660
  # {'home_lx': 'mongodb'},
1652
1661
  # {'nas': 'mysql'},
1653
1662
  ],
mdbq/company/copysh.py CHANGED
@@ -321,7 +321,7 @@ def op_data(days: int =100):
321
321
  # 清理所有非聚合数据的库
322
322
  optimize_data.op_data(
323
323
  db_name_lists=[
324
- '京东数据3',
324
+ '京东数据2',
325
325
  '属性设置3',
326
326
  '推广数据2',
327
327
  '推广数据_淘宝店',
@@ -367,7 +367,6 @@ def main():
367
367
  op_data(days=100)
368
368
 
369
369
  t.sleep_minutes = 5 # 同步前休眠时间
370
- # 4. 同步共享文件
371
370
  t.tb_file()
372
371
  time.sleep(600) # 检测间隔
373
372
 
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.7.0
4
- Home-page: https://pypi.org/project/mdbsql
3
+ Version: 2.7.1
4
+ Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
7
7
  License: MIT
@@ -1,18 +1,18 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=v_5VM-InaDDvDNjAy_b8xsc38-vf78GkqoXjoe6MZ8U,76569
4
+ mdbq/aggregation/aggregation.py,sha256=wR_rHSu3srNBZPKng-7c3L_FKAuj6cL7GVwTCOAleH4,77125
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
7
7
  mdbq/aggregation/optimize_data.py,sha256=gdScrgTAb6RbXHZy1LitX7lggMGn1GTLhkYSgztfwew,4903
8
- mdbq/aggregation/query_data.py,sha256=gqvKDgg3jrfCcI1VudrnQLJgKHUThZVTqS1zOQ5wgMk,102766
8
+ mdbq/aggregation/query_data.py,sha256=m7Y2xSazPYKvy51yPK6n_Izsv5cjV83oHsiNc7N4fyA,102779
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
12
- mdbq/clean/clean_upload.py,sha256=X5WcWm7kkGZDMpk8p0vMq-SFIcrSL1DmVCYWbxYmLVI,86644
12
+ mdbq/clean/clean_upload.py,sha256=_weFInJnBNZxqErIBHt_10SoMLLT5PIV_j_6n84Q_Y8,87490
13
13
  mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
14
14
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
15
- mdbq/company/copysh.py,sha256=3ZYm_rTE8nXcbgZlyHsa0y-RAkZ8vcmwkxMy_Jj4F2k,17574
15
+ mdbq/company/copysh.py,sha256=sisL5eo3D5HGGYvRw46xGqnqFaI3SxfBnoa-Y7zknus,17541
16
16
  mdbq/company/copysh_bak.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
17
17
  mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
18
18
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -44,7 +44,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
44
44
  mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
45
45
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
46
46
  mdbq/spider/aikucun.py,sha256=4Y5zd64hZUFtll8AdpUc2napDas-La-A6XzAhb2mLv0,17157
47
- mdbq-2.7.0.dist-info/METADATA,sha256=i_0WznHsXfCR0sToIhC5S4mv3hv1qMUOFFdws6FocOg,245
48
- mdbq-2.7.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
49
- mdbq-2.7.0.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
50
- mdbq-2.7.0.dist-info/RECORD,,
47
+ mdbq-2.7.1.dist-info/METADATA,sha256=Dtp6f3EYkLh9ML8akrYeEZ0h6qzcdL1XkYfv2CHkHnM,243
48
+ mdbq-2.7.1.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
49
+ mdbq-2.7.1.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
50
+ mdbq-2.7.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: setuptools (70.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5