mdbq 2.0.3__py3-none-any.whl → 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,10 +39,11 @@ class DatabaseUpdate:
39
39
  self.datas: list = [] # 带更新进数据库的数据集合
40
40
  self.start_date = '2022-01-01' # 日期表的起始日期
41
41
 
42
- def cleaning(self, is_move=True):
42
+ def cleaning(self, is_move=True, is_except=[]):
43
43
  """
44
44
  数据清洗, 返回包含 数据库名, 集合名称, 和 df 主体
45
45
  修改 cleaning 时,要同步 support 下的 标题对照表.csv
46
+ is_except: 需要排除不做处理的文件或文件夹
46
47
  """
47
48
  if not os.path.exists(self.path):
48
49
  print(f'1.1.0 初始化时传入了不存在的目录: {self.path}')
@@ -62,6 +63,16 @@ class DatabaseUpdate:
62
63
  check_remove_file = False # 设置这个参数的目的: 避免误删其他文件, 不是本程序数据清洗覆盖的文件不做干预
63
64
  if '~$' in name or '.DS' in name or '.localized' in name or '.ini' in name or '$RECYCLE.BIN' in name or 'Icon' in name:
64
65
  continue
66
+ is_continue = False
67
+ if is_except:
68
+ for item in is_except:
69
+ if item in os.path.join(root, name):
70
+ # print(name)
71
+ is_continue = True
72
+ break
73
+ if is_continue: # 需要排除不做处理的文件或文件夹
74
+ continue
75
+
65
76
  db_name = None # 初始化/重置变量,避免进入下一个循环
66
77
  collection_name = None
67
78
  for data in datas: # 根据标题对照表适配 db_name 和 collection_name
@@ -755,7 +766,15 @@ class DatabaseUpdate:
755
766
  for name in files:
756
767
  if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
757
768
  continue
758
-
769
+ is_continue = False
770
+ if is_except:
771
+ for item in is_except:
772
+ if item in os.path.join(root, name):
773
+ # print(name)
774
+ is_continue = True
775
+ break
776
+ if is_continue: # 需要排除不做处理的文件或文件夹
777
+ continue
759
778
  db_name = None # 初始化/重置变量,避免进入下一个循环
760
779
  collection_name = None
761
780
  for data in datas: # 根据标题对照表适配 db_name 和 collection_name
@@ -1252,7 +1271,7 @@ def test2():
1252
1271
  if __name__ == '__main__':
1253
1272
  username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
1254
1273
  print(username, password, host, port)
1255
- # file_dir(one_file=False, target_service='home_lx')
1274
+ file_dir(one_file=False, target_service='company')
1256
1275
  # one_file_to_mysql(
1257
1276
  # file='/Users/xigua/数据中心/原始文件2/京东报表/JD推广_全站营销报表/2024-08/万里马箱包推广1_营销概况_全站营销_2024-08-19_2024-09-02.csv',
1258
1277
  # db_name='京东数据2',
@@ -1272,13 +1291,13 @@ if __name__ == '__main__':
1272
1291
  # )
1273
1292
 
1274
1293
 
1275
- # 新版 数据分类
1276
- dp = DatabaseUpdate(path='/Users/xigua/Downloads')
1277
- dp.new_unzip(is_move=True)
1278
- dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
1279
- # 将 self.datas 更新至数据库
1280
- # dp.upload_df(service_databases=[
1281
- # # {'home_lx': 'mongodb'},
1282
- # {'company': 'mysql'},
1283
- # # {'nas': 'mysql'},
1284
- # ])
1294
+ # # 新版 数据分类
1295
+ # dp = DatabaseUpdate(path='/Users/xigua/Downloads')
1296
+ # dp.new_unzip(is_move=True)
1297
+ # dp.cleaning(is_move=False) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
1298
+ # # 将 self.datas 更新至数据库
1299
+ # # dp.upload_df(service_databases=[
1300
+ # # # {'home_lx': 'mongodb'},
1301
+ # # {'company': 'mysql'},
1302
+ # # # {'nas': 'mysql'},
1303
+ # # ])
@@ -1535,7 +1535,7 @@ def main():
1535
1535
 
1536
1536
 
1537
1537
  if __name__ == '__main__':
1538
- data_aggregation(service_databases=[{'home_lx': 'mysql'}], months=24) # 正常的聚合所有数据
1538
+ data_aggregation(service_databases=[{'company': 'mysql'}], months=7) # 正常的聚合所有数据
1539
1539
  # data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
1540
1540
  # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
1541
1541
 
mdbq/clean/data_clean.py CHANGED
@@ -67,7 +67,7 @@ class DataClean:
67
67
  _df.to_csv(os.path.join(_save_paths, filenames), encoding=encoding, index=False, header=True)
68
68
 
69
69
  # @try_except
70
- def change_and_sort(self, path=None):
70
+ def change_and_sort(self, path=None, is_except=[]):
71
71
  """数据转换"""
72
72
  if not path:
73
73
  path = self.path
@@ -87,6 +87,16 @@ class DataClean:
87
87
  if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
88
88
  continue
89
89
 
90
+ is_continue = False
91
+ if is_except:
92
+ for item in is_except:
93
+ if item in os.path.join(root, name):
94
+ # print(name)
95
+ is_continue = True
96
+ break
97
+ if is_continue: # 需要排除不做处理的文件或文件夹
98
+ continue
99
+
90
100
  try:
91
101
  encoding = self.get_encoding(file_path=pathlib.Path(root, name))
92
102
  # ----------------- 推广报表 分割线 -----------------
@@ -272,11 +282,14 @@ class DataClean:
272
282
  df.insert(loc=0, column='数据周期', value=data_lis)
273
283
  df.insert(loc=0, column='日期', value=date01[0])
274
284
  # 2024-2-19 官方更新了推广渠道来源名称
285
+ # df['三级来源'] = df['三级来源'].apply(
286
+ # lambda x: '精准人群推广' if x == '精准人群推广(原引力魔方)'
287
+ # else '关键词推广' if x == '关键词推广(原直通车)'
288
+ # else '智能场景' if x == '智能场景(原万相台)'
289
+ # else x
290
+ # )
275
291
  df['三级来源'] = df['三级来源'].apply(
276
- lambda x: '精准人群推广' if x == '精准人群推广(原引力魔方)'
277
- else '关键词推广' if x == '关键词推广(原直通车)'
278
- else '智能场景' if x == '智能场景(原万相台)'
279
- else x
292
+ lambda x: re.sub('(.*)', '', str(x) if x else x)
280
293
  )
281
294
  # df = df[df['访客数'] != '0']
282
295
  df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
@@ -1130,7 +1143,7 @@ class DataClean:
1130
1143
  bib(t_path)
1131
1144
  elif '_新版' in name:
1132
1145
  t_path = str(pathlib.Path(self.source_path, '生意参谋/流量来源'))
1133
- bib(t_path)
1146
+ bib(t_path, _as_month=True)
1134
1147
  else:
1135
1148
  t_path = str(pathlib.Path(self.source_path, '生意参谋/流量来源_旧版'))
1136
1149
  bib(t_path, _as_month=True)
mdbq/config/products.py CHANGED
@@ -20,6 +20,9 @@ class Products:
20
20
 
21
21
  def update_my_datas(self):
22
22
  my_datas = [
23
+ {
24
+ '平台': '天猫', '商品id': '840499705810', '上市年份': '2024年10月'
25
+ },
23
26
  {
24
27
  '平台': '天猫', '商品id': '830789689032', '上市年份': '2024年9月'
25
28
  },
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.0.3
3
+ Version: 2.0.5
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,20 +1,20 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=ysHgpGoO2aXme0l1G9fqKMzWnvtTeqPwy07n4CNpdDA,73922
4
+ mdbq/aggregation/aggregation.py,sha256=TiSMZHa9F_f6iMptzCVdukWhCzXzpcYIh3lN61P-i94,74825
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
7
7
  mdbq/aggregation/optimize_data.py,sha256=u2Kl_MFtZueXJ57ycy4H2OhXD431RctUYJYCl637uT0,4176
8
- mdbq/aggregation/query_data.py,sha256=ixvfjEai-zIN5fUiVg9nSFrzjiBfJv6QC9_mLTvFfg0,72380
8
+ mdbq/aggregation/query_data.py,sha256=qBNjGTxaQl6rg2-_jlJKGz_sop9UVgoNj5z75XGl_iQ,72379
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
12
- mdbq/clean/data_clean.py,sha256=kKPVvKq2WVUI9qLF7U-jW1IEJtBE9_a6rwJwpR9hfhI,102556
12
+ mdbq/clean/data_clean.py,sha256=hyhLsX5UEmj2ROVScQMRdR52vUuuLE5uSG5QJ60gtQU,103176
13
13
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
14
14
  mdbq/company/copysh.py,sha256=VUaaJPXPYPHWwnkdK77PWz_dAXZyEmYBA9Df1yROHAc,17764
15
15
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
16
16
  mdbq/config/get_myconf.py,sha256=cmNvsyoNa0RbZ9FOTjSd3jyyGwkxjUo0phvdHbGlrms,6010
17
- mdbq/config/products.py,sha256=vIK8DJ-F3XXwvNPK-4OJq2tZITNlL6Sub8QBdoOng8U,5676
17
+ mdbq/config/products.py,sha256=L1uhzdbqTprQg_rekKt0ucgpeIuMvi3H2v48_GZWPuY,5803
18
18
  mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
19
19
  mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
20
20
  mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
@@ -36,7 +36,7 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
36
36
  mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
37
37
  mdbq/pbix/refresh_all.py,sha256=0uAnBKCd5cx5FLTkawN1GV9yi87rfyMgYal5LABtumQ,7186
38
38
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
39
- mdbq-2.0.3.dist-info/METADATA,sha256=keQRDwBmHcrEjRS5EuqnKz9tfVGRUzntuD3Cmum4dy8,245
40
- mdbq-2.0.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
41
- mdbq-2.0.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
42
- mdbq-2.0.3.dist-info/RECORD,,
39
+ mdbq-2.0.5.dist-info/METADATA,sha256=q3s1z7iCeWS4qXY4yzg05F7K_JUDYhIp1H5Zlo-uYV4,245
40
+ mdbq-2.0.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
41
+ mdbq-2.0.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
42
+ mdbq-2.0.5.dist-info/RECORD,,
File without changes