mdbq 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -260,6 +260,7 @@ class DatabaseUpdate:
260
260
  if len(df) == 0:
261
261
  print(f'{name} 报表数据为空')
262
262
  check_remove_file = True
263
+ os.remove(os.path.join(root, name))
263
264
  continue
264
265
  df.replace(to_replace=[','], value='', regex=True, inplace=True)
265
266
  df.insert(loc=0, column='日期', value=pattern[0][1])
@@ -398,7 +399,7 @@ class DatabaseUpdate:
398
399
  pattern = re.findall(r'(.*[\u4e00-\u9fa5])(\d{4})(\d{2})(\d{2})\.', name)
399
400
  if not pattern or '省份城市分析2' not in name:
400
401
  print(f'{name} 不支持或已转换的表格')
401
- # os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
402
+ os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
402
403
  check_remove_file = True
403
404
  continue
404
405
  date = '-'.join(pattern[0][1:])
@@ -406,6 +407,7 @@ class DatabaseUpdate:
406
407
  if len(df) == 0:
407
408
  print(f'{name} 报表数据为空')
408
409
  check_remove_file = True
410
+ os.remove(os.path.join(root, name))
409
411
  continue
410
412
  df['省'] = df['省份'].apply(lambda x: x if ' ├─ ' not in x and ' └─ ' not in x else None)
411
413
  df['城市'] = df[['省份', '省']].apply(lambda x: '汇总' if x['省'] else x['省份'], axis=1)
@@ -1291,7 +1293,7 @@ def test2():
1291
1293
  dp.cleaning(is_move=False, ) # 清洗数据, 存入 self.datas
1292
1294
  dp.upload_df(service_databases=[
1293
1295
  # {'home_lx': 'mongodb'},
1294
- {'home_lx': 'mysql'},
1296
+ {'company': 'mysql'},
1295
1297
  # {'nas': 'mysql'}
1296
1298
  ], path=None, service_name=None)
1297
1299
 
@@ -998,6 +998,29 @@ class GroupBy:
998
998
  # df = df.head(1000)
999
999
  # df.to_csv('/Users/xigua/Downloads/test.csv', index=False, header=True, encoding='utf-8_sig')
1000
1000
  # breakpoint()
1001
+
1002
+ # 下面是添加人群 AIPL 分类
1003
+ dir_file = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新\\分类配置文件.xlsx'
1004
+ dir_file2 = '/Volumes/时尚事业部/01.运营部/0-电商周报-每周五更新/分类配置文件.xlsx'
1005
+ if not os.path.isfile(dir_file):
1006
+ dir_file = dir_file2
1007
+ if os.path.isfile(dir_file):
1008
+ df_fl = pd.read_excel(dir_file, sheet_name='人群分类', header=0)
1009
+ df_fl = df_fl[['人群名字', '人群分类']]
1010
+ # 合并并获取分类信息
1011
+ df = pd.merge(df, df_fl, left_on=['人群名字'], right_on=['人群名字'], how='left')
1012
+ df['人群分类'].fillna('', inplace=True)
1013
+ if '人群分类' in df.columns.tolist():
1014
+ # 这行决定了,从文件中读取的分类信息优先级高于内部函数的分类规则
1015
+ df['人群分类'] = df.apply(
1016
+ lambda x: self.set_crowd(keyword=str(x['人群名字']), as_file=False) if x['人群分类'] == ''
1017
+ else x['人群分类'], axis=1
1018
+ )
1019
+ else:
1020
+ df['人群分类'] = df['人群名字'].apply(lambda x: self.set_crowd(keyword=str(x), as_file=False))
1021
+ df['人群分类'] = df['人群分类'].apply(lambda x: str(x).upper() if x else x)
1022
+ # df.to_csv('/Users/xigua/Downloads/test_人群分类.csv', index=False, header=True, encoding='utf-8_sig')
1023
+ # breakpoint()
1001
1024
  return df
1002
1025
 
1003
1026
  elif '天猫_关键词报表' in table_name:
@@ -1620,6 +1643,41 @@ class GroupBy:
1620
1643
  break
1621
1644
  return result
1622
1645
 
1646
+ def set_crowd(self, keyword, as_file=False):
1647
+ """ 推广人群报表,人群分类, """
1648
+ result_a = re.findall('_a$|_a_|_ai|^a_', str(keyword), re.IGNORECASE)
1649
+ result_i = re.findall('_i$|_i_|^i_', str(keyword), re.IGNORECASE)
1650
+ result_p = re.findall('_p$|_p_|_pl|^p_||^pl_', str(keyword), re.IGNORECASE)
1651
+ result_l = re.findall('_l$|_l_|^l_', str(keyword), re.IGNORECASE)
1652
+
1653
+ datas = [
1654
+ {
1655
+ '类别': 'A',
1656
+ '值': result_a,
1657
+ },
1658
+ {
1659
+ '类别': 'I',
1660
+ '值': result_i,
1661
+ },
1662
+ {
1663
+ '类别': 'P',
1664
+ '值': result_p,
1665
+ },
1666
+ {
1667
+ '类别': 'L',
1668
+ '值': result_l,
1669
+ }
1670
+ ]
1671
+
1672
+ is_res = False
1673
+ for data in datas:
1674
+ if data['值']:
1675
+ data['值'] = [item for item in data['值'] if item != '']
1676
+ if data['值']:
1677
+ return data['类别']
1678
+ if not is_res:
1679
+ return ''
1680
+
1623
1681
  # @try_except
1624
1682
  def performance(self, bb_tg=True):
1625
1683
  # print(self.data_tgyj)
@@ -1870,7 +1928,7 @@ def data_aggregation_one(service_databases=[{}], months=1):
1870
1928
  ) # 3. 回传数据库
1871
1929
 
1872
1930
 
1873
- def data_aggregation(service_databases=[{}], months=1):
1931
+ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
1874
1932
  """
1875
1933
  1. 从数据库中读取数据
1876
1934
  2. 数据聚合清洗
@@ -2054,54 +2112,51 @@ def data_aggregation(service_databases=[{}], months=1):
2054
2112
  icm_update=unique_key_list,
2055
2113
  service_database=service_database,
2056
2114
  ) # 3. 回传数据库
2057
- res = g.performance(bb_tg=True) # 盈亏表,依赖其他表,单独做
2058
- m.df_to_mysql(
2059
- df=res,
2060
- db_name='聚合数据',
2061
- table_name='_全店商品销售',
2062
- move_insert=True, # 先删除,再插入
2063
- # df_sql=True,
2064
- # drop_duplicates=False,
2065
- # icm_update=['日期', '商品id'], # 设置唯一主键
2066
- service_database=service_database,
2067
- )
2068
- res = g.performance(bb_tg=False) # 盈亏表,依赖其他表,单独做
2069
- m.df_to_mysql(
2070
- df=res,
2071
- db_name='聚合数据',
2072
- table_name='_推广商品销售',
2073
- move_insert=True, # 先删除,再插入
2074
- # df_sql=True,
2075
- # drop_duplicates=False,
2076
- # icm_update=['日期', '商品id'], # 设置唯一主键
2077
- service_database=service_database,
2078
- )
2079
-
2080
- res = g.performance_concat(bb_tg=False) # 推广主体合并直播表,依赖其他表,单独做
2081
- m.df_to_mysql(
2082
- df=res,
2083
- db_name='聚合数据',
2084
- table_name='天猫_推广汇总',
2085
- move_insert=True, # 先删除,再插入
2086
- # df_sql=True,
2087
- # drop_duplicates=False,
2088
- # icm_update=['日期', '推广渠道', '营销场景', '商品id', '花费', '展现量', '点击量'], # 设置唯一主键
2089
- service_database=service_database,
2090
- )
2091
-
2092
-
2093
- res = g.performance_jd(jd_tg=False) # 盈亏表,依赖其他表,单独做
2094
- m.df_to_mysql(
2095
- df=res,
2096
- db_name='聚合数据',
2097
- table_name='_京东_推广商品销售',
2098
- move_insert=True, # 先删除,再插入
2099
- # df_sql=True,
2100
- # drop_duplicates=False,
2101
- # icm_update=['日期', '跟单sku id', '货号', '花费'], # 设置唯一主键
2102
- service_database=service_database,
2103
- )
2104
-
2115
+ if is_juhe:
2116
+ res = g.performance(bb_tg=True) # 盈亏表,依赖其他表,单独做
2117
+ m.df_to_mysql(
2118
+ df=res,
2119
+ db_name='聚合数据',
2120
+ table_name='_全店商品销售',
2121
+ move_insert=True, # 先删除,再插入
2122
+ # df_sql=True,
2123
+ # drop_duplicates=False,
2124
+ # icm_update=['日期', '商品id'], # 设置唯一主键
2125
+ service_database=service_database,
2126
+ )
2127
+ res = g.performance(bb_tg=False) # 盈亏表,依赖其他表,单独做
2128
+ m.df_to_mysql(
2129
+ df=res,
2130
+ db_name='聚合数据',
2131
+ table_name='_推广商品销售',
2132
+ move_insert=True, # 先删除,再插入
2133
+ # df_sql=True,
2134
+ # drop_duplicates=False,
2135
+ # icm_update=['日期', '商品id'], # 设置唯一主键
2136
+ service_database=service_database,
2137
+ )
2138
+ res = g.performance_concat(bb_tg=False) # 推广主体合并直播表,依赖其他表,单独做
2139
+ m.df_to_mysql(
2140
+ df=res,
2141
+ db_name='聚合数据',
2142
+ table_name='天猫_推广汇总',
2143
+ move_insert=True, # 先删除,再插入
2144
+ # df_sql=True,
2145
+ # drop_duplicates=False,
2146
+ # icm_update=['日期', '推广渠道', '营销场景', '商品id', '花费', '展现量', '点击量'], # 设置唯一主键
2147
+ service_database=service_database,
2148
+ )
2149
+ res = g.performance_jd(jd_tg=False) # 盈亏表,依赖其他表,单独做
2150
+ m.df_to_mysql(
2151
+ df=res,
2152
+ db_name='聚合数据',
2153
+ table_name='_京东_推广商品销售',
2154
+ move_insert=True, # 先删除,再插入
2155
+ # df_sql=True,
2156
+ # drop_duplicates=False,
2157
+ # icm_update=['日期', '跟单sku id', '货号', '花费'], # 设置唯一主键
2158
+ service_database=service_database,
2159
+ )
2105
2160
 
2106
2161
  # 这里要注释掉,不然 copysh.py 可能有问题,这里主要修改配置文件,后续触发 home_lx 的 optimize_datas.py(有s)程序进行全局清理
2107
2162
  # optimize_data.op_data(service_databases=service_databases, days=3650) # 立即启动对聚合数据的清理工作
@@ -2112,6 +2167,6 @@ def main():
2112
2167
 
2113
2168
 
2114
2169
  if __name__ == '__main__':
2115
- data_aggregation(service_databases=[{'company': 'mysql'}], months=0) # 正常的聚合所有数据
2170
+ data_aggregation(service_databases=[{'company': 'mysql'}], months=0, is_juhe=False) # 正常的聚合所有数据
2116
2171
  # data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
2117
2172
  # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
mdbq/mysql/mysql.py CHANGED
@@ -167,7 +167,7 @@ class MysqlUpload:
167
167
  index=False,
168
168
  chunksize=1000
169
169
  )
170
- print(f'重置自增')
170
+ # print(f'重置自增')
171
171
  # 6. 重置自增列
172
172
  try:
173
173
  cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.4.5
3
+ Version: 2.4.7
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,11 +1,11 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=kHKCBRMfpv0F1dq4n75LqOt44K4lRWQCt8MeM6n6hnM,76013
4
+ mdbq/aggregation/aggregation.py,sha256=nPp5fOLktxejNEak3SyTnKLjwzK1l2xjbV45X-I4LFQ,76131
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
6
  mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
7
7
  mdbq/aggregation/optimize_data.py,sha256=Wis40oL04M7E1pkvgNPjyVFAUe-zgjimjIVAikxYY8Y,4418
8
- mdbq/aggregation/query_data.py,sha256=b-beZv7JHMUe7jK0wr-cNClMVYOop3meb1fAGSadfIQ,96001
8
+ mdbq/aggregation/query_data.py,sha256=_RYPlUDg5Hmfm663gx7vGh0HHfgnik9BDqLPQCl9w5A,98932
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
@@ -25,7 +25,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
25
25
  mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
26
26
  mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
27
27
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
28
- mdbq/mysql/mysql.py,sha256=D_0KO7JGFId9brcC6UsHKPgmKx7JMJLd68fqCh8XsSw,46848
28
+ mdbq/mysql/mysql.py,sha256=ZJ5hrwB-oJgvfcTI9Y2E_vwmCNgbxmiU0AJLeCfQFJ8,46850
29
29
  mdbq/mysql/s_query.py,sha256=37GGHzRpycfUjsYEoQgDpdEs9JwjW-LxFXnGwwP2b2Q,8403
30
30
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
31
31
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -41,7 +41,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
41
41
  mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
42
42
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
43
43
  mdbq/spider/aikucun.py,sha256=Olq7IJP9itM4wuNxZeHOG-Q3i8wWyB4hY8TUGGwCvQ0,14104
44
- mdbq-2.4.5.dist-info/METADATA,sha256=Viga-WzhqgOumbM49YE8NbTlKMQ-L-vdJkDSueMeI3E,245
45
- mdbq-2.4.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
46
- mdbq-2.4.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
47
- mdbq-2.4.5.dist-info/RECORD,,
44
+ mdbq-2.4.7.dist-info/METADATA,sha256=dBLtWhjOK1TZ87LrmoRgLAz7NHJYtQiC01dRimYwZ90,245
45
+ mdbq-2.4.7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
46
+ mdbq-2.4.7.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
47
+ mdbq-2.4.7.dist-info/RECORD,,
File without changes