mdbq 2.6.4__tar.gz → 2.6.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {mdbq-2.6.4 → mdbq-2.6.6}/PKG-INFO +1 -1
  2. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/aggregation/aggregation.py +58 -45
  3. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/aggregation/query_data.py +24 -24
  4. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/clean/clean_upload.py +46 -18
  5. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/dataframe/converter.py +4 -1
  6. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/mysql/mysql.py +13 -13
  7. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq.egg-info/PKG-INFO +1 -1
  8. {mdbq-2.6.4 → mdbq-2.6.6}/setup.py +1 -1
  9. {mdbq-2.6.4 → mdbq-2.6.6}/README.txt +0 -0
  10. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/__init__.py +0 -0
  11. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/__version__.py +0 -0
  12. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/aggregation/__init__.py +0 -0
  13. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/aggregation/df_types.py +0 -0
  14. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/aggregation/mysql_types.py +0 -0
  15. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/aggregation/optimize_data.py +0 -0
  16. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/bdup/__init__.py +0 -0
  17. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/bdup/bdup.py +0 -0
  18. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/clean/__init__.py +0 -0
  19. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/clean/data_clean.py +0 -0
  20. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/company/__init__.py +0 -0
  21. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/company/copysh.py +0 -0
  22. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/company/home_sh.py +0 -0
  23. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/config/__init__.py +0 -0
  24. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/config/get_myconf.py +0 -0
  25. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/config/products.py +0 -0
  26. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/config/set_support.py +0 -0
  27. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/config/update_conf.py +0 -0
  28. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/dataframe/__init__.py +0 -0
  29. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/log/__init__.py +0 -0
  30. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/log/mylogger.py +0 -0
  31. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/mongo/__init__.py +0 -0
  32. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/mongo/mongo.py +0 -0
  33. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/mysql/__init__.py +0 -0
  34. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/mysql/s_query.py +0 -0
  35. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/mysql/year_month_day.py +0 -0
  36. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/other/__init__.py +0 -0
  37. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/other/porxy.py +0 -0
  38. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/other/pov_city.py +0 -0
  39. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/other/sku_picture.py +0 -0
  40. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/other/ua_sj.py +0 -0
  41. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/pbix/__init__.py +0 -0
  42. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/pbix/pbix_refresh.py +0 -0
  43. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/pbix/refresh_all.py +0 -0
  44. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/pbix/refresh_all_old.py +0 -0
  45. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/req_post/__init__.py +0 -0
  46. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/req_post/req_tb.py +0 -0
  47. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/spider/__init__.py +0 -0
  48. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq/spider/aikucun.py +0 -0
  49. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq.egg-info/SOURCES.txt +0 -0
  50. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq.egg-info/dependency_links.txt +0 -0
  51. {mdbq-2.6.4 → mdbq-2.6.6}/mdbq.egg-info/top_level.txt +0 -0
  52. {mdbq-2.6.4 → mdbq-2.6.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.6.4
3
+ Version: 2.6.6
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1174,36 +1174,40 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
1174
1174
  i += 1
1175
1175
  continue
1176
1176
  if name.endswith('.csv'):
1177
- try:
1178
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1179
- if len(df) == 0:
1180
- continue
1181
- # if '新版' not in name:
1182
- # continue
1183
- cv = converter.DataFrameConverter()
1184
- df = cv.convert_df_cols(df=df) # 清理列名和 df 中的非法字符
1185
-
1186
- try:
1187
- df = df.astype(dtypes) # 按本地文件更新 df 的数据类型, 可能因为字段不同产生异常
1188
- except Exception as e:
1189
- print(name, e)
1190
- # 如果发生异常,这将 df 的数据和 json 中的数据取交集
1191
- old_dt = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
1192
- intersection_keys = dtypes.keys() & old_dt.keys() # 获取两个字典键的交集
1193
- dtypes = {k: dtypes[k] for k in intersection_keys} # 使用交集的键创建新字典
1194
- df = df.astype(dtypes) # 再次更新 df 的数据类型
1177
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
1178
+ if name.endswith('.xlsx'):
1179
+ df = pd.read_excel(os.path.join(root, name), sheet_name=0, header=0, engine='openpyxl')
1180
+ try:
1181
+ if len(df) == 0:
1182
+ continue
1183
+ # if '新版' not in name:
1184
+ # continue
1185
+ cv = converter.DataFrameConverter()
1186
+ df = cv.convert_df_cols(df=df) # 清理列名和 df 中的非法字符
1195
1187
 
1196
- if dbs['mongodb']:
1197
- d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
1198
- if dbs['mysql']: # drop_duplicates: 值为 True 时检查重复数据再插入
1199
- m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
1200
- move_insert=False, # 先删除,再插入
1201
- df_sql = True,
1202
- drop_duplicates=False,
1203
- filename=name, count=f'{i}/{count}')
1204
- # nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
1188
+ try:
1189
+ df = df.astype(dtypes) # 按本地文件更新 df 的数据类型, 可能因为字段不同产生异常
1205
1190
  except Exception as e:
1206
1191
  print(name, e)
1192
+ # 如果发生异常,这将 df 的数据和 json 中的数据取交集
1193
+ old_dt = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
1194
+ intersection_keys = dtypes.keys() & old_dt.keys() # 获取两个字典键的交集
1195
+ dtypes = {k: dtypes[k] for k in intersection_keys} # 使用交集的键创建新字典
1196
+ df = df.astype(dtypes) # 再次更新 df 的数据类型
1197
+
1198
+ if dbs['mongodb']:
1199
+ d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
1200
+ if dbs['mysql']: # drop_duplicates: 值为 True 时检查重复数据再插入
1201
+ m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
1202
+ move_insert=False, # 先删除,再插入
1203
+ df_sql = True,
1204
+ drop_duplicates=False,
1205
+ filename=name, count=f'{i}/{count}',
1206
+ service_database={target_service: 'mysql'}, # 这个参数是用来设置更新哪台服务器的 types 信息到本地 json 文件
1207
+ )
1208
+ # nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
1209
+ except Exception as e:
1210
+ print(name, e)
1207
1211
  i += 1
1208
1212
  if dbs['mongodb']:
1209
1213
  if d.client:
@@ -1220,7 +1224,16 @@ def one_file_to_mysql(file, db_name, table_name, target_service, database):
1220
1224
  df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False, float_precision='high')
1221
1225
  # df.replace(to_replace=[','], value='', regex=True, inplace=True) # 替换掉特殊字符
1222
1226
  m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
1223
- m.df_to_mysql(df=df, db_name=db_name, table_name=table_name, filename=filename, move_insert=False, df_sql=True, drop_duplicates=False,)
1227
+ m.df_to_mysql(
1228
+ df=df,
1229
+ db_name=db_name,
1230
+ table_name=table_name,
1231
+ filename=filename,
1232
+ move_insert=False,
1233
+ df_sql=True,
1234
+ drop_duplicates=False,
1235
+ service_database={target_service: database},
1236
+ )
1224
1237
 
1225
1238
 
1226
1239
  def file_dir(one_file=True, target_service='company'):
@@ -1303,26 +1316,26 @@ if __name__ == '__main__':
1303
1316
  print(username, password, host, port)
1304
1317
  # file_dir(one_file=False, target_service='company')
1305
1318
 
1306
- # 上传 1 个文件到数据库
1307
- one_file_to_mysql(
1308
- file='/Users/xigua/Downloads/万里马箱包推广1_营销概况_qwqw全站营销_2024-08-18_2024-09-01.csv',
1309
- db_name='京东数据3',
1310
- table_name='推广数据_全站营销',
1311
- target_service='company',
1312
- database='mysql'
1313
- )
1314
-
1315
- # # 上传一个目录到指定数据库
1316
- # db_name = '天猫_推广数据3'
1317
- # table_name = '主体报表'
1318
- # upload_dir(
1319
- # path='/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表',
1320
- # db_name=db_name,
1321
- # collection_name=table_name,
1322
- # dbs={'mysql': True, 'mongodb': False},
1319
+ # # 上传 1 个文件到数据库
1320
+ # one_file_to_mysql(
1321
+ # file='/Users/xigua/Downloads/万里马箱包推广1_营销概况_qwqw全站营销_2024-08-18_2024-09-01.csv',
1322
+ # db_name='京东数据3',
1323
+ # table_name='推广数据_全站营销',
1323
1324
  # target_service='company',
1325
+ # database='mysql'
1324
1326
  # )
1325
1327
 
1328
+ # 上传一个目录到指定数据库
1329
+ db_name = '京东数据3'
1330
+ table_name = '京东商智_spu_商品明细'
1331
+ upload_dir(
1332
+ path='/Users/xigua/数据中心/原始文件3/京东报表/spu_商品明细qwqw',
1333
+ db_name=db_name,
1334
+ collection_name=table_name,
1335
+ dbs={'mysql': True, 'mongodb': False},
1336
+ target_service='company',
1337
+ )
1338
+
1326
1339
 
1327
1340
  # # 新版 数据分类
1328
1341
  # dp = DatabaseUpdate(path='/Users/xigua/Downloads')
@@ -423,7 +423,6 @@ class MysqlDatasQuery:
423
423
  '总订单行': 1,
424
424
  '总订单金额': 1,
425
425
  '总加购数': 1,
426
- '下单新客数(去重)': 1,
427
426
  '领券数': 1,
428
427
  '商品关注数': 1,
429
428
  '店铺关注数': 1,
@@ -493,24 +492,26 @@ class MysqlDatasQuery:
493
492
  return pd.to_datetime(start_date), pd.to_datetime(end_date)
494
493
 
495
494
  @try_except
496
- def tm_search(self):
495
+ def se_search(self):
497
496
  start_date, end_date = self.months_data(num=self.months)
498
497
  projection = {
499
498
  '日期': 1,
500
- '关键词': 1,
499
+ '店铺名称': 1,
500
+ '搜索词': 1,
501
+ '词类行': 1,
501
502
  '访客数': 1,
503
+ '加购人数': 1,
504
+ '商品收藏人数': 1,
502
505
  '支付转化率': 1,
503
- '支付金额': 1,
504
- '下单金额': 1,
505
506
  '支付买家数': 1,
506
- '下单买家数': 1,
507
- '加购人数': 1,
507
+ '支付金额': 1,
508
508
  '新访客': 1,
509
- '店铺名称': 1,
509
+ '客单价': 1,
510
+ 'uv价值': 1,
510
511
  }
511
512
  df = self.download.data_to_df(
512
513
  db_name='生意参谋3',
513
- table_name='店铺来源_手淘搜索',
514
+ table_name='手淘搜索_本店引流词',
514
515
  start_date=start_date,
515
516
  end_date=end_date,
516
517
  projection=projection,
@@ -1462,7 +1463,6 @@ class GroupBy:
1462
1463
  '总订单行': ('总订单行', np.max),
1463
1464
  '总订单金额': ('总订单金额', np.max),
1464
1465
  '总加购数': ('总加购数', np.max),
1465
- '下单新客数': ('下单新客数(去重)', np.max),
1466
1466
  '领券数': ('领券数', np.max),
1467
1467
  '商品关注数': ('商品关注数', np.max),
1468
1468
  '店铺关注数': ('店铺关注数', np.max)
@@ -1476,17 +1476,16 @@ class GroupBy:
1476
1476
  return df
1477
1477
  elif '天猫店铺来源_手淘搜索' in table_name:
1478
1478
  df = df.groupby(
1479
- ['日期', '关键词', '店铺名称'],
1479
+ ['日期', '店铺名称', '搜索词'],
1480
1480
  as_index=False).agg(
1481
1481
  **{
1482
1482
  '访客数': ('访客数', np.max),
1483
- '支付转化率': ('支付转化率', np.max),
1483
+ '加购人数': ('加购人数', np.max),
1484
1484
  '支付金额': ('支付金额', np.max),
1485
- '下单金额': ('下单金额', np.max),
1485
+ '支付转化率': ('支付转化率', np.max),
1486
1486
  '支付买家数': ('支付买家数', np.max),
1487
- '下单买家数': ('下单买家数', np.max),
1488
- '加购人数': ('加购人数', np.max),
1489
- '新访客': ('新访客', np.max),
1487
+ '客单价': ('客单价', np.max),
1488
+ 'uv价值': ('uv价值', np.max)
1490
1489
  }
1491
1490
  )
1492
1491
  return df
@@ -2077,6 +2076,7 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
2077
2076
  2. 数据聚合清洗
2078
2077
  3. 统一回传数据库: <聚合数据> (不再导出为文件)
2079
2078
  公司台式机调用
2079
+ months: 1+,写 0 表示当月数据,但在每月 1 号时可能会因为返回空数据出错
2080
2080
  """
2081
2081
  for service_database in service_databases:
2082
2082
  for service_name, database in service_database.items():
@@ -2183,14 +2183,14 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
2183
2183
  '数据库名': '聚合数据',
2184
2184
  '集合名': '天猫店铺来源_手淘搜索', # 暂缺
2185
2185
  '唯一主键': ['日期', '关键词', '访客数'],
2186
- '数据主体': sdq.tm_search(),
2187
- },
2188
- {
2189
- '数据库名': '聚合数据',
2190
- '集合名': '生意参谋_直播场次分析', # 暂缺
2191
- '唯一主键': ['场次id'],
2192
- '数据主体': sdq.zb_ccfx(),
2186
+ '数据主体': sdq.se_search(),
2193
2187
  },
2188
+ # {
2189
+ # '数据库名': '聚合数据',
2190
+ # '集合名': '生意参谋_直播场次分析', # 暂缺
2191
+ # '唯一主键': ['场次id'],
2192
+ # '数据主体': sdq.zb_ccfx(),
2193
+ # },
2194
2194
  {
2195
2195
  '数据库名': '聚合数据',
2196
2196
  '集合名': '多店推广场景_按日聚合',
@@ -2304,6 +2304,6 @@ def main():
2304
2304
 
2305
2305
 
2306
2306
  if __name__ == '__main__':
2307
- data_aggregation(service_databases=[{'company': 'mysql'}], months=0, is_juhe=True) # 正常的聚合所有数据
2307
+ data_aggregation(service_databases=[{'company': 'mysql'}], months=1, is_juhe=True) # 正常的聚合所有数据
2308
2308
  # data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
2309
2309
  # optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
@@ -103,6 +103,11 @@ class DataClean:
103
103
  '数据库名': '爱库存2',
104
104
  '集合名称': '商品spu榜单',
105
105
  },
106
+ {
107
+ '文件简称': '手淘搜索_本店引流词_', # 文件名中包含的字符
108
+ '数据库名': '生意参谋3',
109
+ '集合名称': '手淘搜索_本店引流词',
110
+ },
106
111
  ]
107
112
  for root, dirs, files in os.walk(path, topdown=False):
108
113
  for name in files:
@@ -134,7 +139,7 @@ class DataClean:
134
139
  if name.endswith('.xls') and '商品排行_' in name:
135
140
  df = pd.read_excel(os.path.join(root, name), header=4)
136
141
  if len(df) == 0:
137
- print(f'{name} 报表数据为空')
142
+ print(f'{name} 报表数据不能为空')
138
143
  continue
139
144
  df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
140
145
  df.replace(to_replace=[','], value='', regex=True, inplace=True)
@@ -145,6 +150,25 @@ class DataClean:
145
150
  new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
146
151
  self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
147
152
  os.remove(os.path.join(root, name))
153
+ elif name.endswith('.xls') and '手淘搜索_本店引流词_' in name:
154
+ df = pd.read_excel(os.path.join(root, name), header=5, engine='xlrd')
155
+ if len(df) == 0:
156
+ print(f'{name} 报表数据不能为空')
157
+ continue
158
+ df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
159
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
160
+ df.rename(columns={'统计日期': '日期'}, inplace=True)
161
+ shop_name = re.findall(r'本店.*_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
162
+ kw_type = re.findall('手淘搜索_本店引流词_([\u4e00-\u9fff]+)_', name)[0]
163
+ df.insert(loc=2, column='词类型', value=kw_type)
164
+ if '店铺名称' in df.columns.tolist():
165
+ df['店铺名称'] = shop_name
166
+ else:
167
+ df.insert(loc=1, column='店铺名称', value=shop_name)
168
+ new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
169
+ self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
170
+ os.remove(os.path.join(root, name))
171
+
148
172
  elif name.endswith('.csv') and '_来源构成_' in name:
149
173
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
150
174
  new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
@@ -358,7 +382,7 @@ class DataClean:
358
382
  sheets4 = ['账户', '推广计划', '推广单元', '创意', '品牌流量包', '定向人群'] # 品销宝
359
383
  file_name4 = os.path.splitext(name)[0] # 明星店铺报表
360
384
  for sheet4 in sheets4:
361
- df = pd.read_excel(os.path.join(root, name), sheet_name=sheet4, header=0, engine='openpyxl')
385
+ df = pd.read_excel(os.path.join(root, name), sheet_name=sheet4, header=0, engine='xlrd')
362
386
  if len(df) == 0:
363
387
  print(f'{name} 报表数据为空')
364
388
  os.remove(os.path.join(root, name))
@@ -741,11 +765,11 @@ class DataClean:
741
765
  continue
742
766
 
743
767
  if name.endswith('.xlsx') and '京东推广_' in name:
744
- df = pd.read_excel(os.path.join(root, name), header=0)
768
+ df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
745
769
  new_name = f'py_xg_{name}'
746
770
  os.rename(os.path.join(root, name), os.path.join(root, new_name))
747
771
  elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
748
- df = pd.read_excel(os.path.join(root, name), header=0)
772
+ df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
749
773
  df.replace(to_replace=['-'], value='', regex=False, inplace=True)
750
774
  pattern = re.findall(r'_(\d{4}-\d{2}-\d{2})', name)[0]
751
775
  df.insert(loc=0, column='日期', value=pattern)
@@ -756,7 +780,7 @@ class DataClean:
756
780
  index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
757
781
  os.remove(os.path.join(root, name))
758
782
  elif name.endswith('.xlsx') and '京东商智_spu_商品明细' in name:
759
- df = pd.read_excel(os.path.join(root, name), header=0)
783
+ df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
760
784
  df.replace(to_replace=['-'], value='', regex=False, inplace=True)
761
785
  pattern = re.findall(r'_(\d{4}-\d{2}-\d{2})', name)[0]
762
786
  df.insert(loc=0, column='日期', value=pattern)
@@ -767,7 +791,7 @@ class DataClean:
767
791
  index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
768
792
  os.remove(os.path.join(root, name))
769
793
  elif name.endswith('.xlsx') and '京东商智_店铺来源_三级来源' in name:
770
- df = pd.read_excel(os.path.join(root, name), header=0)
794
+ df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
771
795
  df.replace(to_replace=['-'], value='', regex=False, inplace=True)
772
796
  df.rename(columns={'时间': '日期'}, inplace=True)
773
797
  for col in df.columns.tolist():
@@ -846,7 +870,7 @@ class DataClean:
846
870
 
847
871
  if name.endswith('.xlsx') and '商品素材_' in name:
848
872
  shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
849
- df = pd.read_excel(os.path.join(root, name), header=0)
873
+ df = pd.read_excel(os.path.join(root, name), header=0, engine='xlrd')
850
874
  if '日期' not in df.columns.tolist():
851
875
  df.insert(loc=0, column='日期', value=datetime.datetime.today().strftime('%Y-%m-%d'))
852
876
  if '店铺名称' not in df.columns.tolist():
@@ -938,6 +962,9 @@ class DataClean:
938
962
  elif name.endswith('.csv') and '爱库存_商品榜单_' in name:
939
963
  t_path = os.path.join(self.source_path, '爱库存', 'spu商品榜单')
940
964
  bib(t_path, _as_month=True)
965
+ elif name.endswith('.csv') and '手淘搜索_本店引流词_' in name:
966
+ t_path = os.path.join(self.source_path, '生意参谋', '手淘搜索_本店引流词')
967
+ bib(t_path, _as_month=True)
941
968
 
942
969
  def move_dmp(self, path=None, is_except=[]):
943
970
  """ 达摩盘 """
@@ -1249,7 +1276,7 @@ class DataClean:
1249
1276
  new_path = os.path.join(root, zip_name_1) # 拼接解压后的文件路径
1250
1277
  if os.path.isfile(new_path) and '全部渠道_商品明细' in new_path: # 是否存在和包内同名的文件
1251
1278
  # 专门处理京东文件, 已过期可删
1252
- df = pd.read_excel(new_path)
1279
+ df = pd.read_excel(new_path, engine='xlrd')
1253
1280
  try:
1254
1281
  pattern1 = re.findall(r'\d{8}_(\d{4})(\d{2})(\d{2})_全部渠道_商品明细',
1255
1282
  name)
@@ -1517,15 +1544,15 @@ def main(service_databases=None, is_mysql=False):
1517
1544
 
1518
1545
 
1519
1546
  if __name__ == '__main__':
1520
- # main(
1521
- # service_databases = [
1522
- # {'company': 'mysql'},
1523
- # # {'home_lx': 'mysql'},
1524
- # # {'home_lx': 'mongodb'},
1525
- # # {'nas': 'mysql'},
1526
- # ],
1527
- # is_mysql = False, # 清理聚合数据
1528
- # )
1547
+ main(
1548
+ service_databases = [
1549
+ {'company': 'mysql'},
1550
+ # {'home_lx': 'mysql'},
1551
+ # {'home_lx': 'mongodb'},
1552
+ # {'nas': 'mysql'},
1553
+ ],
1554
+ is_mysql = False, # 清理聚合数据
1555
+ )
1529
1556
 
1530
1557
  # c = DataClean(
1531
1558
  # path=upload_path, # 源文件目录,下载文件夹
@@ -1536,4 +1563,5 @@ if __name__ == '__main__':
1536
1563
  # c.move_tg_tm(is_except=['临时文件', ]) # 天猫,移到文件到原始文件夹
1537
1564
 
1538
1565
 
1539
- test()
1566
+ # test()
1567
+
@@ -37,6 +37,7 @@ class DataFrameConverter(object):
37
37
  df.replace(to_replace=['="'], value='', regex=True, inplace=True) # ="和"不可以放在一起清洗, 因为有: id=86785565
38
38
  df.replace(to_replace=['"'], value='', regex=True, inplace=True)
39
39
  cols = df.columns.tolist()
40
+
40
41
  df.reset_index(inplace=True, drop=True) # 重置索引,避免下面的 df.loc[0, col] 会出错
41
42
 
42
43
  for col in cols:
@@ -81,7 +82,9 @@ class DataFrameConverter(object):
81
82
  df[col] = df[col].apply(lambda x: pd.to_datetime(x))
82
83
  except:
83
84
  pass
84
- new_col = col.lower()
85
+ new_col = re.sub(r'[()()-,,$%&~^、* ]', '_', col.lower())
86
+ new_col = re.sub(r'_{2,}', '_', new_col)
87
+ new_col = re.sub(r'_+$', '', new_col)
85
88
  df.rename(columns={col: new_col}, inplace=True)
86
89
  df.fillna(0, inplace=True)
87
90
  return df
@@ -82,7 +82,7 @@ class MysqlUpload:
82
82
  icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_duplicates 改为 False
83
83
  使用增量更新: 必须确保 icm_update 传进来的列必须是数据表中唯一主键,值不会发生变化,不会重复,否则可能产生错乱覆盖情况
84
84
  filename: 用来追踪处理进度,传这个参数是方便定位产生错误的文件
85
- service_databases: 这个参数是用来设置更新哪台服务器的 types 信息到本地 json 文件
85
+ service_database: 这个参数是用来设置更新哪台服务器的 types 信息到本地 json 文件
86
86
  json_path: 这个参数同样也是是用来设置更新 json 文件
87
87
  """
88
88
  self.filename = filename
@@ -168,18 +168,18 @@ class MysqlUpload:
168
168
  chunksize=1000
169
169
  )
170
170
  # print(f'重置自增')
171
- # 6. 重置自增列
172
- try:
173
- cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
174
- result = cursor.fetchone()
175
- if result:
176
- cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
177
- cursor.execute(
178
- f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
179
- cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
180
- except Exception as e:
181
- print(f'{e}')
182
- connection.rollback()
171
+ # # 6. 重置自增列
172
+ # try:
173
+ # cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
174
+ # result = cursor.fetchone()
175
+ # if result:
176
+ # cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
177
+ # cursor.execute(
178
+ # f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
179
+ # cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
180
+ # except Exception as e:
181
+ # print(f'{e}')
182
+ # connection.rollback()
183
183
  connection.close()
184
184
  return
185
185
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.6.4
3
+ Version: 2.6.6
4
4
  Home-page: https://pypi.org/project/mdbsql
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -3,7 +3,7 @@
3
3
  from setuptools import setup, find_packages
4
4
 
5
5
  setup(name='mdbq',
6
- version='2.6.4',
6
+ version='2.6.6',
7
7
  author='xigua, ',
8
8
  author_email="2587125111@qq.com",
9
9
  url='https://pypi.org/project/mdbsql',
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes