mdbq 2.8.3__py3-none-any.whl → 2.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1201,8 +1201,8 @@ def one_file_to_mysql(file, db_name, table_name):
1201
1201
 
1202
1202
 
1203
1203
  def test():
1204
- path = os.path.relpath(r'/Users/xigua/Downloads/直播间')
1205
- results = []
1204
+ path = os.path.relpath(r'/Users/xigua/Downloads/未命名文件夹')
1205
+
1206
1206
  for root, dirs, files in os.walk(path, topdown=False):
1207
1207
  for name in files:
1208
1208
  if name.endswith('.csv') and 'baidu' not in name and '~' not in name:
@@ -1212,43 +1212,35 @@ def test():
1212
1212
  # print(name)
1213
1213
  if len(df) == 0:
1214
1214
  continue
1215
- results.append(df)
1216
- # df = df[df['日期'] != '']
1215
+ # df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
1216
+ if '颜色编码' in df.columns.tolist():
1217
+ print(name)
1218
+ df.pop('颜色编码')
1219
+ df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
1220
+ # pattern = re.findall(r'\d{4}-\d{2}-\d{2}_\d{4}-\d{2}-\d{2}', name)[0]
1221
+ # new_name = f'py_xg_店铺销售指标_万里马官方旗舰店_{pattern}.csv'
1217
1222
  # df.to_csv(os.path.join(root, name), encoding='utf-8_sig', index=False, header=True)
1218
- df = pd.concat(results)
1219
-
1220
- df.rename(columns={
1221
- '订单id': '计划ID',
1222
- '投资回报率': '投入产出比',
1223
- '直接投资回报率': '直接成交投入产出比',
1224
- '预售金额': '直接成交投入产出比',
1225
- '直接投资回报率': '总预售成交金额',
1226
- '预售订单量': '总预售成交笔数',
1227
- # '商品点击量': '展现量',
1228
- # '商品点击率': '观看率',
1229
- }, inplace=True)
1230
-
1231
- df.to_csv(os.path.join('/Users/xigua/Downloads/', f'py_xg_tg_report_超级直播报表_人群_万里马官方旗舰店_.csv'), encoding='utf-8_sig', index=False, header=True)
1223
+ # os.remove(os.path.join(root, name))
1232
1224
 
1233
1225
 
1234
1226
  if __name__ == '__main__':
1235
- username = 'root'
1236
- password = ''
1237
- host = ''
1238
- port = ''
1239
-
1240
- # 上传 1 个文件到数据库
1241
- one_file_to_mysql(
1242
- file=r'/Users/xi',
1243
- db_name='推广数据2',
1244
- table_name='超级直播',
1245
- )
1227
+ # username = 'root'
1228
+ # password = ''
1229
+ # host = ''
1230
+ # port = ''
1231
+ #
1232
+ # # 上传 1 个文件到数据库
1233
+ # one_file_to_mysql(
1234
+ # file=r'/Users/xi',
1235
+ # db_name='推广数据2',
1236
+ # table_name='超级直播',
1237
+ # )
1246
1238
 
1247
1239
  # # 上传一个目录到指定数据库
1248
- # db_name = '推广数据2'
1249
- # table_name = '超级直播'
1240
+ # db_name = '生意经3'
1241
+ # table_name = '宝贝指标'
1250
1242
  # upload_dir(
1251
- # path=os.path.relpath(r'/Users/xigua/数据中心/原始文件3/天猫推广报表/超级直播报表_人群/2024-04'),
1243
+ # path=os.path.relpath(r'/Users/xigua/数据中心/原始文件3/生意经/宝贝指标sdff'),
1252
1244
  # db_name=db_name,
1253
1245
  # collection_name=table_name,
1254
1246
  # )
@@ -64,7 +64,7 @@ class DataTypes:
64
64
  def get_mysql_types(self, cl, dtypes, db_name, table_name, is_file_dtype=True):
65
65
  """ 更新 mysql 的 types 信息到 json 文件 """
66
66
  if cl in self.datas.keys():
67
- if db_name in list(self.datas[cl].keys()): # ['京东数据2', '推广数据2', '生意参谋2', '生意经2']
67
+ if db_name in list(self.datas[cl].keys()): # ['京东数据2', '推广数据2', '生意参谋2', '生意经3']
68
68
  if table_name in list(self.datas[cl][db_name].keys()):
69
69
  if is_file_dtype: # 旧数据优先
70
70
  # # 用 dtypes 更新, 允许手动指定 json 文件里面的数据类型
@@ -182,7 +182,7 @@ def mysql_all_dtypes(db_name=None, table_name=None, path=None):
182
182
  sys_lists = ['information_schema', 'mysql', 'performance_schema', 'sakila', 'sys']
183
183
  db_name_lists = [item for item in db_name_lists if item not in sys_lists]
184
184
 
185
- results = [] # 返回结果示例: [{'云电影': '电影更新'}, {'生意经2': 'e3_零售明细统计'}]
185
+ results = [] # 返回结果示例: [{'云电影': '电影更新'}, {'生意经3': 'e3_零售明细统计'}]
186
186
  for db_ in db_name_lists:
187
187
  config.update({'database': db_}) # 添加更新 config 字段
188
188
  connection = pymysql.connect(**config) # 连接数据库
@@ -160,7 +160,7 @@ class MysqlDatasQuery:
160
160
  '退货量_发货后': 1,
161
161
  }
162
162
  df = self.download.data_to_df(
163
- db_name='生意经2',
163
+ db_name='生意经3',
164
164
  table_name='宝贝指标',
165
165
  start_date=start_date,
166
166
  end_date=end_date,
@@ -283,7 +283,7 @@ class MysqlDatasQuery:
283
283
  def idbm(self):
284
284
  """ 用生意经日数据制作商品 id 和编码对照表 """
285
285
  data_values = self.download.columns_to_list(
286
- db_name='生意经2',
286
+ db_name='生意经3',
287
287
  table_name='宝贝指标',
288
288
  columns_name=['宝贝id', '商家编码', '行业类目'],
289
289
  )
@@ -487,22 +487,22 @@ class DataClean:
487
487
  report_names = [
488
488
  {
489
489
  '文件简称': 'baobei',
490
- '数据库名': '生意经2',
490
+ '数据库名': '生意经3',
491
491
  '集合名称': '宝贝指标',
492
492
  },
493
493
  {
494
494
  '文件简称': 'order',
495
- '数据库名': '生意经2',
495
+ '数据库名': '生意经3',
496
496
  '集合名称': '订单数据',
497
497
  },
498
498
  {
499
499
  '文件简称': '省份城市分析',
500
- '数据库名': '生意经2',
500
+ '数据库名': '生意经3',
501
501
  '集合名称': '省份城市分析',
502
502
  },
503
503
  {
504
504
  '文件简称': '店铺销售指标',
505
- '数据库名': '生意经2',
505
+ '数据库名': '生意经3',
506
506
  '集合名称': '店铺销售指标',
507
507
  },
508
508
  ]
@@ -538,25 +538,23 @@ class DataClean:
538
538
  if name.endswith('.csv') and 'baobei' in name:
539
539
  encoding = self.get_encoding(file_path=os.path.join(root, name))
540
540
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
541
- pattern = re.findall(r'-(\d{4})(\d{2})(\d{2})\W', name)[0]
542
- df['日期'] = '-'.join(pattern)
541
+ p = df.pop('日期')
542
+ df.insert(loc=0, column='日期', value=p)
543
+ df['日期'] = df['日期'].apply(lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', str(x))[0]) if int(x) > 0 else '')
543
544
  df.replace(to_replace=['--'], value='', regex=False, inplace=True)
544
- new_name = f'py_xg_天猫_baobeitrains_{'-'.join(pattern)}.csv'
545
+ new_name = f'py_xg_{name}'
545
546
  self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
546
547
  os.remove(os.path.join(root, name))
547
548
  elif name.endswith('.csv') and 'order' in name:
548
- """ 这里不能使用表格原先的 gb2312, 会报错 """
549
- # encoding = self.get_encoding(file_path=os.path.join(root, name))
550
- df = pd.read_csv(os.path.join(root, name), encoding='gb18030', header=0, na_filter=False)
551
- pattern = re.findall(r'(.*)(\d{4})(\d{2})(\d{2})-(\d{4})(\d{2})(\d{2})', name)[0]
552
- date1 ='-'.join(pattern[1:4])
553
- date2 = '-'.join(pattern[4:7])
554
- df.insert(loc=0, column='日期', value=date1)
555
- df.insert(loc=1, column='数据周期', value=f'{date1}_{date2}')
549
+ """ 如果是手动下载的表格,这里不能使用表格原先的 gb2312, 会报错 """
550
+ # df = pd.read_csv(os.path.join(root, name), encoding='gb18030', header=0, na_filter=False)
551
+ encoding = self.get_encoding(file_path=os.path.join(root, name))
552
+ df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
556
553
  df.rename(columns={'宝贝标题': '商品标题', '宝贝链接': '商品链接'}, inplace=True)
557
- df['颜色编码'] = df['商家编码'].apply(
558
- lambda x: ''.join(re.findall(r' .*(\d{4})$', str(x))) if x else x)
559
- new_name = f'py_xg_天猫_order_{date1}_{date2}.csv'
554
+ df['日期'] = df['日期'].apply(lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', str(x))[0]) if int(x) > 0 else '')
555
+ df['商品id'] = df.apply(lambda x: re.sub(r'.*id=', '', x['商品链接']), axis=1)
556
+ df = df[df['订单号'] != '']
557
+ new_name = f'py_xg_{name}'
560
558
  self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
561
559
  os.remove(os.path.join(root, name))
562
560
  elif name.endswith('.csv') and '省份城市分析' in name:
@@ -585,27 +583,15 @@ class DataClean:
585
583
  os.remove(os.path.join(root, name))
586
584
  elif name.endswith('.csv') and '店铺销售指标' in name:
587
585
  # 生意经, 店铺指标,仅限月数据,实际日指标也可以
588
- name_st = re.findall(r'([\u4e00-\u9fa5]+)\(分日', name)
589
- if not name_st:
590
- print(f'{name} 正则提取文件名失败')
591
- os.remove(os.path.join(root, name))
592
- continue
593
586
  encoding = self.get_encoding(file_path=os.path.join(root, name))
594
587
  df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
595
588
  if len(df) == 0:
596
589
  print(f'{name} 报表数据为空')
597
590
  os.remove(os.path.join(root, name))
598
591
  continue
599
- df['日期'] = df['日期'].astype(str).apply(
600
- lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', x)[0]) if x else x)
601
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
602
- # min_clm = str(df.min()['日期']).split(' ')[0]
603
- # max_clm = str(df.max()['日期']).split(' ')[0]
604
- min_clm = str(df['日期'].min()).split(' ')[0]
605
- max_clm = str(df['日期'].max()).split(' ')[0]
606
- new_name = f'py_xg_天猫_{name_st[0]}-{min_clm}_{max_clm}.csv' # 保存时将(分日)去掉
592
+ df['日期'] = df['日期'].apply(lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', str(x))[0]) if int(x) > 0 else '')
607
593
  df.replace(to_replace=['--'], value='', regex=False, inplace=True)
608
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
594
+ new_name = f'py_xg_{name}'
609
595
  self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
610
596
  os.remove(os.path.join(root, name))
611
597
 
@@ -832,6 +818,8 @@ class DataClean:
832
818
  # df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
833
819
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
834
820
  new_name = f'py_xg_{name}'
821
+ if os.path.isfile(os.path.join(root, new_name)):
822
+ os.remove(os.path.join(root, new_name))
835
823
  os.rename(os.path.join(root, name), os.path.join(root, new_name))
836
824
  elif name.endswith('.xlsx') and '京东商智_sku_商品明细' in name:
837
825
  df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
@@ -950,6 +938,8 @@ class DataClean:
950
938
  elif name.endswith('.csv') and ('商品类目属性' in name or '商品主图视频' in name or '商品sku属性' in name):
951
939
  df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
952
940
  new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
941
+ if os.path.isfile(os.path.join(root, new_name)):
942
+ os.remove(os.path.join(root, new_name))
953
943
  os.rename(os.path.join(root, name), os.path.join(root, new_name))
954
944
 
955
945
  # 将数据传入 self.datas 等待更新进数据库
@@ -1096,29 +1086,17 @@ class DataClean:
1096
1086
  if 'py_xg' not in name: # 排除非目标文件
1097
1087
  continue
1098
1088
 
1099
- if '天猫' in name and name.endswith('.csv') and 'baobei' in name:
1100
- t_path = os.path.join(self.source_path, '天猫_生意经', '宝贝指标')
1101
- bib(t_path, _as_month=True)
1102
- elif '天猫' in name and name.endswith('.csv') and '省份城市分析' in name:
1103
- t_path = os.path.join(self.source_path, '天猫_生意经', '省份城市分析')
1104
- bib(t_path, _as_month=True)
1105
- elif '天猫' in name and name.endswith('.csv') and '店铺销售指标' in name:
1106
- t_path = os.path.join(self.source_path, '天猫_生意经', '店铺销售指标')
1107
- bib(t_path, _as_month=False)
1108
- elif '天猫' in name and name.endswith('.csv') and 'order' in name:
1109
- t_path = os.path.join(self.source_path, '天猫_生意经', '订单数据')
1110
- bib(t_path, _as_month=False)
1111
- elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and 'baobei' in name:
1112
- t_path = os.path.join(self.source_path, '淘宝_生意经', '宝贝指标')
1089
+ if name.endswith('.csv') and 'baobei' in name:
1090
+ t_path = os.path.join(self.source_path, '生意经', '宝贝指标')
1113
1091
  bib(t_path, _as_month=True)
1114
- elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and '省份城市分析' in name:
1115
- t_path = os.path.join(self.source_path, '淘宝_生意经', '省份城市分析')
1092
+ elif name.endswith('.csv') and '省份城市分析' in name:
1093
+ t_path = os.path.join(self.source_path, '生意经', '省份城市分析')
1116
1094
  bib(t_path, _as_month=True)
1117
- elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and '店铺销售指标' in name:
1118
- t_path = os.path.join(self.source_path, '淘宝_生意经', '店铺销售指标')
1095
+ elif name.endswith('.csv') and '店铺销售指标' in name:
1096
+ t_path = os.path.join(self.source_path, '生意经', '店铺销售指标')
1119
1097
  bib(t_path, _as_month=False)
1120
- elif '淘宝' in name or '企业店' in name and name.endswith('.csv') and 'order' in name:
1121
- t_path = os.path.join(self.source_path, '淘宝_生意经', '订单数据')
1098
+ elif name.endswith('.csv') and 'order' in name:
1099
+ t_path = os.path.join(self.source_path, '生意经', '订单数据')
1122
1100
  bib(t_path, _as_month=False)
1123
1101
 
1124
1102
  # @try_except
@@ -1585,7 +1563,7 @@ def main(is_mysql=False, is_company=False):
1585
1563
  # 清理聚合数据, mongodb 中没有聚合数据,所以只需要清理 mysql 即可
1586
1564
  optimize_data.op_data(
1587
1565
  db_name_lists=['聚合数据'],
1588
- days=3650,
1566
+ days=100,
1589
1567
  is_mongo=False,
1590
1568
  is_mysql=True,
1591
1569
  )
mdbq/company/copysh.py CHANGED
@@ -324,7 +324,7 @@ def op_data(days: int =100):
324
324
  '推广数据_淘宝店',
325
325
  '爱库存2',
326
326
  '生意参谋3',
327
- '生意经2',
327
+ '生意经3',
328
328
  # '聚合数据',
329
329
  '达摩盘3',
330
330
  ],
@@ -407,6 +407,8 @@ def main():
407
407
  )
408
408
  # print(conf)
409
409
  myconfig.write_back(datas=conf) # 写回文件生效
410
+ now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
411
+ print(f'{now}数据完成!')
410
412
 
411
413
  t.sleep_minutes = 5 # 同步前休眠时间
412
414
  if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 2.8.3
3
+ Version: 2.8.5
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,18 +1,18 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
2
  mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
- mdbq/aggregation/aggregation.py,sha256=FsKrOTCgwUgIuTKNf6rJSrV1g58al9qZ-yLTokWU_YY,72134
4
+ mdbq/aggregation/aggregation.py,sha256=fLsYQO7LTUtVFaVOxXry4OZviiIpZMcTLplE-HXW9XY,71964
5
5
  mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
6
- mdbq/aggregation/mysql_types.py,sha256=2nMEeGGJXdxC3kp0xz2DJ3q6-1rfxi3GkRgDnsKyuNI,10934
6
+ mdbq/aggregation/mysql_types.py,sha256=YTGyrF9vcRgfkQbpT-e-JdJ7c7VF1dDHgyx9YZRES8w,10934
7
7
  mdbq/aggregation/optimize_data.py,sha256=79uwiM2WqNNFxGpE2wKz742PRq-ZGgFjdOV0vgptHdY,3513
8
- mdbq/aggregation/query_data.py,sha256=sLjCO5yDhGdVouA8x42sX5rugT2w9itYKanpk3iywCc,100065
8
+ mdbq/aggregation/query_data.py,sha256=kSIXx4xhthCSaWKdQP4Za9AW-Iu98UUXg9SSWZ8ixO4,100065
9
9
  mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
10
10
  mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
11
11
  mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
12
- mdbq/clean/clean_upload.py,sha256=rNiv_7wcOgYfks1wK2sLHWNyi7jProCZgHZSGjC7IlQ,83318
12
+ mdbq/clean/clean_upload.py,sha256=LRXcvsLqON5NJ5rj7RbeV0750N2Jrjtmr4J7sNPNPoM,81544
13
13
  mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
14
14
  mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
15
- mdbq/company/copysh.py,sha256=tWSD7ZnPxSFrguKzI837glg90xoaxK4JL0IavtnMyGM,19943
15
+ mdbq/company/copysh.py,sha256=gWaNgRe_rBHBBphch2AHkGAljZRdfqYBn_FC3m_l11A,20061
16
16
  mdbq/company/copysh_bak.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
17
17
  mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
18
18
  mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
@@ -45,7 +45,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
45
45
  mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
46
46
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
47
47
  mdbq/spider/aikucun.py,sha256=jHrdGWBJQaSywx7V-U4YuM6vWkwC5SR5tTOOdB3YU_c,17306
48
- mdbq-2.8.3.dist-info/METADATA,sha256=URhLHhurWFz89hBRcw66jx2ebsU23fIS7peIyMR_Iv0,243
49
- mdbq-2.8.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
50
- mdbq-2.8.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
51
- mdbq-2.8.3.dist-info/RECORD,,
48
+ mdbq-2.8.5.dist-info/METADATA,sha256=YeGV1kaOhz9_4VCvMcBraZ0hMCk_tpdL3lJZP-2kda0,243
49
+ mdbq-2.8.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
50
+ mdbq-2.8.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
51
+ mdbq-2.8.5.dist-info/RECORD,,
File without changes