mdbq 1.9.0__py3-none-any.whl → 1.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/clean/data_clean.py CHANGED
@@ -86,943 +86,967 @@ class DataClean:
86
86
  for name in files:
87
87
  if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
88
88
  continue
89
- encoding = self.get_encoding(file_path=pathlib.Path(root, name))
90
- # ----------------- 推广报表 分割线 -----------------
91
- tg_names = ['营销场景报表', '计划报表', '单元报表', '关键词报表', '人群报表', '主体报表',
92
- '其他主体报表',
93
- '创意报表', '地域报表', '权益报表']
94
- for tg_name in tg_names:
95
- if tg_name in name and '汇总' not in name and name.endswith('.csv'): # 人群报表排除达摩盘报表: 人群报表汇总
96
- pattern = re.findall(r'(.*_)\d{8}_\d{6}', name)
97
- if not pattern: # 说明已经转换过
89
+
90
+ try:
91
+ encoding = self.get_encoding(file_path=pathlib.Path(root, name))
92
+ # ----------------- 推广报表 分割线 -----------------
93
+ tg_names = ['营销场景报表', '计划报表', '单元报表', '关键词报表', '人群报表', '主体报表',
94
+ '其他主体报表',
95
+ '创意报表', '地域报表', '权益报表']
96
+ for tg_name in tg_names:
97
+ if tg_name in name and '汇总' not in name and name.endswith('.csv'): # 人群报表排除达摩盘报表: 人群报表汇总
98
+ pattern = re.findall(r'(.*_)\d{8}_\d{6}', name)
99
+ if not pattern: # 说明已经转换过
100
+ continue
101
+ shop_name = re.findall(r'\d{8}_\d{6}_(.*)\W', name)
102
+ if shop_name:
103
+ shop_name = shop_name[0]
104
+ else:
105
+ shop_name = ''
106
+ df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
107
+ if '地域' not in name: # 除了地域报表, 检查数据的字段是否包含“场景名字”,如果没有,说明没有选“pbix” 数据模块下载
108
+ ck = df.columns.tolist()
109
+ if '场景名字' not in ck:
110
+ print(f'{name} 报表字段缺失, 请选择Pbix数据模板下载')
111
+ continue
112
+ if len(df) == 0:
113
+ print(f'{name} 报表是空的, 请重新下载, 此报表已移除')
114
+ os.remove(os.path.join(root, name))
115
+ continue
116
+
117
+ df.replace(to_replace=['\\N'], value=0, regex=False, inplace=True) # 替换掉特殊字符
118
+ df.fillna(0, inplace=True)
119
+ col_ids = ['场景ID', '计划ID', '单元ID', '主体ID', '宝贝ID', '词ID/词包ID', '创意ID']
120
+ sb = df.columns.tolist()
121
+ if '日期' not in sb:
122
+ print(f'{name} 注意:该报表不包含分日数据,数据不会保存,请重新下载!')
123
+ continue
124
+ if '省' in sb:
125
+ if '市' not in sb:
126
+ print(
127
+ f'{name} 注意:请下载市级地域报表,而不是省报表,数据不会保存,请重新下载!')
128
+ continue
129
+ for col_id in col_ids:
130
+ if col_id in sb:
131
+ df[col_id] = df[col_id].apply(
132
+ lambda x: f'="{x}"' if x and '=' not in str(x) else x
133
+ )
134
+ date_min = f'_{df["日期"].values.min()}_'
135
+ date_max = f'{df["日期"].values.max()}.csv'
136
+ if '万里马' in name:
137
+ tm_s_name = pattern[0] + shop_name + date_min + date_max
138
+ new_root_p = pathlib.Path(self.source_path, '推广报表', tg_name) # 文件夹,未包括文件名
139
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
140
+ if '省' in df.columns.tolist() and '场景名字' in df.columns.tolist() and '完整' in name:
141
+ new_root_p = pathlib.Path(self.source_path, '推广报表', f'完整_{tg_name}')
142
+ tm_s_name = f'完整_{tm_s_name}'
143
+ self.save_to_csv(df, new_root_p, tm_s_name)
144
+ if self.set_up_to_mogo:
145
+ d.df_to_mongo(df=df, db_name='天猫数据1', collection_name=f'天猫_推广_{tg_name}')
146
+ if self.set_up_to_mysql:
147
+ m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name=f'天猫_推广_{tg_name}')
148
+ os.remove(os.path.join(root, name))
149
+ else:
150
+ print(f'{name} 文件名不含"万里马", 不属于爬虫下载,您可以手动进行分类,但不会上传数据库')
151
+
152
+ if name.endswith('.csv') and '超级直播' in name:
153
+ # 超级直播
154
+ df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
155
+ if len(df) == 0:
156
+ print(f'{name} 报表数据为空')
157
+ os.remove(os.path.join(root, name))
98
158
  continue
159
+ pattern = re.findall(r'(.*_)\d{8}_\d{6}', name)
99
160
  shop_name = re.findall(r'\d{8}_\d{6}_(.*)\W', name)
100
161
  if shop_name:
101
162
  shop_name = shop_name[0]
102
163
  else:
103
164
  shop_name = ''
104
- df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
105
- if '地域' not in name: # 除了地域报表, 检查数据的字段是否包含“场景名字”,如果没有,说明没有选“pbix” 数据模块下载
106
- ck = df.columns.tolist()
107
- if '场景名字' not in ck:
108
- print(f'{name} 报表字段缺失, 请选择Pbix数据模板下载')
109
- continue
110
- if len(df) == 0:
111
- print(f'{name} 报表是空的, 请重新下载, 此报表已移除')
112
- os.remove(os.path.join(root, name))
113
- continue
114
-
165
+ cols = ['场景ID', '计划ID']
166
+ for col in cols:
167
+ df[col] = df[col].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
115
168
  df.replace(to_replace=['\\N'], value=0, regex=False, inplace=True) # 替换掉特殊字符
116
- df.fillna(0, inplace=True)
117
- col_ids = ['场景ID', '计划ID', '单元ID', '主体ID', '宝贝ID', '词ID/词包ID', '创意ID']
118
- sb = df.columns.tolist()
119
- if '日期' not in sb:
120
- print(f'{name} 注意:该报表不包含分日数据,数据不会保存,请重新下载!')
121
- continue
122
- if '省' in sb:
123
- if '市' not in sb:
124
- print(
125
- f'{name} 注意:请下载市级地域报表,而不是省报表,数据不会保存,请重新下载!')
126
- continue
127
- for col_id in col_ids:
128
- if col_id in sb:
129
- df[col_id] = df[col_id].apply(
130
- lambda x: f'="{x}"' if x and '=' not in str(x) else x
131
- )
132
- date_min = f'_{df["日期"].values.min()}_'
169
+ root_new = pathlib.Path(self.source_path, '推广报表', '超级直播')
170
+ date_min = f'_{df["日期"].values.min()}_' # 仅适用于日期列未转换之前, 还是整数,转换后不能用这个函数
133
171
  date_max = f'{df["日期"].values.max()}.csv'
134
- if '万里马' in name:
135
- tm_s_name = pattern[0] + shop_name + date_min + date_max
136
- new_root_p = pathlib.Path(self.source_path, '推广报表', tg_name) # 文件夹,未包括文件名
137
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
138
- if '省' in df.columns.tolist() and '场景名字' in df.columns.tolist() and '完整' in name:
139
- new_root_p = pathlib.Path(self.source_path, '推广报表', f'完整_{tg_name}')
140
- tm_s_name = f'完整_{tm_s_name}'
141
- self.save_to_csv(df, new_root_p, tm_s_name)
142
- if self.set_up_to_mogo:
143
- d.df_to_mongo(df=df, db_name='天猫数据1', collection_name=f'天猫_推广_{tg_name}')
144
- if self.set_up_to_mysql:
145
- m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name=f'天猫_推广_{tg_name}')
146
- os.remove(os.path.join(root, name))
147
- else:
148
- print(f'{name} 文件名不含"万里马", 不属于爬虫下载,您可以手动进行分类,但不会上传数据库')
149
-
150
- if name.endswith('.csv') and '超级直播' in name:
151
- # 超级直播
152
- df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
153
- if len(df) == 0:
154
- print(f'{name} 报表数据为空')
155
- os.remove(os.path.join(root, name))
156
- continue
157
- pattern = re.findall(r'(.*_)\d{8}_\d{6}', name)
158
- shop_name = re.findall(r'\d{8}_\d{6}_(.*)\W', name)
159
- if shop_name:
160
- shop_name = shop_name[0]
161
- else:
162
- shop_name = ''
163
- cols = ['场景ID', '计划ID']
164
- for col in cols:
165
- df[col] = df[col].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
166
- df.replace(to_replace=['\\N'], value=0, regex=False, inplace=True) # 替换掉特殊字符
167
- root_new = pathlib.Path(self.source_path, '推广报表', '超级直播')
168
- date_min = f'_{df["日期"].values.min()}_' # 仅适用于日期列未转换之前, 还是整数,转换后不能用这个函数
169
- date_max = f'{df["日期"].values.max()}.csv'
170
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
171
- new_name = pattern[0] + shop_name + date_min + date_max
172
- self.save_to_csv(df, root_new, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
173
- if self.set_up_to_mogo:
174
- d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_推广_超级直播')
175
- if self.set_up_to_mysql:
176
- m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_推广_超级直播')
177
- os.remove(os.path.join(root, name))
178
- elif name.endswith('.xls') and '短直联投' in name:
179
- # 短直联投
180
- df = pd.read_excel(os.path.join(root, name), sheet_name=None, header=0)
181
- df = pd.concat(df)
182
- if len(df) == 0:
183
- print(f'{name} 报表数据为空')
172
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
173
+ new_name = pattern[0] + shop_name + date_min + date_max
174
+ self.save_to_csv(df, root_new, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
175
+ if self.set_up_to_mogo:
176
+ d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_推广_超级直播')
177
+ if self.set_up_to_mysql:
178
+ m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_推广_超级直播')
184
179
  os.remove(os.path.join(root, name))
185
- continue
186
- new_name2 = os.path.splitext(name)[0] + '.csv'
187
- df['订单Id'] = df['订单Id'].apply(
188
- lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
189
- )
190
- root_new = pathlib.Path(self.source_path, '推广报表/短直联投')
191
- self.save_to_csv(df, root_new, new_name2) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
192
- if self.set_up_to_mogo:
193
- d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_推广_短直联投')
194
- if self.set_up_to_mysql:
195
- m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_推广_短直联投')
196
- os.remove(os.path.join(root, name))
197
- elif name.endswith('.xls') and '视频加速推广' in name:
198
- # 超级短视频
199
- df = pd.read_excel(os.path.join(root, name), sheet_name=None, header=0)
200
- df = pd.concat(df)
201
- if len(df) == 0:
202
- print(f'{name} 报表数据为空')
180
+ elif name.endswith('.xls') and '短直联投' in name:
181
+ # 短直联投
182
+ df = pd.read_excel(os.path.join(root, name), sheet_name=None, header=0)
183
+ df = pd.concat(df)
184
+ if len(df) == 0:
185
+ print(f'{name} 报表数据为空')
186
+ os.remove(os.path.join(root, name))
187
+ continue
188
+ new_name2 = os.path.splitext(name)[0] + '.csv'
189
+ df['订单Id'] = df['订单Id'].apply(
190
+ lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
191
+ )
192
+ root_new = pathlib.Path(self.source_path, '推广报表/短直联投')
193
+ self.save_to_csv(df, root_new, new_name2) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
194
+ if self.set_up_to_mogo:
195
+ d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_推广_短直联投')
196
+ if self.set_up_to_mysql:
197
+ m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_推广_短直联投')
203
198
  os.remove(os.path.join(root, name))
204
- continue
205
- new_name2 = os.path.splitext(name)[0] + '.csv'
206
- df['计划ID'] = df['计划ID'].apply(
207
- lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
208
- )
209
- df['视频id'] = df['视频id'].apply(
210
- lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
211
- )
212
- root_new = pathlib.Path(self.source_path, '推广报表/超级短视频')
213
- self.save_to_csv(df, root_new, new_name2) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
214
- if self.set_up_to_mogo:
215
- d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_推广_超级短视频')
216
- if self.set_up_to_mysql:
217
- m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_推广_超级短视频')
218
- os.remove(os.path.join(root, name))
219
- if '人群报表汇总' in name:
220
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=1, na_filter=False)
221
- if len(df) == 0:
222
- print(f'{name} 报表数据为空')
199
+ elif name.endswith('.xls') and '视频加速推广' in name:
200
+ # 超级短视频
201
+ df = pd.read_excel(os.path.join(root, name), sheet_name=None, header=0)
202
+ df = pd.concat(df)
203
+ if len(df) == 0:
204
+ print(f'{name} 报表数据为空')
205
+ os.remove(os.path.join(root, name))
206
+ continue
207
+ new_name2 = os.path.splitext(name)[0] + '.csv'
208
+ df['计划ID'] = df['计划ID'].apply(
209
+ lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
210
+ )
211
+ df['视频id'] = df['视频id'].apply(
212
+ lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
213
+ )
214
+ root_new = pathlib.Path(self.source_path, '推广报表/超级短视频')
215
+ self.save_to_csv(df, root_new, new_name2) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
216
+ if self.set_up_to_mogo:
217
+ d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_推广_超级短视频')
218
+ if self.set_up_to_mysql:
219
+ m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_推广_超级短视频')
223
220
  os.remove(os.path.join(root, name))
224
- continue
225
- min_clm = df.min()['日期']
226
- max_clm = df.max()['日期']
227
- new_name = '{}{}{}'.format(min_clm, '_', max_clm)
228
- df['点击率'] = df['点击率'].apply(lambda x: format(x, '.2%') if x > 0 else '') # 格式化成百分比
229
- df['UV点击率'] = df['UV点击率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
230
- df['收藏加购率'] = df['收藏加购率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
231
- df['UV收藏加购率'] = df['UV收藏加购率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
232
- df['点击转化率'] = df['点击转化率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
233
- df['UV点击转化率'] = df['UV点击转化率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
234
- df.replace(to_replace=[0], value='', regex=False, inplace=True)
235
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
236
- df.to_csv(os.path.join(self.path, 'DMP报表_' + new_name + '.csv'), encoding='utf-8_sig',
237
- index=False, header=True)
238
- if self.set_up_to_mogo:
239
- d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_达摩盘_DMP报表',)
240
- if self.set_up_to_mysql:
241
- m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_达摩盘_DMP报表')
242
- os.remove(os.path.join(root, name))
243
- # ----------------- 推广报表 分割线 -----------------
244
- # ----------------- 推广报表 分割线 -----------------
245
-
246
- date01 = re.findall(r'(\d{4}-\d{2}-\d{2})_\d{4}-\d{2}-\d{2}', str(name))
247
- date02 = re.findall(r'\d{4}-\d{2}-\d{2}_(\d{4}-\d{2}-\d{2})', str(name))
248
- if name.endswith('.xls') and '生意参谋' in name and '无线店铺流量来源' in name:
249
- # 无线店铺流量来源
250
- new_name = os.path.splitext(name)[0] + '.csv'
251
- df = pd.read_excel(os.path.join(root, name), header=5)
252
- if len(df) == 0:
253
- print(f'{name} 报表数据为空')
221
+ if '人群报表汇总' in name:
222
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=1, na_filter=False)
223
+ if len(df) == 0:
224
+ print(f'{name} 报表数据为空')
225
+ os.remove(os.path.join(root, name))
226
+ continue
227
+ min_clm = df.min()['日期']
228
+ max_clm = df.max()['日期']
229
+ new_name = '{}{}{}'.format(min_clm, '_', max_clm)
230
+ df['点击率'] = df['点击率'].apply(lambda x: format(x, '.2%') if x > 0 else '') # 格式化成百分比
231
+ df['UV点击率'] = df['UV点击率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
232
+ df['收藏加购率'] = df['收藏加购率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
233
+ df['UV收藏加购率'] = df['UV收藏加购率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
234
+ df['点击转化率'] = df['点击转化率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
235
+ df['UV点击转化率'] = df['UV点击转化率'].apply(lambda x: format(x, '.2%') if x > 0 else '')
236
+ df.replace(to_replace=[0], value='', regex=False, inplace=True)
237
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
238
+ df.to_csv(os.path.join(self.path, 'DMP报表_' + new_name + '.csv'), encoding='utf-8_sig',
239
+ index=False, header=True)
240
+ if self.set_up_to_mogo:
241
+ d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_达摩盘_DMP报表',)
242
+ if self.set_up_to_mysql:
243
+ m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_达摩盘_DMP报表')
254
244
  os.remove(os.path.join(root, name))
255
- continue
256
- df.replace(to_replace=['-'], value='', regex=False, inplace=True)
257
- if date01[0] != date02[0]:
258
- data_lis = date01[0] + '_' + date02[0]
259
- df.insert(loc=0, column='数据周期', value=data_lis)
260
- df.insert(loc=0, column='日期', value=date01[0])
261
- # 2024-2-19 官方更新了推广渠道来源名称
262
- df['三级来源'] = df['三级来源'].apply(
263
- lambda x: '精准人群推广' if x == '精准人群推广(原引力魔方)'
264
- else '关键词推广' if x == '关键词推广(原直通车)'
265
- else '智能场景' if x == '智能场景(原万相台)'
266
- else x
267
- )
268
- # df = df[df['访客数'] != '0']
269
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
270
- for col in df.columns.tolist():
271
- df[col] = df[col].apply(lambda x: 0 if not x else 0 if x == '' else x)
272
- if '经营优势' in df['一级来源'].tolist(): # 新版流量
273
- new_name = re.sub(r'\s?\(.*\)', '', new_name) # 删除小括号
274
- new_name = os.path.splitext(new_name)[0] + '_新版.csv'
245
+ # ----------------- 推广报表 分割线 -----------------
246
+ # ----------------- 推广报表 分割线 -----------------
275
247
 
276
- self.save_to_csv(df, root, new_name) # 因为 mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
277
- if '经营优势' in df['一级来源'].tolist(): # 新版流量
278
- if '数据周期' in df.columns.tolist():
279
- if self.set_up_to_mogo:
280
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_店铺来源_月数据')
281
- if self.set_up_to_mysql:
282
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_店铺来源_月数据')
283
- else:
284
- if self.set_up_to_mogo:
285
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_店铺来源_日数据')
286
- if self.set_up_to_mysql:
287
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_店铺来源_日数据')
288
- else: # 旧版流量
289
- if '数据周期' in df.columns.tolist():
290
- if self.set_up_to_mogo:
291
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_店铺来源_月数据_旧版')
292
- if self.set_up_to_mysql:
293
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_店铺来源_月数据_旧版')
294
- else:
295
- if self.set_up_to_mogo:
296
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_店铺来源_日数据_旧版')
297
- if self.set_up_to_mysql:
298
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_店铺来源_日数据_旧版')
299
- os.remove(os.path.join(root, name))
300
-
301
- elif name.endswith('.xls') and '生意参谋' in name and '无线店铺三级流量来源详情' in name:
302
- # 店铺来源,手淘搜索,关键词
303
- pattern = re.findall(r'(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})', name)
304
- df = pd.read_excel(os.path.join(root, name), header=5)
305
- if len(df) == 0:
306
- print(f'{name} 报表数据为空')
307
- continue
308
- df.replace(to_replace=[','], value='', regex=True, inplace=True)
309
- df.insert(loc=0, column='日期', value=pattern[0][1])
310
- df.rename(columns={
311
- '来源名称': '关键词',
312
- '收藏商品-支付买家数': '收藏商品_支付买家数',
313
- '加购商品-支付买家数': '加购商品_支付买家数',
314
- }, inplace=True)
315
- if pattern[0][0] != pattern[0][1]:
316
- data_lis = pattern[0][0] + '_' + pattern[0][1]
317
- df.insert(loc=1, column='数据周期', value=data_lis)
318
- new_name = os.path.splitext(name)[0] + '.csv'
319
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
320
- os.remove(os.path.join(root, name))
248
+ date01 = re.findall(r'(\d{4}-\d{2}-\d{2})_\d{4}-\d{2}-\d{2}', str(name))
249
+ date02 = re.findall(r'\d{4}-\d{2}-\d{2}_(\d{4}-\d{2}-\d{2})', str(name))
250
+ if name.endswith('.xls') and '生意参谋' in name and '无线店铺流量来源' in name:
251
+ # 无线店铺流量来源
252
+ new_name = os.path.splitext(name)[0] + '.csv'
253
+ df = pd.read_excel(os.path.join(root, name), header=5)
254
+ if len(df) == 0:
255
+ print(f'{name} 报表数据为空')
256
+ os.remove(os.path.join(root, name))
257
+ continue
258
+ df.replace(to_replace=['-'], value='', regex=False, inplace=True)
259
+ if date01[0] != date02[0]:
260
+ data_lis = date01[0] + '_' + date02[0]
261
+ df.insert(loc=0, column='数据周期', value=data_lis)
262
+ df.insert(loc=0, column='日期', value=date01[0])
263
+ # 2024-2-19 官方更新了推广渠道来源名称
264
+ df['三级来源'] = df['三级来源'].apply(
265
+ lambda x: '精准人群推广' if x == '精准人群推广(原引力魔方)'
266
+ else '关键词推广' if x == '关键词推广(原直通车)'
267
+ else '智能场景' if x == '智能场景(原万相台)'
268
+ else x
269
+ )
270
+ # df = df[df['访客数'] != '0']
271
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
272
+ for col in df.columns.tolist():
273
+ df[col] = df[col].apply(lambda x: 0 if not x else 0 if x == '' else x)
274
+ if '经营优势' in df['一级来源'].tolist(): # 新版流量
275
+ new_name = re.sub(r'\s?\(.*\)', '', new_name) # 删除小括号
276
+ new_name = os.path.splitext(new_name)[0] + '_新版.csv'
321
277
 
322
- elif name.endswith('.xls') and '生意参谋' in name and '商品_全部' in name:
323
- # 店铺商品排行
324
- new_name = os.path.splitext(name)[0] + '.csv'
325
- df = pd.read_excel(os.path.join(root, name), header=4)
326
- if len(df) == 0:
327
- print(f'{name} 报表数据为空')
278
+ self.save_to_csv(df, root, new_name) # 因为 mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
279
+ if '经营优势' in df['一级来源'].tolist(): # 新版流量
280
+ if '数据周期' in df.columns.tolist():
281
+ if self.set_up_to_mogo:
282
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_店铺来源_月数据')
283
+ if self.set_up_to_mysql:
284
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_店铺来源_月数据')
285
+ else:
286
+ if self.set_up_to_mogo:
287
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_店铺来源_日数据')
288
+ if self.set_up_to_mysql:
289
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_店铺来源_日数据')
290
+ else: # 旧版流量
291
+ if '数据周期' in df.columns.tolist():
292
+ if self.set_up_to_mogo:
293
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_店铺来源_月数据_旧版')
294
+ if self.set_up_to_mysql:
295
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_店铺来源_月数据_旧版')
296
+ else:
297
+ if self.set_up_to_mogo:
298
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_店铺来源_日数据_旧版')
299
+ if self.set_up_to_mysql:
300
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_店铺来源_日数据_旧版')
328
301
  os.remove(os.path.join(root, name))
329
- continue
330
- df.replace(to_replace=['-'], value='', regex=False, inplace=True)
331
- df['商品ID'] = df['商品ID'].apply(
332
- lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
333
- )
334
- df['货号'] = df['货号'].apply(
335
- lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
336
- )
337
- df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
338
- if date01[0] != date02[0]:
339
- data_lis = date01[0] + '_' + date02[0]
340
- df.insert(loc=1, column='数据周期', value=data_lis)
341
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
342
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
343
- if self.set_up_to_mogo:
344
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_商品排行')
345
- if self.set_up_to_mysql:
346
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_商品排行')
347
- os.remove(os.path.join(root, name))
348
302
 
349
- elif name.endswith('.xls') and '参谋店铺整体日报' in name:
350
- # 自助取数,店铺日报
351
- new_name = os.path.splitext(name)[0] + '.csv'
352
- df = pd.read_excel(os.path.join(root, name), header=7)
353
- if len(df) == 0:
354
- print(f'{name} 报表数据为空')
303
+ elif name.endswith('.xls') and '生意参谋' in name and '无线店铺三级流量来源详情' in name:
304
+ # 店铺来源,手淘搜索,关键词
305
+ pattern = re.findall(r'(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})', name)
306
+ df = pd.read_excel(os.path.join(root, name), header=5)
307
+ if len(df) == 0:
308
+ print(f'{name} 报表数据为空')
309
+ continue
310
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
311
+ df.insert(loc=0, column='日期', value=pattern[0][1])
312
+ df.rename(columns={
313
+ '来源名称': '关键词',
314
+ '收藏商品-支付买家数': '收藏商品_支付买家数',
315
+ '加购商品-支付买家数': '加购商品_支付买家数',
316
+ }, inplace=True)
317
+ if pattern[0][0] != pattern[0][1]:
318
+ data_lis = pattern[0][0] + '_' + pattern[0][1]
319
+ df.insert(loc=1, column='数据周期', value=data_lis)
320
+ new_name = os.path.splitext(name)[0] + '.csv'
321
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
355
322
  os.remove(os.path.join(root, name))
356
- continue
357
- df.rename(columns={'统计日期': '日期'}, inplace=True)
358
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
359
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
360
- if self.set_up_to_mogo:
361
- d.df_to_mongo(df=df,db_name='生意参谋2', collection_name='生意参谋_自助取数_整体日报')
362
- if self.set_up_to_mysql:
363
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_自助取数_整体日报')
364
- os.remove(os.path.join(root, name))
365
323
 
366
- elif name.endswith('.xls') and '参谋每日流量_自助取数_新版' in name:
367
- # 自助取数,每日流量
368
- new_name = os.path.splitext(name)[0] + '.csv'
369
- df = pd.read_excel(os.path.join(root, name), header=7)
370
- if len(df) == 0:
371
- print(f'{name} 报表数据为空')
324
+ elif name.endswith('.xls') and '生意参谋' in name and '商品_全部' in name:
325
+ # 店铺商品排行
326
+ new_name = os.path.splitext(name)[0] + '.csv'
327
+ df = pd.read_excel(os.path.join(root, name), header=4)
328
+ if len(df) == 0:
329
+ print(f'{name} 报表数据为空')
330
+ os.remove(os.path.join(root, name))
331
+ continue
332
+ df.replace(to_replace=['-'], value='', regex=False, inplace=True)
333
+ df['商品ID'] = df['商品ID'].apply(
334
+ lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
335
+ )
336
+ df['货号'] = df['货号'].apply(
337
+ lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
338
+ )
339
+ df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
340
+ if date01[0] != date02[0]:
341
+ data_lis = date01[0] + '_' + date02[0]
342
+ df.insert(loc=1, column='数据周期', value=data_lis)
343
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
344
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
345
+ if self.set_up_to_mogo:
346
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_商品排行')
347
+ if self.set_up_to_mysql:
348
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_商品排行')
372
349
  os.remove(os.path.join(root, name))
373
- continue
374
- df.rename(columns={'统计日期': '日期'}, inplace=True)
375
- # 2024-2-19 官方更新了推广渠道来源名称,自助取数没有更新,这里强制更改
376
- df['三级来源'] = df['三级来源'].apply(
377
- lambda x: '精准人群推广' if x == '引力魔方'
378
- else '关键词推广' if x == '直通车'
379
- else '智能场景' if x == '万相台'
380
- else '精准人群推广' if x == '精准人群推广(原引力魔方)'
381
- else '关键词推广' if x == '关键词推广(原直通车)'
382
- else '智能场景' if x == '智能场景(原万相台)'
383
- else x
384
- )
385
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
386
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
387
- if self.set_up_to_mogo:
388
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_自助取数_每日流量')
389
- if self.set_up_to_mysql:
390
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_自助取数_每日流量')
391
- os.remove(os.path.join(root, name))
392
350
 
393
- elif name.endswith('.xls') and '商品sku' in name:
394
- # 自助取数,商品sku
395
- new_name = os.path.splitext(name)[0] + '.csv'
396
- df = pd.read_excel(os.path.join(root, name), header=7)
397
- if len(df) == 0:
398
- print(f'{name} 报表数据为空')
351
+ elif name.endswith('.xls') and '参谋店铺整体日报' in name:
352
+ # 自助取数,店铺日报
353
+ new_name = os.path.splitext(name)[0] + '.csv'
354
+ df = pd.read_excel(os.path.join(root, name), header=7)
355
+ if len(df) == 0:
356
+ print(f'{name} 报表数据为空')
357
+ os.remove(os.path.join(root, name))
358
+ continue
359
+ df.rename(columns={'统计日期': '日期'}, inplace=True)
360
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
361
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
362
+ if self.set_up_to_mogo:
363
+ d.df_to_mongo(df=df,db_name='生意参谋2', collection_name='生意参谋_自助取数_整体日报')
364
+ if self.set_up_to_mysql:
365
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_自助取数_整体日报')
399
366
  os.remove(os.path.join(root, name))
400
- continue
401
- df.rename(columns={
402
- '统计日期': '日期',
403
- '商品ID': '商品id',
404
- 'SKU ID': 'sku id',
405
- '商品SKU': '商品sku',
406
- }, inplace=True)
407
- for _i in ['商品id', 'sku id']:
408
- df[_i] = df[_i].astype(str).apply(lambda x: f'="{x}"')
409
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
410
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
411
- if self.set_up_to_mogo:
412
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_自助取数_商品sku')
413
- if self.set_up_to_mysql:
414
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_自助取数_商品sku')
415
- os.remove(os.path.join(root, name))
416
367
 
417
- elif name.endswith('.xls') and '参谋店铺流量来源(月)' in name:
418
- # 自助取数,月店铺流量来源
419
- new_name = os.path.splitext(name)[0] + '.csv'
420
- df = pd.read_excel(os.path.join(root, name), header=7)
421
- if len(df) == 0:
422
- print(f'{name} 报表数据为空')
368
+ elif name.endswith('.xls') and '参谋每日流量_自助取数_新版' in name:
369
+ # 自助取数,每日流量
370
+ new_name = os.path.splitext(name)[0] + '.csv'
371
+ df = pd.read_excel(os.path.join(root, name), header=7)
372
+ if len(df) == 0:
373
+ print(f'{name} 报表数据为空')
374
+ os.remove(os.path.join(root, name))
375
+ continue
376
+ df.rename(columns={'统计日期': '日期'}, inplace=True)
377
+ # 2024-2-19 官方更新了推广渠道来源名称,自助取数没有更新,这里强制更改
378
+ df['三级来源'] = df['三级来源'].apply(
379
+ lambda x: '精准人群推广' if x == '引力魔方'
380
+ else '关键词推广' if x == '直通车'
381
+ else '智能场景' if x == '万相台'
382
+ else '精准人群推广' if x == '精准人群推广(原引力魔方)'
383
+ else '关键词推广' if x == '关键词推广(原直通车)'
384
+ else '智能场景' if x == '智能场景(原万相台)'
385
+ else x
386
+ )
387
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
388
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
389
+ if self.set_up_to_mogo:
390
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_自助取数_每日流量')
391
+ if self.set_up_to_mysql:
392
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_自助取数_每日流量')
423
393
  os.remove(os.path.join(root, name))
424
- continue
425
- df.rename(columns={'统计日期': '数据周期'}, inplace=True)
426
- # 2024-2-19 官方更新了推广渠道来源名称,自助取数没有更新,这里强制更改
427
- df['三级来源'] = df['三级来源'].apply(
428
- lambda x: '精准人群推广' if x == '引力魔方'
429
- else '关键词推广' if x == '直通车'
430
- else '智能场景' if x == '万相台'
431
- else '精准人群推广' if x == '精准人群推广(原引力魔方)'
432
- else '关键词推广' if x == '关键词推广(原直通车)'
433
- else '智能场景' if x == '智能场景(原万相台)'
434
- else x
435
- )
436
- df['日期'] = df['数据周期'].apply(lambda x: re.findall('(.*) ~', x)[0])
437
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
438
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
439
- if self.set_up_to_mogo:
440
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_自助取数_店铺流量_月数据')
441
- if self.set_up_to_mysql:
442
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_自助取数_店铺流量_月数据')
443
- os.remove(os.path.join(root, name))
444
394
 
445
- elif name.endswith('.csv') and 'baobei' in name:
446
- # 生意经宝贝指标日数据
447
- # print(name)
448
- date = re.findall(r's-(\d{4})(\d{2})(\d{2})\.', str(name))
449
- if not date: # 阻止月数据及已转换的表格
450
- print(f'{name} 不支持或是已转换的表格')
451
- os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
452
- continue
453
- df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
454
- if len(df) == 0:
455
- print(f'{name} 报表数据为空')
395
+ elif name.endswith('.xls') and '商品sku' in name:
396
+ # 自助取数,商品sku
397
+ new_name = os.path.splitext(name)[0] + '.csv'
398
+ df = pd.read_excel(os.path.join(root, name), header=7)
399
+ if len(df) == 0:
400
+ print(f'{name} 报表数据为空')
401
+ os.remove(os.path.join(root, name))
402
+ continue
403
+ df.rename(columns={
404
+ '统计日期': '日期',
405
+ '商品ID': '商品id',
406
+ 'SKU ID': 'sku id',
407
+ '商品SKU': '商品sku',
408
+ }, inplace=True)
409
+ for _i in ['商品id', 'sku id']:
410
+ df[_i] = df[_i].astype(str).apply(lambda x: f'="{x}"')
411
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
412
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
413
+ if self.set_up_to_mogo:
414
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_自助取数_商品sku')
415
+ if self.set_up_to_mysql:
416
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_自助取数_商品sku')
456
417
  os.remove(os.path.join(root, name))
457
- continue
458
- if '日期' in df.columns.tolist():
459
- df.pop('日期')
460
- new_date = '-'.join(date[0])
461
- df.insert(loc=0, column='日期', value=new_date)
462
- df.replace(to_replace=['--'], value='', regex=False, inplace=True)
463
- df['宝贝ID'] = df['宝贝ID'].apply(
464
- lambda x: f'="{x}"' if x and '=' not in str(x) else x
465
- )
466
- df['商家编码'] = df['商家编码'].apply(
467
- lambda x: f'="{x}"' if x and '=' not in str(x) else x
468
- )
469
- name_st = re.findall(r'(.*)\d{4}\d{2}\d{2}\.', str(name)) # baobeitrans-
470
- new_name = f'{name_st[0]}{new_date}.csv'
471
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
472
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
473
- if self.set_up_to_mogo:
474
- d.df_to_mongo(df=df, db_name='生意经1', collection_name='生意经_宝贝指标')
475
- if self.set_up_to_mysql:
476
- m.df_to_mysql(df=df, db_name='生意经1', tabel_name='生意经_宝贝指标')
477
- os.remove(os.path.join(root, name))
478
418
 
479
- elif name.endswith('.csv') and '店铺销售指标' in name:
480
- # 生意经, 店铺指标,仅限月数据,实际日指标也可以
481
- name_st = re.findall(r'(.*)\(分日', name)
482
- if not name_st:
483
- print(f'{name} 已转换的表格')
484
- continue
485
- df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
486
- if len(df) == 0:
487
- print(f'{name} 报表数据为空')
419
+ elif name.endswith('.xls') and '参谋店铺流量来源(月)' in name:
420
+ # 自助取数,月店铺流量来源
421
+ new_name = os.path.splitext(name)[0] + '.csv'
422
+ df = pd.read_excel(os.path.join(root, name), header=7)
423
+ if len(df) == 0:
424
+ print(f'{name} 报表数据为空')
425
+ os.remove(os.path.join(root, name))
426
+ continue
427
+ df.rename(columns={'统计日期': '数据周期'}, inplace=True)
428
+ # 2024-2-19 官方更新了推广渠道来源名称,自助取数没有更新,这里强制更改
429
+ df['三级来源'] = df['三级来源'].apply(
430
+ lambda x: '精准人群推广' if x == '引力魔方'
431
+ else '关键词推广' if x == '直通车'
432
+ else '智能场景' if x == '万相台'
433
+ else '精准人群推广' if x == '精准人群推广(原引力魔方)'
434
+ else '关键词推广' if x == '关键词推广(原直通车)'
435
+ else '智能场景' if x == '智能场景(原万相台)'
436
+ else x
437
+ )
438
+ df['日期'] = df['数据周期'].apply(lambda x: re.findall('(.*) ~', x)[0])
439
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
440
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
441
+ if self.set_up_to_mogo:
442
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_自助取数_店铺流量_月数据')
443
+ if self.set_up_to_mysql:
444
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_自助取数_店铺流量_月数据')
488
445
  os.remove(os.path.join(root, name))
489
- continue
490
- df['日期'] = df['日期'].astype(str).apply(
491
- lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', x)[0]) if x else x)
492
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
493
- # min_clm = str(df.min()['日期']).split(' ')[0]
494
- # max_clm = str(df.max()['日期']).split(' ')[0]
495
- min_clm = str(df['日期'].min()).split(' ')[0]
496
- max_clm = str(df['日期'].max()).split(' ')[0]
497
- new_name = f'{name_st[0]}-{min_clm}_{max_clm}.csv' # 保存时将(分日)去掉
498
- df.replace(to_replace=['--'], value='', regex=False, inplace=True)
499
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
500
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
501
- if self.set_up_to_mogo:
502
- d.df_to_mongo(df=df, db_name='生意经1', collection_name='生意经_店铺指标')
503
- if self.set_up_to_mysql:
504
- m.df_to_mysql(df=df, db_name='生意经1', tabel_name='生意经_店铺指标')
505
- os.remove(os.path.join(root, name))
506
-
507
- elif name.endswith('csv') and '省份' in name:
508
- # 生意经,地域分布, 仅限日数据
509
- pattern = re.findall(r'(.*[\u4e00-\u9fa5])(\d{4})(\d{2})(\d{2})\.', name)
510
- if not pattern or '省份城市分析2' not in name:
511
- print(f'{name} 不支持或已转换的表格')
512
- os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
513
- continue
514
- date = pattern[0][1:]
515
- date = '-'.join(date)
516
- new_name = f'{pattern[0][0]}-{date}.csv'
517
- df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
518
- if len(df) == 0:
519
- print(f'{name} 报表数据为空')
446
+ elif name.endswith('.xlsx') and '直播分场次效果' in name:
447
+ pattern = re.findall(r'(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})', name)
448
+ if pattern:
449
+ continue
450
+ df = pd.read_excel(os.path.join(root, name), header=0)
451
+ if len(df) == 0:
452
+ print(f'{name} 报表数据为空')
453
+ continue
454
+ df.replace(to_replace=['--'], value='0', regex=False, inplace=True)
455
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
456
+ df['直播开播时间'] = pd.to_datetime(df['直播开播时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
457
+ df.insert(loc=0, column='日期', value=df['直播开播时间'])
458
+ df['日期'] = df['日期'].apply(lambda x: pd.to_datetime(str(x).split(' ')[0], format='%Y-%m-%d', errors='ignore') if x else x)
459
+ df.insert(loc=1, column='店铺', value='万里马官方旗舰店')
460
+ min_clm = str(df.min()['直播开播时间']).split(' ')[0]
461
+ max_clm = str(df.max()['直播开播时间']).split(' ')[0]
462
+ new_name = f'{os.path.splitext(name)[0]}_{min_clm}_{max_clm}.csv'
463
+ new_name = re.sub(r' ?(\(\d+\))', '',new_name)
464
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
520
465
  os.remove(os.path.join(root, name))
521
- continue
522
- df['省'] = df['省份'].apply(lambda x: x if ' ├─ ' not in x and ' └─ ' not in x else None)
523
- df['城市'] = df[['省份', '省']].apply(lambda x: '汇总' if x['省'] else x['省份'], axis=1)
524
- df['省'].fillna(method='ffill', inplace=True)
525
- df['城市'].replace(to_replace=[' ├─ | └─ '], value='', regex=True, inplace=True)
526
- pov = df.pop('')
527
- city = df.pop('城市')
528
- df['省+市'] = df['省份']
529
- df['省份'] = pov
530
- df.insert(loc=1, column='城市', value=city)
531
- df.insert(loc=0, column='日期', value=date)
532
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
533
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
534
- if self.set_up_to_mogo:
535
- d.df_to_mongo(df=df, db_name='生意经1', collection_name='生意经_地域分布_省份城市分析')
536
- if self.set_up_to_mysql:
537
- m.df_to_mysql(df=df, db_name='生意经1', tabel_name='生意经_地域分布_省份城市分析')
538
- os.remove(os.path.join(root, name)) # 移除已转换的原文件
539
-
540
- elif name.endswith('csv') and 'order' in name:
541
- # 生意经,订单数据,仅限月数据
542
- pattern = re.findall(r'(.*)(\d{4})(\d{2})(\d{2})-(\d{4})(\d{2})(\d{2})', name)
543
- if not pattern:
544
- print(f'{name} 不支持或已转换的表格')
545
- os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
546
- continue
547
- date1 = pattern[0][1:4]
548
- date1 = '-'.join(date1)
549
- date2 = pattern[0][4:]
550
- date2 = '-'.join(date2)
551
- date = f'{date1}_{date2}'
552
- new_name = f'{pattern[0][0]}{date}.csv'
553
- df = pd.read_csv(os.path.join(root, name), encoding='gb18030', header=0, na_filter=False)
554
- if len(df) == 0:
555
- print(f'{name} 报表数据为空')
466
+ elif name.endswith('.csv') and 'baobei' in name:
467
+ # 生意经宝贝指标日数据
468
+ # print(name)
469
+ date = re.findall(r's-(\d{4})(\d{2})(\d{2})\.', str(name))
470
+ if not date: # 阻止月数据及已转换的表格
471
+ print(f'{name} 不支持或是已转换的表格')
472
+ os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
473
+ continue
474
+ df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
475
+ if len(df) == 0:
476
+ print(f'{name} 报表数据为空')
477
+ os.remove(os.path.join(root, name))
478
+ continue
479
+ if '日期' in df.columns.tolist():
480
+ df.pop('日期')
481
+ new_date = '-'.join(date[0])
482
+ df.insert(loc=0, column='日期', value=new_date)
483
+ df.replace(to_replace=['--'], value='', regex=False, inplace=True)
484
+ df['宝贝ID'] = df['宝贝ID'].apply(
485
+ lambda x: f'="{x}"' if x and '=' not in str(x) else x
486
+ )
487
+ df['商家编码'] = df['商家编码'].apply(
488
+ lambda x: f'="{x}"' if x and '=' not in str(x) else x
489
+ )
490
+ name_st = re.findall(r'(.*)\d{4}\d{2}\d{2}\.', str(name)) # baobeitrans-
491
+ new_name = f'{name_st[0]}{new_date}.csv'
492
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
493
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
494
+ if self.set_up_to_mogo:
495
+ d.df_to_mongo(df=df, db_name='生意经1', collection_name='生意经_宝贝指标')
496
+ if self.set_up_to_mysql:
497
+ m.df_to_mysql(df=df, db_name='生意经1', tabel_name='生意经_宝贝指标')
556
498
  os.remove(os.path.join(root, name))
557
- continue
558
- df.insert(loc=0, column='日期', value=date1)
559
- df.insert(loc=1, column='数据周期', value=date)
560
- df['商品id'] = df['宝贝链接'].apply(
561
- lambda x: f'=\"{"".join(re.findall("id=(.*)", str(x))[0])}\"' if x else x)
562
- df.rename(columns={'宝贝标题': '商品标题', '宝贝链接': '商品链接'}, inplace=True)
563
- df['颜色编码'] = df['商家编码'].apply(
564
- lambda x: ''.join(re.findall(r' .*(\d{4})$', str(x))) if x else x)
565
- df['商家编码'] = df['商家编码'].apply(lambda x: f'="{x}"' if x else x)
566
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
567
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
568
- if self.set_up_to_mogo:
569
- d.df_to_mongo(df=df, db_name='生意经1', collection_name='生意经_订单数据')
570
- if self.set_up_to_mysql:
571
- m.df_to_mysql(df=df, db_name='生意经1', tabel_name='生意经_订单数据')
572
- os.remove(os.path.join(root, name)) # 移除已转换的原文件
573
499
 
574
- elif name.endswith('.xlsx') and '直播间成交订单明细' in name:
575
- # 直播间成交订单明细
576
- df = pd.read_excel(os.path.join(root, name), header=0)
577
- if len(df) == 0:
578
- print(f'{name} 报表数据为空')
500
+ elif name.endswith('.csv') and '店铺销售指标' in name:
501
+ # 生意经, 店铺指标,仅限月数据,实际日指标也可以
502
+ name_st = re.findall(r'(.*)\(分日', name)
503
+ if not name_st:
504
+ print(f'{name} 已转换的表格')
505
+ continue
506
+ df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
507
+ if len(df) == 0:
508
+ print(f'{name} 报表数据为空')
509
+ os.remove(os.path.join(root, name))
510
+ continue
511
+ df['日期'] = df['日期'].astype(str).apply(
512
+ lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', x)[0]) if x else x)
513
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
514
+ # min_clm = str(df.min()['日期']).split(' ')[0]
515
+ # max_clm = str(df.max()['日期']).split(' ')[0]
516
+ min_clm = str(df['日期'].min()).split(' ')[0]
517
+ max_clm = str(df['日期'].max()).split(' ')[0]
518
+ new_name = f'{name_st[0]}-{min_clm}_{max_clm}.csv' # 保存时将(分日)去掉
519
+ df.replace(to_replace=['--'], value='', regex=False, inplace=True)
520
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
521
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
522
+ if self.set_up_to_mogo:
523
+ d.df_to_mongo(df=df, db_name='生意经1', collection_name='生意经_店铺指标')
524
+ if self.set_up_to_mysql:
525
+ m.df_to_mysql(df=df, db_name='生意经1', tabel_name='生意经_店铺指标')
579
526
  os.remove(os.path.join(root, name))
580
- continue
581
- df.rename(columns={'场次ID': '场次id', '商品ID': '商品id'}, inplace=True)
582
- df.replace(to_replace=['-'], value='', regex=False, inplace=True)
583
- cols = ['开播时间', '下单时间', '支付时间', '确认收货时间']
584
- for col in cols:
585
- df[col] = pd.to_datetime(df[col]) # 转换日期列
586
- for col2 in ['支付金额', '确认收货金额']:
587
- df[col2] = pd.to_numeric(df[col2], errors='ignore')
588
- df['日期'] = df['支付时间'].apply(lambda x: x.strftime('%Y-%m-%d'))
589
- date_min = df['日期'].values.min() + '_'
590
- date_max = df['日期'].values.max()
591
- new_name = '直播间成交订单明细_' + date_min + date_max + '.csv'
592
- for col3 in ['场次id', '商品id', '父订单', '子订单']:
593
- df[col3] = df[col3].apply(
594
- lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
595
- )
596
- col4 = ['日期', '直播标题', '开播时间', '场次id', '支付时间', '支付金额', '商品id', '商品标题',
597
- '商品一级类目', '父订单', '子订单', '下单时间', '确认收货时间', '确认收货金额']
598
- df_lin = df[col4]
599
- # 调整列顺序
600
- df = pd.merge(df_lin, df, how='outer', on=col4)
601
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
602
- if self.set_up_to_mogo:
603
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_直播间成交订单明细')
604
- if self.set_up_to_mysql:
605
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_直播间成交订单明细')
606
- os.remove(os.path.join(root, name))
607
527
 
608
- elif name.endswith('.xlsx') and '直播间大盘数据' in name:
609
- # 直播间大盘数据
610
- df = pd.read_excel(os.path.join(root, name), header=0)
611
- if len(df) == 0:
612
- print(f'{name} 报表数据为空')
528
+ elif name.endswith('csv') and '省份' in name:
529
+ # 生意经,地域分布, 仅限日数据
530
+ pattern = re.findall(r'(.*[\u4e00-\u9fa5])(\d{4})(\d{2})(\d{2})\.', name)
531
+ if not pattern or '省份城市分析2' not in name:
532
+ print(f'{name} 不支持或已转换的表格')
533
+ os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
534
+ continue
535
+ date = pattern[0][1:]
536
+ date = '-'.join(date)
537
+ new_name = f'{pattern[0][0]}-{date}.csv'
538
+ df = pd.read_csv(os.path.join(root, name), encoding=encoding, header=0, na_filter=False)
539
+ if len(df) == 0:
540
+ print(f'{name} 报表数据为空')
541
+ os.remove(os.path.join(root, name))
542
+ continue
543
+ df['省'] = df['省份'].apply(lambda x: x if ' ├─ ' not in x and ' └─ ' not in x else None)
544
+ df['城市'] = df[['省份', '省']].apply(lambda x: '汇总' if x['省'] else x['省份'], axis=1)
545
+ df['省'].fillna(method='ffill', inplace=True)
546
+ df['城市'].replace(to_replace=[' ├─ | └─ '], value='', regex=True, inplace=True)
547
+ pov = df.pop('省')
548
+ city = df.pop('城市')
549
+ df['省+市'] = df['省份']
550
+ df['省份'] = pov
551
+ df.insert(loc=1, column='城市', value=city)
552
+ df.insert(loc=0, column='日期', value=date)
553
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
554
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
555
+ if self.set_up_to_mogo:
556
+ d.df_to_mongo(df=df, db_name='生意经1', collection_name='生意经_地域分布_省份城市分析')
557
+ if self.set_up_to_mysql:
558
+ m.df_to_mysql(df=df, db_name='生意经1', tabel_name='生意经_地域分布_省份城市分析')
559
+ os.remove(os.path.join(root, name)) # 移除已转换的原文件
560
+
561
+ elif name.endswith('csv') and 'order' in name:
562
+ # 生意经,订单数据,仅限月数据
563
+ pattern = re.findall(r'(.*)(\d{4})(\d{2})(\d{2})-(\d{4})(\d{2})(\d{2})', name)
564
+ if not pattern:
565
+ print(f'{name} 不支持或已转换的表格')
566
+ os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
567
+ continue
568
+ date1 = pattern[0][1:4]
569
+ date1 = '-'.join(date1)
570
+ date2 = pattern[0][4:]
571
+ date2 = '-'.join(date2)
572
+ date = f'{date1}_{date2}'
573
+ new_name = f'{pattern[0][0]}{date}.csv'
574
+ df = pd.read_csv(os.path.join(root, name), encoding='gb18030', header=0, na_filter=False)
575
+ if len(df) == 0:
576
+ print(f'{name} 报表数据为空')
577
+ os.remove(os.path.join(root, name))
578
+ continue
579
+ df.insert(loc=0, column='日期', value=date1)
580
+ df.insert(loc=1, column='数据周期', value=date)
581
+ df['商品id'] = df['宝贝链接'].apply(
582
+ lambda x: f'=\"{"".join(re.findall("id=(.*)", str(x))[0])}\"' if x else x)
583
+ df.rename(columns={'宝贝标题': '商品标题', '宝贝链接': '商品链接'}, inplace=True)
584
+ df['颜色编码'] = df['商家编码'].apply(
585
+ lambda x: ''.join(re.findall(r' .*(\d{4})$', str(x))) if x else x)
586
+ df['商家编码'] = df['商家编码'].apply(lambda x: f'="{x}"' if x else x)
587
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
588
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
589
+ if self.set_up_to_mogo:
590
+ d.df_to_mongo(df=df, db_name='生意经1', collection_name='生意经_订单数据')
591
+ if self.set_up_to_mysql:
592
+ m.df_to_mysql(df=df, db_name='生意经1', tabel_name='生意经_订单数据')
593
+ os.remove(os.path.join(root, name)) # 移除已转换的原文件
594
+
595
+ elif name.endswith('.xlsx') and '直播间成交订单明细' in name:
596
+ # 直播间成交订单明细
597
+ df = pd.read_excel(os.path.join(root, name), header=0)
598
+ if len(df) == 0:
599
+ print(f'{name} 报表数据为空')
600
+ os.remove(os.path.join(root, name))
601
+ continue
602
+ df.rename(columns={'场次ID': '场次id', '商品ID': '商品id'}, inplace=True)
603
+ df.replace(to_replace=['-'], value='', regex=False, inplace=True)
604
+ cols = ['开播时间', '下单时间', '支付时间', '确认收货时间']
605
+ for col in cols:
606
+ df[col] = pd.to_datetime(df[col]) # 转换日期列
607
+ for col2 in ['支付金额', '确认收货金额']:
608
+ df[col2] = pd.to_numeric(df[col2], errors='ignore')
609
+ df['日期'] = df['支付时间'].apply(lambda x: x.strftime('%Y-%m-%d'))
610
+ date_min = df['日期'].values.min() + '_'
611
+ date_max = df['日期'].values.max()
612
+ new_name = '直播间成交订单明细_' + date_min + date_max + '.csv'
613
+ for col3 in ['场次id', '商品id', '父订单', '子订单']:
614
+ df[col3] = df[col3].apply(
615
+ lambda x: "{0}{1}{2}".format('="', x, '"') if x and '=' not in str(x) else x
616
+ )
617
+ col4 = ['日期', '直播标题', '开播时间', '场次id', '支付时间', '支付金额', '商品id', '商品标题',
618
+ '商品一级类目', '父订单', '子订单', '下单时间', '确认收货时间', '确认收货金额']
619
+ df_lin = df[col4]
620
+ # 调整列顺序
621
+ df = pd.merge(df_lin, df, how='outer', on=col4)
622
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
623
+ if self.set_up_to_mogo:
624
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_直播间成交订单明细')
625
+ if self.set_up_to_mysql:
626
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_直播间成交订单明细')
613
627
  os.remove(os.path.join(root, name))
614
- continue
615
- df.replace(to_replace=['-'], value='', regex=False, inplace=True)
616
- df.rename(columns={'统计日期': '日期'}, inplace=True)
617
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
618
- df['日期'] = df['日期'].apply(lambda x: x.strftime('%Y-%m-%d'))
619
- date_min = df['日期'].values.min() + '_'
620
- date_max = df['日期'].values.max()
621
- new_name = '直播间大盘数据_' + date_min + date_max + '.csv'
622
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
623
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
624
- if self.set_up_to_mogo:
625
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_直播间大盘数据')
626
- if self.set_up_to_mysql:
627
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_直播间大盘数据')
628
- os.remove(os.path.join(root, name))
629
628
 
630
- elif name.endswith('.xls') and '直播业绩-成交拆解' in name:
631
- # 直播业绩-成交拆解
632
- df = pd.read_excel(os.path.join(root, name), header=5)
633
- if len(df) == 0:
634
- print(f'{name} 报表数据为空')
629
+ elif name.endswith('.xlsx') and '直播间大盘数据' in name:
630
+ # 直播间大盘数据
631
+ df = pd.read_excel(os.path.join(root, name), header=0)
632
+ if len(df) == 0:
633
+ print(f'{name} 报表数据为空')
634
+ os.remove(os.path.join(root, name))
635
+ continue
636
+ df.replace(to_replace=['-'], value='', regex=False, inplace=True)
637
+ df.rename(columns={'统计日期': '日期'}, inplace=True)
638
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
639
+ df['日期'] = df['日期'].apply(lambda x: x.strftime('%Y-%m-%d'))
640
+ date_min = df['日期'].values.min() + '_'
641
+ date_max = df['日期'].values.max()
642
+ new_name = '直播间大盘数据_' + date_min + date_max + '.csv'
643
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
644
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
645
+ if self.set_up_to_mogo:
646
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_直播间大盘数据')
647
+ if self.set_up_to_mysql:
648
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_直播间大盘数据')
635
649
  os.remove(os.path.join(root, name))
636
- continue
637
- df.replace(to_replace=['-'], value='', regex=False, inplace=True)
638
- df.replace(to_replace=[','], value='', regex=True, inplace=True)
639
- df.rename(columns={'统计日期': '日期'}, inplace=True)
640
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
641
- df['日期'] = df['日期'].apply(lambda x: x.strftime('%Y-%m-%d'))
642
- date_min = df['日期'].values.min() + '_'
643
- date_max = df['日期'].values.max()
644
- new_name = '直播业绩_成交拆解_' + date_min + date_max + '.csv'
645
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
646
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
647
- if self.set_up_to_mogo:
648
- d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_直播业绩')
649
- if self.set_up_to_mysql:
650
- m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_直播业绩')
651
- os.remove(os.path.join(root, name))
652
650
 
653
- elif name.endswith('.xlsx') and '明星店铺' in name:
654
- # 品销宝
655
- pattern = re.findall(r'_(\d{4}-\d{2}-\d{2})_', name)
656
- if pattern:
657
- continue
658
- sheets4 = ['账户', '推广计划', '推广单元', '创意', '品牌流量包', '定向人群'] # 品销宝
659
- file_name4 = os.path.splitext(name)[0] # 明星店铺报表
660
- for sheet4 in sheets4:
661
- df = pd.read_excel(os.path.join(root, name), sheet_name=sheet4, header=0, engine='openpyxl')
662
- # print(sheet4)
651
+ elif name.endswith('.xls') and '直播业绩-成交拆解' in name:
652
+ # 直播业绩-成交拆解
653
+ df = pd.read_excel(os.path.join(root, name), header=5)
663
654
  if len(df) == 0:
664
655
  print(f'{name} 报表数据为空')
665
656
  os.remove(os.path.join(root, name))
666
657
  continue
667
- if len(df) < 1:
668
- print(f'{name} 跳过')
658
+ df.replace(to_replace=['-'], value='', regex=False, inplace=True)
659
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
660
+ df.rename(columns={'统计日期': '日期'}, inplace=True)
661
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
662
+ df['日期'] = df['日期'].apply(lambda x: x.strftime('%Y-%m-%d'))
663
+ date_min = df['日期'].values.min() + '_'
664
+ date_max = df['日期'].values.max()
665
+ new_name = '直播业绩_成交拆解_' + date_min + date_max + '.csv'
666
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
667
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
668
+ if self.set_up_to_mogo:
669
+ d.df_to_mongo(df=df, db_name='生意参谋2', collection_name='生意参谋_直播业绩')
670
+ if self.set_up_to_mysql:
671
+ m.df_to_mysql(df=df, db_name='生意参谋2', tabel_name='生意参谋_直播业绩')
672
+ os.remove(os.path.join(root, name))
673
+
674
+ elif name.endswith('.xlsx') and '明星店铺' in name:
675
+ # 品销宝
676
+ pattern = re.findall(r'_(\d{4}-\d{2}-\d{2})_', name)
677
+ if pattern:
669
678
  continue
670
- else:
671
- df.insert(loc=1, column='报表类型', value=sheet4)
672
- df.fillna(0, inplace=True)
673
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
674
- min_clm = str(df['日期'].min()).split(' ')[0]
675
- max_clm = str(df['日期'].max()).split(' ')[0]
676
- new_file_name4 = f'{sheet4}_{file_name4}_{min_clm}_{max_clm}.csv'
677
- # 以sheet名进一步创建子文件夹
678
- root_new = str(pathlib.Path(self.source_path, '推广报表/品销宝', sheet4))
679
- self.save_to_csv(df, root_new, new_file_name4) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
680
- if self.set_up_to_mogo:
681
- d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_推广_品销宝')
682
- if self.set_up_to_mysql:
683
- m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_推广_品销宝')
684
- os.remove(os.path.join(root, name))
679
+ sheets4 = ['账户', '推广计划', '推广单元', '创意', '品牌流量包', '定向人群'] # 品销宝
680
+ file_name4 = os.path.splitext(name)[0] # 明星店铺报表
681
+ for sheet4 in sheets4:
682
+ df = pd.read_excel(os.path.join(root, name), sheet_name=sheet4, header=0, engine='openpyxl')
683
+ # print(sheet4)
684
+ if len(df) == 0:
685
+ print(f'{name} 报表数据为空')
686
+ os.remove(os.path.join(root, name))
687
+ continue
688
+ if len(df) < 1:
689
+ print(f'{name} 跳过')
690
+ continue
691
+ else:
692
+ df.insert(loc=1, column='报表类型', value=sheet4)
693
+ df.fillna(0, inplace=True)
694
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
695
+ min_clm = str(df['日期'].min()).split(' ')[0]
696
+ max_clm = str(df['日期'].max()).split(' ')[0]
697
+ new_file_name4 = f'{sheet4}_{file_name4}_{min_clm}_{max_clm}.csv'
698
+ # 以sheet名进一步创建子文件夹
699
+ root_new = str(pathlib.Path(self.source_path, '推广报表/品销宝', sheet4))
700
+ self.save_to_csv(df, root_new, new_file_name4) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
701
+ if self.set_up_to_mogo:
702
+ d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='天猫_推广_品销宝')
703
+ if self.set_up_to_mysql:
704
+ m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='天猫_推广_品销宝')
705
+ os.remove(os.path.join(root, name))
685
706
 
686
- elif name.endswith('.csv') and '淘宝店铺数据' in name:
687
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
688
- if self.set_up_to_mogo:
689
- d.df_to_mongo(df=df, db_name='市场数据1', collection_name='淘宝店铺数据')
690
- if self.set_up_to_mysql:
691
- m.df_to_mysql(df=df, db_name='市场数据1', tabel_name='淘宝店铺数据')
707
+ elif name.endswith('.csv') and '淘宝店铺数据' in name:
708
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
709
+ if self.set_up_to_mogo:
710
+ d.df_to_mongo(df=df, db_name='市场数据1', collection_name='淘宝店铺数据')
711
+ if self.set_up_to_mysql:
712
+ m.df_to_mysql(df=df, db_name='市场数据1', tabel_name='淘宝店铺数据')
692
713
 
693
- elif name.endswith('.csv') and '人群洞察' in name:
694
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
695
- df.replace(to_replace=['--'], value='', regex=False, inplace=True)
696
- df = df[df['人群规模'] != '']
697
- if len(df) == 0:
714
+ elif name.endswith('.csv') and '人群洞察' in name:
715
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
716
+ df.replace(to_replace=['--'], value='', regex=False, inplace=True)
717
+ df = df[df['人群规模'] != '']
718
+ if len(df) == 0:
719
+ os.remove(os.path.join(root, name))
720
+ print(f'{name}: 数据为空, 已移除: {os.path.join(root, name)}')
721
+ continue
722
+ if self.set_up_to_mogo:
723
+ d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='万相台_人群洞察')
724
+ if self.set_up_to_mysql:
725
+ m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='万相台_人群洞察')
726
+
727
+ # ----------------------- 京东数据处理分界线 -----------------------
728
+ elif name.endswith('.csv') and '关键词点击成交报表_pbix同步_勿删改' in name:
729
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
730
+ for col in df.columns.tolist():
731
+ if '(' in col:
732
+ new_col = re.sub('[()]', '_', col)
733
+ new_col = new_col.strip('_')
734
+ df.rename(columns={col: new_col}, inplace=True)
735
+ df['日期'] = df['日期'].apply(lambda x: f'{str(x)[:4]}-{str(x)[4:6]}-{str(x)[6:8]}')
736
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
737
+ min_clm = str(df['日期'].min()).split(' ')[0]
738
+ max_clm = str(df['日期'].max()).split(' ')[0]
739
+ new_name = f'京东推广关键词点击成交报表_{min_clm}_{max_clm}.csv'
740
+ self.save_to_csv(df, root, new_name)
698
741
  os.remove(os.path.join(root, name))
699
- print(f'{name}: 数据为空, 已移除: {os.path.join(root, name)}')
700
- continue
701
- if self.set_up_to_mogo:
702
- d.df_to_mongo(df=df, db_name='天猫数据1', collection_name='万相台_人群洞察')
703
- if self.set_up_to_mysql:
704
- m.df_to_mysql(df=df, db_name='天猫数据1', tabel_name='万相台_人群洞察')
742
+ elif name.endswith('.csv') and '营销概况_全站营销' in name:
743
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=1, na_filter=False)
744
+ df = df[(df['日期'] != '日期') & (df['日期'] != '汇总') & (df['日期'] != '0') & (df['花费'] != '0') & (df['花费'] != '0.00')]
745
+ df['日期'] = df['日期'].apply(lambda x: f'{str(x)[:4]}-{str(x)[4:6]}-{str(x)[6:8]}')
746
+ df.drop("'当前时间'", axis=1, inplace=True)
747
+ df.rename(columns={'全站ROI': '全站roi'}, inplace=True)
748
+ df.insert(loc=1, column='产品线', value='全站营销')
749
+ new_name = re.sub('至', '_', name)
750
+ self.save_to_csv(df, root, new_name)
751
+ os.remove(os.path.join(root, name))
752
+ elif name.endswith('.xlsx') and '店铺来源_流量来源' in name:
753
+ # 京东店铺来源
754
+ if '按天' not in name:
755
+ print(f'{name} 京东流量请按天下载')
756
+ continue
757
+ new_name = name.split(r'__20')[0]
758
+ date01 = re.findall(r'(\d{4})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})', str(name))
759
+ new_date01 = f'{date01[0][0]}-{date01[0][1]}-{date01[0][2]}'
760
+ new_date02 = f'{date01[0][3]}-{date01[0][4]}-{date01[0][5]}'
761
+ new_date03 = f'{new_date01}_{new_date02}'
762
+ df = pd.read_excel(os.path.join(root, name), header=0)
763
+ if len(df) == 0:
764
+ print(f'{name} 报表数据为空')
765
+ os.remove(os.path.join(root, name))
766
+ continue
767
+ df.replace(to_replace=['-'], value='', regex=False, inplace=True)
768
+ df.insert(loc=0, column='日期', value=new_date01)
769
+ if new_date01 != new_date02:
770
+ df.insert(loc=1, column='数据周期', value=new_date03)
771
+ cols = df.columns.tolist()
772
+ if '三级来源' in cols:
773
+ source = '三级来源'
774
+ elif '二级来源' in cols:
775
+ source = '二级来源'
776
+ else:
777
+ source = '一级来源'
705
778
 
706
- # ----------------------- 京东数据处理分界线 -----------------------
707
- elif name.endswith('.csv') and '关键词点击成交报表_pbix同步_勿删改' in name:
708
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
709
- for col in df.columns.tolist():
710
- if '' in col:
711
- new_col = re.sub('[()]', '_', col)
712
- new_col = new_col.strip('_')
713
- df.rename(columns={col: new_col}, inplace=True)
714
- df['日期'] = df['日期'].apply(lambda x: f'{str(x)[:4]}-{str(x)[4:6]}-{str(x)[6:8]}')
715
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
716
- min_clm = str(df['日期'].min()).split(' ')[0]
717
- max_clm = str(df['日期'].max()).split(' ')[0]
718
- new_name = f'京东推广关键词点击成交报表_{min_clm}_{max_clm}.csv'
719
- self.save_to_csv(df, root, new_name)
720
- os.remove(os.path.join(root, name))
721
- elif name.endswith('.csv') and '营销概况_全站营销' in name:
722
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=1, na_filter=False)
723
- df = df[(df['日期'] != '日期') & (df['日期'] != '汇总') & (df['日期'] != '0') & (df['花费'] != '0') & (df['花费'] != '0.00')]
724
- df['日期'] = df['日期'].apply(lambda x: f'{str(x)[:4]}-{str(x)[4:6]}-{str(x)[6:8]}')
725
- df.drop("'当前时间'", axis=1, inplace=True)
726
- df.rename(columns={'全站ROI': '全站roi'}, inplace=True)
727
- df.insert(loc=1, column='产品线', value='全站营销')
728
- new_name = re.sub('至', '_', name)
729
- self.save_to_csv(df, root, new_name)
730
- os.remove(os.path.join(root, name))
731
- elif name.endswith('.xlsx') and '店铺来源_流量来源' in name:
732
- # 京东店铺来源
733
- if '按天' not in name:
734
- print(f'{name} 京东流量请按天下载')
735
- continue
736
- new_name = name.split(r'__20')[0]
737
- date01 = re.findall(r'(\d{4})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})', str(name))
738
- new_date01 = f'{date01[0][0]}-{date01[0][1]}-{date01[0][2]}'
739
- new_date02 = f'{date01[0][3]}-{date01[0][4]}-{date01[0][5]}'
740
- new_date03 = f'{new_date01}_{new_date02}'
741
- df = pd.read_excel(os.path.join(root, name), header=0)
742
- if len(df) == 0:
743
- print(f'{name} 报表数据为空')
779
+ new_name = f'{new_name}_{source}_{new_date03}.csv'
780
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
781
+ self.save_to_csv(df, root, new_name) # csv 文件仍然保留这些列
782
+ for col_2024 in cols: # 京东这个表有字段加了去年日期,删除这些同比数据字段,不然列数量爆炸
783
+ if '20' in col_2024 and '流量来源' in name:
784
+ df.drop(col_2024, axis=1, inplace=True)
785
+ if self.set_up_to_mogo:
786
+ d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_流量来源_日数据')
787
+ if self.set_up_to_mysql:
788
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_流量来源_日数据')
744
789
  os.remove(os.path.join(root, name))
745
- continue
746
- df.replace(to_replace=['-'], value='', regex=False, inplace=True)
747
- df.insert(loc=0, column='日期', value=new_date01)
748
- if new_date01 != new_date02:
749
- df.insert(loc=1, column='数据周期', value=new_date03)
750
- cols = df.columns.tolist()
751
- if '三级来源' in cols:
752
- source = '三级来源'
753
- elif '二级来源' in cols:
754
- source = '二级来源'
755
- else:
756
- source = '一级来源'
757
790
 
758
- new_name = f'{new_name}_{source}_{new_date03}.csv'
759
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
760
- self.save_to_csv(df, root, new_name) # csv 文件仍然保留这些列
761
- for col_2024 in cols: # 京东这个表有字段加了去年日期,删除这些同比数据字段,不然列数量爆炸
762
- if '20' in col_2024 and '流量来源' in name:
763
- df.drop(col_2024, axis=1, inplace=True)
764
- if self.set_up_to_mogo:
765
- d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_流量来源_日数据')
766
- if self.set_up_to_mysql:
767
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_流量来源_日数据')
768
- os.remove(os.path.join(root, name))
791
+ elif name.endswith('.xlsx') and '全部渠道_商品明细' in name:
792
+ # 京东商品明细 文件转换
793
+ date1 = re.findall(r'_(\d{4})(\d{2})(\d{2})_全部', str(name))
794
+ if not date1[0]:
795
+ print(f'{name}: 仅支持日数据')
796
+ continue
797
+ if date1:
798
+ date1 = f'{date1[0][0]}-{date1[0][1]}-{date1[0][2]}'
799
+ df = pd.read_excel(os.path.join(root, name), header=0)
800
+ if len(df) == 0:
801
+ print(f'{name} 报表数据为空')
802
+ os.remove(os.path.join(root, name))
803
+ continue
804
+ if '10035975359247' in df['商品ID'].values or '10056642622343' in df['商品ID'].values:
805
+ new_name = f'sku_{date1}_全部渠道_商品明细.csv'
806
+ elif '10021440233518' in df['商品ID'].values or '10022867813485' in df['商品ID'].values:
807
+ new_name = f'spu_{date1}_全部渠道_商品明细.csv'
808
+ else:
809
+ new_name = f'未分类_{date1}_全部渠道_商品明细.csv'
810
+ df.replace(to_replace=['-'], value='', regex=False, inplace=True)
811
+ df.rename(columns={'商品ID': '商品id'}, inplace=True)
812
+ df['商品id'] = df['商品id'].apply(lambda x: f'="{x}"' if x else x)
813
+ df['货号'] = df['货号'].apply(lambda x: f'="{x}"' if x else x)
814
+ df.insert(loc=0, column='日期', value=date1)
769
815
 
770
- elif name.endswith('.xlsx') and '全部渠道_商品明细' in name:
771
- # 京东商品明细 文件转换
772
- date1 = re.findall(r'_(\d{4})(\d{2})(\d{2})_全部', str(name))
773
- if not date1[0]:
774
- print(f'{name}: 仅支持日数据')
775
- continue
776
- if date1:
777
- date1 = f'{date1[0][0]}-{date1[0][1]}-{date1[0][2]}'
778
- df = pd.read_excel(os.path.join(root, name), header=0)
779
- if len(df) == 0:
780
- print(f'{name} 报表数据为空')
816
+ self.save_to_csv(df, root, new_name)
817
+ if self.set_up_to_mogo:
818
+ if 'sku' in new_name:
819
+ d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_sku_商品明细')
820
+ elif 'spu' in new_name:
821
+ d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_spu_商品明细')
822
+ if self.set_up_to_mysql:
823
+ if 'sku' in new_name:
824
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_sku_商品明细')
825
+ elif 'spu' in new_name:
826
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_spu_商品明细')
781
827
  os.remove(os.path.join(root, name))
782
- continue
783
- if '10035975359247' in df['商品ID'].values or '10056642622343' in df['商品ID'].values:
784
- new_name = f'sku_{date1}_全部渠道_商品明细.csv'
785
- elif '10021440233518' in df['商品ID'].values or '10022867813485' in df['商品ID'].values:
786
- new_name = f'spu_{date1}_全部渠道_商品明细.csv'
787
- else:
788
- new_name = f'未分类_{date1}_全部渠道_商品明细.csv'
789
- df.replace(to_replace=['-'], value='', regex=False, inplace=True)
790
- df.rename(columns={'商品ID': '商品id'}, inplace=True)
791
- df['商品id'] = df['商品id'].apply(lambda x: f'="{x}"' if x else x)
792
- df['货号'] = df['货号'].apply(lambda x: f'="{x}"' if x else x)
793
- df.insert(loc=0, column='日期', value=date1)
828
+ elif name.endswith('.xlsx') and '搜索分析-排名定位-商品词下排名' in name:
829
+ # 京东商品词下排名
830
+ new_name = os.path.splitext(name)[0] + '.csv'
831
+ # print(name)
832
+ df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
833
+ if len(df) == 0:
834
+ print(f'{name} 报表数据为空')
835
+ os.remove(os.path.join(root, name))
836
+ continue
837
+ df.rename(columns={'商品的ID': 'skuid'}, inplace=True)
838
+ df['skuid'] = df['skuid'].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
839
+ self.save_to_csv(df, root, new_name)
840
+ if self.set_up_to_mogo:
841
+ d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_商品词下排名')
842
+ if self.set_up_to_mysql:
843
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_商品词下排名')
844
+ os.remove(os.path.join(root, name)) # 移除已转换的原文件
794
845
 
795
- self.save_to_csv(df, root, new_name)
796
- if self.set_up_to_mogo:
797
- if 'sku' in new_name:
798
- d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_sku_商品明细')
799
- elif 'spu' in new_name:
800
- d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_spu_商品明细')
801
- if self.set_up_to_mysql:
802
- if 'sku' in new_name:
803
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_sku_商品明细')
804
- elif 'spu' in new_name:
805
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_spu_商品明细')
806
- os.remove(os.path.join(root, name))
807
- elif name.endswith('.xlsx') and '搜索分析-排名定位-商品词下排名' in name:
808
- # 京东商品词下排名
809
- new_name = os.path.splitext(name)[0] + '.csv'
810
- # print(name)
811
- df = pd.read_excel(os.path.join(root, name), header=0, engine='openpyxl')
812
- if len(df) == 0:
813
- print(f'{name} 报表数据为空')
814
- os.remove(os.path.join(root, name))
815
- continue
816
- df.rename(columns={'商品的ID': 'skuid'}, inplace=True)
817
- df['skuid'] = df['skuid'].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
818
- self.save_to_csv(df, root, new_name)
819
- if self.set_up_to_mogo:
820
- d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_商品词下排名')
821
- if self.set_up_to_mysql:
822
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_商品词下排名')
823
- os.remove(os.path.join(root, name)) # 移除已转换的原文件
846
+ elif name.endswith('.xlsx') and '搜索分析-排名定位-商品排名' in name:
847
+ # 京东商品排名
848
+ new_name = os.path.splitext(name)[0] + '.csv'
849
+ date_in = re.findall(r'(\d{4}-\d{2}-\d{2})-搜索', str(name))[0]
850
+ df = pd.read_excel(os.path.join(root, name), header=0)
851
+ if len(df) == 0:
852
+ print(f'{name} 报表数据为空')
853
+ os.remove(os.path.join(root, name))
854
+ continue
855
+ df.insert(0, '日期', date_in) # 插入新列
856
+ df.rename(columns={'SKU': 'skuid'}, inplace=True)
857
+ df['skuid'] = df['skuid'].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
858
+ self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
859
+ if self.set_up_to_mogo:
860
+ d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_商品排名')
861
+ if self.set_up_to_mysql:
862
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_商品排名')
863
+ os.remove(os.path.join(root, name)) # 移除已转换的原文件
824
864
 
825
- elif name.endswith('.xlsx') and '搜索分析-排名定位-商品排名' in name:
826
- # 京东商品排名
827
- new_name = os.path.splitext(name)[0] + '.csv'
828
- date_in = re.findall(r'(\d{4}-\d{2}-\d{2})-搜索', str(name))[0]
829
- df = pd.read_excel(os.path.join(root, name), header=0)
830
- if len(df) == 0:
831
- print(f'{name} 报表数据为空')
865
+ elif name.endswith('.xls') and '竞店概况_竞店详情' in name:
866
+ # 京东,竞争-竞店概况-竞店详情-全部渠道
867
+ date01 = re.findall(r'全部渠道_(\d{4})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})', str(name))
868
+ start_date = f'{date01[0][0]}-{date01[0][1]}-{date01[0][2]}'
869
+ end_date = f'{date01[0][3]}-{date01[0][4]}-{date01[0][5]}'
870
+ df = pd.read_excel(os.path.join(root, name), header=0)
871
+ if len(df) == 0:
872
+ print(f'{name} 报表数据为空')
873
+ os.remove(os.path.join(root, name))
874
+ continue
875
+ df.replace(to_replace=[','], value='', regex=True, inplace=True)
876
+ df.insert(loc=0, column='日期', value=start_date)
877
+ new_name = f'{os.path.splitext(name)[0]}'
878
+ new_name = re.sub(r'\d{8}_\d{8}', f'{start_date}_{end_date}', new_name)
879
+ self.save_to_csv(df, root, new_name)
880
+ if self.set_up_to_mogo:
881
+ d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_竞店监控_日数据')
882
+ if self.set_up_to_mysql:
883
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_竞店监控_日数据')
832
884
  os.remove(os.path.join(root, name))
833
- continue
834
- df.insert(0, '日期', date_in) # 插入新列
835
- df.rename(columns={'SKU': 'skuid'}, inplace=True)
836
- df['skuid'] = df['skuid'].apply(lambda x: f'="{x}"' if x and '=' not in str(x) else x)
837
- self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
838
- if self.set_up_to_mogo:
839
- d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_商品排名')
840
- if self.set_up_to_mysql:
841
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_商品排名')
842
- os.remove(os.path.join(root, name)) # 移除已转换的原文件
843
885
 
844
- elif name.endswith('.xls') and '竞店概况_竞店详情' in name:
845
- # 京东,竞争-竞店概况-竞店详情-全部渠道
846
- date01 = re.findall(r'全部渠道_(\d{4})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})', str(name))
847
- start_date = f'{date01[0][0]}-{date01[0][1]}-{date01[0][2]}'
848
- end_date = f'{date01[0][3]}-{date01[0][4]}-{date01[0][5]}'
849
- df = pd.read_excel(os.path.join(root, name), header=0)
850
- if len(df) == 0:
851
- print(f'{name} 报表数据为空')
886
+ elif name.endswith('.xls') and '店铺' in name:
887
+ # 京东 自助报表 店铺日报
888
+ df = pd.read_excel(os.path.join(root, name), header=0)
889
+ if len(df) == 0:
890
+ print(f'{name} 报表数据为空')
891
+ os.remove(os.path.join(root, name))
892
+ continue
893
+ df['日期'] = df['日期'].apply(
894
+ lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', str(x))[0])
895
+ )
896
+ date_min = df['日期'].values.min()
897
+ date_max = df['日期'].values.max()
898
+ # df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
899
+ new_name = f'JD店铺日报_' + re.findall(r"(.*)\d{8}_\d{8}", name)[0] + f'_{date_min}_{date_max}.csv'
900
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
901
+ if self.set_up_to_mogo:
902
+ d.df_to_mongo(df=df, db_name='京东数据1', collection_name='京东_自助取数_店铺日报')
903
+ if self.set_up_to_mysql:
904
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_自助取数_店铺日报')
852
905
  os.remove(os.path.join(root, name))
853
- continue
854
- df.replace(to_replace=[','], value='', regex=True, inplace=True)
855
- df.insert(loc=0, column='日期', value=start_date)
856
- new_name = f'{os.path.splitext(name)[0]}'
857
- new_name = re.sub(r'\d{8}_\d{8}', f'{start_date}_{end_date}', new_name)
858
- self.save_to_csv(df, root, new_name)
859
- if self.set_up_to_mogo:
860
- d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_竞店监控_日数据')
861
- if self.set_up_to_mysql:
862
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_竞店监控_日数据')
863
- os.remove(os.path.join(root, name))
864
906
 
865
- elif name.endswith('.xls') and '店铺' in name:
866
- # 京东 自助报表 店铺日报
867
- df = pd.read_excel(os.path.join(root, name), header=0)
868
- if len(df) == 0:
869
- print(f'{name} 报表数据为空')
907
+ elif name.endswith('.xls') and '商家榜单_女包_整体' in name:
908
+ # 京东 行业 商家榜单
909
+ date2 = re.findall(r'_\d{8}-\d+', name)
910
+ if date2:
911
+ print(f'{name}: 请下载日数据,不支持其他周期')
912
+ os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
913
+ continue
914
+ date1 = re.findall(r'_(\d{4})(\d{2})(\d{2})', name)
915
+ date1 = f'{date1[0][0]}-{date1[0][1]}-{date1[0][2]}'
916
+ df = pd.read_excel(os.path.join(root, name), header=0)
917
+ if len(df) == 0:
918
+ print(f'{name} 报表数据为空')
919
+ os.remove(os.path.join(root, name))
920
+ continue
921
+ df['日期'] = df['日期'].astype(str).apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}')
922
+ df.insert(loc=0, column='类型', value='商家榜单')
923
+ new_name = f'{os.path.splitext(name)[0]}_{date1}.csv'
924
+ self.save_to_csv(df, root, new_name)
925
+ if self.set_up_to_mogo:
926
+ d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_商家榜单')
927
+ if self.set_up_to_mysql:
928
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_商家榜单')
870
929
  os.remove(os.path.join(root, name))
871
- continue
872
- df['日期'] = df['日期'].apply(
873
- lambda x: '-'.join(re.findall(r'(\d{4})(\d{2})(\d{2})', str(x))[0])
874
- )
875
- date_min = df['日期'].values.min()
876
- date_max = df['日期'].values.max()
877
- # df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
878
- new_name = f'JD店铺日报_' + re.findall(r"(.*)\d{8}_\d{8}", name)[0] + f'_{date_min}_{date_max}.csv'
879
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
880
- if self.set_up_to_mogo:
881
- d.df_to_mongo(df=df, db_name='京东数据1', collection_name='京东_自助取数_店铺日报')
882
- if self.set_up_to_mysql:
883
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_自助取数_店铺日报')
884
- os.remove(os.path.join(root, name))
885
930
 
886
- elif name.endswith('.xls') and '商家榜单_女包_整体' in name:
887
- # 京东 行业 商家榜单
888
- date2 = re.findall(r'_\d{8}-\d+', name)
889
- if date2:
890
- print(f'{name}: 请下载日数据,不支持其他周期')
891
- os.remove(os.path.join(root, name)) # 直接删掉,避免被分到原始文件, encoding 不同会引发错误
892
- continue
893
- date1 = re.findall(r'_(\d{4})(\d{2})(\d{2})', name)
894
- date1 = f'{date1[0][0]}-{date1[0][1]}-{date1[0][2]}'
895
- df = pd.read_excel(os.path.join(root, name), header=0)
896
- if len(df) == 0:
897
- print(f'{name} 报表数据为空')
931
+ elif name.endswith('.xlsx') and '批量SKU导出-批量任务' in name:
932
+ # 京东 sku 导出
933
+ df = pd.read_excel(os.path.join(root, name), header=0)
934
+ if len(df) == 0:
935
+ print(f'{name} 报表数据为空')
936
+ os.remove(os.path.join(root, name))
937
+ continue
938
+ d_time = datetime.datetime.today().strftime('%Y-%m-%d')
939
+ df.insert(loc=0, column='日期', value=d_time)
940
+ for col in ['SKUID', '商品编码', '商家SKU', '货号']:
941
+ df[col] = df[col].apply(lambda x: f'="{x}"' if x else x)
942
+ df['商品链接'] = df['商品链接'].apply(lambda x: f'https://{x}' if x else x)
943
+ new_name = f'京东商品信息_{os.path.splitext(name)[0]}_{d_time}.csv'
944
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
945
+ if self.set_up_to_mogo:
946
+ d.df_to_mongo(df=df, db_name='属性设置1', collection_name='京东商品信息')
947
+ if self.set_up_to_mysql:
948
+ m.df_to_mysql(df=df, db_name='属性设置1', tabel_name='京东商品信息')
898
949
  os.remove(os.path.join(root, name))
899
- continue
900
- df['日期'] = df['日期'].astype(str).apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}')
901
- df.insert(loc=0, column='类型', value='商家榜单')
902
- new_name = f'{os.path.splitext(name)[0]}_{date1}.csv'
903
- self.save_to_csv(df, root, new_name)
904
- if self.set_up_to_mogo:
905
- d.df_to_mongo(df=df,db_name='京东数据1', collection_name='京东_商家榜单')
906
- if self.set_up_to_mysql:
907
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_商家榜单')
908
- os.remove(os.path.join(root, name))
909
950
 
910
- elif name.endswith('.xlsx') and '批量SKU导出-批量任务' in name:
911
- # 京东 sku 导出
912
- df = pd.read_excel(os.path.join(root, name), header=0)
913
- if len(df) == 0:
914
- print(f'{name} 报表数据为空')
915
- os.remove(os.path.join(root, name))
916
- continue
917
- d_time = datetime.datetime.today().strftime('%Y-%m-%d')
918
- df.insert(loc=0, column='日期', value=d_time)
919
- for col in ['SKUID', '商品编码', '商家SKU', '货号']:
920
- df[col] = df[col].apply(lambda x: f'="{x}"' if x else x)
921
- df['商品链接'] = df['商品链接'].apply(lambda x: f'https://{x}' if x else x)
922
- new_name = f'京东商品信息_{os.path.splitext(name)[0]}_{d_time}.csv'
923
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
924
- if self.set_up_to_mogo:
925
- d.df_to_mongo(df=df, db_name='属性设置1', collection_name='京东商品信息')
926
- if self.set_up_to_mysql:
927
- m.df_to_mysql(df=df, db_name='属性设置1', tabel_name='京东商品信息')
928
- os.remove(os.path.join(root, name))
951
+ elif name.endswith('.xlsx') and '批量SPU导出-批量任务' in name:
952
+ # 京东 spu 导出
953
+ df = pd.read_excel(os.path.join(root, name), header=0)
954
+ if len(df) == 0:
955
+ print(f'{name} 报表数据为空')
956
+ os.remove(os.path.join(root, name))
957
+ continue
958
+ d_time = datetime.datetime.today().strftime('%Y-%m-%d')
959
+ df.insert(loc=0, column='日期', value=d_time)
960
+ for col in ['商品编码', '货号']:
961
+ df[col] = df[col].apply(lambda x: f'="{x}"' if x else x)
962
+ new_name = f'京东商品信息_{os.path.splitext(name)[0]}_{d_time}.csv'
929
963
 
930
- elif name.endswith('.xlsx') and '批量SPU导出-批量任务' in name:
931
- # 京东 spu 导出
932
- df = pd.read_excel(os.path.join(root, name), header=0)
933
- if len(df) == 0:
934
- print(f'{name} 报表数据为空')
964
+ self.save_to_csv(df, root, new_name)
935
965
  os.remove(os.path.join(root, name))
936
- continue
937
- d_time = datetime.datetime.today().strftime('%Y-%m-%d')
938
- df.insert(loc=0, column='日期', value=d_time)
939
- for col in ['商品编码', '货号']:
940
- df[col] = df[col].apply(lambda x: f'="{x}"' if x else x)
941
- new_name = f'京东商品信息_{os.path.splitext(name)[0]}_{d_time}.csv'
942
966
 
943
- self.save_to_csv(df, root, new_name)
944
- os.remove(os.path.join(root, name))
945
-
946
- elif name.endswith('.csv') and '万里马箱包推广1_完整点击成交' in name:
947
- # 京东推广数据
948
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
949
- if len(df) == 0:
950
- print(f'{name} 报表数据为空')
967
+ elif name.endswith('.csv') and '万里马箱包推广1_完整点击成交' in name:
968
+ # 京东推广数据
969
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
970
+ if len(df) == 0:
971
+ print(f'{name} 报表数据为空')
972
+ os.remove(os.path.join(root, name))
973
+ continue
974
+ pic_list = df['日期'].tolist()
975
+ pic = []
976
+ for i in pic_list:
977
+ pics = re.findall(pattern=r'(\d{4})(\d{2})(\d{2})', string=str(i))
978
+ if pics:
979
+ pics = '-'.join(pics[0])
980
+ pic.append(pics)
981
+ else:
982
+ pic.append(i)
983
+ df['日期'] = pd.Series(pic)
984
+ date_min = df['日期'].values.min() + '_'
985
+ date_max = df['日期'].values.max()
986
+ new_name2 = '京东点击成交报表_' + date_min + date_max + '.csv'
987
+ for col in ['计划ID', '触发SKU ID', '跟单SKU ID', 'SPU ID']:
988
+ df[col] = df[col].astype(str).apply(lambda x: f'="{x}"' if x and '=' not in x else x)
989
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
990
+ self.save_to_csv(df, root, new_name2) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
991
+ if self.set_up_to_mogo:
992
+ d.df_to_mongo(df=df, db_name='京东数据1', collection_name='京东_推广_京准通')
993
+ if self.set_up_to_mysql:
994
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_推广_京准通')
951
995
  os.remove(os.path.join(root, name))
952
- continue
953
- pic_list = df['日期'].tolist()
954
- pic = []
955
- for i in pic_list:
956
- pics = re.findall(pattern=r'(\d{4})(\d{2})(\d{2})', string=str(i))
957
- if pics:
958
- pics = '-'.join(pics[0])
959
- pic.append(pics)
960
- else:
961
- pic.append(i)
962
- df['日期'] = pd.Series(pic)
963
- date_min = df['日期'].values.min() + '_'
964
- date_max = df['日期'].values.max()
965
- new_name2 = '京东点击成交报表_' + date_min + date_max + '.csv'
966
- for col in ['计划ID', '触发SKU ID', '跟单SKU ID', 'SPU ID']:
967
- df[col] = df[col].astype(str).apply(lambda x: f'="{x}"' if x and '=' not in x else x)
968
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
969
- self.save_to_csv(df, root, new_name2) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
970
- if self.set_up_to_mogo:
971
- d.df_to_mongo(df=df, db_name='京东数据1', collection_name='京东_推广_京准通')
972
- if self.set_up_to_mysql:
973
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_推广_京准通')
974
- os.remove(os.path.join(root, name))
975
- elif name.endswith('.csv') and '万里马箱包推广1_京东推广搜索词_pbix同步不要' in name:
976
- df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
977
- if len(df) == 0:
978
- print(f'{name} 报表数据为空')
996
+ elif name.endswith('.csv') and '万里马箱包推广1_京东推广搜索词_pbix同步不要' in name:
997
+ df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
998
+ if len(df) == 0:
999
+ print(f'{name} 报表数据为空')
1000
+ os.remove(os.path.join(root, name))
1001
+ continue
1002
+ pic_list = df['日期'].tolist()
1003
+ pic = []
1004
+ for i in pic_list:
1005
+ pics = re.findall(pattern=r'(\d{4})(\d{2})(\d{2})', string=str(i))
1006
+ if pics:
1007
+ pics = '-'.join(pics[0])
1008
+ pic.append(pics)
1009
+ else:
1010
+ pic.append(i)
1011
+ df['日期'] = pd.Series(pic)
1012
+ date_min = df['日期'].values.min() + '_'
1013
+ date_max = df['日期'].values.max()
1014
+ new_name2 = '京东推广搜索词_' + date_min + date_max + '.csv'
1015
+ df.replace(to_replace=[0], value='', regex=False, inplace=True)
1016
+ df['是否品牌词'] = df['搜索词'].str.contains('万里马|wanlima', regex=True)
1017
+ df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '')
1018
+ self.save_to_csv(df, root, new_name2) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
1019
+ if self.set_up_to_mogo:
1020
+ d.df_to_mongo(df=df, db_name='京东数据1', collection_name='京东_推广_搜索词报表')
1021
+ if self.set_up_to_mysql:
1022
+ m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_推广_搜索词报表')
979
1023
  os.remove(os.path.join(root, name))
980
- continue
981
- pic_list = df['日期'].tolist()
982
- pic = []
983
- for i in pic_list:
984
- pics = re.findall(pattern=r'(\d{4})(\d{2})(\d{2})', string=str(i))
985
- if pics:
986
- pics = '-'.join(pics[0])
987
- pic.append(pics)
988
- else:
989
- pic.append(i)
990
- df['日期'] = pd.Series(pic)
991
- date_min = df['日期'].values.min() + '_'
992
- date_max = df['日期'].values.max()
993
- new_name2 = '京东推广搜索词_' + date_min + date_max + '.csv'
994
- df.replace(to_replace=[0], value='', regex=False, inplace=True)
995
- df['是否品牌词'] = df['搜索词'].str.contains('万里马|wanlima', regex=True)
996
- df['是否品牌词'] = df['是否品牌词'].apply(lambda x: '品牌词' if x else '')
997
- self.save_to_csv(df, root, new_name2) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
998
- if self.set_up_to_mogo:
999
- d.df_to_mongo(df=df, db_name='京东数据1', collection_name='京东_推广_搜索词报表')
1000
- if self.set_up_to_mysql:
1001
- m.df_to_mysql(df=df, db_name='京东数据1', tabel_name='京东_推广_搜索词报表')
1002
- os.remove(os.path.join(root, name))
1003
1024
 
1004
- elif name.endswith('.xlsx') and '零售明细统计' in name:
1005
- #
1006
- df = pd.read_excel(os.path.join(root, name), header=0)
1007
- if len(df) == 0:
1008
- print(f'{name} 报表数据为空')
1025
+ elif name.endswith('.xlsx') and '零售明细统计' in name:
1026
+ #
1027
+ df = pd.read_excel(os.path.join(root, name), header=0)
1028
+ if len(df) == 0:
1029
+ print(f'{name} 报表数据为空')
1030
+ os.remove(os.path.join(root, name))
1031
+ continue
1032
+ df['摘要'] = df['摘要'].apply(lambda x: re.sub('\'', '', str(x)) if x else x)
1033
+ for col in ['原单号', '商品代码', '摘要']:
1034
+ df[col] = df[col].apply(lambda x: f'="{re.sub(".0", "", str(x))}"' if x else x)
1035
+ df = df[df['缩略图'] != '合计']
1036
+ df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
1037
+ date_min = f'_{re.sub("T.*", "", str(df["日期"].values.min()))}_'
1038
+ date_max = f'{re.sub("T.*", "", str(df["日期"].values.max()))}.csv'
1039
+ new_name = re.findall(r'(.*)_\d{4}-\d{2}-\d{2}', name)[0]
1040
+ new_name = f'{new_name}{date_min}{date_max}'
1041
+ self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
1042
+ if self.set_up_to_mogo:
1043
+ d.df_to_mongo(df=df, db_name='生意经1', collection_name='E3_零售明细统计')
1044
+ if self.set_up_to_mysql:
1045
+ m.df_to_mysql(df=df, db_name='生意经1', tabel_name='E3_零售明细统计')
1009
1046
  os.remove(os.path.join(root, name))
1010
- continue
1011
- df['摘要'] = df['摘要'].apply(lambda x: re.sub('\'', '', str(x)) if x else x)
1012
- for col in ['原单号', '商品代码', '摘要']:
1013
- df[col] = df[col].apply(lambda x: f'="{re.sub(".0", "", str(x))}"' if x else x)
1014
- df = df[df['缩略图'] != '合计']
1015
- df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
1016
- date_min = f'_{re.sub("T.*", "", str(df["日期"].values.min()))}_'
1017
- date_max = f'{re.sub("T.*", "", str(df["日期"].values.max()))}.csv'
1018
- new_name = re.findall(r'(.*)_\d{4}-\d{2}-\d{2}', name)[0]
1019
- new_name = f'{new_name}{date_min}{date_max}'
1020
- self.save_to_csv(df, root, new_name) # mysql 可能改变 df 列名,所以在上传 mysql 前保存 csv
1021
- if self.set_up_to_mogo:
1022
- d.df_to_mongo(df=df, db_name='生意经1', collection_name='E3_零售明细统计')
1023
- if self.set_up_to_mysql:
1024
- m.df_to_mysql(df=df, db_name='生意经1', tabel_name='E3_零售明细统计')
1025
- os.remove(os.path.join(root, name))
1047
+ except Exception as e:
1048
+ now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
1049
+ print(f'{now}{name}: 报错: {e}')
1026
1050
  if self.set_up_to_mogo:
1027
1051
  if d.client:
1028
1052
  d.client.close() # 必须手动关闭数据库连接
@@ -1187,6 +1211,12 @@ class DataClean:
1187
1211
  elif name.endswith('.csv') and '客户运营平台_客户列表' in name:
1188
1212
  t_path = str(pathlib.Path(self.source_path, '生意参谋/客户运营平台'))
1189
1213
  bib(t_path, _as_month=True)
1214
+ elif name.endswith('.csv') and '直播分场次效果' in name:
1215
+ pattern = re.findall(r'(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})', name)
1216
+ if not pattern:
1217
+ continue
1218
+ t_path = str(pathlib.Path(self.source_path, '生意参谋/直播场次分析'))
1219
+ bib(t_path, _as_month=True)
1190
1220
  # 京东分界线 ------- 开始标记
1191
1221
  # 京东分界线
1192
1222
  elif name.endswith('.csv') and '全部渠道_商品明细' in name:
@@ -1430,11 +1460,11 @@ def main():
1430
1460
  c.set_up_to_mysql = False
1431
1461
  c.new_unzip(is_move=True) # 解压文件
1432
1462
  c.change_and_sort()
1433
- # c.move_all() # 移到文件到原始文件夹
1463
+ c.move_all() # 移到文件到原始文件夹
1434
1464
  # c.attribute() # 商品素材重命名和分类
1435
1465
 
1436
1466
 
1437
1467
  if __name__ == '__main__':
1438
- # main()
1468
+ main()
1439
1469
  username, password, host, port = get_myconf.select_config_values(target_service='aliyun', database='mongodb')
1440
1470
  print(username, password, host, port)