mdbq 2.6.0__tar.gz → 2.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-2.6.0 → mdbq-2.6.2}/PKG-INFO +1 -1
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/aggregation/aggregation.py +11 -10
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/aggregation/query_data.py +45 -3
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/clean/clean_upload.py +55 -33
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/dataframe/converter.py +2 -2
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq.egg-info/PKG-INFO +1 -1
- {mdbq-2.6.0 → mdbq-2.6.2}/setup.py +1 -1
- {mdbq-2.6.0 → mdbq-2.6.2}/README.txt +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/__version__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/aggregation/df_types.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/aggregation/mysql_types.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/aggregation/optimize_data.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/bdup/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/bdup/bdup.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/clean/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/clean/data_clean.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/company/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/company/copysh.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/company/home_sh.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/config/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/config/get_myconf.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/config/products.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/config/set_support.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/config/update_conf.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/dataframe/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/log/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/log/mylogger.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/mongo/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/mongo/mongo.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/mysql/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/mysql/mysql.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/mysql/s_query.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/mysql/year_month_day.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/other/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/other/porxy.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/other/pov_city.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/other/sku_picture.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/other/ua_sj.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/pbix/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/pbix/refresh_all_old.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/req_post/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/req_post/req_tb.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/spider/__init__.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq/spider/aikucun.py +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-2.6.0 → mdbq-2.6.2}/setup.cfg +0 -0
@@ -1301,7 +1301,7 @@ def test2():
|
|
1301
1301
|
if __name__ == '__main__':
|
1302
1302
|
username, password, host, port = get_myconf.select_config_values(target_service='nas', database='mysql')
|
1303
1303
|
print(username, password, host, port)
|
1304
|
-
file_dir(one_file=False, target_service='company')
|
1304
|
+
# file_dir(one_file=False, target_service='company')
|
1305
1305
|
# one_file_to_mysql(
|
1306
1306
|
# file='/Users/xigua/Downloads/爱库存_商品榜单_spu_2024-10-17_2024-10-17.csv',
|
1307
1307
|
# db_name='爱库存2',
|
@@ -1310,15 +1310,16 @@ if __name__ == '__main__':
|
|
1310
1310
|
# database='mysql'
|
1311
1311
|
# )
|
1312
1312
|
|
1313
|
-
#
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
|
1313
|
+
# 上传一个目录到指定数据库
|
1314
|
+
db_name = '天猫_推广数据3'
|
1315
|
+
table_name = '主体报表'
|
1316
|
+
upload_dir(
|
1317
|
+
path='/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表',
|
1318
|
+
db_name=db_name,
|
1319
|
+
collection_name=table_name,
|
1320
|
+
dbs={'mysql': True, 'mongodb': False},
|
1321
|
+
target_service='company',
|
1322
|
+
)
|
1322
1323
|
|
1323
1324
|
|
1324
1325
|
# # 新版 数据分类
|
@@ -93,6 +93,18 @@ class MysqlDatasQuery:
|
|
93
93
|
username, password, host, port = get_myconf.select_config_values(target_service=target_service, database='mysql')
|
94
94
|
self.download = s_query.QueryDatas(username=username, password=password, host=host, port=port)
|
95
95
|
|
96
|
+
@staticmethod
|
97
|
+
def try_except(func): # 在类内部定义一个异常处理方法
|
98
|
+
@wraps(func)
|
99
|
+
def wrapper(*args, **kwargs):
|
100
|
+
try:
|
101
|
+
return func(*args, **kwargs)
|
102
|
+
except Exception as e:
|
103
|
+
print(f'{func.__name__}, {e}') # 将异常信息返回
|
104
|
+
|
105
|
+
return wrapper
|
106
|
+
|
107
|
+
@try_except
|
96
108
|
def tg_wxt(self):
|
97
109
|
start_date, end_date = self.months_data(num=self.months)
|
98
110
|
projection = {
|
@@ -118,6 +130,7 @@ class MysqlDatasQuery:
|
|
118
130
|
)
|
119
131
|
return df
|
120
132
|
|
133
|
+
@try_except
|
121
134
|
def syj(self):
|
122
135
|
start_date, end_date = self.months_data(num=self.months)
|
123
136
|
projection = {
|
@@ -142,6 +155,7 @@ class MysqlDatasQuery:
|
|
142
155
|
)
|
143
156
|
return df
|
144
157
|
|
158
|
+
@try_except
|
145
159
|
def tg_rqbb(self):
|
146
160
|
start_date, end_date = self.months_data(num=self.months)
|
147
161
|
projection = {
|
@@ -167,6 +181,7 @@ class MysqlDatasQuery:
|
|
167
181
|
)
|
168
182
|
return df
|
169
183
|
|
184
|
+
@try_except
|
170
185
|
def tg_gjc(self):
|
171
186
|
start_date, end_date = self.months_data(num=self.months)
|
172
187
|
projection = {
|
@@ -193,6 +208,7 @@ class MysqlDatasQuery:
|
|
193
208
|
)
|
194
209
|
return df
|
195
210
|
|
211
|
+
@try_except
|
196
212
|
def tg_cjzb(self):
|
197
213
|
start_date, end_date = self.months_data(num=self.months)
|
198
214
|
projection = {
|
@@ -220,6 +236,7 @@ class MysqlDatasQuery:
|
|
220
236
|
)
|
221
237
|
return df
|
222
238
|
|
239
|
+
@try_except
|
223
240
|
def pxb_zh(self):
|
224
241
|
start_date, end_date = self.months_data(num=self.months)
|
225
242
|
projection = {
|
@@ -245,6 +262,7 @@ class MysqlDatasQuery:
|
|
245
262
|
)
|
246
263
|
return df
|
247
264
|
|
265
|
+
@try_except
|
248
266
|
def idbm(self):
|
249
267
|
""" 用生意经日数据制作商品 id 和编码对照表 """
|
250
268
|
data_values = self.download.columns_to_list(
|
@@ -255,6 +273,7 @@ class MysqlDatasQuery:
|
|
255
273
|
df = pd.DataFrame(data=data_values)
|
256
274
|
return df
|
257
275
|
|
276
|
+
@try_except
|
258
277
|
def sp_picture(self):
|
259
278
|
""" 用生意经日数据制作商品 id 和编码对照表 """
|
260
279
|
data_values = self.download.columns_to_list(
|
@@ -265,6 +284,7 @@ class MysqlDatasQuery:
|
|
265
284
|
df = pd.DataFrame(data=data_values)
|
266
285
|
return df
|
267
286
|
|
287
|
+
@try_except
|
268
288
|
def dplyd(self):
|
269
289
|
""" 新旧版取的字段是一样的 """
|
270
290
|
start_date, end_date = self.months_data(num=self.months)
|
@@ -288,6 +308,7 @@ class MysqlDatasQuery:
|
|
288
308
|
)
|
289
309
|
return df
|
290
310
|
|
311
|
+
@try_except
|
291
312
|
def dplyd_old(self):
|
292
313
|
start_date, end_date = self.months_data(num=self.months)
|
293
314
|
projection = {
|
@@ -310,6 +331,7 @@ class MysqlDatasQuery:
|
|
310
331
|
)
|
311
332
|
return df
|
312
333
|
|
334
|
+
@try_except
|
313
335
|
def sp_cost(self):
|
314
336
|
""" 电商定价 """
|
315
337
|
data_values = self.download.columns_to_list(
|
@@ -320,6 +342,7 @@ class MysqlDatasQuery:
|
|
320
342
|
df = pd.DataFrame(data=data_values)
|
321
343
|
return df
|
322
344
|
|
345
|
+
@try_except
|
323
346
|
def jdjzt(self):
|
324
347
|
start_date, end_date = self.months_data(num=self.months)
|
325
348
|
projection = {
|
@@ -346,6 +369,8 @@ class MysqlDatasQuery:
|
|
346
369
|
projection=projection,
|
347
370
|
)
|
348
371
|
return df
|
372
|
+
|
373
|
+
@try_except
|
349
374
|
def jdqzyx(self):
|
350
375
|
start_date, end_date = self.months_data(num=self.months)
|
351
376
|
projection = {
|
@@ -368,6 +393,8 @@ class MysqlDatasQuery:
|
|
368
393
|
projection=projection,
|
369
394
|
)
|
370
395
|
return df
|
396
|
+
|
397
|
+
@try_except
|
371
398
|
def jd_gjc(self):
|
372
399
|
start_date, end_date = self.months_data(num=self.months)
|
373
400
|
projection = {
|
@@ -401,6 +428,8 @@ class MysqlDatasQuery:
|
|
401
428
|
projection=projection,
|
402
429
|
)
|
403
430
|
return df
|
431
|
+
|
432
|
+
@try_except
|
404
433
|
def sku_sales(self):
|
405
434
|
start_date, end_date = self.months_data(num=self.months)
|
406
435
|
projection = {
|
@@ -422,6 +451,8 @@ class MysqlDatasQuery:
|
|
422
451
|
projection=projection,
|
423
452
|
)
|
424
453
|
return df
|
454
|
+
|
455
|
+
@try_except
|
425
456
|
def spu_sales(self):
|
426
457
|
start_date, end_date = self.months_data(num=self.months)
|
427
458
|
projection = {
|
@@ -453,6 +484,7 @@ class MysqlDatasQuery:
|
|
453
484
|
start_date = f'{start_date.year}-{start_date.month}-01' # 替换为 n 月以前的第一天
|
454
485
|
return pd.to_datetime(start_date), pd.to_datetime(end_date)
|
455
486
|
|
487
|
+
@try_except
|
456
488
|
def tm_search(self):
|
457
489
|
start_date, end_date = self.months_data(num=self.months)
|
458
490
|
projection = {
|
@@ -476,6 +508,7 @@ class MysqlDatasQuery:
|
|
476
508
|
)
|
477
509
|
return df
|
478
510
|
|
511
|
+
@try_except
|
479
512
|
def zb_ccfx(self):
|
480
513
|
start_date, end_date = self.months_data(num=self.months)
|
481
514
|
projection = {
|
@@ -520,6 +553,7 @@ class MysqlDatasQuery:
|
|
520
553
|
)
|
521
554
|
return df
|
522
555
|
|
556
|
+
@try_except
|
523
557
|
def tg_by_day(self):
|
524
558
|
"""
|
525
559
|
汇总各个店铺的推广数据,按日汇总
|
@@ -689,6 +723,7 @@ class MysqlDatasQuery:
|
|
689
723
|
df = pd.concat(_datas, axis=0, ignore_index=True)
|
690
724
|
return df
|
691
725
|
|
726
|
+
@try_except
|
692
727
|
def aikucun_bd_spu(self):
|
693
728
|
start_date, end_date = self.months_data(num=self.months)
|
694
729
|
projection = {
|
@@ -736,6 +771,7 @@ class MysqlDatasQuery:
|
|
736
771
|
)
|
737
772
|
return df
|
738
773
|
|
774
|
+
@try_except
|
739
775
|
def dmp_crowd(self):
|
740
776
|
start_date, end_date = self.months_data(num=self.months)
|
741
777
|
projection = {
|
@@ -748,7 +784,7 @@ class MysqlDatasQuery:
|
|
748
784
|
}
|
749
785
|
# projection = {}
|
750
786
|
df_crowd = self.download.data_to_df(
|
751
|
-
db_name='达摩盘
|
787
|
+
db_name='达摩盘3',
|
752
788
|
table_name='我的人群属性',
|
753
789
|
start_date=start_date,
|
754
790
|
end_date=end_date,
|
@@ -1497,6 +1533,7 @@ class GroupBy:
|
|
1497
1533
|
print(f'<{table_name}>: Groupby 类尚未配置,数据为空')
|
1498
1534
|
return pd.DataFrame({})
|
1499
1535
|
|
1536
|
+
@try_except
|
1500
1537
|
def ret_keyword(self, keyword, as_file=False):
|
1501
1538
|
""" 推广关键词报表,关键词分类, """
|
1502
1539
|
datas = [
|
@@ -1664,6 +1701,7 @@ class GroupBy:
|
|
1664
1701
|
break
|
1665
1702
|
return result
|
1666
1703
|
|
1704
|
+
@try_except
|
1667
1705
|
def set_crowd(self, keyword, as_file=False):
|
1668
1706
|
""" 推广人群报表,人群分类, """
|
1669
1707
|
result_a = re.findall('_a$|_a_|_ai|^a_', str(keyword), re.IGNORECASE)
|
@@ -1699,6 +1737,7 @@ class GroupBy:
|
|
1699
1737
|
if not is_res:
|
1700
1738
|
return ''
|
1701
1739
|
|
1740
|
+
@try_except
|
1702
1741
|
def set_crowd2(self, keyword, as_file=False):
|
1703
1742
|
""" 推广人群报表,人群分类, """
|
1704
1743
|
datas = [
|
@@ -1796,7 +1835,7 @@ class GroupBy:
|
|
1796
1835
|
break
|
1797
1836
|
return result
|
1798
1837
|
|
1799
|
-
|
1838
|
+
@try_except
|
1800
1839
|
def performance(self, bb_tg=True):
|
1801
1840
|
# print(self.data_tgyj)
|
1802
1841
|
tg, syj, idbm, pic, cost = (
|
@@ -1832,6 +1871,8 @@ class GroupBy:
|
|
1832
1871
|
df['毛利率'] = df.apply(lambda x: round((x['销售额'] - x['商品成本']) / x['销售额'], 4) if x['销售额'] > 0 else 0, axis=1)
|
1833
1872
|
df['盈亏'] = df.apply(lambda x: x['商品毛利'] - x['花费'], axis=1)
|
1834
1873
|
return df
|
1874
|
+
|
1875
|
+
@try_except
|
1835
1876
|
def performance_concat(self, bb_tg=True):
|
1836
1877
|
tg, zb, pxb = self.data_tgyj['天猫汇总表调用'], self.data_tgyj['天猫_超级直播'], self.data_tgyj['天猫_品销宝账户报表']
|
1837
1878
|
zb.rename(columns={
|
@@ -1880,6 +1921,7 @@ class GroupBy:
|
|
1880
1921
|
)
|
1881
1922
|
return df
|
1882
1923
|
|
1924
|
+
@try_except
|
1883
1925
|
def performance_jd(self, jd_tg=True):
|
1884
1926
|
jdtg, sku_sales = self.data_jdtg['京东_京准通'], self.data_jdtg['京东_sku_商品明细']
|
1885
1927
|
jdtg = jdtg.groupby(['日期', '跟单sku id'],
|
@@ -2285,6 +2327,6 @@ def main():
|
|
2285
2327
|
|
2286
2328
|
|
2287
2329
|
if __name__ == '__main__':
|
2288
|
-
data_aggregation(service_databases=[{'company': 'mysql'}], months=
|
2330
|
+
data_aggregation(service_databases=[{'company': 'mysql'}], months=0, is_juhe=False) # 正常的聚合所有数据
|
2289
2331
|
# data_aggregation_one(service_databases=[{'company': 'mysql'}], months=1) # 单独聚合某一个数据库,具体库进函数编辑
|
2290
2332
|
# optimize_data.op_data(service_databases=[{'company': 'mysql'}], days=3650) # 立即启动对聚合数据的清理工作
|
@@ -89,15 +89,30 @@ class DataClean:
|
|
89
89
|
path = self.path
|
90
90
|
report_names = [
|
91
91
|
{
|
92
|
-
'文件简称': '商品排行', # 文件名中包含的字符
|
93
|
-
'数据库名': '
|
92
|
+
'文件简称': '商品排行_', # 文件名中包含的字符
|
93
|
+
'数据库名': '生意参谋3',
|
94
94
|
'集合名称': '商品排行',
|
95
95
|
},
|
96
96
|
{
|
97
|
-
'文件简称': '店铺来源_来源构成_
|
98
|
-
'数据库名': '
|
97
|
+
'文件简称': '店铺来源_来源构成_', # 文件名中包含的字符
|
98
|
+
'数据库名': '生意参谋3',
|
99
99
|
'集合名称': '店铺流量来源构成',
|
100
100
|
},
|
101
|
+
{
|
102
|
+
'文件简称': '商品类目属性_', # 文件名中包含的字符
|
103
|
+
'数据库名': '生意参谋3',
|
104
|
+
'集合名称': '商品类目属性',
|
105
|
+
},
|
106
|
+
{
|
107
|
+
'文件简称': '商品主图视频_', # 文件名中包含的字符
|
108
|
+
'数据库名': '生意参谋3',
|
109
|
+
'集合名称': '商品主图视频',
|
110
|
+
},
|
111
|
+
{
|
112
|
+
'文件简称': '商品sku属性_', # 文件名中包含的字符
|
113
|
+
'数据库名': '生意参谋3',
|
114
|
+
'集合名称': '商品sku',
|
115
|
+
},
|
101
116
|
]
|
102
117
|
for root, dirs, files in os.walk(path, topdown=False):
|
103
118
|
for name in files:
|
@@ -126,7 +141,7 @@ class DataClean:
|
|
126
141
|
is_continue = True
|
127
142
|
if not is_continue:
|
128
143
|
continue
|
129
|
-
if name.endswith('.xls') and '商品排行_
|
144
|
+
if name.endswith('.xls') and '商品排行_' in name:
|
130
145
|
df = pd.read_excel(os.path.join(root, name), header=4)
|
131
146
|
if len(df) == 0:
|
132
147
|
print(f'{name} 报表数据为空')
|
@@ -135,15 +150,20 @@ class DataClean:
|
|
135
150
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
136
151
|
df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
|
137
152
|
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
|
138
|
-
|
153
|
+
if '店铺名称' not in df.columns.tolist():
|
154
|
+
df.insert(loc=1, column='店铺名称', value=shop_name)
|
139
155
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
140
156
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
141
157
|
os.remove(os.path.join(root, name))
|
142
|
-
elif name.endswith('.csv') and '_来源构成_
|
158
|
+
elif name.endswith('.csv') and '_来源构成_' in name:
|
143
159
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
144
160
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
145
161
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
146
162
|
os.remove(os.path.join(root, name))
|
163
|
+
elif name.endswith('.csv') and ('商品类目属性' in name or '商品主图视频' in name or '商品sku属性' in name):
|
164
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
165
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
166
|
+
os.rename(os.path.join(root, name), os.path.join(root, new_name))
|
147
167
|
|
148
168
|
# 将数据传入 self.datas 等待更新进数据库
|
149
169
|
if not db_name or not collection_name:
|
@@ -804,16 +824,13 @@ class DataClean:
|
|
804
824
|
if name.endswith('.xlsx') and '商品素材_' in name:
|
805
825
|
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
|
806
826
|
df = pd.read_excel(os.path.join(root, name), header=0)
|
807
|
-
|
827
|
+
if '店铺名称' not in df.columns.tolist():
|
828
|
+
df.insert(loc=1, column='店铺名称', value=shop_name)
|
808
829
|
new_name = f'py_xg_{name}'
|
809
830
|
df.to_excel(os.path.join(upload_path, new_name),
|
810
831
|
index=False, header=True, engine='openpyxl', freeze_panes=(1, 0))
|
811
|
-
|
812
|
-
|
813
|
-
collection_name = '商品素材_天猫'
|
814
|
-
elif '官方企业店' in name:
|
815
|
-
db_name = '属性设置3'
|
816
|
-
collection_name = '商品素材_淘宝'
|
832
|
+
db_name = '属性设置3'
|
833
|
+
collection_name = '商品素材'
|
817
834
|
os.remove(os.path.join(root, name))
|
818
835
|
|
819
836
|
# 将数据传入 self.datas 等待更新进数据库
|
@@ -881,11 +898,15 @@ class DataClean:
|
|
881
898
|
if 'py_xg' not in name: # 排除非目标文件
|
882
899
|
continue
|
883
900
|
|
884
|
-
if name.endswith('.csv') and '商品排行_
|
885
|
-
t_path = os.path.join(self.source_path, '
|
901
|
+
if name.endswith('.csv') and '商品排行_' in name:
|
902
|
+
t_path = os.path.join(self.source_path, '生意参谋', '商品排行')
|
903
|
+
bib(t_path, _as_month=True)
|
904
|
+
elif name.endswith('.csv') and '店铺来源_来源构成_' in name:
|
905
|
+
t_path = os.path.join(self.source_path, '生意参谋', '店铺流量来源')
|
886
906
|
bib(t_path, _as_month=True)
|
887
|
-
elif name.endswith('.csv') and
|
888
|
-
|
907
|
+
elif name.endswith('.csv') and (
|
908
|
+
'商品类目属性' in name or '商品主图视频' in name or '商品sku属性' in name):
|
909
|
+
t_path = os.path.join(self.source_path, '生意参谋', '商品属性')
|
889
910
|
bib(t_path, _as_month=True)
|
890
911
|
|
891
912
|
def move_dmp(self, path=None, is_except=[]):
|
@@ -1097,10 +1118,10 @@ class DataClean:
|
|
1097
1118
|
t_path = os.path.join(self.source_path, '天猫推广报表', '品销宝', '定向人群报表')
|
1098
1119
|
bib(t_path, _as_month=True)
|
1099
1120
|
elif name.endswith('xlsx') and '商品素材_万里马官方旗舰店' in name:
|
1100
|
-
t_path = os.path.join(self.source_path, '商品素材'
|
1121
|
+
t_path = os.path.join(self.source_path, '商品素材')
|
1101
1122
|
bib(t_path, _as_month=True)
|
1102
1123
|
elif name.endswith('xlsx') and '商品素材_万里马官方企业店' in name:
|
1103
|
-
t_path = os.path.join(self.source_path, '商品素材'
|
1124
|
+
t_path = os.path.join(self.source_path, '商品素材')
|
1104
1125
|
bib(t_path, _as_month=True)
|
1105
1126
|
|
1106
1127
|
# @try_except
|
@@ -1366,6 +1387,7 @@ def main(service_databases=None, is_mysql=False):
|
|
1366
1387
|
cn.dmp_tm(is_except=['except']) # 达摩盘
|
1367
1388
|
cn.tg_reports(is_except=['except']) # 推广报表,天猫淘宝共同清洗
|
1368
1389
|
cn.syj_reports_tm(is_except=['except']) # 天猫生意经
|
1390
|
+
# # 淘宝生意经,不可以和天猫同时运行
|
1369
1391
|
# cn.syj_reports_tb(is_except=['except']) # 淘宝生意经,不可以和天猫同时运行
|
1370
1392
|
cn.jd_reports(is_except=['except']) # 清洗京东报表
|
1371
1393
|
cn.sp_scene_clean(is_except=['except']) # 商品素材
|
@@ -1399,7 +1421,7 @@ def main(service_databases=None, is_mysql=False):
|
|
1399
1421
|
'天猫_推广数据3',
|
1400
1422
|
'淘宝_推广数据3',
|
1401
1423
|
# '市场数据3',
|
1402
|
-
'
|
1424
|
+
'生意参谋3',
|
1403
1425
|
'天猫_生意经3',
|
1404
1426
|
# '淘宝_生意经3',
|
1405
1427
|
],
|
@@ -1425,25 +1447,25 @@ def main(service_databases=None, is_mysql=False):
|
|
1425
1447
|
|
1426
1448
|
def test():
|
1427
1449
|
# main_key = '单元报表'
|
1428
|
-
path = f'/Users/xigua/数据中心/原始文件
|
1450
|
+
path = f'/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表'
|
1429
1451
|
for root, dirs, files in os.walk(path, topdown=False):
|
1430
1452
|
for name in files:
|
1431
1453
|
if '~$' in name or '.DS' in name or '.localized' in name or '.jpg' in name or '.png' in name:
|
1432
1454
|
continue
|
1433
|
-
if 'py_xg' in name:
|
1434
|
-
|
1435
|
-
if 'TM_旧表_字段' in root:
|
1436
|
-
|
1455
|
+
# if 'py_xg' in name:
|
1456
|
+
# continue
|
1457
|
+
# if 'TM_旧表_字段' in root:
|
1458
|
+
# continue
|
1437
1459
|
|
1438
1460
|
if name.endswith('.csv'):
|
1439
1461
|
print(name)
|
1440
1462
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1441
|
-
if '店铺名称' not in df.columns.tolist():
|
1442
|
-
|
1443
|
-
df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
1444
|
-
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
1445
|
-
if '统计日期' in df.columns.tolist() and '日期' not in df.columns.tolist():
|
1446
|
-
|
1463
|
+
# if '店铺名称' not in df.columns.tolist():
|
1464
|
+
# df.insert(loc=1, column='店铺名称', value='万里马官方旗舰店')
|
1465
|
+
# df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
1466
|
+
# df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
1467
|
+
# if '统计日期' in df.columns.tolist() and '日期' not in df.columns.tolist():
|
1468
|
+
# df.rename(columns={'统计日期': '日期', '商品ID': '商品id'}, inplace=True)
|
1447
1469
|
# shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
|
1448
1470
|
# df.insert(loc=1, column='店铺名称', value=shop_name)
|
1449
1471
|
|
@@ -1478,7 +1500,7 @@ if __name__ == '__main__':
|
|
1478
1500
|
# {'home_lx': 'mongodb'},
|
1479
1501
|
# {'nas': 'mysql'},
|
1480
1502
|
],
|
1481
|
-
is_mysql =
|
1503
|
+
is_mysql = False,
|
1482
1504
|
)
|
1483
1505
|
|
1484
1506
|
# c = DataClean(
|
@@ -47,7 +47,7 @@ class DataFrameConverter(object):
|
|
47
47
|
try:
|
48
48
|
# 百分比在某些数据库中不兼容, 转换百分比为小数, # 转百分比的列不能含有中文或特殊字符
|
49
49
|
df[col] = df[col].apply(
|
50
|
-
lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d
|
50
|
+
lambda x: float(float((str(x).rstrip("%"))) / 100) if re.findall(r'^\d+\.?\d*%$', str(x)) else x)
|
51
51
|
except Exception as e:
|
52
52
|
print(f'留意错误信息: 位于列 -> {col} -> {e}')
|
53
53
|
|
@@ -94,5 +94,5 @@ if __name__ == '__main__':
|
|
94
94
|
# print(df['a'].dtype)
|
95
95
|
# print(df)
|
96
96
|
pattern = '1540%'
|
97
|
-
pattern = re.findall(r'^\d+\.?\d
|
97
|
+
pattern = re.findall(r'^\d+\.?\d*%$', pattern)
|
98
98
|
print(pattern)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|