mdbq 2.6.6__py3-none-any.whl → 2.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +3 -3
- mdbq/aggregation/query_data.py +68 -55
- mdbq/clean/clean_upload.py +101 -1
- mdbq/company/copysh.py +15 -57
- mdbq/company/copysh_bak.py +417 -0
- mdbq/dataframe/converter.py +1 -1
- mdbq/mysql/mysql.py +1 -1
- {mdbq-2.6.6.dist-info → mdbq-2.6.8.dist-info}/METADATA +1 -1
- {mdbq-2.6.6.dist-info → mdbq-2.6.8.dist-info}/RECORD +11 -10
- {mdbq-2.6.6.dist-info → mdbq-2.6.8.dist-info}/WHEEL +0 -0
- {mdbq-2.6.6.dist-info → mdbq-2.6.8.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1326,10 +1326,10 @@ if __name__ == '__main__':
|
|
1326
1326
|
# )
|
1327
1327
|
|
1328
1328
|
# 上传一个目录到指定数据库
|
1329
|
-
db_name = '
|
1330
|
-
table_name = '
|
1329
|
+
db_name = '生意经2'
|
1330
|
+
table_name = '省份城市分析'
|
1331
1331
|
upload_dir(
|
1332
|
-
path='/Users/xigua/数据中心/原始文件3
|
1332
|
+
path='/Users/xigua/数据中心/原始文件3/天猫_生意经/省份城市分析',
|
1333
1333
|
db_name=db_name,
|
1334
1334
|
collection_name=table_name,
|
1335
1335
|
dbs={'mysql': True, 'mongodb': False},
|
mdbq/aggregation/query_data.py
CHANGED
@@ -144,8 +144,8 @@ class MysqlDatasQuery:
|
|
144
144
|
'订单数': 1,
|
145
145
|
'退货量': 1,
|
146
146
|
'退款额': 1,
|
147
|
-
'
|
148
|
-
'
|
147
|
+
'退款额_发货后': 1,
|
148
|
+
'退货量_发货后': 1,
|
149
149
|
}
|
150
150
|
df = self.download.data_to_df(
|
151
151
|
db_name='生意经2',
|
@@ -498,7 +498,7 @@ class MysqlDatasQuery:
|
|
498
498
|
'日期': 1,
|
499
499
|
'店铺名称': 1,
|
500
500
|
'搜索词': 1,
|
501
|
-
'
|
501
|
+
'词类型': 1,
|
502
502
|
'访客数': 1,
|
503
503
|
'加购人数': 1,
|
504
504
|
'商品收藏人数': 1,
|
@@ -522,42 +522,42 @@ class MysqlDatasQuery:
|
|
522
522
|
def zb_ccfx(self):
|
523
523
|
start_date, end_date = self.months_data(num=self.months)
|
524
524
|
projection = {
|
525
|
-
'日期': 1,
|
526
|
-
'店铺': 1,
|
527
|
-
'场次信息': 1,
|
528
|
-
'场次id': 1,
|
529
|
-
'直播开播时间': 1,
|
530
|
-
'开播时长': 1,
|
531
|
-
'封面图点击率': 1,
|
532
|
-
'观看人数': 1,
|
533
|
-
'观看次数': 1,
|
534
|
-
'新增粉丝数': 1,
|
535
|
-
'流量券消耗': 1,
|
536
|
-
'观看总时长(秒)': 1,
|
537
|
-
'人均观看时长(秒)': 1,
|
538
|
-
'次均观看时长(秒)': 1,
|
539
|
-
'商品点击人数': 1,
|
540
|
-
'商品点击次数': 1,
|
541
|
-
'商品点击率': 1,
|
542
|
-
'加购人数': 1,
|
543
|
-
'加购件数': 1,
|
544
|
-
'加购次数': 1,
|
545
|
-
'成交金额(元)': 1,
|
546
|
-
'成交人数': 1,
|
547
|
-
'成交件数': 1,
|
548
|
-
'成交笔数': 1,
|
549
|
-
'成交转化率': 1,
|
550
|
-
'退款人数': 1,
|
551
|
-
'退款笔数': 1,
|
552
|
-
'退款件数': 1,
|
553
|
-
'
|
554
|
-
'
|
555
|
-
'
|
556
|
-
'店铺名称': 1,
|
525
|
+
# '日期': 1,
|
526
|
+
# '店铺': 1,
|
527
|
+
# '场次信息': 1,
|
528
|
+
# '场次id': 1,
|
529
|
+
# '直播开播时间': 1,
|
530
|
+
# '开播时长': 1,
|
531
|
+
# '封面图点击率': 1,
|
532
|
+
# '观看人数': 1,
|
533
|
+
# '观看次数': 1,
|
534
|
+
# '新增粉丝数': 1,
|
535
|
+
# '流量券消耗': 1,
|
536
|
+
# '观看总时长(秒)': 1,
|
537
|
+
# '人均观看时长(秒)': 1,
|
538
|
+
# '次均观看时长(秒)': 1,
|
539
|
+
# '商品点击人数': 1,
|
540
|
+
# '商品点击次数': 1,
|
541
|
+
# '商品点击率': 1,
|
542
|
+
# '加购人数': 1,
|
543
|
+
# '加购件数': 1,
|
544
|
+
# '加购次数': 1,
|
545
|
+
# '成交金额(元)': 1,
|
546
|
+
# '成交人数': 1,
|
547
|
+
# '成交件数': 1,
|
548
|
+
# '成交笔数': 1,
|
549
|
+
# '成交转化率': 1,
|
550
|
+
# '退款人数': 1,
|
551
|
+
# '退款笔数': 1,
|
552
|
+
# '退款件数': 1,
|
553
|
+
# '退款金额': 1,
|
554
|
+
# '预售定金支付金额': 1,
|
555
|
+
# '预售预估总金额': 1,
|
556
|
+
# '店铺名称': 1,
|
557
557
|
}
|
558
558
|
df = self.download.data_to_df(
|
559
559
|
db_name='生意参谋3',
|
560
|
-
table_name='
|
560
|
+
table_name='直播分场次效果',
|
561
561
|
start_date=start_date,
|
562
562
|
end_date=end_date,
|
563
563
|
projection=projection,
|
@@ -808,14 +808,14 @@ class MysqlDatasQuery:
|
|
808
808
|
|
809
809
|
projection = {}
|
810
810
|
df_dmp = self.download.data_to_df(
|
811
|
-
db_name='
|
812
|
-
table_name='
|
811
|
+
db_name='达摩盘3',
|
812
|
+
table_name='dmp人群报表',
|
813
813
|
start_date=start_date,
|
814
814
|
end_date=end_date,
|
815
815
|
projection=projection,
|
816
816
|
)
|
817
817
|
df_dmp.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
818
|
-
df_dmp.drop_duplicates(subset=['日期', '人群id', '消耗
|
818
|
+
df_dmp.drop_duplicates(subset=['日期', '人群id', '消耗'], keep='last', inplace=True, ignore_index=True)
|
819
819
|
df = pd.merge(df_dmp, df_crowd, left_on=['人群id'], right_on=['人群id'], how='left')
|
820
820
|
# 清除一些不必要的字符
|
821
821
|
df['用户年龄'] = df['用户年龄'].apply(lambda x: '~'.join(re.findall(r'^(\d+).*-(\d+)岁$', str(x))[0]) if '岁' in str(x) else x)
|
@@ -973,7 +973,9 @@ class GroupBy:
|
|
973
973
|
], keep='last', inplace=True, ignore_index=True)
|
974
974
|
return df
|
975
975
|
elif '天猫_人群报表' in table_name and '达摩盘' not in table_name:
|
976
|
-
"""
|
976
|
+
"""
|
977
|
+
天猫推广人群报表独立生成消费力、年龄层、分类等特征,不依赖于达摩盘数据表
|
978
|
+
"""
|
977
979
|
df.rename(columns={
|
978
980
|
'场景名字': '营销场景',
|
979
981
|
'主体id': '商品id',
|
@@ -1292,8 +1294,8 @@ class GroupBy:
|
|
1292
1294
|
'订单数': ('订单数', np.min),
|
1293
1295
|
'退货量': ('退货量', np.max),
|
1294
1296
|
'退款额': ('退款额', np.max),
|
1295
|
-
'
|
1296
|
-
'
|
1297
|
+
'退款额_发货后': ('退款额_发货后', np.max),
|
1298
|
+
'退货量_发货后': ('退货量_发货后', np.max),
|
1297
1299
|
}
|
1298
1300
|
)
|
1299
1301
|
df['件均价'] = df.apply(lambda x: x['销售额'] / x['销售量'] if x['销售量'] > 0 else 0, axis=1).round(
|
@@ -1307,7 +1309,7 @@ class GroupBy:
|
|
1307
1309
|
)
|
1308
1310
|
self.data_tgyj.update(
|
1309
1311
|
{
|
1310
|
-
table_name: df[['日期', '宝贝id', '销售额', '销售量', '
|
1312
|
+
table_name: df[['日期', '宝贝id', '销售额', '销售量', '退款额_发货后', '退货量_发货后']],
|
1311
1313
|
}
|
1312
1314
|
)
|
1313
1315
|
return df
|
@@ -1476,7 +1478,7 @@ class GroupBy:
|
|
1476
1478
|
return df
|
1477
1479
|
elif '天猫店铺来源_手淘搜索' in table_name:
|
1478
1480
|
df = df.groupby(
|
1479
|
-
['日期', '店铺名称', '搜索词'],
|
1481
|
+
['日期', '店铺名称', '词类型', '搜索词'],
|
1480
1482
|
as_index=False).agg(
|
1481
1483
|
**{
|
1482
1484
|
'访客数': ('访客数', np.max),
|
@@ -1489,7 +1491,7 @@ class GroupBy:
|
|
1489
1491
|
}
|
1490
1492
|
)
|
1491
1493
|
return df
|
1492
|
-
elif '直播场次分析' in table_name:
|
1494
|
+
elif '生意参谋_直播场次分析' in table_name:
|
1493
1495
|
df.drop_duplicates(subset=['场次id'], keep='first', inplace=True, ignore_index=True)
|
1494
1496
|
return df
|
1495
1497
|
elif '多店推广场景_按日聚合' in table_name:
|
@@ -2070,14 +2072,19 @@ def data_aggregation_one(service_databases=[{}], months=1):
|
|
2070
2072
|
) # 3. 回传数据库
|
2071
2073
|
|
2072
2074
|
|
2073
|
-
def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
2075
|
+
def data_aggregation(service_databases=[{}], months=1, is_juhe=True, less_dict=[]):
|
2074
2076
|
"""
|
2075
2077
|
1. 从数据库中读取数据
|
2076
2078
|
2. 数据聚合清洗
|
2077
2079
|
3. 统一回传数据库: <聚合数据> (不再导出为文件)
|
2078
2080
|
公司台式机调用
|
2079
2081
|
months: 1+,写 0 表示当月数据,但在每月 1 号时可能会因为返回空数据出错
|
2082
|
+
is_juhe: 聚合数据
|
2083
|
+
less_dict::只聚合某个特定的库
|
2080
2084
|
"""
|
2085
|
+
if months == 0:
|
2086
|
+
print(f'months 不建议为 0 ')
|
2087
|
+
return
|
2081
2088
|
for service_database in service_databases:
|
2082
2089
|
for service_name, database in service_database.items():
|
2083
2090
|
sdq = MysqlDatasQuery(target_service=service_name) # 实例化数据处理类
|
@@ -2185,12 +2192,12 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
|
2185
2192
|
'唯一主键': ['日期', '关键词', '访客数'],
|
2186
2193
|
'数据主体': sdq.se_search(),
|
2187
2194
|
},
|
2188
|
-
|
2189
|
-
|
2190
|
-
|
2191
|
-
|
2192
|
-
|
2193
|
-
|
2195
|
+
{
|
2196
|
+
'数据库名': '聚合数据',
|
2197
|
+
'集合名': '生意参谋_直播场次分析', # 暂缺
|
2198
|
+
'唯一主键': ['场次id'],
|
2199
|
+
'数据主体': sdq.zb_ccfx(),
|
2200
|
+
},
|
2194
2201
|
{
|
2195
2202
|
'数据库名': '聚合数据',
|
2196
2203
|
'集合名': '多店推广场景_按日聚合',
|
@@ -2210,6 +2217,9 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
|
2210
2217
|
'数据主体': sdq.dmp_crowd(),
|
2211
2218
|
},
|
2212
2219
|
]
|
2220
|
+
|
2221
|
+
if less_dict:
|
2222
|
+
data_dict = [item for item in data_dict if item['集合名'] in less_dict]
|
2213
2223
|
for items in data_dict: # 遍历返回结果
|
2214
2224
|
db_name, table_name, unique_key_list, df = items['数据库名'], items['集合名'], items['唯一主键'], items['数据主体']
|
2215
2225
|
df = g.groupby(df=df, table_name=table_name, is_maximize=True) # 2. 聚合数据
|
@@ -2304,6 +2314,9 @@ def main():
|
|
2304
2314
|
|
2305
2315
|
|
2306
2316
|
if __name__ == '__main__':
|
2307
|
-
data_aggregation(
|
2308
|
-
|
2309
|
-
|
2317
|
+
data_aggregation(
|
2318
|
+
service_databases=[{'company': 'mysql'}],
|
2319
|
+
months=1,
|
2320
|
+
is_juhe=False, # 立即启动对聚合数据的清理工作
|
2321
|
+
# less_dict=['生意参谋_直播场次分析'], # 单独聚合某一个数据库
|
2322
|
+
)
|
mdbq/clean/clean_upload.py
CHANGED
@@ -108,6 +108,11 @@ class DataClean:
|
|
108
108
|
'数据库名': '生意参谋3',
|
109
109
|
'集合名称': '手淘搜索_本店引流词',
|
110
110
|
},
|
111
|
+
{
|
112
|
+
'文件简称': '直播分场次效果_', # 文件名中包含的字符
|
113
|
+
'数据库名': '生意参谋3',
|
114
|
+
'集合名称': '直播分场次效果',
|
115
|
+
},
|
111
116
|
]
|
112
117
|
for root, dirs, files in os.walk(path, topdown=False):
|
113
118
|
for name in files:
|
@@ -181,6 +186,14 @@ class DataClean:
|
|
181
186
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
182
187
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
183
188
|
os.remove(os.path.join(root, name))
|
189
|
+
elif name.endswith('.csv') and '直播分场次效果' in name:
|
190
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
191
|
+
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
|
192
|
+
if '店铺名称' not in df.columns.tolist():
|
193
|
+
df.insert(loc=1, column='店铺名称', value=shop_name)
|
194
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
195
|
+
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
196
|
+
os.remove(os.path.join(root, name))
|
184
197
|
|
185
198
|
# 将数据传入 self.datas 等待更新进数据库
|
186
199
|
if not db_name or not collection_name:
|
@@ -205,6 +218,11 @@ class DataClean:
|
|
205
218
|
'数据库名': '达摩盘3',
|
206
219
|
'集合名称': '我的人群属性',
|
207
220
|
},
|
221
|
+
{
|
222
|
+
'文件简称': 'dmp人群报表_', # 文件名中包含的字符
|
223
|
+
'数据库名': '达摩盘3',
|
224
|
+
'集合名称': 'dmp人群报表',
|
225
|
+
},
|
208
226
|
]
|
209
227
|
for root, dirs, files in os.walk(path, topdown=False):
|
210
228
|
for name in files:
|
@@ -241,6 +259,21 @@ class DataClean:
|
|
241
259
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
242
260
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
243
261
|
os.remove(os.path.join(root, name))
|
262
|
+
elif name.endswith('.csv') and 'dmp人群报表_' in name:
|
263
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
264
|
+
if len(df) == 0:
|
265
|
+
print(f'{name} 报表数据为空')
|
266
|
+
continue
|
267
|
+
for col in df.columns.tolist():
|
268
|
+
if '(' in col or ')' in col:
|
269
|
+
new_col = re.sub(r'\(.*\)', '', col)
|
270
|
+
df.rename(columns={col: new_col}, inplace=True)
|
271
|
+
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
|
272
|
+
if '店铺名称' not in df.columns.tolist():
|
273
|
+
df.insert(loc=1, column='店铺名称', value=shop_name)
|
274
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
275
|
+
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
276
|
+
os.remove(os.path.join(root, name))
|
244
277
|
|
245
278
|
# 将数据传入 self.datas 等待更新进数据库
|
246
279
|
if not db_name or not collection_name:
|
@@ -324,6 +357,11 @@ class DataClean:
|
|
324
357
|
'文件简称': 'tg_report_品销宝_明星店铺',
|
325
358
|
'数据库名': '推广数据2',
|
326
359
|
'集合名称': '品销宝',
|
360
|
+
},
|
361
|
+
{
|
362
|
+
'文件简称': 'tg_report_超级短视频_主体',
|
363
|
+
'数据库名': '推广数据2',
|
364
|
+
'集合名称': '超级短视频_主体',
|
327
365
|
}
|
328
366
|
]
|
329
367
|
for root, dirs, files in os.walk(path, topdown=False):
|
@@ -965,6 +1003,9 @@ class DataClean:
|
|
965
1003
|
elif name.endswith('.csv') and '手淘搜索_本店引流词_' in name:
|
966
1004
|
t_path = os.path.join(self.source_path, '生意参谋', '手淘搜索_本店引流词')
|
967
1005
|
bib(t_path, _as_month=True)
|
1006
|
+
elif name.endswith('.csv') and '直播分场次效果_' in name:
|
1007
|
+
t_path = os.path.join(self.source_path, '生意参谋', '直播分场次效果')
|
1008
|
+
bib(t_path, _as_month=True)
|
968
1009
|
|
969
1010
|
def move_dmp(self, path=None, is_except=[]):
|
970
1011
|
""" 达摩盘 """
|
@@ -995,6 +1036,9 @@ class DataClean:
|
|
995
1036
|
if name.endswith('.csv') and '人群属性_万里马官方旗舰店' in name:
|
996
1037
|
t_path = os.path.join(self.source_path, '达摩盘', '我的人群属性')
|
997
1038
|
bib(t_path, _as_month=True)
|
1039
|
+
elif name.endswith('.csv') and 'dmp人群报表_' in name:
|
1040
|
+
t_path = os.path.join(self.source_path, '达摩盘', 'dmp人群报表')
|
1041
|
+
bib(t_path, _as_month=True)
|
998
1042
|
|
999
1043
|
|
1000
1044
|
# @try_except
|
@@ -1154,6 +1198,9 @@ class DataClean:
|
|
1154
1198
|
elif name.endswith('.csv') and 'tg_report_超级直播报表_人群_万里马官方旗舰店' in name:
|
1155
1199
|
t_path = os.path.join(self.source_path, '天猫推广报表', '超级直播报表_人群')
|
1156
1200
|
bib(t_path, _as_month=True)
|
1201
|
+
elif name.endswith('.csv') and '超级短视频_主体' in name:
|
1202
|
+
t_path = os.path.join(self.source_path, '天猫推广报表', '超级短视频_主体')
|
1203
|
+
bib(t_path, _as_month=True)
|
1157
1204
|
|
1158
1205
|
elif name.endswith('.csv') and 'tg_report_品销宝_明星店铺_万里马官方旗舰店' in name:
|
1159
1206
|
if '账户' in name:
|
@@ -1466,6 +1513,57 @@ def test():
|
|
1466
1513
|
# print(e)
|
1467
1514
|
|
1468
1515
|
|
1516
|
+
def date_table(service_databases=[{}]):
|
1517
|
+
"""
|
1518
|
+
生成 pbix 使用的日期表
|
1519
|
+
"""
|
1520
|
+
start_date = '2022-01-01' # 日期表的起始日期
|
1521
|
+
yesterday = time.strftime('%Y-%m-%d', time.localtime(time.time() - 86400))
|
1522
|
+
dic = pd.date_range(start=start_date, end=yesterday)
|
1523
|
+
df = pd.DataFrame(dic, columns=['日期'])
|
1524
|
+
df.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
1525
|
+
df.reset_index(inplace=True)
|
1526
|
+
# inplace 添加索引到 df
|
1527
|
+
p = df.pop('index')
|
1528
|
+
df['月2'] = df['日期']
|
1529
|
+
df['月2'] = df['月2'].dt.month
|
1530
|
+
df['日期'] = df['日期'].dt.date # 日期格式保留年月日,去掉时分秒
|
1531
|
+
df['年'] = df['日期'].apply(lambda x: str(x).split('-')[0] + '年')
|
1532
|
+
df['月'] = df['月2'].apply(lambda x: str(x) + '月')
|
1533
|
+
# df.drop('月2', axis=1, inplace=True)
|
1534
|
+
mon = df.pop('月2')
|
1535
|
+
df['日'] = df['日期'].apply(lambda x: str(x).split('-')[2])
|
1536
|
+
df['年月'] = df.apply(lambda x: x['年'] + x['月'], axis=1)
|
1537
|
+
df['月日'] = df.apply(lambda x: x['月'] + x['日'] + '日', axis=1)
|
1538
|
+
df['第n周'] = df['日期'].apply(lambda x: x.strftime('第%W周'))
|
1539
|
+
df['索引'] = p
|
1540
|
+
df['月索引'] = mon
|
1541
|
+
df.sort_values('日期', ascending=False, ignore_index=True, inplace=True)
|
1542
|
+
|
1543
|
+
for service_database in service_databases:
|
1544
|
+
for service_name, database in service_database.items():
|
1545
|
+
username, password, host, port = get_myconf.select_config_values(
|
1546
|
+
target_service=service_name,
|
1547
|
+
database=database,
|
1548
|
+
)
|
1549
|
+
m = mysql.MysqlUpload(
|
1550
|
+
username=username,
|
1551
|
+
password=password,
|
1552
|
+
host=host,
|
1553
|
+
port=port,
|
1554
|
+
)
|
1555
|
+
m.df_to_mysql(
|
1556
|
+
df=df,
|
1557
|
+
db_name='聚合数据',
|
1558
|
+
table_name='日期表',
|
1559
|
+
move_insert=True, # 先删除,再插入
|
1560
|
+
df_sql=False, # 值为 True 时使用 df.to_sql 函数上传整个表, 不会排重
|
1561
|
+
drop_duplicates=False, # 值为 True 时检查重复数据再插入,反之直接上传,会比较慢
|
1562
|
+
filename=None, # 用来追踪处理进度
|
1563
|
+
service_database=service_database, # 用来追踪处理进度
|
1564
|
+
)
|
1565
|
+
|
1566
|
+
|
1469
1567
|
def main(service_databases=None, is_mysql=False):
|
1470
1568
|
"""
|
1471
1569
|
is_mysql: 调试时加,False: 是否后续的聚合数据
|
@@ -1505,6 +1603,8 @@ def main(service_databases=None, is_mysql=False):
|
|
1505
1603
|
if not is_mysql:
|
1506
1604
|
return
|
1507
1605
|
|
1606
|
+
# 更新日期表
|
1607
|
+
date_table(service_databases=service_databases)
|
1508
1608
|
# 更新货品年份基准表, 属性设置 2 - 货品年份基准
|
1509
1609
|
p = products.Products()
|
1510
1610
|
p.to_mysql(service_databases=service_databases)
|
@@ -1553,7 +1653,7 @@ if __name__ == '__main__':
|
|
1553
1653
|
],
|
1554
1654
|
is_mysql = False, # 清理聚合数据
|
1555
1655
|
)
|
1556
|
-
|
1656
|
+
# date_table(service_databases=[{'company': 'mysql'}])
|
1557
1657
|
# c = DataClean(
|
1558
1658
|
# path=upload_path, # 源文件目录,下载文件夹
|
1559
1659
|
# source_path=source_path3, # 原始文件保存目录
|
mdbq/company/copysh.py
CHANGED
@@ -22,6 +22,7 @@ from mdbq.config import products
|
|
22
22
|
from mdbq.mysql import mysql
|
23
23
|
from mdbq.pbix import refresh_all
|
24
24
|
from mdbq.other import sku_picture
|
25
|
+
from mdbq.clean import clean_upload
|
25
26
|
warnings.filterwarnings('ignore')
|
26
27
|
|
27
28
|
|
@@ -317,34 +318,18 @@ def op_data(days: int =100):
|
|
317
318
|
|
318
319
|
# 清理数据库, 除了 聚合数据
|
319
320
|
if socket.gethostname() == 'company': # 公司台式机自身运行
|
320
|
-
# # Mysql
|
321
|
-
# username, password, host, port = get_myconf.select_config_values(
|
322
|
-
# target_service='company',
|
323
|
-
# database='mysql',
|
324
|
-
# )
|
325
|
-
# s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
|
326
|
-
# s.db_name_lists = [
|
327
|
-
# '京东数据2',
|
328
|
-
# '推广数据2',
|
329
|
-
# '市场数据2',
|
330
|
-
# '生意参谋2',
|
331
|
-
# '生意经2',
|
332
|
-
# '属性设置2',
|
333
|
-
# # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
334
|
-
# ]
|
335
|
-
# s.days = days
|
336
|
-
# s.optimize_list()
|
337
|
-
|
338
321
|
# 清理所有非聚合数据的库
|
339
322
|
optimize_data.op_data(
|
340
323
|
db_name_lists=[
|
341
324
|
'京东数据2',
|
325
|
+
'属性设置3',
|
342
326
|
'推广数据2',
|
343
|
-
'
|
344
|
-
'
|
327
|
+
'推广数据_淘宝店',
|
328
|
+
'爱库存2',
|
329
|
+
'生意参谋3',
|
345
330
|
'生意经2',
|
346
|
-
'
|
347
|
-
|
331
|
+
# '聚合数据',
|
332
|
+
'达摩盘3',
|
348
333
|
],
|
349
334
|
days=days,
|
350
335
|
)
|
@@ -363,39 +348,23 @@ def main():
|
|
363
348
|
while True:
|
364
349
|
res, d_path = u.check_date() # 文件中的 ch_record 值,决定是否执行更新
|
365
350
|
if res:
|
366
|
-
upload_path = f'
|
351
|
+
upload_path = f'windows2/{str(datetime.date.today().strftime("%Y-%m"))}/{str(datetime.date.today())}'
|
367
352
|
b = bdup.BaiDu()
|
353
|
+
# 1. 从百度云下载文件
|
368
354
|
b.download_dir(local_path=d_path, remote_path=upload_path)
|
369
355
|
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
dp.date_table(service_databases=[{'company': 'mysql'}]) # 因为日期表不受 days 参数控制,因此单独更新日期表
|
375
|
-
dp.other_table(service_databases=[{'company': 'mysql'}]) # 上传 support 文件夹下的 主推商品.csv
|
376
|
-
# 更新货品年份基准表, 属性设置 2 - 货品年份基准
|
377
|
-
p = products.Products()
|
378
|
-
p.to_mysql(service_databases=[
|
379
|
-
# {'home_lx': 'mysql'},
|
380
|
-
{'company': 'mysql'}
|
381
|
-
]
|
356
|
+
# 2. 对文件进行清洗和上传数据库
|
357
|
+
clean_upload.main(
|
358
|
+
service_databases = [{'company': 'mysql'}],
|
359
|
+
is_mysql = False, # 清理聚合数据
|
382
360
|
)
|
383
361
|
|
384
|
-
if datetime.datetime.now().day in [1, 3, 7, 9, 12, 16, 19, 22, 25, 27]:
|
385
|
-
sku_picture.download_spu(
|
386
|
-
service_name='company',
|
387
|
-
database='mysql',
|
388
|
-
db_name='属性设置2',
|
389
|
-
table_name='商品spu素材下载记录',
|
390
|
-
col_name='商品图片',
|
391
|
-
save_path=os.path.join(f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\天猫报表\\其他文件', '商品id_商家编码_图片'),
|
392
|
-
)
|
393
|
-
|
394
362
|
# 此操作用于修改 .copysh_conf 文件,将 ch_record 改为 false (更新完成)
|
395
363
|
w = update_conf.UpdateConf()
|
396
364
|
w.update_config(filename='.copysh_conf', option='ch_record', new_value='False')
|
397
365
|
time.sleep(60)
|
398
|
-
|
366
|
+
# 3. 数据清理和聚合
|
367
|
+
op_data(days=100)
|
399
368
|
|
400
369
|
t.sleep_minutes = 5 # 同步前休眠时间
|
401
370
|
t.tb_file()
|
@@ -404,14 +373,3 @@ def main():
|
|
404
373
|
|
405
374
|
if __name__ == '__main__':
|
406
375
|
main()
|
407
|
-
# # 聚合数据,并清理聚合数据
|
408
|
-
# query_data.data_aggregation(service_databases=[{'company': 'mysql'}], months=1)
|
409
|
-
|
410
|
-
# sku_picture.download_spu(
|
411
|
-
# service_name='company',
|
412
|
-
# database='mysql',
|
413
|
-
# db_name='属性设置2',
|
414
|
-
# table_name='商品spu素材下载记录',
|
415
|
-
# col_name='商品图片',
|
416
|
-
# save_path=os.path.join(f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\天猫报表\\其他文件', '商品id_商家编码_图片'),
|
417
|
-
# )
|
@@ -0,0 +1,417 @@
|
|
1
|
+
# -*- coding: UTF-8 –*-
|
2
|
+
import os
|
3
|
+
import platform
|
4
|
+
import warnings
|
5
|
+
import getpass
|
6
|
+
import sys
|
7
|
+
import configparser
|
8
|
+
import datetime
|
9
|
+
import shutil
|
10
|
+
import time
|
11
|
+
import re
|
12
|
+
import socket
|
13
|
+
from dateutil.utils import today
|
14
|
+
from mdbq.bdup import bdup
|
15
|
+
from mdbq.aggregation import aggregation
|
16
|
+
from mdbq.aggregation import query_data
|
17
|
+
from mdbq.aggregation import optimize_data
|
18
|
+
from mdbq.config import update_conf
|
19
|
+
from mdbq.config import get_myconf
|
20
|
+
from mdbq.config import set_support
|
21
|
+
from mdbq.config import products
|
22
|
+
from mdbq.mysql import mysql
|
23
|
+
from mdbq.pbix import refresh_all
|
24
|
+
from mdbq.other import sku_picture
|
25
|
+
warnings.filterwarnings('ignore')
|
26
|
+
|
27
|
+
|
28
|
+
class TbFiles:
|
29
|
+
"""
|
30
|
+
用于在公司台式机中 定时同步pandas数据源文件到共享
|
31
|
+
"""
|
32
|
+
def __init__(self):
|
33
|
+
|
34
|
+
support_path = set_support.SetSupport(dirname='support').dirname
|
35
|
+
|
36
|
+
self.my_conf = os.path.join(support_path, '.copysh_conf')
|
37
|
+
self.path1 = os.path.join(support_path, 'tb_list.txt')
|
38
|
+
self.path2 = os.path.join(support_path, 'cp_list.txt')
|
39
|
+
self.d_path = None
|
40
|
+
self.data_path = None
|
41
|
+
self.share_path = None
|
42
|
+
self.before_max_time = []
|
43
|
+
self.sleep_minutes = 30
|
44
|
+
self.tomorrow = datetime.date.today()
|
45
|
+
|
46
|
+
def check_change(self):
|
47
|
+
""" 检查 source_path 的所有文件修改日期, 函数返回最新修改日期 """
|
48
|
+
source_path = os.path.join(self.data_path, 'pandas数据源')
|
49
|
+
if not os.path.exists(source_path):
|
50
|
+
return
|
51
|
+
results = []
|
52
|
+
for root, dirs, files in os.walk(source_path, topdown=False):
|
53
|
+
for name in files:
|
54
|
+
if '~$' in name or 'baiduyun' in name or name.startswith('.') or 'Icon' in name or 'xunlei' in name:
|
55
|
+
continue # 排除这些文件的变动
|
56
|
+
# stat_info = os.path.getmtime(os.path.join(root, name))
|
57
|
+
_c = os.stat(os.path.join(root, name)).st_mtime # 读取文件的元信息 >>>文件修改时间
|
58
|
+
c_time = datetime.datetime.fromtimestamp(_c) # 格式化修改时间
|
59
|
+
results.append(c_time)
|
60
|
+
return max(results).strftime('%Y%m%d%H%M%S')
|
61
|
+
|
62
|
+
def check_conf(self):
|
63
|
+
if not os.path.isfile(self.my_conf):
|
64
|
+
self.set_conf() # 添加配置文件
|
65
|
+
print('因缺少配置文件, 已自动初始化')
|
66
|
+
config = configparser.ConfigParser() # 初始化configparser类
|
67
|
+
try:
|
68
|
+
config.read(self.my_conf, 'UTF-8')
|
69
|
+
self.d_path = config.get('database', 'd_path')
|
70
|
+
self.data_path = config.get('database', 'data_path')
|
71
|
+
self.share_path = config.get('database', 'share_path')
|
72
|
+
if self.d_path is None or self.data_path is None or self.share_path is None:
|
73
|
+
self.set_conf()
|
74
|
+
print('配置文件部分值不完整, 已自动初始化')
|
75
|
+
if not os.path.exists(self.d_path) or not os.path.exists(self.data_path) or not os.path.exists(self.share_path):
|
76
|
+
self.set_conf()
|
77
|
+
print('配置文件异常(可能跨系统), 已自动初始化')
|
78
|
+
except Exception as e:
|
79
|
+
print(e)
|
80
|
+
print('配置文件部分值缺失, 已自动初始化')
|
81
|
+
self.set_conf()
|
82
|
+
sys.path.append(self.share_path)
|
83
|
+
|
84
|
+
def set_conf(self):
|
85
|
+
if platform.system() == 'Windows':
|
86
|
+
self.d_path = os.path.join('C:\\Users', getpass.getuser(), 'Downloads')
|
87
|
+
self.data_path = os.path.join('C:\\同步空间', 'BaiduSyncdisk')
|
88
|
+
self.share_path = os.path.join('\\\\192.168.1.198', '时尚事业部\\01.运营部\\天猫报表') # 共享文件根目录
|
89
|
+
elif platform.system() == 'Darwin':
|
90
|
+
self.d_path = os.path.join('/Users', getpass.getuser(), 'Downloads')
|
91
|
+
self.data_path = os.path.join('/Users', getpass.getuser(), '数据中心')
|
92
|
+
self.share_path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
|
93
|
+
else:
|
94
|
+
self.d_path = 'Downloads'
|
95
|
+
self.data_path = os.path.join(getpass.getuser(), '数据中心')
|
96
|
+
self.share_path = os.path.join('/Volumes/时尚事业部/01.运营部/天猫报表') # 共享文件根目录
|
97
|
+
|
98
|
+
if not os.path.exists(self.share_path):
|
99
|
+
self.share_path = re.sub('时尚事业部', '时尚事业部-1', self.share_path)
|
100
|
+
|
101
|
+
with open(self.my_conf, 'w+', encoding='utf-8') as f:
|
102
|
+
f.write('[database]\n')
|
103
|
+
f.write(f'# 配置文件\n')
|
104
|
+
f.write('# 下载目录\n')
|
105
|
+
f.write(f'd_path = {self.d_path}\n\n')
|
106
|
+
f.write('# 数据中心目录\n')
|
107
|
+
f.write(f'data_path = {self.data_path}\n\n')
|
108
|
+
f.write('# 共享目录\n')
|
109
|
+
f.write(f'share_path = {self.share_path}\n\n')
|
110
|
+
f.write('# 公司台式机中,用于触发下载百度云文件,更新至本机数据库\n')
|
111
|
+
f.write(f'ch_record = False\n\n')
|
112
|
+
print('目录初始化!')
|
113
|
+
|
114
|
+
def tb_file(self):
|
115
|
+
|
116
|
+
self.check_conf() # 检查配置文件
|
117
|
+
|
118
|
+
now_max_time = self.check_change()
|
119
|
+
if now_max_time in self.before_max_time:
|
120
|
+
return # 不更新
|
121
|
+
else:
|
122
|
+
self.before_max_time = [] # 重置变量,以免越来越占内存
|
123
|
+
self.before_max_time.append(now_max_time)
|
124
|
+
|
125
|
+
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
126
|
+
res = self.check_upload_mysql()
|
127
|
+
if not res:
|
128
|
+
print(f'检测到源文件修改, 但今日已经同步过, 不再同步')
|
129
|
+
return
|
130
|
+
print(f'{now}pandas数据源文件修改, 触发同步 ({self.sleep_minutes}分钟后开始)')
|
131
|
+
|
132
|
+
if not os.path.exists(self.data_path):
|
133
|
+
print(f'{self.data_path}: 本地目录不存在或配置文件异常, 无法同步此目录')
|
134
|
+
return None
|
135
|
+
if not os.path.exists(self.share_path):
|
136
|
+
print(f'{self.share_path}: 本机未连接共享或配置文件异常, 无法同步')
|
137
|
+
return None
|
138
|
+
|
139
|
+
time.sleep(self.sleep_minutes*60) # 开始同步前休眠时间
|
140
|
+
recent_time = 48 # 同步近N小时内更新过的文件,单位:小时
|
141
|
+
tb_list = []
|
142
|
+
pd_list = []
|
143
|
+
try:
|
144
|
+
with open(self.path1, 'r', encoding='utf-8') as f:
|
145
|
+
content = f.readlines()
|
146
|
+
content = [item.strip() for item in content if not item.strip().startswith('#')]
|
147
|
+
tb_list = [item for item in content if item]
|
148
|
+
|
149
|
+
with open(self.path2, 'r', encoding='utf-8') as f:
|
150
|
+
content = f.readlines()
|
151
|
+
content = [item.strip() for item in content if not item.strip().startswith('#')]
|
152
|
+
pd_list = [item for item in content if item]
|
153
|
+
except Exception as e:
|
154
|
+
print(e)
|
155
|
+
|
156
|
+
source_path = os.path.join(self.data_path, 'pandas数据源') # \BaiduSyncdisk\pandas数据源
|
157
|
+
target_path = os.path.join(self.share_path, 'pandas数据源') # \01.运营部\天猫报表\pandas数据源
|
158
|
+
|
159
|
+
if not os.path.exists(target_path): # 检查共享主目录,创建目录
|
160
|
+
os.makedirs(target_path, exist_ok=True)
|
161
|
+
|
162
|
+
# 删除共享的副本
|
163
|
+
file_list = os.listdir(self.share_path)
|
164
|
+
for file_1 in file_list:
|
165
|
+
if '副本_' in file_1 or 'con' in file_1: # or '.DS' in file_1
|
166
|
+
try:
|
167
|
+
os.remove(os.path.join(self.share_path, file_1))
|
168
|
+
print(f'移除: {os.path.join(self.share_path, file_1)}')
|
169
|
+
except Exception as e:
|
170
|
+
print(e)
|
171
|
+
print(f'移除失败:{os.path.join(self.share_path, file_1)}')
|
172
|
+
file_list2 = os.listdir(target_path) # 删除乱七八糟的临时文件
|
173
|
+
for file_1 in file_list2:
|
174
|
+
if '.DS' in file_1 or 'con' in file_1:
|
175
|
+
try:
|
176
|
+
os.remove(os.path.join(target_path, file_1))
|
177
|
+
print(f'移除: {os.path.join(target_path, file_1)}')
|
178
|
+
except Exception as e:
|
179
|
+
print(e)
|
180
|
+
|
181
|
+
# 删除 run_py的 副本
|
182
|
+
del_p = os.path.join(self.data_path, '自动0备份', 'py', '数据更新', 'run_py')
|
183
|
+
for file_1 in os.listdir(del_p):
|
184
|
+
if '副本_' in file_1:
|
185
|
+
try:
|
186
|
+
os.remove(os.path.join(del_p, file_1))
|
187
|
+
print(f'移除: {os.path.join(del_p, file_1)}')
|
188
|
+
except Exception as e:
|
189
|
+
print(e)
|
190
|
+
print(f'移除失败:{os.path.join(del_p, file_1)}')
|
191
|
+
|
192
|
+
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
193
|
+
print(f'{now} 正在同步文件...')
|
194
|
+
# 复制 run_py的文件到共享
|
195
|
+
for file_1 in tb_list:
|
196
|
+
s = os.path.join(del_p, file_1)
|
197
|
+
t = os.path.join(self.share_path, file_1)
|
198
|
+
try:
|
199
|
+
shutil.copy2(s, t)
|
200
|
+
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
201
|
+
print(f'{now}复制: {s}')
|
202
|
+
except Exception as e:
|
203
|
+
print(e)
|
204
|
+
s1 = os.path.join(del_p, f'副本_{file_1}')
|
205
|
+
t1 = os.path.join(self.share_path, f'副本_{file_1}')
|
206
|
+
shutil.copy2(s, s1) # 创建副本
|
207
|
+
shutil.copy2(s1, t1) # 复制副本到共享
|
208
|
+
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
209
|
+
print(f'{now}已创建副本 -->> {s1}')
|
210
|
+
|
211
|
+
# 同步 pandas 文件到共享
|
212
|
+
now_time = time.time()
|
213
|
+
for filenames in pd_list:
|
214
|
+
src = os.path.join(source_path, filenames) # 原位置,可能是文件或文件夹
|
215
|
+
dst = os.path.join(target_path, filenames) # 目标位置,可能是文件或文件夹
|
216
|
+
if os.path.isdir(src): # 如果是文件夹
|
217
|
+
for root, dirs, files in os.walk(src, topdown=False):
|
218
|
+
for name in files:
|
219
|
+
if '~$' in name or 'DS_Store' in name:
|
220
|
+
continue
|
221
|
+
if name.endswith('csv') or name.endswith('xlsx') or name.endswith('pbix') or name.endswith(
|
222
|
+
'xls'):
|
223
|
+
new_src = os.path.join(root, name)
|
224
|
+
# share_path = dst + '\\' + new_src.split(src)[1] # 拼接目标路径
|
225
|
+
share_path = os.path.join(f'{dst}{new_src.split(src)[1]}') # 拼接目标路径
|
226
|
+
ls_paths = os.path.dirname(os.path.abspath(share_path)) # 获取上级目录,用来创建
|
227
|
+
if not os.path.exists(ls_paths): # 目录不存在则创建
|
228
|
+
os.makedirs(ls_paths, exist_ok=True)
|
229
|
+
c_stat = os.stat(new_src).st_mtime # 读取文件的元信息 >>>文件修改时间
|
230
|
+
if now_time - c_stat < recent_time * 3600: # 仅同步近期更新的文件
|
231
|
+
# res_name = os.path.basename(new_src)
|
232
|
+
try:
|
233
|
+
shutil.copy2(new_src, share_path)
|
234
|
+
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
235
|
+
print(f'{now}复制文件: {new_src}')
|
236
|
+
except Exception as e:
|
237
|
+
print(e)
|
238
|
+
elif os.path.isfile(src) and 'DS_Store' not in src: # 如果是文件
|
239
|
+
if src.endswith('csv') or src.endswith('xlsx') or src.endswith('pbix') or src.endswith('xls'):
|
240
|
+
c_stat = os.stat(src).st_mtime # 读取文件的元信息 >>>文件修改时间
|
241
|
+
if now_time - c_stat < recent_time * 3600:
|
242
|
+
ls_paths = os.path.dirname(os.path.abspath(src)) # 获取上级目录,用来创建
|
243
|
+
if not os.path.exists(ls_paths): # 目录不存在则创建
|
244
|
+
os.makedirs(ls_paths, exist_ok=True)
|
245
|
+
# new_name = os.path.basename(src)
|
246
|
+
try:
|
247
|
+
shutil.copy2(src, dst)
|
248
|
+
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S ')
|
249
|
+
print(f'{now}复制文件: {src}')
|
250
|
+
except Exception as e:
|
251
|
+
print(e)
|
252
|
+
else:
|
253
|
+
print(f'{src} 所需同步的文件不存在,请检查:pd_list参数')
|
254
|
+
|
255
|
+
# 刷新共享位置的指定文件/文件夹
|
256
|
+
excel_path = os.path.join(self.share_path, 'EXCEL报表')
|
257
|
+
files = os.listdir(excel_path)
|
258
|
+
files = [f'{excel_path}\\{item}' for item in files if item.endswith('.xlsx') or item.endswith('.xls')]
|
259
|
+
r = refresh_all.RefreshAll()
|
260
|
+
for file in files:
|
261
|
+
if '~' in file or 'DS_Store' in file or 'baidu' in file or 'xunlei' in file:
|
262
|
+
continue
|
263
|
+
if file.endswith('.xlsx') or file.endswith('.xls'):
|
264
|
+
r.refresh_excel(file=file)
|
265
|
+
time.sleep(5)
|
266
|
+
|
267
|
+
# 临时加的
|
268
|
+
# excel_file = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新\\0-WLM_运营周报-1012输出.xlsx'
|
269
|
+
dir_files = f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\0-电商周报-每周五更新'
|
270
|
+
files = os.listdir(dir_files)
|
271
|
+
for file in files:
|
272
|
+
if file.endswith('.xlsx') and file.startswith('0-WLM_运营周报') and '~' not in file and 'baidu' not in file:
|
273
|
+
excel_file = os.path.join(dir_files, file)
|
274
|
+
r.refresh_excel(file=excel_file)
|
275
|
+
|
276
|
+
self.before_max_time = self.check_change() # 重置值, 避免重复同步
|
277
|
+
|
278
|
+
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
279
|
+
print(f'{now} 同步完成!')
|
280
|
+
|
281
|
+
def check_upload_mysql(self):
|
282
|
+
# 每天只更新一次
|
283
|
+
today = datetime.date.today()
|
284
|
+
if today == self.tomorrow:
|
285
|
+
self.tomorrow = today + datetime.timedelta(days=1)
|
286
|
+
return True
|
287
|
+
else:
|
288
|
+
return False
|
289
|
+
|
290
|
+
|
291
|
+
class UpdateMysql:
|
292
|
+
def __init__(self):
|
293
|
+
support_path = set_support.SetSupport(dirname='support').dirname
|
294
|
+
self.my_conf = os.path.join(support_path, '.copysh_conf')
|
295
|
+
self.ch_record = False
|
296
|
+
self.d_path = None
|
297
|
+
|
298
|
+
def check_date(self):
|
299
|
+
""" 检查公司台式机 .copysh_conf 文件中的 ch_record 值,决定是否执行更新"""
|
300
|
+
config = configparser.ConfigParser() # 初始化configparser类
|
301
|
+
try:
|
302
|
+
config.read(self.my_conf, 'UTF-8')
|
303
|
+
self.ch_record = config.get('database', 'ch_record').lower()
|
304
|
+
self.d_path = config.get('database', 'd_path')
|
305
|
+
except Exception as e:
|
306
|
+
print(e)
|
307
|
+
if self.ch_record == 'false':
|
308
|
+
return False, self.d_path
|
309
|
+
elif self.ch_record == 'true':
|
310
|
+
return True, self.d_path
|
311
|
+
else:
|
312
|
+
print(f'配置可能有误: {self.ch_record}, self.ch_record 值应为: true 或 false')
|
313
|
+
return False, self.d_path
|
314
|
+
|
315
|
+
|
316
|
+
def op_data(days: int =100):
|
317
|
+
|
318
|
+
# 清理数据库, 除了 聚合数据
|
319
|
+
if socket.gethostname() == 'company': # 公司台式机自身运行
|
320
|
+
# # Mysql
|
321
|
+
# username, password, host, port = get_myconf.select_config_values(
|
322
|
+
# target_service='company',
|
323
|
+
# database='mysql',
|
324
|
+
# )
|
325
|
+
# s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
|
326
|
+
# s.db_name_lists = [
|
327
|
+
# '京东数据2',
|
328
|
+
# '推广数据2',
|
329
|
+
# '市场数据2',
|
330
|
+
# '生意参谋2',
|
331
|
+
# '生意经2',
|
332
|
+
# '属性设置2',
|
333
|
+
# # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
334
|
+
# ]
|
335
|
+
# s.days = days
|
336
|
+
# s.optimize_list()
|
337
|
+
|
338
|
+
# 清理所有非聚合数据的库
|
339
|
+
optimize_data.op_data(
|
340
|
+
db_name_lists=[
|
341
|
+
'京东数据2',
|
342
|
+
'推广数据2',
|
343
|
+
'市场数据2',
|
344
|
+
'生意参谋2',
|
345
|
+
'生意经2',
|
346
|
+
'属性设置2',
|
347
|
+
# '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
348
|
+
],
|
349
|
+
days=days,
|
350
|
+
)
|
351
|
+
|
352
|
+
# 数据聚合
|
353
|
+
query_data.data_aggregation(service_databases=[{'company': 'mysql'}], months=3,)
|
354
|
+
time.sleep(60)
|
355
|
+
|
356
|
+
# 清理聚合数据
|
357
|
+
optimize_data.op_data(db_name_lists=['聚合数据'], days=3650, )
|
358
|
+
|
359
|
+
|
360
|
+
def main():
|
361
|
+
t = TbFiles()
|
362
|
+
u = UpdateMysql()
|
363
|
+
while True:
|
364
|
+
res, d_path = u.check_date() # 文件中的 ch_record 值,决定是否执行更新
|
365
|
+
if res:
|
366
|
+
upload_path = f'windows/{str(datetime.date.today().strftime("%Y-%m"))}/{str(datetime.date.today())}'
|
367
|
+
b = bdup.BaiDu()
|
368
|
+
b.download_dir(local_path=d_path, remote_path=upload_path)
|
369
|
+
|
370
|
+
dp = aggregation.DatabaseUpdate(path=d_path)
|
371
|
+
dp.new_unzip(is_move=True)
|
372
|
+
dp.cleaning(is_move=True, is_except=[]) # 公司台式机需要移除自身下载的文件
|
373
|
+
dp.upload_df(service_databases=[{'company': 'mysql'}])
|
374
|
+
dp.date_table(service_databases=[{'company': 'mysql'}]) # 因为日期表不受 days 参数控制,因此单独更新日期表
|
375
|
+
dp.other_table(service_databases=[{'company': 'mysql'}]) # 上传 support 文件夹下的 主推商品.csv
|
376
|
+
# 更新货品年份基准表, 属性设置 2 - 货品年份基准
|
377
|
+
p = products.Products()
|
378
|
+
p.to_mysql(service_databases=[
|
379
|
+
# {'home_lx': 'mysql'},
|
380
|
+
{'company': 'mysql'}
|
381
|
+
]
|
382
|
+
)
|
383
|
+
|
384
|
+
if datetime.datetime.now().day in [1, 3, 7, 9, 12, 16, 19, 22, 25, 27]:
|
385
|
+
sku_picture.download_spu(
|
386
|
+
service_name='company',
|
387
|
+
database='mysql',
|
388
|
+
db_name='属性设置2',
|
389
|
+
table_name='商品spu素材下载记录',
|
390
|
+
col_name='商品图片',
|
391
|
+
save_path=os.path.join(f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\天猫报表\\其他文件', '商品id_商家编码_图片'),
|
392
|
+
)
|
393
|
+
|
394
|
+
# 此操作用于修改 .copysh_conf 文件,将 ch_record 改为 false (更新完成)
|
395
|
+
w = update_conf.UpdateConf()
|
396
|
+
w.update_config(filename='.copysh_conf', option='ch_record', new_value='False')
|
397
|
+
time.sleep(60)
|
398
|
+
op_data(days=100) # 数据清理和聚合
|
399
|
+
|
400
|
+
t.sleep_minutes = 5 # 同步前休眠时间
|
401
|
+
t.tb_file()
|
402
|
+
time.sleep(600) # 检测间隔
|
403
|
+
|
404
|
+
|
405
|
+
if __name__ == '__main__':
|
406
|
+
main()
|
407
|
+
# # 聚合数据,并清理聚合数据
|
408
|
+
# query_data.data_aggregation(service_databases=[{'company': 'mysql'}], months=1)
|
409
|
+
|
410
|
+
# sku_picture.download_spu(
|
411
|
+
# service_name='company',
|
412
|
+
# database='mysql',
|
413
|
+
# db_name='属性设置2',
|
414
|
+
# table_name='商品spu素材下载记录',
|
415
|
+
# col_name='商品图片',
|
416
|
+
# save_path=os.path.join(f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\天猫报表\\其他文件', '商品id_商家编码_图片'),
|
417
|
+
# )
|
mdbq/dataframe/converter.py
CHANGED
@@ -82,7 +82,7 @@ class DataFrameConverter(object):
|
|
82
82
|
df[col] = df[col].apply(lambda x: pd.to_datetime(x))
|
83
83
|
except:
|
84
84
|
pass
|
85
|
-
new_col = re.sub(r'[()()
|
85
|
+
new_col = re.sub(r'[()()-,,$&~^、* ]', '_', col.lower())
|
86
86
|
new_col = re.sub(r'_{2,}', '_', new_col)
|
87
87
|
new_col = re.sub(r'_+$', '', new_col)
|
88
88
|
df.rename(columns={col: new_col}, inplace=True)
|
mdbq/mysql/mysql.py
CHANGED
@@ -142,7 +142,7 @@ class MysqlUpload:
|
|
142
142
|
if col_not_exist: # 数据表中不存在的列
|
143
143
|
for col in col_not_exist:
|
144
144
|
# 创建列,需转义
|
145
|
-
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
|
145
|
+
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
146
146
|
cursor.execute(sql)
|
147
147
|
print(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
148
148
|
|
@@ -1,18 +1,19 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=fnXBRxATlaCohx_dzAIewVlPI0d8L-2QY6wth9ENCwA,76594
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=gdScrgTAb6RbXHZy1LitX7lggMGn1GTLhkYSgztfwew,4903
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=m7Y2xSazPYKvy51yPK6n_Izsv5cjV83oHsiNc7N4fyA,102779
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/clean_upload.py,sha256=
|
12
|
+
mdbq/clean/clean_upload.py,sha256=4DNoSQBUYyn6OsdAP4WJoqWneReeHlvmctXyS5dQvIU,86640
|
13
13
|
mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
|
14
14
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
15
|
-
mdbq/company/copysh.py,sha256=
|
15
|
+
mdbq/company/copysh.py,sha256=sisL5eo3D5HGGYvRw46xGqnqFaI3SxfBnoa-Y7zknus,17541
|
16
|
+
mdbq/company/copysh_bak.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
|
16
17
|
mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
|
17
18
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
18
19
|
mdbq/config/get_myconf.py,sha256=cmNvsyoNa0RbZ9FOTjSd3jyyGwkxjUo0phvdHbGlrms,6010
|
@@ -20,13 +21,13 @@ mdbq/config/products.py,sha256=hN9UMkM6j76HYMulTYdtr3mOhh9QdpvvrLH14a_mbFY,5980
|
|
20
21
|
mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
|
21
22
|
mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
|
22
23
|
mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
23
|
-
mdbq/dataframe/converter.py,sha256=
|
24
|
+
mdbq/dataframe/converter.py,sha256=X5Aubm9Z4_bhslcu1-XZzT8X6UpoAW5BFs30RfgfRmE,4460
|
24
25
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
25
26
|
mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
26
27
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
27
28
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
28
29
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
29
|
-
mdbq/mysql/mysql.py,sha256=
|
30
|
+
mdbq/mysql/mysql.py,sha256=9IIyKYU81SXglz6GqVTz0-kCE2dhFuwpQAhUym-yjuc,47135
|
30
31
|
mdbq/mysql/s_query.py,sha256=37GGHzRpycfUjsYEoQgDpdEs9JwjW-LxFXnGwwP2b2Q,8403
|
31
32
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
32
33
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -42,7 +43,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
42
43
|
mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
|
43
44
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
44
45
|
mdbq/spider/aikucun.py,sha256=4Y5zd64hZUFtll8AdpUc2napDas-La-A6XzAhb2mLv0,17157
|
45
|
-
mdbq-2.6.
|
46
|
-
mdbq-2.6.
|
47
|
-
mdbq-2.6.
|
48
|
-
mdbq-2.6.
|
46
|
+
mdbq-2.6.8.dist-info/METADATA,sha256=0f19adUwFUrRTcAT5TvVboqz7L0X2CKv0x5acWdpIYw,245
|
47
|
+
mdbq-2.6.8.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
48
|
+
mdbq-2.6.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
49
|
+
mdbq-2.6.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|