mdbq 2.6.7__py3-none-any.whl → 2.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +3 -3
- mdbq/aggregation/query_data.py +66 -53
- mdbq/clean/clean_upload.py +57 -10
- mdbq/company/copysh.py +7 -36
- mdbq/dataframe/converter.py +1 -1
- mdbq/mysql/mysql.py +1 -1
- {mdbq-2.6.7.dist-info → mdbq-2.6.8.dist-info}/METADATA +1 -1
- {mdbq-2.6.7.dist-info → mdbq-2.6.8.dist-info}/RECORD +10 -10
- {mdbq-2.6.7.dist-info → mdbq-2.6.8.dist-info}/WHEEL +0 -0
- {mdbq-2.6.7.dist-info → mdbq-2.6.8.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1326,10 +1326,10 @@ if __name__ == '__main__':
|
|
1326
1326
|
# )
|
1327
1327
|
|
1328
1328
|
# 上传一个目录到指定数据库
|
1329
|
-
db_name = '
|
1330
|
-
table_name = '
|
1329
|
+
db_name = '生意经2'
|
1330
|
+
table_name = '省份城市分析'
|
1331
1331
|
upload_dir(
|
1332
|
-
path='/Users/xigua/数据中心/原始文件3
|
1332
|
+
path='/Users/xigua/数据中心/原始文件3/天猫_生意经/省份城市分析',
|
1333
1333
|
db_name=db_name,
|
1334
1334
|
collection_name=table_name,
|
1335
1335
|
dbs={'mysql': True, 'mongodb': False},
|
mdbq/aggregation/query_data.py
CHANGED
@@ -144,8 +144,8 @@ class MysqlDatasQuery:
|
|
144
144
|
'订单数': 1,
|
145
145
|
'退货量': 1,
|
146
146
|
'退款额': 1,
|
147
|
-
'
|
148
|
-
'
|
147
|
+
'退款额_发货后': 1,
|
148
|
+
'退货量_发货后': 1,
|
149
149
|
}
|
150
150
|
df = self.download.data_to_df(
|
151
151
|
db_name='生意经2',
|
@@ -522,42 +522,42 @@ class MysqlDatasQuery:
|
|
522
522
|
def zb_ccfx(self):
|
523
523
|
start_date, end_date = self.months_data(num=self.months)
|
524
524
|
projection = {
|
525
|
-
'日期': 1,
|
526
|
-
'店铺': 1,
|
527
|
-
'场次信息': 1,
|
528
|
-
'场次id': 1,
|
529
|
-
'直播开播时间': 1,
|
530
|
-
'开播时长': 1,
|
531
|
-
'封面图点击率': 1,
|
532
|
-
'观看人数': 1,
|
533
|
-
'观看次数': 1,
|
534
|
-
'新增粉丝数': 1,
|
535
|
-
'流量券消耗': 1,
|
536
|
-
'观看总时长(秒)': 1,
|
537
|
-
'人均观看时长(秒)': 1,
|
538
|
-
'次均观看时长(秒)': 1,
|
539
|
-
'商品点击人数': 1,
|
540
|
-
'商品点击次数': 1,
|
541
|
-
'商品点击率': 1,
|
542
|
-
'加购人数': 1,
|
543
|
-
'加购件数': 1,
|
544
|
-
'加购次数': 1,
|
545
|
-
'成交金额(元)': 1,
|
546
|
-
'成交人数': 1,
|
547
|
-
'成交件数': 1,
|
548
|
-
'成交笔数': 1,
|
549
|
-
'成交转化率': 1,
|
550
|
-
'退款人数': 1,
|
551
|
-
'退款笔数': 1,
|
552
|
-
'退款件数': 1,
|
553
|
-
'
|
554
|
-
'
|
555
|
-
'
|
556
|
-
'店铺名称': 1,
|
525
|
+
# '日期': 1,
|
526
|
+
# '店铺': 1,
|
527
|
+
# '场次信息': 1,
|
528
|
+
# '场次id': 1,
|
529
|
+
# '直播开播时间': 1,
|
530
|
+
# '开播时长': 1,
|
531
|
+
# '封面图点击率': 1,
|
532
|
+
# '观看人数': 1,
|
533
|
+
# '观看次数': 1,
|
534
|
+
# '新增粉丝数': 1,
|
535
|
+
# '流量券消耗': 1,
|
536
|
+
# '观看总时长(秒)': 1,
|
537
|
+
# '人均观看时长(秒)': 1,
|
538
|
+
# '次均观看时长(秒)': 1,
|
539
|
+
# '商品点击人数': 1,
|
540
|
+
# '商品点击次数': 1,
|
541
|
+
# '商品点击率': 1,
|
542
|
+
# '加购人数': 1,
|
543
|
+
# '加购件数': 1,
|
544
|
+
# '加购次数': 1,
|
545
|
+
# '成交金额(元)': 1,
|
546
|
+
# '成交人数': 1,
|
547
|
+
# '成交件数': 1,
|
548
|
+
# '成交笔数': 1,
|
549
|
+
# '成交转化率': 1,
|
550
|
+
# '退款人数': 1,
|
551
|
+
# '退款笔数': 1,
|
552
|
+
# '退款件数': 1,
|
553
|
+
# '退款金额': 1,
|
554
|
+
# '预售定金支付金额': 1,
|
555
|
+
# '预售预估总金额': 1,
|
556
|
+
# '店铺名称': 1,
|
557
557
|
}
|
558
558
|
df = self.download.data_to_df(
|
559
559
|
db_name='生意参谋3',
|
560
|
-
table_name='
|
560
|
+
table_name='直播分场次效果',
|
561
561
|
start_date=start_date,
|
562
562
|
end_date=end_date,
|
563
563
|
projection=projection,
|
@@ -808,14 +808,14 @@ class MysqlDatasQuery:
|
|
808
808
|
|
809
809
|
projection = {}
|
810
810
|
df_dmp = self.download.data_to_df(
|
811
|
-
db_name='
|
812
|
-
table_name='
|
811
|
+
db_name='达摩盘3',
|
812
|
+
table_name='dmp人群报表',
|
813
813
|
start_date=start_date,
|
814
814
|
end_date=end_date,
|
815
815
|
projection=projection,
|
816
816
|
)
|
817
817
|
df_dmp.sort_values('日期', ascending=True, ignore_index=True, inplace=True)
|
818
|
-
df_dmp.drop_duplicates(subset=['日期', '人群id', '消耗
|
818
|
+
df_dmp.drop_duplicates(subset=['日期', '人群id', '消耗'], keep='last', inplace=True, ignore_index=True)
|
819
819
|
df = pd.merge(df_dmp, df_crowd, left_on=['人群id'], right_on=['人群id'], how='left')
|
820
820
|
# 清除一些不必要的字符
|
821
821
|
df['用户年龄'] = df['用户年龄'].apply(lambda x: '~'.join(re.findall(r'^(\d+).*-(\d+)岁$', str(x))[0]) if '岁' in str(x) else x)
|
@@ -973,7 +973,9 @@ class GroupBy:
|
|
973
973
|
], keep='last', inplace=True, ignore_index=True)
|
974
974
|
return df
|
975
975
|
elif '天猫_人群报表' in table_name and '达摩盘' not in table_name:
|
976
|
-
"""
|
976
|
+
"""
|
977
|
+
天猫推广人群报表独立生成消费力、年龄层、分类等特征,不依赖于达摩盘数据表
|
978
|
+
"""
|
977
979
|
df.rename(columns={
|
978
980
|
'场景名字': '营销场景',
|
979
981
|
'主体id': '商品id',
|
@@ -1292,8 +1294,8 @@ class GroupBy:
|
|
1292
1294
|
'订单数': ('订单数', np.min),
|
1293
1295
|
'退货量': ('退货量', np.max),
|
1294
1296
|
'退款额': ('退款额', np.max),
|
1295
|
-
'
|
1296
|
-
'
|
1297
|
+
'退款额_发货后': ('退款额_发货后', np.max),
|
1298
|
+
'退货量_发货后': ('退货量_发货后', np.max),
|
1297
1299
|
}
|
1298
1300
|
)
|
1299
1301
|
df['件均价'] = df.apply(lambda x: x['销售额'] / x['销售量'] if x['销售量'] > 0 else 0, axis=1).round(
|
@@ -1307,7 +1309,7 @@ class GroupBy:
|
|
1307
1309
|
)
|
1308
1310
|
self.data_tgyj.update(
|
1309
1311
|
{
|
1310
|
-
table_name: df[['日期', '宝贝id', '销售额', '销售量', '
|
1312
|
+
table_name: df[['日期', '宝贝id', '销售额', '销售量', '退款额_发货后', '退货量_发货后']],
|
1311
1313
|
}
|
1312
1314
|
)
|
1313
1315
|
return df
|
@@ -1489,7 +1491,7 @@ class GroupBy:
|
|
1489
1491
|
}
|
1490
1492
|
)
|
1491
1493
|
return df
|
1492
|
-
elif '直播场次分析' in table_name:
|
1494
|
+
elif '生意参谋_直播场次分析' in table_name:
|
1493
1495
|
df.drop_duplicates(subset=['场次id'], keep='first', inplace=True, ignore_index=True)
|
1494
1496
|
return df
|
1495
1497
|
elif '多店推广场景_按日聚合' in table_name:
|
@@ -2070,14 +2072,19 @@ def data_aggregation_one(service_databases=[{}], months=1):
|
|
2070
2072
|
) # 3. 回传数据库
|
2071
2073
|
|
2072
2074
|
|
2073
|
-
def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
2075
|
+
def data_aggregation(service_databases=[{}], months=1, is_juhe=True, less_dict=[]):
|
2074
2076
|
"""
|
2075
2077
|
1. 从数据库中读取数据
|
2076
2078
|
2. 数据聚合清洗
|
2077
2079
|
3. 统一回传数据库: <聚合数据> (不再导出为文件)
|
2078
2080
|
公司台式机调用
|
2079
2081
|
months: 1+,写 0 表示当月数据,但在每月 1 号时可能会因为返回空数据出错
|
2082
|
+
is_juhe: 聚合数据
|
2083
|
+
less_dict::只聚合某个特定的库
|
2080
2084
|
"""
|
2085
|
+
if months == 0:
|
2086
|
+
print(f'months 不建议为 0 ')
|
2087
|
+
return
|
2081
2088
|
for service_database in service_databases:
|
2082
2089
|
for service_name, database in service_database.items():
|
2083
2090
|
sdq = MysqlDatasQuery(target_service=service_name) # 实例化数据处理类
|
@@ -2185,12 +2192,12 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
|
2185
2192
|
'唯一主键': ['日期', '关键词', '访客数'],
|
2186
2193
|
'数据主体': sdq.se_search(),
|
2187
2194
|
},
|
2188
|
-
|
2189
|
-
|
2190
|
-
|
2191
|
-
|
2192
|
-
|
2193
|
-
|
2195
|
+
{
|
2196
|
+
'数据库名': '聚合数据',
|
2197
|
+
'集合名': '生意参谋_直播场次分析', # 暂缺
|
2198
|
+
'唯一主键': ['场次id'],
|
2199
|
+
'数据主体': sdq.zb_ccfx(),
|
2200
|
+
},
|
2194
2201
|
{
|
2195
2202
|
'数据库名': '聚合数据',
|
2196
2203
|
'集合名': '多店推广场景_按日聚合',
|
@@ -2210,6 +2217,9 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
|
2210
2217
|
'数据主体': sdq.dmp_crowd(),
|
2211
2218
|
},
|
2212
2219
|
]
|
2220
|
+
|
2221
|
+
if less_dict:
|
2222
|
+
data_dict = [item for item in data_dict if item['集合名'] in less_dict]
|
2213
2223
|
for items in data_dict: # 遍历返回结果
|
2214
2224
|
db_name, table_name, unique_key_list, df = items['数据库名'], items['集合名'], items['唯一主键'], items['数据主体']
|
2215
2225
|
df = g.groupby(df=df, table_name=table_name, is_maximize=True) # 2. 聚合数据
|
@@ -2304,6 +2314,9 @@ def main():
|
|
2304
2314
|
|
2305
2315
|
|
2306
2316
|
if __name__ == '__main__':
|
2307
|
-
data_aggregation(
|
2308
|
-
|
2309
|
-
|
2317
|
+
data_aggregation(
|
2318
|
+
service_databases=[{'company': 'mysql'}],
|
2319
|
+
months=1,
|
2320
|
+
is_juhe=False, # 立即启动对聚合数据的清理工作
|
2321
|
+
# less_dict=['生意参谋_直播场次分析'], # 单独聚合某一个数据库
|
2322
|
+
)
|
mdbq/clean/clean_upload.py
CHANGED
@@ -108,6 +108,11 @@ class DataClean:
|
|
108
108
|
'数据库名': '生意参谋3',
|
109
109
|
'集合名称': '手淘搜索_本店引流词',
|
110
110
|
},
|
111
|
+
{
|
112
|
+
'文件简称': '直播分场次效果_', # 文件名中包含的字符
|
113
|
+
'数据库名': '生意参谋3',
|
114
|
+
'集合名称': '直播分场次效果',
|
115
|
+
},
|
111
116
|
]
|
112
117
|
for root, dirs, files in os.walk(path, topdown=False):
|
113
118
|
for name in files:
|
@@ -181,6 +186,14 @@ class DataClean:
|
|
181
186
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
182
187
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
183
188
|
os.remove(os.path.join(root, name))
|
189
|
+
elif name.endswith('.csv') and '直播分场次效果' in name:
|
190
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
191
|
+
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
|
192
|
+
if '店铺名称' not in df.columns.tolist():
|
193
|
+
df.insert(loc=1, column='店铺名称', value=shop_name)
|
194
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
195
|
+
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
196
|
+
os.remove(os.path.join(root, name))
|
184
197
|
|
185
198
|
# 将数据传入 self.datas 等待更新进数据库
|
186
199
|
if not db_name or not collection_name:
|
@@ -205,6 +218,11 @@ class DataClean:
|
|
205
218
|
'数据库名': '达摩盘3',
|
206
219
|
'集合名称': '我的人群属性',
|
207
220
|
},
|
221
|
+
{
|
222
|
+
'文件简称': 'dmp人群报表_', # 文件名中包含的字符
|
223
|
+
'数据库名': '达摩盘3',
|
224
|
+
'集合名称': 'dmp人群报表',
|
225
|
+
},
|
208
226
|
]
|
209
227
|
for root, dirs, files in os.walk(path, topdown=False):
|
210
228
|
for name in files:
|
@@ -241,6 +259,21 @@ class DataClean:
|
|
241
259
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
242
260
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
243
261
|
os.remove(os.path.join(root, name))
|
262
|
+
elif name.endswith('.csv') and 'dmp人群报表_' in name:
|
263
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
264
|
+
if len(df) == 0:
|
265
|
+
print(f'{name} 报表数据为空')
|
266
|
+
continue
|
267
|
+
for col in df.columns.tolist():
|
268
|
+
if '(' in col or ')' in col:
|
269
|
+
new_col = re.sub(r'\(.*\)', '', col)
|
270
|
+
df.rename(columns={col: new_col}, inplace=True)
|
271
|
+
shop_name = re.findall(r'_([\u4e00-\u9fffA-Za-z]+店)', name)[0]
|
272
|
+
if '店铺名称' not in df.columns.tolist():
|
273
|
+
df.insert(loc=1, column='店铺名称', value=shop_name)
|
274
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
275
|
+
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
276
|
+
os.remove(os.path.join(root, name))
|
244
277
|
|
245
278
|
# 将数据传入 self.datas 等待更新进数据库
|
246
279
|
if not db_name or not collection_name:
|
@@ -324,6 +357,11 @@ class DataClean:
|
|
324
357
|
'文件简称': 'tg_report_品销宝_明星店铺',
|
325
358
|
'数据库名': '推广数据2',
|
326
359
|
'集合名称': '品销宝',
|
360
|
+
},
|
361
|
+
{
|
362
|
+
'文件简称': 'tg_report_超级短视频_主体',
|
363
|
+
'数据库名': '推广数据2',
|
364
|
+
'集合名称': '超级短视频_主体',
|
327
365
|
}
|
328
366
|
]
|
329
367
|
for root, dirs, files in os.walk(path, topdown=False):
|
@@ -965,6 +1003,9 @@ class DataClean:
|
|
965
1003
|
elif name.endswith('.csv') and '手淘搜索_本店引流词_' in name:
|
966
1004
|
t_path = os.path.join(self.source_path, '生意参谋', '手淘搜索_本店引流词')
|
967
1005
|
bib(t_path, _as_month=True)
|
1006
|
+
elif name.endswith('.csv') and '直播分场次效果_' in name:
|
1007
|
+
t_path = os.path.join(self.source_path, '生意参谋', '直播分场次效果')
|
1008
|
+
bib(t_path, _as_month=True)
|
968
1009
|
|
969
1010
|
def move_dmp(self, path=None, is_except=[]):
|
970
1011
|
""" 达摩盘 """
|
@@ -995,6 +1036,9 @@ class DataClean:
|
|
995
1036
|
if name.endswith('.csv') and '人群属性_万里马官方旗舰店' in name:
|
996
1037
|
t_path = os.path.join(self.source_path, '达摩盘', '我的人群属性')
|
997
1038
|
bib(t_path, _as_month=True)
|
1039
|
+
elif name.endswith('.csv') and 'dmp人群报表_' in name:
|
1040
|
+
t_path = os.path.join(self.source_path, '达摩盘', 'dmp人群报表')
|
1041
|
+
bib(t_path, _as_month=True)
|
998
1042
|
|
999
1043
|
|
1000
1044
|
# @try_except
|
@@ -1154,6 +1198,9 @@ class DataClean:
|
|
1154
1198
|
elif name.endswith('.csv') and 'tg_report_超级直播报表_人群_万里马官方旗舰店' in name:
|
1155
1199
|
t_path = os.path.join(self.source_path, '天猫推广报表', '超级直播报表_人群')
|
1156
1200
|
bib(t_path, _as_month=True)
|
1201
|
+
elif name.endswith('.csv') and '超级短视频_主体' in name:
|
1202
|
+
t_path = os.path.join(self.source_path, '天猫推广报表', '超级短视频_主体')
|
1203
|
+
bib(t_path, _as_month=True)
|
1157
1204
|
|
1158
1205
|
elif name.endswith('.csv') and 'tg_report_品销宝_明星店铺_万里马官方旗舰店' in name:
|
1159
1206
|
if '账户' in name:
|
@@ -1597,16 +1644,16 @@ def main(service_databases=None, is_mysql=False):
|
|
1597
1644
|
|
1598
1645
|
|
1599
1646
|
if __name__ == '__main__':
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
|
1609
|
-
date_table(service_databases=[{'company': 'mysql'}])
|
1647
|
+
main(
|
1648
|
+
service_databases = [
|
1649
|
+
{'company': 'mysql'},
|
1650
|
+
# {'home_lx': 'mysql'},
|
1651
|
+
# {'home_lx': 'mongodb'},
|
1652
|
+
# {'nas': 'mysql'},
|
1653
|
+
],
|
1654
|
+
is_mysql = False, # 清理聚合数据
|
1655
|
+
)
|
1656
|
+
# date_table(service_databases=[{'company': 'mysql'}])
|
1610
1657
|
# c = DataClean(
|
1611
1658
|
# path=upload_path, # 源文件目录,下载文件夹
|
1612
1659
|
# source_path=source_path3, # 原始文件保存目录
|
mdbq/company/copysh.py
CHANGED
@@ -318,36 +318,17 @@ def op_data(days: int =100):
|
|
318
318
|
|
319
319
|
# 清理数据库, 除了 聚合数据
|
320
320
|
if socket.gethostname() == 'company': # 公司台式机自身运行
|
321
|
-
# # Mysql
|
322
|
-
# username, password, host, port = get_myconf.select_config_values(
|
323
|
-
# target_service='company',
|
324
|
-
# database='mysql',
|
325
|
-
# )
|
326
|
-
# s = mysql.OptimizeDatas(username=username, password=password, host=host, port=port)
|
327
|
-
# s.db_name_lists = [
|
328
|
-
# '京东数据2',
|
329
|
-
# '推广数据2',
|
330
|
-
# '市场数据2',
|
331
|
-
# '生意参谋2',
|
332
|
-
# '生意经2',
|
333
|
-
# '属性设置2',
|
334
|
-
# # '聚合数据', # 不在这里清理聚合数据, 还未开始聚合呢
|
335
|
-
# ]
|
336
|
-
# s.days = days
|
337
|
-
# s.optimize_list()
|
338
|
-
|
339
321
|
# 清理所有非聚合数据的库
|
340
322
|
optimize_data.op_data(
|
341
323
|
db_name_lists=[
|
342
|
-
'京东数据
|
324
|
+
'京东数据2',
|
343
325
|
'属性设置3',
|
344
326
|
'推广数据2',
|
345
|
-
'生意参谋3',
|
346
327
|
'推广数据_淘宝店',
|
347
|
-
'爱库存2'
|
328
|
+
'爱库存2',
|
348
329
|
'生意参谋3',
|
349
330
|
'生意经2',
|
350
|
-
# '聚合数据',
|
331
|
+
# '聚合数据',
|
351
332
|
'达摩盘3',
|
352
333
|
],
|
353
334
|
days=days,
|
@@ -369,10 +350,10 @@ def main():
|
|
369
350
|
if res:
|
370
351
|
upload_path = f'windows2/{str(datetime.date.today().strftime("%Y-%m"))}/{str(datetime.date.today())}'
|
371
352
|
b = bdup.BaiDu()
|
372
|
-
# 从百度云下载文件
|
353
|
+
# 1. 从百度云下载文件
|
373
354
|
b.download_dir(local_path=d_path, remote_path=upload_path)
|
374
355
|
|
375
|
-
# 对文件进行清洗和上传数据库
|
356
|
+
# 2. 对文件进行清洗和上传数据库
|
376
357
|
clean_upload.main(
|
377
358
|
service_databases = [{'company': 'mysql'}],
|
378
359
|
is_mysql = False, # 清理聚合数据
|
@@ -382,7 +363,8 @@ def main():
|
|
382
363
|
w = update_conf.UpdateConf()
|
383
364
|
w.update_config(filename='.copysh_conf', option='ch_record', new_value='False')
|
384
365
|
time.sleep(60)
|
385
|
-
|
366
|
+
# 3. 数据清理和聚合
|
367
|
+
op_data(days=100)
|
386
368
|
|
387
369
|
t.sleep_minutes = 5 # 同步前休眠时间
|
388
370
|
t.tb_file()
|
@@ -391,14 +373,3 @@ def main():
|
|
391
373
|
|
392
374
|
if __name__ == '__main__':
|
393
375
|
main()
|
394
|
-
# # 聚合数据,并清理聚合数据
|
395
|
-
# query_data.data_aggregation(service_databases=[{'company': 'mysql'}], months=1)
|
396
|
-
|
397
|
-
# sku_picture.download_spu(
|
398
|
-
# service_name='company',
|
399
|
-
# database='mysql',
|
400
|
-
# db_name='属性设置2',
|
401
|
-
# table_name='商品spu素材下载记录',
|
402
|
-
# col_name='商品图片',
|
403
|
-
# save_path=os.path.join(f'\\\\192.168.1.198\\时尚事业部\\01.运营部\\天猫报表\\其他文件', '商品id_商家编码_图片'),
|
404
|
-
# )
|
mdbq/dataframe/converter.py
CHANGED
@@ -82,7 +82,7 @@ class DataFrameConverter(object):
|
|
82
82
|
df[col] = df[col].apply(lambda x: pd.to_datetime(x))
|
83
83
|
except:
|
84
84
|
pass
|
85
|
-
new_col = re.sub(r'[()()
|
85
|
+
new_col = re.sub(r'[()()-,,$&~^、* ]', '_', col.lower())
|
86
86
|
new_col = re.sub(r'_{2,}', '_', new_col)
|
87
87
|
new_col = re.sub(r'_+$', '', new_col)
|
88
88
|
df.rename(columns={col: new_col}, inplace=True)
|
mdbq/mysql/mysql.py
CHANGED
@@ -142,7 +142,7 @@ class MysqlUpload:
|
|
142
142
|
if col_not_exist: # 数据表中不存在的列
|
143
143
|
for col in col_not_exist:
|
144
144
|
# 创建列,需转义
|
145
|
-
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]};"
|
145
|
+
sql = f"ALTER TABLE `{table_name}` ADD COLUMN `{col}` {dtypes[col]} NOT NULL;"
|
146
146
|
cursor.execute(sql)
|
147
147
|
print(f"添加列: {col}({dtypes[col]})") # 添加列并指定数据类型
|
148
148
|
|
@@ -1,18 +1,18 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256=
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=fnXBRxATlaCohx_dzAIewVlPI0d8L-2QY6wth9ENCwA,76594
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=gdScrgTAb6RbXHZy1LitX7lggMGn1GTLhkYSgztfwew,4903
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=m7Y2xSazPYKvy51yPK6n_Izsv5cjV83oHsiNc7N4fyA,102779
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/clean_upload.py,sha256=
|
12
|
+
mdbq/clean/clean_upload.py,sha256=4DNoSQBUYyn6OsdAP4WJoqWneReeHlvmctXyS5dQvIU,86640
|
13
13
|
mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
|
14
14
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
15
|
-
mdbq/company/copysh.py,sha256=
|
15
|
+
mdbq/company/copysh.py,sha256=sisL5eo3D5HGGYvRw46xGqnqFaI3SxfBnoa-Y7zknus,17541
|
16
16
|
mdbq/company/copysh_bak.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
|
17
17
|
mdbq/company/home_sh.py,sha256=42CZ2tZIXHLl2mOl2gk2fZnjH2IHh1VJ1s3qHABjonY,18021
|
18
18
|
mdbq/config/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -21,13 +21,13 @@ mdbq/config/products.py,sha256=hN9UMkM6j76HYMulTYdtr3mOhh9QdpvvrLH14a_mbFY,5980
|
|
21
21
|
mdbq/config/set_support.py,sha256=xkZCX6y9Bq1ppBpJAofld4B2YtchA7fl0eT3dx3CrSI,777
|
22
22
|
mdbq/config/update_conf.py,sha256=taL3ZqKgiVWwUrDFuaYhim9a72Hm4BHRhhDscJTziR8,4535
|
23
23
|
mdbq/dataframe/__init__.py,sha256=2HtCN8AdRj53teXDqzysC1h8aPL-mMFy561ESmhehGQ,22
|
24
|
-
mdbq/dataframe/converter.py,sha256=
|
24
|
+
mdbq/dataframe/converter.py,sha256=X5Aubm9Z4_bhslcu1-XZzT8X6UpoAW5BFs30RfgfRmE,4460
|
25
25
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
26
26
|
mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
27
27
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
28
28
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
29
29
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
30
|
-
mdbq/mysql/mysql.py,sha256=
|
30
|
+
mdbq/mysql/mysql.py,sha256=9IIyKYU81SXglz6GqVTz0-kCE2dhFuwpQAhUym-yjuc,47135
|
31
31
|
mdbq/mysql/s_query.py,sha256=37GGHzRpycfUjsYEoQgDpdEs9JwjW-LxFXnGwwP2b2Q,8403
|
32
32
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
33
33
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -43,7 +43,7 @@ mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
43
43
|
mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
|
44
44
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
45
45
|
mdbq/spider/aikucun.py,sha256=4Y5zd64hZUFtll8AdpUc2napDas-La-A6XzAhb2mLv0,17157
|
46
|
-
mdbq-2.6.
|
47
|
-
mdbq-2.6.
|
48
|
-
mdbq-2.6.
|
49
|
-
mdbq-2.6.
|
46
|
+
mdbq-2.6.8.dist-info/METADATA,sha256=0f19adUwFUrRTcAT5TvVboqz7L0X2CKv0x5acWdpIYw,245
|
47
|
+
mdbq-2.6.8.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
48
|
+
mdbq-2.6.8.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
49
|
+
mdbq-2.6.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|