mdbq 2.6.3__py3-none-any.whl → 2.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +58 -45
- mdbq/aggregation/query_data.py +15 -21
- mdbq/clean/clean_upload.py +43 -1
- mdbq/mysql/mysql.py +13 -13
- mdbq/spider/aikucun.py +22 -20
- {mdbq-2.6.3.dist-info → mdbq-2.6.5.dist-info}/METADATA +1 -1
- {mdbq-2.6.3.dist-info → mdbq-2.6.5.dist-info}/RECORD +9 -9
- {mdbq-2.6.3.dist-info → mdbq-2.6.5.dist-info}/WHEEL +0 -0
- {mdbq-2.6.3.dist-info → mdbq-2.6.5.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1174,36 +1174,40 @@ def upload_dir(path, db_name, collection_name, dbs={'mysql': True, 'mongodb': Tr
|
|
1174
1174
|
i += 1
|
1175
1175
|
continue
|
1176
1176
|
if name.endswith('.csv'):
|
1177
|
-
|
1178
|
-
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
df = df.astype(dtypes) # 按本地文件更新 df 的数据类型, 可能因为字段不同产生异常
|
1188
|
-
except Exception as e:
|
1189
|
-
print(name, e)
|
1190
|
-
# 如果发生异常,这将 df 的数据和 json 中的数据取交集
|
1191
|
-
old_dt = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
|
1192
|
-
intersection_keys = dtypes.keys() & old_dt.keys() # 获取两个字典键的交集
|
1193
|
-
dtypes = {k: dtypes[k] for k in intersection_keys} # 使用交集的键创建新字典
|
1194
|
-
df = df.astype(dtypes) # 再次更新 df 的数据类型
|
1177
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1178
|
+
if name.endswith('.xlsx'):
|
1179
|
+
df = pd.read_excel(os.path.join(root, name), sheet_name=0, header=0, engine='openpyxl')
|
1180
|
+
try:
|
1181
|
+
if len(df) == 0:
|
1182
|
+
continue
|
1183
|
+
# if '新版' not in name:
|
1184
|
+
# continue
|
1185
|
+
cv = converter.DataFrameConverter()
|
1186
|
+
df = cv.convert_df_cols(df=df) # 清理列名和 df 中的非法字符
|
1195
1187
|
|
1196
|
-
|
1197
|
-
|
1198
|
-
if dbs['mysql']: # drop_duplicates: 值为 True 时检查重复数据再插入
|
1199
|
-
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
|
1200
|
-
move_insert=False, # 先删除,再插入
|
1201
|
-
df_sql = True,
|
1202
|
-
drop_duplicates=False,
|
1203
|
-
filename=name, count=f'{i}/{count}')
|
1204
|
-
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
|
1188
|
+
try:
|
1189
|
+
df = df.astype(dtypes) # 按本地文件更新 df 的数据类型, 可能因为字段不同产生异常
|
1205
1190
|
except Exception as e:
|
1206
1191
|
print(name, e)
|
1192
|
+
# 如果发生异常,这将 df 的数据和 json 中的数据取交集
|
1193
|
+
old_dt = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
|
1194
|
+
intersection_keys = dtypes.keys() & old_dt.keys() # 获取两个字典键的交集
|
1195
|
+
dtypes = {k: dtypes[k] for k in intersection_keys} # 使用交集的键创建新字典
|
1196
|
+
df = df.astype(dtypes) # 再次更新 df 的数据类型
|
1197
|
+
|
1198
|
+
if dbs['mongodb']:
|
1199
|
+
d.df_to_mongo(df=df, db_name=db_name, collection_name=collection_name)
|
1200
|
+
if dbs['mysql']: # drop_duplicates: 值为 True 时检查重复数据再插入
|
1201
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
|
1202
|
+
move_insert=False, # 先删除,再插入
|
1203
|
+
df_sql = True,
|
1204
|
+
drop_duplicates=False,
|
1205
|
+
filename=name, count=f'{i}/{count}',
|
1206
|
+
service_database={target_service: 'mysql'}, # 这个参数是用来设置更新哪台服务器的 types 信息到本地 json 文件
|
1207
|
+
)
|
1208
|
+
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
|
1209
|
+
except Exception as e:
|
1210
|
+
print(name, e)
|
1207
1211
|
i += 1
|
1208
1212
|
if dbs['mongodb']:
|
1209
1213
|
if d.client:
|
@@ -1220,7 +1224,16 @@ def one_file_to_mysql(file, db_name, table_name, target_service, database):
|
|
1220
1224
|
df = pd.read_csv(file, encoding='utf-8_sig', header=0, na_filter=False, float_precision='high')
|
1221
1225
|
# df.replace(to_replace=[','], value='', regex=True, inplace=True) # 替换掉特殊字符
|
1222
1226
|
m = mysql.MysqlUpload(username=username, password=password, host=host, port=port)
|
1223
|
-
m.df_to_mysql(
|
1227
|
+
m.df_to_mysql(
|
1228
|
+
df=df,
|
1229
|
+
db_name=db_name,
|
1230
|
+
table_name=table_name,
|
1231
|
+
filename=filename,
|
1232
|
+
move_insert=False,
|
1233
|
+
df_sql=True,
|
1234
|
+
drop_duplicates=False,
|
1235
|
+
service_database={target_service: database},
|
1236
|
+
)
|
1224
1237
|
|
1225
1238
|
|
1226
1239
|
def file_dir(one_file=True, target_service='company'):
|
@@ -1303,26 +1316,26 @@ if __name__ == '__main__':
|
|
1303
1316
|
print(username, password, host, port)
|
1304
1317
|
# file_dir(one_file=False, target_service='company')
|
1305
1318
|
|
1306
|
-
# 上传 1 个文件到数据库
|
1307
|
-
one_file_to_mysql(
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
target_service='company',
|
1312
|
-
database='mysql'
|
1313
|
-
)
|
1314
|
-
|
1315
|
-
# # 上传一个目录到指定数据库
|
1316
|
-
# db_name = '天猫_推广数据3'
|
1317
|
-
# table_name = '主体报表'
|
1318
|
-
# upload_dir(
|
1319
|
-
# path='/Users/xigua/数据中心/原始文件3/天猫推广报表/主体报表',
|
1320
|
-
# db_name=db_name,
|
1321
|
-
# collection_name=table_name,
|
1322
|
-
# dbs={'mysql': True, 'mongodb': False},
|
1319
|
+
# # 上传 1 个文件到数据库
|
1320
|
+
# one_file_to_mysql(
|
1321
|
+
# file='/Users/xigua/Downloads/万里马箱包推广1_营销概况_qwqw全站营销_2024-08-18_2024-09-01.csv',
|
1322
|
+
# db_name='京东数据3',
|
1323
|
+
# table_name='推广数据_全站营销',
|
1323
1324
|
# target_service='company',
|
1325
|
+
# database='mysql'
|
1324
1326
|
# )
|
1325
1327
|
|
1328
|
+
# 上传一个目录到指定数据库
|
1329
|
+
db_name = '京东数据3'
|
1330
|
+
table_name = '京东商智_spu_商品明细'
|
1331
|
+
upload_dir(
|
1332
|
+
path='/Users/xigua/数据中心/原始文件3/京东报表/spu_商品明细',
|
1333
|
+
db_name=db_name,
|
1334
|
+
collection_name=table_name,
|
1335
|
+
dbs={'mysql': True, 'mongodb': False},
|
1336
|
+
target_service='company',
|
1337
|
+
)
|
1338
|
+
|
1326
1339
|
|
1327
1340
|
# # 新版 数据分类
|
1328
1341
|
# dp = DatabaseUpdate(path='/Users/xigua/Downloads')
|
mdbq/aggregation/query_data.py
CHANGED
@@ -1205,7 +1205,7 @@ class GroupBy:
|
|
1205
1205
|
df.insert(loc=1, column='推广渠道', value='万相台无界版') # df中插入新列
|
1206
1206
|
# df.insert(loc=2, column='营销场景', value='超级直播') # df中插入新列
|
1207
1207
|
# df = df.loc[df['日期'].between(start_day, today)]
|
1208
|
-
df_new = df.groupby(['日期', '推广渠道', '营销场景'], as_index=False).agg(
|
1208
|
+
df_new = df.groupby(['日期', '店铺名称', '推广渠道', '营销场景'], as_index=False).agg(
|
1209
1209
|
**{
|
1210
1210
|
'花费': ('花费', np.sum),
|
1211
1211
|
'展现量': ('展现量', np.sum),
|
@@ -1264,7 +1264,7 @@ class GroupBy:
|
|
1264
1264
|
)
|
1265
1265
|
df.insert(loc=1, column='推广渠道', value='品销宝') # df中插入新列
|
1266
1266
|
df.insert(loc=2, column='营销场景', value='品销宝') # df中插入新列
|
1267
|
-
df_new = df.groupby(['日期', '推广渠道', '营销场景'], as_index=False).agg(
|
1267
|
+
df_new = df.groupby(['日期', '店铺名称', '推广渠道', '营销场景'], as_index=False).agg(
|
1268
1268
|
**{
|
1269
1269
|
'花费': ('花费', np.sum),
|
1270
1270
|
'展现量': ('展现量', np.sum),
|
@@ -2107,12 +2107,6 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
|
2107
2107
|
'唯一主键': ['日期', '一级来源', '二级来源', '三级来源', '访客数'],
|
2108
2108
|
'数据主体': sdq.dplyd(),
|
2109
2109
|
},
|
2110
|
-
# {
|
2111
|
-
# '数据库名': '聚合数据',
|
2112
|
-
# '集合名': '天猫_店铺来源_日数据_旧版',
|
2113
|
-
# '唯一主键': ['日期', '一级来源', '二级来源', '三级来源'],
|
2114
|
-
# '数据主体': sdq.dplyd_old(),
|
2115
|
-
# },
|
2116
2110
|
{
|
2117
2111
|
'数据库名': '聚合数据',
|
2118
2112
|
'集合名': '商品id编码表',
|
@@ -2185,18 +2179,18 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
|
2185
2179
|
'唯一主键': ['日期', '报表类型', '推广渠道', '营销场景', '花费'],
|
2186
2180
|
'数据主体': sdq.pxb_zh(),
|
2187
2181
|
},
|
2188
|
-
|
2189
|
-
|
2190
|
-
|
2191
|
-
|
2192
|
-
|
2193
|
-
|
2194
|
-
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2198
|
-
|
2199
|
-
|
2182
|
+
{
|
2183
|
+
'数据库名': '聚合数据',
|
2184
|
+
'集合名': '天猫店铺来源_手淘搜索', # 暂缺
|
2185
|
+
'唯一主键': ['日期', '关键词', '访客数'],
|
2186
|
+
'数据主体': sdq.tm_search(),
|
2187
|
+
},
|
2188
|
+
{
|
2189
|
+
'数据库名': '聚合数据',
|
2190
|
+
'集合名': '生意参谋_直播场次分析', # 暂缺
|
2191
|
+
'唯一主键': ['场次id'],
|
2192
|
+
'数据主体': sdq.zb_ccfx(),
|
2193
|
+
},
|
2200
2194
|
{
|
2201
2195
|
'数据库名': '聚合数据',
|
2202
2196
|
'集合名': '多店推广场景_按日聚合',
|
@@ -2232,7 +2226,7 @@ def data_aggregation(service_databases=[{}], months=1, is_juhe=True):
|
|
2232
2226
|
service_database=service_database,
|
2233
2227
|
)
|
2234
2228
|
g.sp_index_datas = pd.DataFrame() # 重置,不然下个循环会继续刷入数据库
|
2235
|
-
#
|
2229
|
+
# g.as_csv(df=df, filename=table_name + '.csv') # 导出 csv
|
2236
2230
|
if '日期' in df.columns.tolist():
|
2237
2231
|
m.df_to_mysql(
|
2238
2232
|
df=df,
|
mdbq/clean/clean_upload.py
CHANGED
@@ -98,6 +98,16 @@ class DataClean:
|
|
98
98
|
'数据库名': '生意参谋3',
|
99
99
|
'集合名称': '店铺流量来源构成',
|
100
100
|
},
|
101
|
+
{
|
102
|
+
'文件简称': '爱库存_商品榜单_', # 文件名中包含的字符
|
103
|
+
'数据库名': '爱库存2',
|
104
|
+
'集合名称': '商品spu榜单',
|
105
|
+
},
|
106
|
+
{
|
107
|
+
'文件简称': '手淘搜索_本店引流词_', # 文件名中包含的字符
|
108
|
+
'数据库名': '生意参谋3',
|
109
|
+
'集合名称': '手淘搜索_本店引流词',
|
110
|
+
},
|
101
111
|
]
|
102
112
|
for root, dirs, files in os.walk(path, topdown=False):
|
103
113
|
for name in files:
|
@@ -129,7 +139,7 @@ class DataClean:
|
|
129
139
|
if name.endswith('.xls') and '商品排行_' in name:
|
130
140
|
df = pd.read_excel(os.path.join(root, name), header=4)
|
131
141
|
if len(df) == 0:
|
132
|
-
print(f'{name}
|
142
|
+
print(f'{name} 报表数据不能为空')
|
133
143
|
continue
|
134
144
|
df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
135
145
|
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
@@ -140,11 +150,37 @@ class DataClean:
|
|
140
150
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
141
151
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
142
152
|
os.remove(os.path.join(root, name))
|
153
|
+
elif name.endswith('.xls') and '手淘搜索_本店引流词_' in name:
|
154
|
+
df = pd.read_excel(os.path.join(root, name), header=5)
|
155
|
+
if len(df) == 0:
|
156
|
+
print(f'{name} 报表数据不能为空')
|
157
|
+
continue
|
158
|
+
df.replace(to_replace=['-'], value=0, regex=False, inplace=True)
|
159
|
+
df.replace(to_replace=[','], value='', regex=True, inplace=True)
|
160
|
+
df.rename(columns={'统计日期': '日期'}, inplace=True)
|
161
|
+
shop_name = re.findall(r'本店.*_([\u4e00-\u9fffA-Za-z]+店)_', name)[0]
|
162
|
+
kw_type = re.findall('手淘搜索_本店引流词_([\u4e00-\u9fff]+)_', name)[0]
|
163
|
+
df.insert(loc=2, column='词类型', value=kw_type)
|
164
|
+
if '店铺名称' in df.columns.tolist():
|
165
|
+
df['店铺名称'] = shop_name
|
166
|
+
else:
|
167
|
+
df.insert(loc=1, column='店铺名称', value=shop_name)
|
168
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
169
|
+
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
170
|
+
os.remove(os.path.join(root, name))
|
171
|
+
|
143
172
|
elif name.endswith('.csv') and '_来源构成_' in name:
|
144
173
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
145
174
|
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
146
175
|
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
147
176
|
os.remove(os.path.join(root, name))
|
177
|
+
elif name.endswith('.csv') and '爱库存_商品榜单_' in name:
|
178
|
+
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
179
|
+
if '店铺名称' not in df.columns.tolist():
|
180
|
+
df.insert(loc=1, column='店铺名称', value='爱库存平台') # df中插入新列
|
181
|
+
new_name = f'py_xg_{os.path.splitext(name)[0]}.csv'
|
182
|
+
self.save_to_csv(df, root, new_name, encoding='utf-8_sig')
|
183
|
+
os.remove(os.path.join(root, name))
|
148
184
|
|
149
185
|
# 将数据传入 self.datas 等待更新进数据库
|
150
186
|
if not db_name or not collection_name:
|
@@ -923,6 +959,12 @@ class DataClean:
|
|
923
959
|
'商品类目属性' in name or '商品主图视频' in name or '商品sku属性' in name):
|
924
960
|
t_path = os.path.join(self.source_path, '生意参谋', '商品属性')
|
925
961
|
bib(t_path, _as_month=True)
|
962
|
+
elif name.endswith('.csv') and '爱库存_商品榜单_' in name:
|
963
|
+
t_path = os.path.join(self.source_path, '爱库存', 'spu商品榜单')
|
964
|
+
bib(t_path, _as_month=True)
|
965
|
+
elif name.endswith('.csv') and '手淘搜索_本店引流词_' in name:
|
966
|
+
t_path = os.path.join(self.source_path, '生意参谋', '手淘搜索_本店引流词')
|
967
|
+
bib(t_path, _as_month=True)
|
926
968
|
|
927
969
|
def move_dmp(self, path=None, is_except=[]):
|
928
970
|
""" 达摩盘 """
|
mdbq/mysql/mysql.py
CHANGED
@@ -82,7 +82,7 @@ class MysqlUpload:
|
|
82
82
|
icm_update: 增量更新, 在聚合数据中使用,原始文件不要使用,设置此参数时需将 drop_duplicates 改为 False
|
83
83
|
使用增量更新: 必须确保 icm_update 传进来的列必须是数据表中唯一主键,值不会发生变化,不会重复,否则可能产生错乱覆盖情况
|
84
84
|
filename: 用来追踪处理进度,传这个参数是方便定位产生错误的文件
|
85
|
-
|
85
|
+
service_database: 这个参数是用来设置更新哪台服务器的 types 信息到本地 json 文件
|
86
86
|
json_path: 这个参数同样也是是用来设置更新 json 文件
|
87
87
|
"""
|
88
88
|
self.filename = filename
|
@@ -168,18 +168,18 @@ class MysqlUpload:
|
|
168
168
|
chunksize=1000
|
169
169
|
)
|
170
170
|
# print(f'重置自增')
|
171
|
-
# 6. 重置自增列
|
172
|
-
try:
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
except Exception as e:
|
181
|
-
|
182
|
-
|
171
|
+
# # 6. 重置自增列
|
172
|
+
# try:
|
173
|
+
# cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE 'id'")
|
174
|
+
# result = cursor.fetchone()
|
175
|
+
# if result:
|
176
|
+
# cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN id;") # 删除 id 列
|
177
|
+
# cursor.execute(
|
178
|
+
# f"ALTER TABLE {table_name} ADD column id INT AUTO_INCREMENT PRIMARY KEY FIRST;")
|
179
|
+
# cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") # 设置自增从 1 开始
|
180
|
+
# except Exception as e:
|
181
|
+
# print(f'{e}')
|
182
|
+
# connection.rollback()
|
183
183
|
connection.close()
|
184
184
|
return
|
185
185
|
|
mdbq/spider/aikucun.py
CHANGED
@@ -216,6 +216,7 @@ class AikuCun:
|
|
216
216
|
today = datetime.date.today()
|
217
217
|
for date_s in range(date_num):
|
218
218
|
new_date = today - datetime.timedelta(days=date_s) # 会用作文件名
|
219
|
+
print(f'正在下载爱库存文件 {date_s}/{date_num}: {new_date}')
|
219
220
|
str_date = str(new_date)[2:]
|
220
221
|
wait = WebDriverWait(_driver, timeout=15) #
|
221
222
|
elements = _driver.find_elements(
|
@@ -254,7 +255,7 @@ class AikuCun:
|
|
254
255
|
'//button[@class="el-button el-button--primary el-button--small is-plain"]/span[contains(text(), "下载数据")]')
|
255
256
|
_driver.execute_script("arguments[0].click();", elements[0]) # 点击
|
256
257
|
time.sleep(5)
|
257
|
-
self.clean_data(date=new_date)
|
258
|
+
self.clean_data(date=new_date) # 每下载一个文件,需要立即清洗数据
|
258
259
|
_driver.quit()
|
259
260
|
|
260
261
|
def clean_data(self, date):
|
@@ -275,6 +276,7 @@ class AikuCun:
|
|
275
276
|
os.remove(os.path.join(root, name))
|
276
277
|
continue
|
277
278
|
df.insert(loc=0, column='日期', value=date) # df中插入新列
|
279
|
+
df.insert(loc=1, column='店铺名称', value='爱库存平台') # df中插入新列
|
278
280
|
df.rename(columns={'spuId': 'spu_id'}, inplace=True)
|
279
281
|
df['数据更新时间'] = pd.to_datetime(df['数据更新时间'], format='%Y-%m-%d %H:%M:%S', errors='ignore')
|
280
282
|
# df['数据更新时间'] = df['数据更新时间'].apply(lambda x: re.sub(' ', ' ', str(x)) if x else x)
|
@@ -290,24 +292,24 @@ def akucun():
|
|
290
292
|
akc.get_data(shop_name='aikucun', date_num=10) # 获取最近 N 天数据,0表示今天
|
291
293
|
# akc.clean_data()
|
292
294
|
|
293
|
-
# 新版 数据分类
|
294
|
-
dp = aggregation.DatabaseUpdate(path=upload_path)
|
295
|
-
dp.new_unzip(is_move=True)
|
296
|
-
dp.cleaning(is_move=False, is_except=['临时文件']) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
297
|
-
# 将 self.datas 更新至数据库
|
298
|
-
dp.upload_df(service_databases=[
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
])
|
304
|
-
# 数据分类
|
305
|
-
c = data_clean.DataClean(path=upload_path, source_path=Source_Path)
|
306
|
-
c.set_up_to_mogo = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
307
|
-
c.set_up_to_mysql = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
308
|
-
c.new_unzip(is_move=True, ) # 解压文件
|
309
|
-
c.change_and_sort(is_except=['临时文件'])
|
310
|
-
c.move_all(is_except=['临时文件']) # 移到文件到原始文件夹
|
295
|
+
# # 新版 数据分类
|
296
|
+
# dp = aggregation.DatabaseUpdate(path=upload_path)
|
297
|
+
# dp.new_unzip(is_move=True)
|
298
|
+
# dp.cleaning(is_move=False, is_except=['临时文件']) # 清洗数据, 存入 self.datas, 不需要立即移除文件,仍保留文件到原始文件中
|
299
|
+
# # 将 self.datas 更新至数据库
|
300
|
+
# dp.upload_df(service_databases=[
|
301
|
+
# # {'home_lx': 'mongodb'},
|
302
|
+
# # {'home_lx': 'mysql'},
|
303
|
+
# {'company': 'mysql'},
|
304
|
+
# # {'nas': 'mysql'},
|
305
|
+
# ])
|
306
|
+
# # 数据分类
|
307
|
+
# c = data_clean.DataClean(path=upload_path, source_path=Source_Path)
|
308
|
+
# c.set_up_to_mogo = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
309
|
+
# c.set_up_to_mysql = False # 不再使用 data_clean 更新数据库,改为 aggregation.py
|
310
|
+
# c.new_unzip(is_move=True, ) # 解压文件
|
311
|
+
# c.change_and_sort(is_except=['临时文件'])
|
312
|
+
# c.move_all(is_except=['临时文件']) # 移到文件到原始文件夹
|
311
313
|
|
312
314
|
|
313
315
|
class AikuCunNew:
|
@@ -359,7 +361,7 @@ class AikuCunNew:
|
|
359
361
|
|
360
362
|
if __name__ == '__main__':
|
361
363
|
pass
|
362
|
-
|
364
|
+
get_cookie_aikucun()
|
363
365
|
akucun()
|
364
366
|
|
365
367
|
# a = AikuCunNew(shop_name='aikucun')
|
@@ -1,15 +1,15 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
2
|
mdbq/__version__.py,sha256=y9Mp_8x0BCZSHsdLT_q5tX9wZwd5QgqrSIENLrb6vXA,62
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/aggregation.py,sha256
|
4
|
+
mdbq/aggregation/aggregation.py,sha256=5WnLHNResPSMNNFYqt2trvw3PQM3XCHQD-XMZkfMYBM,76602
|
5
5
|
mdbq/aggregation/df_types.py,sha256=U9i3q2eRPTDY8qAPTw7irzu-Tlg4CIySW9uYro81wdk,8125
|
6
6
|
mdbq/aggregation/mysql_types.py,sha256=DQYROALDiwjJzjhaJfIIdnsrNs11i5BORlj_v6bp67Y,11062
|
7
7
|
mdbq/aggregation/optimize_data.py,sha256=gdScrgTAb6RbXHZy1LitX7lggMGn1GTLhkYSgztfwew,4903
|
8
|
-
mdbq/aggregation/query_data.py,sha256=
|
8
|
+
mdbq/aggregation/query_data.py,sha256=0NGYmfl1klQryriHu4V6_Twi9WPERHbl56X3kUqmZaY,102619
|
9
9
|
mdbq/bdup/__init__.py,sha256=AkhsGk81SkG1c8FqDH5tRq-8MZmFobVbN60DTyukYTY,28
|
10
10
|
mdbq/bdup/bdup.py,sha256=LAV0TgnQpc-LB-YuJthxb0U42_VkPidzQzAagan46lU,4234
|
11
11
|
mdbq/clean/__init__.py,sha256=A1d6x3L27j4NtLgiFV5TANwEkLuaDfPHDQNrPBbNWtU,41
|
12
|
-
mdbq/clean/clean_upload.py,sha256=
|
12
|
+
mdbq/clean/clean_upload.py,sha256=I9aJL-674ISOi5ZAbeGViRRKlcMW2bXQ2TGGBOQvzh4,81148
|
13
13
|
mdbq/clean/data_clean.py,sha256=ucfslhqXVZoH2QaXHSAWDky0GhIvH9f4GeNaHg4SrFE,104790
|
14
14
|
mdbq/company/__init__.py,sha256=qz8F_GsP_pMB5PblgJAUAMjasuZbOEp3qQOCB39E8f0,21
|
15
15
|
mdbq/company/copysh.py,sha256=NvlXCBZBcO2GIT5nLRYYqhOyHWM1-1RE7DHvgbj6jmQ,19723
|
@@ -26,7 +26,7 @@ mdbq/log/mylogger.py,sha256=oaT7Bp-Hb9jZt52seP3ISUuxVcI19s4UiqTeouScBO0,3258
|
|
26
26
|
mdbq/mongo/__init__.py,sha256=SILt7xMtQIQl_m-ik9WLtJSXIVf424iYgCfE_tnQFbw,13
|
27
27
|
mdbq/mongo/mongo.py,sha256=v9qvrp6p1ZRWuPpbSilqveiE0FEcZF7U5xUPI0RN4xs,31880
|
28
28
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
29
|
-
mdbq/mysql/mysql.py,sha256=
|
29
|
+
mdbq/mysql/mysql.py,sha256=ejiRPW2RPhnvU9xI8XTFin08bkMLoyeoxTYzFUelKWM,47126
|
30
30
|
mdbq/mysql/s_query.py,sha256=37GGHzRpycfUjsYEoQgDpdEs9JwjW-LxFXnGwwP2b2Q,8403
|
31
31
|
mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
|
32
32
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -41,8 +41,8 @@ mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,
|
|
41
41
|
mdbq/req_post/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
42
42
|
mdbq/req_post/req_tb.py,sha256=PexWSCPJNM6Tv0ol4lAWIhlOwsAr_frnjtcdSHCFiek,36179
|
43
43
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
44
|
-
mdbq/spider/aikucun.py,sha256=
|
45
|
-
mdbq-2.6.
|
46
|
-
mdbq-2.6.
|
47
|
-
mdbq-2.6.
|
48
|
-
mdbq-2.6.
|
44
|
+
mdbq/spider/aikucun.py,sha256=4Y5zd64hZUFtll8AdpUc2napDas-La-A6XzAhb2mLv0,17157
|
45
|
+
mdbq-2.6.5.dist-info/METADATA,sha256=mUyIb-qC1-GsTA2eIp_1_-oUUkJa8rXo9eLDw9PmUko,245
|
46
|
+
mdbq-2.6.5.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
47
|
+
mdbq-2.6.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
48
|
+
mdbq-2.6.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|