mdbq 2.9.6__py3-none-any.whl → 2.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/aggregation/aggregation.py +34 -46
- mdbq/aggregation/query_data.py +1287 -1176
- mdbq/aggregation/query_data_bak.py +2423 -0
- mdbq/dataframe/converter.py +3 -3
- mdbq/mysql/mysql.py +60 -54
- mdbq/mysql/s_query.py +1 -0
- mdbq/spider/aikucun.py +41 -0
- {mdbq-2.9.6.dist-info → mdbq-2.9.8.dist-info}/METADATA +1 -1
- {mdbq-2.9.6.dist-info → mdbq-2.9.8.dist-info}/RECORD +11 -10
- {mdbq-2.9.6.dist-info → mdbq-2.9.8.dist-info}/WHEEL +1 -1
- {mdbq-2.9.6.dist-info → mdbq-2.9.8.dist-info}/top_level.txt +0 -0
mdbq/aggregation/aggregation.py
CHANGED
@@ -1150,51 +1150,39 @@ def upload_dir(path, db_name, collection_name, json_path=None):
|
|
1150
1150
|
df = pd.read_csv(os.path.join(root, name), encoding='utf-8_sig', header=0, na_filter=False)
|
1151
1151
|
if name.endswith('.xlsx'):
|
1152
1152
|
df = pd.read_excel(os.path.join(root, name), sheet_name=0, header=0, engine='openpyxl')
|
1153
|
-
try:
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
try:
|
1162
|
-
df = df.astype(dtypes) # 按本地文件更新 df 的数据类型, 可能因为字段不同产生异常
|
1163
|
-
except Exception as e:
|
1164
|
-
print(name, e)
|
1165
|
-
# 如果发生异常,这将 df 的数据和 json 中的数据取交集
|
1166
|
-
old_dt = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
|
1167
|
-
intersection_keys = dtypes.keys() & old_dt.keys() # 获取两个字典键的交集
|
1168
|
-
dtypes = {k: dtypes[k] for k in intersection_keys} # 使用交集的键创建新字典
|
1169
|
-
df = df.astype(dtypes) # 再次更新 df 的数据类型
|
1170
|
-
df.fillna(0, inplace=True)
|
1171
|
-
|
1172
|
-
# for col in df.columns.tolist():
|
1173
|
-
# df[col] = df[col].apply(lambda x: 0 if str(x) == '' else x)
|
1174
|
-
# print(f'{i}/{count}')
|
1175
|
-
# sql_engine = create_engine(
|
1176
|
-
# f"mysql+pymysql://{username}:{password}@{host}:{port}/{db_name}") # 创建数据库引擎
|
1177
|
-
# df.to_sql(
|
1178
|
-
# name=collection_name,
|
1179
|
-
# con=sql_engine,
|
1180
|
-
# if_exists='append',
|
1181
|
-
# index=False,
|
1182
|
-
# chunksize=1000
|
1183
|
-
# )
|
1153
|
+
# try:
|
1154
|
+
if len(df) == 0:
|
1155
|
+
continue
|
1156
|
+
# if '新版' not in name:
|
1157
|
+
# continue
|
1158
|
+
# cv = converter.DataFrameConverter()
|
1159
|
+
# df = cv.convert_df_cols(df=df) # 清理列名和 df 中的非法字符
|
1184
1160
|
|
1161
|
+
# try:
|
1162
|
+
# df = df.astype(dtypes) # 按本地文件更新 df 的数据类型, 可能因为字段不同产生异常
|
1163
|
+
# except Exception as e:
|
1164
|
+
# print(name, e)
|
1165
|
+
# # 如果发生异常,这将 df 的数据和 json 中的数据取交集
|
1166
|
+
# old_dt = df.dtypes.apply(str).to_dict() # 将 dataframe 数据类型转为字典形式
|
1167
|
+
# intersection_keys = dtypes.keys() & old_dt.keys() # 获取两个字典键的交集
|
1168
|
+
# dtypes = {k: dtypes[k] for k in intersection_keys} # 使用交集的键创建新字典
|
1169
|
+
# df = df.astype(dtypes) # 再次更新 df 的数据类型
|
1170
|
+
df.fillna(0, inplace=True)
|
1171
|
+
for col in df.columns.tolist():
|
1172
|
+
df[col] = df[col].apply(lambda x: 0 if str(x) == '' else x)
|
1185
1173
|
|
1174
|
+
# if '更新时间' not in df.columns.tolist():
|
1175
|
+
# df['更新时间'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
1176
|
+
#
|
1177
|
+
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
|
1178
|
+
move_insert=False, # 先删除,再插入
|
1179
|
+
df_sql = True,
|
1180
|
+
drop_duplicates=False,
|
1181
|
+
count=f'{i}/{count}',
|
1182
|
+
filename=name,
|
1183
|
+
)
|
1184
|
+
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
|
1186
1185
|
|
1187
|
-
#
|
1188
|
-
m.df_to_mysql(df=df, db_name=db_name, table_name=collection_name,
|
1189
|
-
move_insert=False, # 先删除,再插入
|
1190
|
-
df_sql = True,
|
1191
|
-
drop_duplicates=False,
|
1192
|
-
count=f'{i}/{count}',
|
1193
|
-
filename=name,
|
1194
|
-
)
|
1195
|
-
# nas.df_to_mysql(df=df, db_name=db_name, table_name=collection_name, drop_duplicates=True,)
|
1196
|
-
except Exception as e:
|
1197
|
-
print(name, e)
|
1198
1186
|
i += 1
|
1199
1187
|
|
1200
1188
|
|
@@ -1282,13 +1270,13 @@ if __name__ == '__main__':
|
|
1282
1270
|
# )
|
1283
1271
|
|
1284
1272
|
# test()
|
1285
|
-
col =
|
1273
|
+
col = 1
|
1286
1274
|
if col:
|
1287
1275
|
# 上传一个目录到指定数据库
|
1288
|
-
db_name = '
|
1289
|
-
table_name = '
|
1276
|
+
db_name = '京东数据3'
|
1277
|
+
table_name = '推广数据_全站营销'
|
1290
1278
|
upload_dir(
|
1291
|
-
path=r'/Users/xigua/数据中心/原始文件3
|
1279
|
+
path=r'/Users/xigua/数据中心/原始文件3/京东报表/京准通_全站营销',
|
1292
1280
|
db_name=db_name,
|
1293
1281
|
collection_name=table_name,
|
1294
1282
|
)
|