mdbq 3.9.17__py3-none-any.whl → 3.9.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/mysql/uploader.py +35 -45
- {mdbq-3.9.17.dist-info → mdbq-3.9.18.dist-info}/METADATA +1 -1
- {mdbq-3.9.17.dist-info → mdbq-3.9.18.dist-info}/RECORD +6 -6
- {mdbq-3.9.17.dist-info → mdbq-3.9.18.dist-info}/WHEEL +0 -0
- {mdbq-3.9.17.dist-info → mdbq-3.9.18.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '3.9.
|
1
|
+
VERSION = '3.9.18'
|
mdbq/mysql/uploader.py
CHANGED
@@ -1106,6 +1106,11 @@ class MySQLUploader:
|
|
1106
1106
|
safe_columns = [self._validate_identifier(col) for col in all_columns]
|
1107
1107
|
placeholders = ','.join(['%s'] * len(safe_columns))
|
1108
1108
|
|
1109
|
+
# 初始化统计变量
|
1110
|
+
total_inserted = 0
|
1111
|
+
total_skipped = 0
|
1112
|
+
total_failed = 0
|
1113
|
+
|
1109
1114
|
# 构建基础SQL语句
|
1110
1115
|
if check_duplicate:
|
1111
1116
|
if not duplicate_columns:
|
@@ -1116,10 +1121,7 @@ class MySQLUploader:
|
|
1116
1121
|
conditions = []
|
1117
1122
|
for col in duplicate_columns:
|
1118
1123
|
col_type = set_typ.get(col, '').lower()
|
1119
|
-
|
1120
|
-
# 处理DECIMAL类型,使用ROUND确保精度一致
|
1121
1124
|
if col_type.startswith('decimal'):
|
1122
|
-
# 提取小数位数,如DECIMAL(10,2)提取2
|
1123
1125
|
scale_match = re.search(r'decimal\(\d+,(\d+)\)', col_type)
|
1124
1126
|
scale = int(scale_match.group(1)) if scale_match else 2
|
1125
1127
|
conditions.append(f"ROUND(`{self._validate_identifier(col)}`, {scale}) = ROUND(%s, {scale})")
|
@@ -1137,10 +1139,6 @@ class MySQLUploader:
|
|
1137
1139
|
VALUES ({placeholders})
|
1138
1140
|
ON DUPLICATE KEY UPDATE {update_clause}
|
1139
1141
|
"""
|
1140
|
-
|
1141
|
-
# 注意:在update_on_duplicate模式下,row_values只需要插入数据,不需要排重列值
|
1142
|
-
def prepare_values(row):
|
1143
|
-
return [row.get(col) for col in all_columns]
|
1144
1142
|
else:
|
1145
1143
|
sql = f"""INSERT INTO `{db_name}`.`{table_name}`
|
1146
1144
|
(`{'`,`'.join(safe_columns)}`)
|
@@ -1151,10 +1149,6 @@ class MySQLUploader:
|
|
1151
1149
|
WHERE {where_clause}
|
1152
1150
|
)
|
1153
1151
|
"""
|
1154
|
-
|
1155
|
-
# 在check_duplicate模式下,row_values需要插入数据+排重列值
|
1156
|
-
def prepare_values(row):
|
1157
|
-
return [row.get(col) for col in all_columns] + [row.get(col) for col in duplicate_columns]
|
1158
1152
|
else:
|
1159
1153
|
sql = f"""
|
1160
1154
|
INSERT INTO `{db_name}`.`{table_name}`
|
@@ -1162,35 +1156,38 @@ class MySQLUploader:
|
|
1162
1156
|
VALUES ({placeholders})
|
1163
1157
|
"""
|
1164
1158
|
|
1165
|
-
# 普通模式下,row_values只需要插入数据
|
1166
|
-
def prepare_values(row):
|
1167
|
-
return [row.get(col) for col in all_columns]
|
1168
|
-
|
1169
|
-
total_inserted = 0
|
1170
|
-
total_skipped = 0
|
1171
|
-
total_failed = 0 # 失败计数器
|
1172
|
-
|
1173
1159
|
# 分批插入数据
|
1174
1160
|
with self._get_connection() as conn:
|
1175
1161
|
with conn.cursor() as cursor:
|
1176
1162
|
for i in range(0, len(data), batch_size):
|
1177
|
-
batch_start = time.time()
|
1178
1163
|
batch = data[i:i + batch_size]
|
1179
|
-
|
1164
|
+
batch_inserted = 0
|
1165
|
+
batch_skipped = 0
|
1166
|
+
batch_failed = 0
|
1180
1167
|
|
1181
1168
|
for row in batch:
|
1182
1169
|
try:
|
1183
1170
|
# 准备参数
|
1184
|
-
row_values =
|
1171
|
+
row_values = [row.get(col) for col in all_columns]
|
1172
|
+
if check_duplicate and not update_on_duplicate:
|
1173
|
+
row_values += [row.get(col) for col in duplicate_columns]
|
1174
|
+
|
1185
1175
|
cursor.execute(sql, row_values)
|
1186
|
-
successful_rows += 1
|
1187
|
-
conn.commit() # 每次成功插入后提交
|
1188
1176
|
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1177
|
+
if check_duplicate:
|
1178
|
+
# 检查是否实际插入了行
|
1179
|
+
if cursor.rowcount > 0:
|
1180
|
+
batch_inserted += 1
|
1181
|
+
else:
|
1182
|
+
batch_skipped += 1
|
1183
|
+
else:
|
1184
|
+
batch_inserted += 1
|
1192
1185
|
|
1193
|
-
|
1186
|
+
conn.commit()
|
1187
|
+
|
1188
|
+
except Exception as e:
|
1189
|
+
conn.rollback()
|
1190
|
+
batch_failed += 1
|
1194
1191
|
logger.error(sys._getframe().f_code.co_name, {
|
1195
1192
|
'库': db_name,
|
1196
1193
|
'表': table_name,
|
@@ -1201,29 +1198,22 @@ class MySQLUploader:
|
|
1201
1198
|
'是否排重': check_duplicate,
|
1202
1199
|
'排重列': duplicate_columns
|
1203
1200
|
})
|
1204
|
-
continue # 跳过当前行,继续处理下一行
|
1205
|
-
|
1206
|
-
# 更新统计信息
|
1207
|
-
if check_duplicate:
|
1208
|
-
cursor.execute("SELECT ROW_COUNT()")
|
1209
|
-
affected_rows = cursor.rowcount
|
1210
|
-
total_inserted += affected_rows
|
1211
|
-
total_skipped += len(batch) - affected_rows - (len(batch) - successful_rows)
|
1212
|
-
else:
|
1213
|
-
total_inserted += successful_rows
|
1214
1201
|
|
1215
|
-
|
1202
|
+
# 更新总统计
|
1203
|
+
total_inserted += batch_inserted
|
1204
|
+
total_skipped += batch_skipped
|
1205
|
+
total_failed += batch_failed
|
1206
|
+
|
1216
1207
|
logger.debug(sys._getframe().f_code.co_name, {
|
1217
1208
|
'库': db_name,
|
1218
1209
|
'表': table_name,
|
1219
1210
|
'批次': batch_id,
|
1220
1211
|
'批次处理完成': i // batch_size + 1,
|
1221
|
-
'
|
1222
|
-
'
|
1223
|
-
'
|
1224
|
-
'
|
1225
|
-
'
|
1226
|
-
'rows_per_second': successful_rows / batch_elapsed if batch_elapsed > 0 else 0
|
1212
|
+
'总批次': (len(data) + batch_size - 1) // batch_size,
|
1213
|
+
'数据量': len(batch),
|
1214
|
+
'插入': batch_inserted,
|
1215
|
+
'跳过': batch_skipped,
|
1216
|
+
'失败': batch_failed
|
1227
1217
|
})
|
1228
1218
|
|
1229
1219
|
logger.info('插入完成', {
|
@@ -1,5 +1,5 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=ZrDWol2b_rsMyMYX5Mye1GkN4Kl5Fbba7vG9AQ17vqo,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
4
|
mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
|
5
5
|
mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
|
@@ -12,7 +12,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
12
12
|
mdbq/mysql/deduplicator.py,sha256=brhX3eyE8-kn3nAYweKfBbAkXiNcyw_pL4CTyPqmPBg,21983
|
13
13
|
mdbq/mysql/mysql.py,sha256=Fzaqbjg5g3HdNl50jInIrdurdzcgR2CCzdKLVImD1-Q,55339
|
14
14
|
mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
|
15
|
-
mdbq/mysql/uploader.py,sha256=
|
15
|
+
mdbq/mysql/uploader.py,sha256=OTUKnAxOLpd_SpXsSiJDTWQFuyU2hh5WweY_n3pMijc,52756
|
16
16
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
17
17
|
mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
|
18
18
|
mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=OhyEv1VyAKTOHjLDM37iNDQeRg5OnrNoKODoG2VxHes,19806
|
28
|
-
mdbq-3.9.
|
29
|
-
mdbq-3.9.
|
30
|
-
mdbq-3.9.
|
31
|
-
mdbq-3.9.
|
28
|
+
mdbq-3.9.18.dist-info/METADATA,sha256=kVj3t7BkNCh6hcmJ4qR5xcSD34FC6vb_ce-sxuZXCpY,364
|
29
|
+
mdbq-3.9.18.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-3.9.18.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-3.9.18.dist-info/RECORD,,
|
File without changes
|
File without changes
|