mdbq 3.10.3__py3-none-any.whl → 3.10.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/__version__.py CHANGED
@@ -1 +1 @@
1
- VERSION = '3.10.3'
1
+ VERSION = '3.10.4'
mdbq/mysql/uploader.py CHANGED
@@ -106,6 +106,8 @@ class MySQLUploader:
106
106
  :param connect_timeout: 连接超时(秒),默认为10
107
107
  :param read_timeout: 读取超时(秒),默认为30
108
108
  :param write_timeout: 写入超时(秒),默认为30
109
+ :param base_excute_col: # 排重插入数据时始终排除该列
110
+ :param case_sensitive: # 是否保持大小写敏感,默认为False(转为小写)
109
111
  :param ssl: SSL配置字典,默认为None
110
112
  """
111
113
  self.username = username
@@ -120,6 +122,8 @@ class MySQLUploader:
120
122
  self.connect_timeout = connect_timeout
121
123
  self.read_timeout = read_timeout
122
124
  self.write_timeout = write_timeout
125
+ self.base_excute_col = ['id', '更新时间'] # 排重插入数据时始终排除该列
126
+ self.case_sensitive = False # 是否保持大小写敏感,默认为False(转为小写)
123
127
  self.ssl = ssl
124
128
  self._prepared_statements = StatementCache(maxsize=100)
125
129
  self._max_cached_statements = 100 # 用于控制 StatementCache 类中缓存的 SQL 语句数量,最多缓存 100 条 SQL 语句
@@ -377,6 +381,10 @@ class MySQLUploader:
377
381
  })
378
382
  raise ValueError(f"无效的标识符: `{identifier}`")
379
383
 
384
+ # 统一转为小写(除非明确要求大小写敏感)
385
+ if not self.case_sensitive:
386
+ identifier = identifier.lower()
387
+
380
388
  # 移除非法字符,只保留字母、数字、下划线和美元符号
381
389
  cleaned = re.sub(r'[^\w\u4e00-\u9fff$]', '_', identifier)
382
390
 
@@ -673,7 +681,7 @@ class MySQLUploader:
673
681
  with self._get_connection() as conn:
674
682
  with conn.cursor() as cursor:
675
683
  cursor.execute(sql, (db_name, table_name))
676
- set_typ = {row['COLUMN_NAME']: row['DATA_TYPE'] for row in cursor.fetchall()}
684
+ set_typ = {row['COLUMN_NAME'].lower(): row['DATA_TYPE'] for row in cursor.fetchall()}
677
685
  logger.debug(f"`{db_name}`.`{table_name}`: 获取表的列信息: `{set_typ}`")
678
686
  return set_typ
679
687
  except Exception as e:
@@ -1154,6 +1162,67 @@ class MySQLUploader:
1154
1162
  return 'batch'
1155
1163
  return mode.lower()
1156
1164
 
1165
+ def _build_simple_insert_sql(self, db_name, table_name, columns, update_on_duplicate):
1166
+ safe_columns = [self._validate_identifier(col) for col in columns]
1167
+ placeholders = ','.join(['%s'] * len(safe_columns))
1168
+
1169
+ sql = f"""
1170
+ INSERT INTO `{db_name}`.`{table_name}`
1171
+ (`{'`,`'.join(safe_columns)}`)
1172
+ VALUES ({placeholders})
1173
+ """
1174
+
1175
+ # # 情况2:不检查重复但允许更新
1176
+ # if update_on_duplicate:
1177
+ # update_clause = ", ".join([f"`{col}` = VALUES(`{col}`)"
1178
+ # for col in columns])
1179
+ # sql += f" ON DUPLICATE KEY UPDATE {update_clause}"
1180
+
1181
+ return sql
1182
+
1183
+ def _build_duplicate_check_sql(self, db_name, table_name, all_columns,
1184
+ duplicate_columns, update_on_duplicate, set_typ):
1185
+ duplicate_columns = [_item for _item in duplicate_columns if _item.lower() not in self.base_excute_col]
1186
+ safe_columns = [self._validate_identifier(col) for col in all_columns]
1187
+ placeholders = ','.join(['%s'] * len(safe_columns))
1188
+
1189
+ # 确定排重列(排除id和更新时间列)
1190
+ dup_cols = duplicate_columns if duplicate_columns else all_columns
1191
+
1192
+ # 构建排重条件
1193
+ conditions = []
1194
+ for col in dup_cols:
1195
+ col_type = set_typ.get(col, '').lower()
1196
+ if col_type.startswith('decimal'):
1197
+ scale = self._get_decimal_scale(col_type)
1198
+ conditions.append(f"ROUND(`{col}`, {scale}) = ROUND(%s, {scale})")
1199
+ else:
1200
+ conditions.append(f"`{col}` = %s")
1201
+
1202
+ # 情况3/5:允许更新
1203
+ if update_on_duplicate:
1204
+ update_clause = ", ".join([f"`{col}` = VALUES(`{col}`)"
1205
+ for col in all_columns])
1206
+ sql = f"""
1207
+ INSERT INTO `{db_name}`.`{table_name}`
1208
+ (`{'`,`'.join(safe_columns)}`)
1209
+ VALUES ({placeholders})
1210
+ ON DUPLICATE KEY UPDATE {update_clause}
1211
+ """
1212
+ else:
1213
+ # 情况4/6:不允许更新
1214
+ sql = f"""
1215
+ INSERT INTO `{db_name}`.`{table_name}`
1216
+ (`{'`,`'.join(safe_columns)}`)
1217
+ SELECT {placeholders}
1218
+ FROM DUAL
1219
+ WHERE NOT EXISTS (
1220
+ SELECT 1 FROM `{db_name}`.`{table_name}`
1221
+ WHERE {' AND '.join(conditions)}
1222
+ )
1223
+ """
1224
+ return sql
1225
+
1157
1226
  def _prepare_insert_sql(
1158
1227
  self,
1159
1228
  db_name: str,
@@ -1163,55 +1232,29 @@ class MySQLUploader:
1163
1232
  duplicate_columns: Optional[List[str]],
1164
1233
  update_on_duplicate: bool
1165
1234
  ) -> str:
1166
- """准备插入SQL语句"""
1167
- # 获取所有列名(排除 `id`、`更新时间` 列)
1168
- all_columns = [col for col in set_typ.keys() if col.lower() not in ['id', '更新时间']]
1169
- safe_columns = [self._validate_identifier(col) for col in all_columns]
1170
- placeholders = ','.join(['%s'] * len(safe_columns))
1235
+ """
1236
+ 准备插入SQL语句
1171
1237
 
1172
- if check_duplicate:
1173
- if not duplicate_columns:
1174
- duplicate_columns = all_columns
1175
- else:
1176
- duplicate_columns = [col for col in duplicate_columns if col.lower() not in ['id', '更新时间']]
1177
-
1178
- conditions = []
1179
- for col in duplicate_columns:
1180
- col_type = set_typ.get(col, '').lower()
1181
- if col_type.startswith('decimal'):
1182
- scale_match = re.search(r'decimal\(\d+,(\d+)\)', col_type)
1183
- scale = int(scale_match.group(1)) if scale_match else 2
1184
- conditions.append(f"ROUND(`{self._validate_identifier(col)}`, {scale}) = ROUND(%s, {scale})")
1185
- else:
1186
- conditions.append(f"`{self._validate_identifier(col)}` = %s")
1187
-
1188
- where_clause = " AND ".join(conditions)
1189
-
1190
- if update_on_duplicate:
1191
- # 更新模式 - 使用ON DUPLICATE KEY UPDATE语法
1192
- update_clause = ", ".join([f"`{col}` = VALUES(`{col}`)" for col in all_columns])
1193
- return f"""
1194
- INSERT INTO `{db_name}`.`{table_name}`
1195
- (`{'`,`'.join(safe_columns)}`)
1196
- VALUES ({placeholders})
1197
- ON DUPLICATE KEY UPDATE {update_clause}
1198
- """
1199
- else:
1200
- return f"""INSERT INTO `{db_name}`.`{table_name}`
1201
- (`{'`,`'.join(safe_columns)}`)
1202
- SELECT {placeholders}
1203
- FROM DUAL
1204
- WHERE NOT EXISTS (
1205
- SELECT 1 FROM `{db_name}`.`{table_name}`
1206
- WHERE {where_clause}
1207
- )
1208
- """
1209
- else:
1210
- return f"""
1211
- INSERT INTO `{db_name}`.`{table_name}`
1212
- (`{'`,`'.join(safe_columns)}`)
1213
- VALUES ({placeholders})
1214
- """
1238
+ 1. check_duplicate=False 时,忽略 duplicate_columns 和 update_on_duplicate 参数,直接插入全部data。
1239
+ 2. check_duplicate=False 且 update_on_duplicate=True 时,由于 check_duplicate=False,直接插入全部data。
1240
+ 3. 当 check_duplicate=True 且 duplicate_columns=[] 且 update_on_duplicate=True 时,获取数据库所有列(但排除`id`和`更新时间`列),按这些列(不含`id`和`更新时间`)排重插入,遇到重复数据时更新旧数据。
1241
+ 4. 当 check_duplicate=True 且 duplicate_columns=[] 且 update_on_duplicate=False 时,获取数据库所有列(但排除`id`和`更新时间`列),按这些列(不含`id`和`更新时间`)排重插入,不考虑是否更新旧数据。
1242
+ 5. 当 check_duplicate=True duplicate_columns 指定了排重列且 update_on_duplicate=True 时,按 duplicate_columns 指定的列(但排除`id`和`更新时间`)排重插入,遇到重复数据时更新旧数据。
1243
+ 6. 当 check_duplicate=True 且 duplicate_columns 指定了排重列且 update_on_duplicate=False 时,按 duplicate_columns 指定的列(但排除`id`和`更新时间`)排重插入,不考虑是否更新旧数据。
1244
+
1245
+ """
1246
+ # 获取所有列名(排除id和更新时间列)
1247
+ all_columns = [col for col in set_typ.keys()
1248
+ if col.lower() != 'id']
1249
+
1250
+ # 情况1-2:不检查重复
1251
+ if not check_duplicate:
1252
+ return self._build_simple_insert_sql(db_name, table_name, all_columns,
1253
+ update_on_duplicate)
1254
+
1255
+ # 情况3-6:检查重复
1256
+ return self._build_duplicate_check_sql(db_name, table_name, all_columns,
1257
+ duplicate_columns, update_on_duplicate, set_typ)
1215
1258
 
1216
1259
  def _execute_batch_insert(
1217
1260
  self,
@@ -1228,7 +1271,8 @@ class MySQLUploader:
1228
1271
  ) -> Tuple[int, int, int]:
1229
1272
  """执行批量插入操作"""
1230
1273
  # 获取所有列名(排除id列)
1231
- all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1274
+ all_columns = [col for col in set_typ.keys()
1275
+ if col.lower() != 'id']
1232
1276
 
1233
1277
  total_inserted = 0
1234
1278
  total_skipped = 0
@@ -1277,6 +1321,7 @@ class MySQLUploader:
1277
1321
  try:
1278
1322
  for row_idx, row in enumerate(batch, 1):
1279
1323
  result = self._process_single_row(
1324
+ db_name, table_name,
1280
1325
  cursor, row, all_columns, sql,
1281
1326
  check_duplicate, duplicate_columns
1282
1327
  )
@@ -1308,6 +1353,7 @@ class MySQLUploader:
1308
1353
  for row_idx, row in enumerate(batch, 1):
1309
1354
  try:
1310
1355
  result = self._process_single_row(
1356
+ db_name, table_name,
1311
1357
  cursor, row, all_columns, sql,
1312
1358
  check_duplicate, duplicate_columns
1313
1359
  )
@@ -1360,6 +1406,8 @@ class MySQLUploader:
1360
1406
 
1361
1407
  def _process_single_row(
1362
1408
  self,
1409
+ db_name,
1410
+ table_name,
1363
1411
  cursor,
1364
1412
  row: Dict,
1365
1413
  all_columns: List[str],
@@ -1374,6 +1422,8 @@ class MySQLUploader:
1374
1422
  if check_duplicate:
1375
1423
  row_values += [row.get(col) for col in duplicate_columns]
1376
1424
 
1425
+ # logger.info(sql)
1426
+ # logger.info(row_values)
1377
1427
  cursor.execute(sql, row_values)
1378
1428
 
1379
1429
  if check_duplicate:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 3.10.3
3
+ Version: 3.10.4
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1,5 +1,5 @@
1
1
  mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
2
- mdbq/__version__.py,sha256=S1pYeTgXo5MtZqzwck9ASp8x1pB1PZ33oC1NI7fY9dI,18
2
+ mdbq/__version__.py,sha256=L-43kDdR8o3iwkH5IR35xUFgTPugEww0j_gk9jPlkCU,18
3
3
  mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
4
4
  mdbq/aggregation/optimize.py,sha256=2oalzD9weZhDclUC22OLxYa8Zj7KnmsGUoUau_Jlyc4,19796
5
5
  mdbq/aggregation/query_data.py,sha256=5_OzjGR5Sq00q-EgAYmSE5V9i4Solw9y4hkldl4mvt8,179808
@@ -12,7 +12,7 @@ mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
12
12
  mdbq/mysql/deduplicator.py,sha256=brhX3eyE8-kn3nAYweKfBbAkXiNcyw_pL4CTyPqmPBg,21983
13
13
  mdbq/mysql/mysql.py,sha256=Fzaqbjg5g3HdNl50jInIrdurdzcgR2CCzdKLVImD1-Q,55339
14
14
  mdbq/mysql/s_query.py,sha256=X055aLRAgxVvueXx4NbfNjp6MyBI02_XBb1pTKw09L0,8660
15
- mdbq/mysql/uploader.py,sha256=9wgFxsiFSAngdX2pWj57jElaspwqfPtadC-xQqvweUc,59066
15
+ mdbq/mysql/uploader.py,sha256=ElT1-Jq5nR6qg8re0rfs26YGNPHK6zsNGc3ni7TnWFA,61954
16
16
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
17
17
  mdbq/other/download_sku_picture.py,sha256=YU8DxKMXbdeE1OOKEA848WVp62jYHw5O4tXTjUdq9H0,44832
18
18
  mdbq/other/otk.py,sha256=iclBIFbQbhlqzUbcMMoePXBpcP1eZ06ZtjnhcA_EbmE,7241
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
25
25
  mdbq/redis/getredis.py,sha256=Uk8-cOWT0JU1qRyIVqdbYokSLvkDIAfcokmYj1ebw8k,24104
26
26
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
27
27
  mdbq/spider/aikucun.py,sha256=YyPWa_nOH1zs8wgTDcgzn5w8szGKWPyWzmWMVIPkFnU,21638
28
- mdbq-3.10.3.dist-info/METADATA,sha256=TM8JAb8gTTte7N0agKbaDWZ14bmkl66dgCxIbLTqCbc,364
29
- mdbq-3.10.3.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
- mdbq-3.10.3.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
- mdbq-3.10.3.dist-info/RECORD,,
28
+ mdbq-3.10.4.dist-info/METADATA,sha256=z-9kwc0z6aVg7ugS9FPf2TZd1vfyNBYz0qpvsfW3b_w,364
29
+ mdbq-3.10.4.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
30
+ mdbq-3.10.4.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
31
+ mdbq-3.10.4.dist-info/RECORD,,
File without changes