mdbq 4.1.10__tar.gz → 4.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mdbq might be problematic. Click here for more details.

Files changed (46) hide show
  1. {mdbq-4.1.10 → mdbq-4.1.11}/PKG-INFO +2 -2
  2. mdbq-4.1.11/mdbq/__version__.py +1 -0
  3. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/mysql/uploader.py +172 -26
  4. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq.egg-info/PKG-INFO +2 -2
  5. mdbq-4.1.10/mdbq/__version__.py +0 -1
  6. {mdbq-4.1.10 → mdbq-4.1.11}/README.txt +0 -0
  7. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/__init__.py +0 -0
  8. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/auth/__init__.py +0 -0
  9. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/auth/auth_backend.py +0 -0
  10. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/auth/crypto.py +0 -0
  11. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/auth/rate_limiter.py +0 -0
  12. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/js/__init__.py +0 -0
  13. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/js/jc.py +0 -0
  14. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/log/__init__.py +0 -0
  15. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/log/mylogger.py +0 -0
  16. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/myconf/__init__.py +0 -0
  17. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/myconf/myconf.py +0 -0
  18. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/mysql/__init__.py +0 -0
  19. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/mysql/deduplicator.py +0 -0
  20. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/mysql/mysql.py +0 -0
  21. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/mysql/s_query.py +0 -0
  22. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/mysql/unique_.py +0 -0
  23. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/other/__init__.py +0 -0
  24. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/other/download_sku_picture.py +0 -0
  25. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/other/error_handler.py +0 -0
  26. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/other/otk.py +0 -0
  27. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/other/pov_city.py +0 -0
  28. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/other/ua_sj.py +0 -0
  29. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/pbix/__init__.py +0 -0
  30. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/pbix/pbix_refresh.py +0 -0
  31. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/pbix/refresh_all.py +0 -0
  32. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/redis/__init__.py +0 -0
  33. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/redis/getredis.py +0 -0
  34. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/redis/redis_cache.py +0 -0
  35. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/route/__init__.py +0 -0
  36. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/route/analytics.py +0 -0
  37. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/route/monitor.py +0 -0
  38. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/route/routes.py +0 -0
  39. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/selenium/__init__.py +0 -0
  40. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/selenium/get_driver.py +0 -0
  41. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq/spider/__init__.py +0 -0
  42. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq.egg-info/SOURCES.txt +0 -0
  43. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq.egg-info/dependency_links.txt +0 -0
  44. {mdbq-4.1.10 → mdbq-4.1.11}/mdbq.egg-info/top_level.txt +0 -0
  45. {mdbq-4.1.10 → mdbq-4.1.11}/setup.cfg +0 -0
  46. {mdbq-4.1.10 → mdbq-4.1.11}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: mdbq
3
- Version: 4.1.10
3
+ Version: 4.1.11
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.1.11'
@@ -111,6 +111,7 @@ class MySQLUploader:
111
111
  :param read_timeout: 读取超时(秒),默认为30
112
112
  :param write_timeout: 写入超时(秒),默认为30
113
113
  :param ssl: SSL配置字典,默认为None
114
+ :param auto_creat_missing_cols: 自动添加缺失列,默认为False,建议手动维护表结构
114
115
  """
115
116
  self.username = username
116
117
  self.password = password
@@ -134,6 +135,7 @@ class MySQLUploader:
134
135
  self._table_metadata_cache = {}
135
136
  self.metadata_cache_ttl = 300 # 5分钟缓存时间
136
137
  self.pool = self._create_connection_pool() # 创建连接池
138
+ self.auto_creat_missing_cols = False # 自动添加缺失列,正常不要自动添加,建议手动维护表结构
137
139
 
138
140
  def _create_connection_pool(self) -> PooledDB:
139
141
  """
@@ -695,15 +697,16 @@ class MySQLUploader:
695
697
  break
696
698
 
697
699
  if not allow_null:
698
- logger.warning("该列不允许为空值", {
699
- "": db_name,
700
- "": table_name,
701
- "allow_null": allow_null,
702
- "": col_name,
703
- "": original_value,
704
- "兜底值": fallback
705
- })
706
- return fallback # 直接返回兜底值,而不是抛出异常
700
+ # 注释掉,这里可能会产生大量日志
701
+ # logger.debug("该列不允许为空值", {
702
+ # "": db_name,
703
+ # "": table_name,
704
+ # "allow_null": allow_null,
705
+ # "": col_name,
706
+ # "": original_value,
707
+ # "兜底值": fallback
708
+ # })
709
+ return fallback # 直接返回兜底值
707
710
 
708
711
  return None # 允许空值时返回None
709
712
 
@@ -799,7 +802,13 @@ class MySQLUploader:
799
802
 
800
803
  # 统一处理空值
801
804
  if is_empty_value:
802
- return self._get_fallback_value(column_type_lower, allow_null, db_name, table_name, col_name, value)
805
+ fallback_value = self._get_fallback_value(column_type_lower, allow_null, db_name, table_name, col_name, value)
806
+ # 如果返回了兜底值(非None),直接返回,不再进行后续验证
807
+ # 因为兜底值已经是根据列类型设计的合适值
808
+ if fallback_value is not None:
809
+ return fallback_value
810
+ # 如果返回None(允许空值的情况),继续后续处理
811
+ return None
803
812
 
804
813
  # JSON类型验证和转换
805
814
  if 'json' in column_type_lower:
@@ -1072,14 +1081,41 @@ class MySQLUploader:
1072
1081
  '列': self._shorten_for_log(table_columns),
1073
1082
  })
1074
1083
  raise ValueError(f"获取列失败 `{db_name}`.`{table_name}`")
1075
- for col in set_typ:
1076
- if col not in table_columns:
1077
- logger.error('列不存在', {
1084
+ # 检查并自动添加缺失的列
1085
+ missing_columns = [col for col in set_typ if col not in table_columns]
1086
+ if missing_columns:
1087
+ if not self.auto_creat_missing_cols:
1088
+ logger.error('列不存在且不支持自动添加,请手动维护表结构,并补齐缺失列', {
1078
1089
  '库': db_name,
1079
1090
  '表': table_name,
1080
- '': col,
1091
+ '缺失列数': len(missing_columns),
1092
+ '缺失列': missing_columns,
1081
1093
  })
1082
- raise ValueError(f"列不存在: `{col}` -> `{db_name}`.`{table_name}`")
1094
+ raise ValueError(f"列不存在: `{missing_columns}` -> `{db_name}`.`{table_name}`")
1095
+ else:
1096
+ # 表有缺失列时报错,建议不允许自动添加,手动检查数据一致性,以免产生不必要的表错误
1097
+ # 自动添加缺失的列
1098
+ for col in missing_columns:
1099
+ try:
1100
+ self._add_column_to_table(db_name, table_name, col, set_typ[col], allow_null)
1101
+ logger.info('自动添加缺失列', {
1102
+ '库': db_name,
1103
+ '表': table_name,
1104
+ '列': col,
1105
+ '类型': set_typ[col]
1106
+ })
1107
+ except Exception as e:
1108
+ logger.error('添加列失败', {
1109
+ '库': db_name,
1110
+ '表': table_name,
1111
+ '列': col,
1112
+ '类型': set_typ[col],
1113
+ '错误': str(e)
1114
+ })
1115
+ raise ValueError(f"添加列失败: `{col}` -> `{db_name}`.`{table_name}`: {str(e)}")
1116
+
1117
+ # 重新获取表列信息
1118
+ table_columns = self._get_table_columns(db_name, table_name)
1083
1119
  if date_column and date_column in table_columns:
1084
1120
  try:
1085
1121
  self._ensure_index(db_name, table_name, date_column)
@@ -1300,21 +1336,67 @@ class MySQLUploader:
1300
1336
  prepared_row[col_name] = self._validate_value(None, filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
1301
1337
  except ValueError as e:
1302
1338
  if not allow_null:
1303
- error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
1304
- logger.error(error_msg, {'row': self._shorten_for_log(row)})
1305
- raise ValueError(error_msg)
1306
- prepared_row[col_name] = None
1339
+ # 如果不允许空值但验证失败,尝试使用兜底值
1340
+ try:
1341
+ fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, None)
1342
+ if fallback_value is not None:
1343
+ prepared_row[col_name] = fallback_value
1344
+ logger.warning(f"行号:{row_idx} -> 缺失列: `{col_name}`, 使用兜底值: {fallback_value}", {'row': self._shorten_for_log(row)})
1345
+ else:
1346
+ error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
1347
+ logger.error(error_msg, {'row': self._shorten_for_log(row)})
1348
+ raise ValueError(error_msg)
1349
+ except Exception:
1350
+ error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
1351
+ logger.error(error_msg, {'row': self._shorten_for_log(row)})
1352
+ raise ValueError(error_msg)
1353
+ else:
1354
+ prepared_row[col_name] = None
1307
1355
  else:
1308
1356
  try:
1309
1357
  prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
1310
1358
  except ValueError as e:
1311
- logger.error('数据验证失败', {
1312
- '列': col_name,
1313
- '行': row_idx,
1314
- '报错': str(e),
1315
- 'row': self._shorten_for_log(row),
1316
- })
1317
- raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1359
+ # 如果数据验证失败,检查是否为空值且不允许空值,尝试使用兜底值
1360
+ original_value = row[col_name]
1361
+ is_empty_original = (original_value is None or
1362
+ original_value == '' or
1363
+ (not isinstance(original_value, (list, dict)) and
1364
+ pd.isna(original_value) if hasattr(pd, 'isna') else False))
1365
+
1366
+ if is_empty_original and not allow_null:
1367
+ try:
1368
+ fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, original_value)
1369
+ if fallback_value is not None:
1370
+ prepared_row[col_name] = fallback_value
1371
+ logger.warning(f"行:{row_idx}, 列:`{col_name}` -> 原值验证失败,使用兜底值: {fallback_value}", {
1372
+ '原值': original_value,
1373
+ '兜底值': fallback_value,
1374
+ 'row': self._shorten_for_log(row)
1375
+ })
1376
+ else:
1377
+ logger.error('数据验证失败', {
1378
+ '列': col_name,
1379
+ '行': row_idx,
1380
+ '报错': str(e),
1381
+ 'row': self._shorten_for_log(row),
1382
+ })
1383
+ raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1384
+ except Exception:
1385
+ logger.error('数据验证失败', {
1386
+ '列': col_name,
1387
+ '行': row_idx,
1388
+ '报错': str(e),
1389
+ 'row': self._shorten_for_log(row),
1390
+ })
1391
+ raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1392
+ else:
1393
+ logger.error('数据验证失败', {
1394
+ '列': col_name,
1395
+ '行': row_idx,
1396
+ '报错': str(e),
1397
+ 'row': self._shorten_for_log(row),
1398
+ })
1399
+ raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1318
1400
  prepared_data.append(prepared_row)
1319
1401
  return prepared_data, filtered_set_typ
1320
1402
 
@@ -2080,6 +2162,70 @@ class MySQLUploader:
2080
2162
  except Exception as e:
2081
2163
  logger.error('创建索引失败', {'库': db_name, '表': table_name, '列': column, '错误': str(e)})
2082
2164
  raise
2165
+
2166
+ @_execute_with_retry
2167
+ def _add_column_to_table(self, db_name: str, table_name: str, column: str, column_type: str, allow_null: bool = False):
2168
+ """
2169
+ 添加列到指定表。
2170
+
2171
+ :param db_name: 数据库名
2172
+ :param table_name: 表名
2173
+ :param column: 需要添加的列名
2174
+ :param column_type: 列的数据类型
2175
+ :param allow_null: 是否允许空值,默认为False
2176
+ """
2177
+ db_name = self._validate_identifier(db_name, is_database=True)
2178
+ table_name = self._validate_identifier(table_name)
2179
+ column = self._validate_identifier(column)
2180
+
2181
+ # 构建ALTER TABLE语句
2182
+ null_constraint = "NULL" if allow_null else "NOT NULL"
2183
+
2184
+ # 为新添加的列设置默认值
2185
+ default_value = ""
2186
+ if not allow_null:
2187
+ column_type_lower = column_type.lower()
2188
+ if any(t in column_type_lower for t in ['int', 'bigint', 'tinyint', 'smallint', 'mediumint']):
2189
+ default_value = " DEFAULT 0"
2190
+ elif any(t in column_type_lower for t in ['decimal', 'float', 'double']):
2191
+ default_value = " DEFAULT 0.0"
2192
+ elif any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
2193
+ default_value = " DEFAULT 'none'"
2194
+ elif 'date' in column_type_lower:
2195
+ if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
2196
+ default_value = " DEFAULT '1970-01-01 00:00:00'"
2197
+ else:
2198
+ default_value = " DEFAULT '1970-01-01'"
2199
+ elif 'json' in column_type_lower:
2200
+ default_value = " DEFAULT '{}'"
2201
+
2202
+ sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD COLUMN `{column}` {column_type} {null_constraint}{default_value}'
2203
+
2204
+ conn = None
2205
+ try:
2206
+ with self._get_connection() as conn:
2207
+ with conn.cursor() as cursor:
2208
+ cursor.execute(sql)
2209
+ conn.commit()
2210
+ logger.debug('已为表添加列', {
2211
+ '库': db_name,
2212
+ '表': table_name,
2213
+ '列': column,
2214
+ '类型': column_type,
2215
+ '允许空值': allow_null
2216
+ })
2217
+ except Exception as e:
2218
+ logger.error('添加列失败', {
2219
+ '库': db_name,
2220
+ '表': table_name,
2221
+ '列': column,
2222
+ '类型': column_type,
2223
+ '错误': str(e),
2224
+ 'SQL': sql
2225
+ })
2226
+ if conn is not None:
2227
+ conn.rollback()
2228
+ raise
2083
2229
 
2084
2230
  def __enter__(self):
2085
2231
  return self
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: mdbq
3
- Version: 4.1.10
3
+ Version: 4.1.11
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '4.1.10'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes