mdbq 4.1.9__tar.gz → 4.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mdbq might be problematic. Click here for more details.

Files changed (46) hide show
  1. {mdbq-4.1.9 → mdbq-4.1.11}/PKG-INFO +2 -2
  2. mdbq-4.1.11/mdbq/__version__.py +1 -0
  3. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/mysql/uploader.py +235 -70
  4. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq.egg-info/PKG-INFO +2 -2
  5. mdbq-4.1.9/mdbq/__version__.py +0 -1
  6. {mdbq-4.1.9 → mdbq-4.1.11}/README.txt +0 -0
  7. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/__init__.py +0 -0
  8. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/auth/__init__.py +0 -0
  9. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/auth/auth_backend.py +0 -0
  10. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/auth/crypto.py +0 -0
  11. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/auth/rate_limiter.py +0 -0
  12. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/js/__init__.py +0 -0
  13. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/js/jc.py +0 -0
  14. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/log/__init__.py +0 -0
  15. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/log/mylogger.py +0 -0
  16. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/myconf/__init__.py +0 -0
  17. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/myconf/myconf.py +0 -0
  18. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/mysql/__init__.py +0 -0
  19. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/mysql/deduplicator.py +0 -0
  20. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/mysql/mysql.py +0 -0
  21. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/mysql/s_query.py +0 -0
  22. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/mysql/unique_.py +0 -0
  23. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/other/__init__.py +0 -0
  24. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/other/download_sku_picture.py +0 -0
  25. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/other/error_handler.py +0 -0
  26. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/other/otk.py +0 -0
  27. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/other/pov_city.py +0 -0
  28. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/other/ua_sj.py +0 -0
  29. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/pbix/__init__.py +0 -0
  30. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/pbix/pbix_refresh.py +0 -0
  31. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/pbix/refresh_all.py +0 -0
  32. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/redis/__init__.py +0 -0
  33. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/redis/getredis.py +0 -0
  34. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/redis/redis_cache.py +0 -0
  35. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/route/__init__.py +0 -0
  36. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/route/analytics.py +0 -0
  37. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/route/monitor.py +0 -0
  38. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/route/routes.py +0 -0
  39. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/selenium/__init__.py +0 -0
  40. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/selenium/get_driver.py +0 -0
  41. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq/spider/__init__.py +0 -0
  42. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq.egg-info/SOURCES.txt +0 -0
  43. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq.egg-info/dependency_links.txt +0 -0
  44. {mdbq-4.1.9 → mdbq-4.1.11}/mdbq.egg-info/top_level.txt +0 -0
  45. {mdbq-4.1.9 → mdbq-4.1.11}/setup.cfg +0 -0
  46. {mdbq-4.1.9 → mdbq-4.1.11}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: mdbq
3
- Version: 4.1.9
3
+ Version: 4.1.11
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.1.11'
@@ -111,6 +111,7 @@ class MySQLUploader:
111
111
  :param read_timeout: 读取超时(秒),默认为30
112
112
  :param write_timeout: 写入超时(秒),默认为30
113
113
  :param ssl: SSL配置字典,默认为None
114
+ :param auto_creat_missing_cols: 自动添加缺失列,默认为False,建议手动维护表结构
114
115
  """
115
116
  self.username = username
116
117
  self.password = password
@@ -134,6 +135,7 @@ class MySQLUploader:
134
135
  self._table_metadata_cache = {}
135
136
  self.metadata_cache_ttl = 300 # 5分钟缓存时间
136
137
  self.pool = self._create_connection_pool() # 创建连接池
138
+ self.auto_creat_missing_cols = False # 自动添加缺失列,正常不要自动添加,建议手动维护表结构
137
139
 
138
140
  def _create_connection_pool(self) -> PooledDB:
139
141
  """
@@ -656,6 +658,58 @@ class MySQLUploader:
656
658
  logger.error('无效的日期格式', {'值': value})
657
659
  raise ValueError(f"无效的日期格式: `{value}`")
658
660
 
661
+ def _get_fallback_value(self, column_type_lower: str, allow_null: bool, db_name: str = None, table_name: str = None, col_name: str = None, original_value: Any = None) -> Any:
662
+ """
663
+ 获取空值的兜底填充值
664
+ """
665
+ # 兜底填充值映射
666
+ fallback_map = {
667
+ 'int': 0,
668
+ 'bigint': 0,
669
+ 'tinyint': 0,
670
+ 'smallint': 0,
671
+ 'mediumint': 0,
672
+ 'decimal': 0.0,
673
+ 'float': 0.0,
674
+ 'double': 0.0,
675
+ 'date': '1970-01-01',
676
+ 'datetime': '1970-01-01 00:00:00',
677
+ 'timestamp': '1970-01-01 00:00:00',
678
+ 'json': '{}',
679
+ 'varchar': 'none',
680
+ 'text': 'none',
681
+ 'char': 'none',
682
+ 'mediumtext': 'none',
683
+ 'longtext': 'none',
684
+ 'enum': None, # enum类型需要特殊处理,使用第一个可选值
685
+ 'set': '', # set类型默认为空字符串
686
+ }
687
+
688
+ fallback = 'none'
689
+ for typ, val in fallback_map.items():
690
+ if typ in column_type_lower:
691
+ if typ == 'enum' and val is None:
692
+ # 对于enum类型,使用第一个可选值作为默认值
693
+ enum_values = re.findall(r"['\"]([^'\"]*)['\"]", column_type_lower)
694
+ fallback = enum_values[0] if enum_values else 'none'
695
+ else:
696
+ fallback = val
697
+ break
698
+
699
+ if not allow_null:
700
+ # 注释掉,这里可能会产生大量日志
701
+ # logger.debug("该列不允许为空值", {
702
+ # "库": db_name,
703
+ # "表": table_name,
704
+ # "allow_null": allow_null,
705
+ # "列": col_name,
706
+ # "值": original_value,
707
+ # "兜底值": fallback
708
+ # })
709
+ return fallback # 直接返回兜底值
710
+
711
+ return None # 允许空值时返回None
712
+
659
713
  def _convert_to_int(self, value):
660
714
  """
661
715
  尝试将value转换为int
@@ -729,7 +783,34 @@ class MySQLUploader:
729
783
  """
730
784
  column_type_lower = column_type.lower() if column_type else ''
731
785
 
732
- # JSON类型验证和转换(优先处理,避免pd.isna的问题)
786
+ # 统一的空值检查(None、空字符串、NaN)
787
+ is_empty_value = False
788
+ if value is None:
789
+ is_empty_value = True
790
+ elif value == '':
791
+ # 空字符串对于字符串类型是有效值
792
+ if any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
793
+ return ""
794
+ is_empty_value = True
795
+ else:
796
+ # 检查NaN值(避免对列表和字典使用pd.isna)
797
+ if not isinstance(value, (list, dict)):
798
+ try:
799
+ is_empty_value = pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value))
800
+ except (ValueError, TypeError):
801
+ is_empty_value = False
802
+
803
+ # 统一处理空值
804
+ if is_empty_value:
805
+ fallback_value = self._get_fallback_value(column_type_lower, allow_null, db_name, table_name, col_name, value)
806
+ # 如果返回了兜底值(非None),直接返回,不再进行后续验证
807
+ # 因为兜底值已经是根据列类型设计的合适值
808
+ if fallback_value is not None:
809
+ return fallback_value
810
+ # 如果返回None(允许空值的情况),继续后续处理
811
+ return None
812
+
813
+ # JSON类型验证和转换
733
814
  if 'json' in column_type_lower:
734
815
  if isinstance(value, (dict, list)):
735
816
  try:
@@ -748,59 +829,6 @@ class MySQLUploader:
748
829
  else:
749
830
  # 其他类型转换为字符串
750
831
  return str(value)
751
-
752
- # 统一判断None/NaN(排除列表和字典类型)
753
- if value == '':
754
- if any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
755
- return ""
756
-
757
- # 安全地检查NaN值,避免对列表和字典使用pd.isna
758
- is_nan = False
759
- if isinstance(value, (list, dict)):
760
- is_nan = False # 列表和字典不是NaN
761
- else:
762
- try:
763
- is_nan = pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value))
764
- except (ValueError, TypeError):
765
- is_nan = False
766
-
767
- if value == '' or is_nan:
768
- # 兜底填充值映射
769
- fallback_map = {
770
- 'int': 0,
771
- 'bigint': 0,
772
- 'tinyint': 0,
773
- 'smallint': 0,
774
- 'mediumint': 0,
775
- 'decimal': 0.0,
776
- 'float': 0.0,
777
- 'double': 0.0,
778
- 'date': '1970-01-01',
779
- 'datetime': '1970-01-01 00:00:00',
780
- 'timestamp': '1970-01-01 00:00:00',
781
- 'json': '{}',
782
- 'varchar': 'none',
783
- 'text': 'none',
784
- 'char': 'none',
785
- 'mediumtext': 'none',
786
- 'longtext': 'none',
787
- 'enum': None, # enum类型需要特殊处理,使用第一个可选值
788
- 'set': '', # set类型默认为空字符串
789
- }
790
- fallback = 'none'
791
- for typ, val in fallback_map.items():
792
- if typ in column_type_lower:
793
- if typ == 'enum' and val is None:
794
- # 对于enum类型,使用第一个可选值作为默认值
795
- enum_values = re.findall(r"['\"]([^'\"]*)['\"]", column_type)
796
- fallback = enum_values[0] if enum_values else 'none'
797
- else:
798
- fallback = val
799
- break
800
- if not allow_null:
801
- logger.warning("该列不允许为空值", {"库": db_name, "表": table_name, "allow_null": allow_null, "列": col_name, "值": value, "兜底值": fallback})
802
- raise ValueError("该列不允许为空值")
803
- return fallback
804
832
 
805
833
  original_value = value
806
834
 
@@ -1053,14 +1081,41 @@ class MySQLUploader:
1053
1081
  '列': self._shorten_for_log(table_columns),
1054
1082
  })
1055
1083
  raise ValueError(f"获取列失败 `{db_name}`.`{table_name}`")
1056
- for col in set_typ:
1057
- if col not in table_columns:
1058
- logger.error('列不存在', {
1084
+ # 检查并自动添加缺失的列
1085
+ missing_columns = [col for col in set_typ if col not in table_columns]
1086
+ if missing_columns:
1087
+ if not self.auto_creat_missing_cols:
1088
+ logger.error('列不存在且不支持自动添加,请手动维护表结构,并补齐缺失列', {
1059
1089
  '库': db_name,
1060
1090
  '表': table_name,
1061
- '': col,
1091
+ '缺失列数': len(missing_columns),
1092
+ '缺失列': missing_columns,
1062
1093
  })
1063
- raise ValueError(f"列不存在: `{col}` -> `{db_name}`.`{table_name}`")
1094
+ raise ValueError(f"列不存在: `{missing_columns}` -> `{db_name}`.`{table_name}`")
1095
+ else:
1096
+ # 表有缺失列时报错,建议不允许自动添加,手动检查数据一致性,以免产生不必要的表错误
1097
+ # 自动添加缺失的列
1098
+ for col in missing_columns:
1099
+ try:
1100
+ self._add_column_to_table(db_name, table_name, col, set_typ[col], allow_null)
1101
+ logger.info('自动添加缺失列', {
1102
+ '库': db_name,
1103
+ '表': table_name,
1104
+ '列': col,
1105
+ '类型': set_typ[col]
1106
+ })
1107
+ except Exception as e:
1108
+ logger.error('添加列失败', {
1109
+ '库': db_name,
1110
+ '表': table_name,
1111
+ '列': col,
1112
+ '类型': set_typ[col],
1113
+ '错误': str(e)
1114
+ })
1115
+ raise ValueError(f"添加列失败: `{col}` -> `{db_name}`.`{table_name}`: {str(e)}")
1116
+
1117
+ # 重新获取表列信息
1118
+ table_columns = self._get_table_columns(db_name, table_name)
1064
1119
  if date_column and date_column in table_columns:
1065
1120
  try:
1066
1121
  self._ensure_index(db_name, table_name, date_column)
@@ -1281,21 +1336,67 @@ class MySQLUploader:
1281
1336
  prepared_row[col_name] = self._validate_value(None, filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
1282
1337
  except ValueError as e:
1283
1338
  if not allow_null:
1284
- error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
1285
- logger.error(error_msg, {'row': self._shorten_for_log(row)})
1286
- raise ValueError(error_msg)
1287
- prepared_row[col_name] = None
1339
+ # 如果不允许空值但验证失败,尝试使用兜底值
1340
+ try:
1341
+ fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, None)
1342
+ if fallback_value is not None:
1343
+ prepared_row[col_name] = fallback_value
1344
+ logger.warning(f"行号:{row_idx} -> 缺失列: `{col_name}`, 使用兜底值: {fallback_value}", {'row': self._shorten_for_log(row)})
1345
+ else:
1346
+ error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
1347
+ logger.error(error_msg, {'row': self._shorten_for_log(row)})
1348
+ raise ValueError(error_msg)
1349
+ except Exception:
1350
+ error_msg = f"行号:{row_idx} -> 缺失列: `{col_name}`, 且不允许空值"
1351
+ logger.error(error_msg, {'row': self._shorten_for_log(row)})
1352
+ raise ValueError(error_msg)
1353
+ else:
1354
+ prepared_row[col_name] = None
1288
1355
  else:
1289
1356
  try:
1290
1357
  prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
1291
1358
  except ValueError as e:
1292
- logger.error('数据验证失败', {
1293
- '列': col_name,
1294
- '行': row_idx,
1295
- '报错': str(e),
1296
- 'row': self._shorten_for_log(row),
1297
- })
1298
- raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1359
+ # 如果数据验证失败,检查是否为空值且不允许空值,尝试使用兜底值
1360
+ original_value = row[col_name]
1361
+ is_empty_original = (original_value is None or
1362
+ original_value == '' or
1363
+ (not isinstance(original_value, (list, dict)) and
1364
+ pd.isna(original_value) if hasattr(pd, 'isna') else False))
1365
+
1366
+ if is_empty_original and not allow_null:
1367
+ try:
1368
+ fallback_value = self._get_fallback_value(filtered_set_typ[col_name].lower(), allow_null, db_name, table_name, col_name, original_value)
1369
+ if fallback_value is not None:
1370
+ prepared_row[col_name] = fallback_value
1371
+ logger.warning(f"行:{row_idx}, 列:`{col_name}` -> 原值验证失败,使用兜底值: {fallback_value}", {
1372
+ '原值': original_value,
1373
+ '兜底值': fallback_value,
1374
+ 'row': self._shorten_for_log(row)
1375
+ })
1376
+ else:
1377
+ logger.error('数据验证失败', {
1378
+ '列': col_name,
1379
+ '行': row_idx,
1380
+ '报错': str(e),
1381
+ 'row': self._shorten_for_log(row),
1382
+ })
1383
+ raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1384
+ except Exception:
1385
+ logger.error('数据验证失败', {
1386
+ '列': col_name,
1387
+ '行': row_idx,
1388
+ '报错': str(e),
1389
+ 'row': self._shorten_for_log(row),
1390
+ })
1391
+ raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1392
+ else:
1393
+ logger.error('数据验证失败', {
1394
+ '列': col_name,
1395
+ '行': row_idx,
1396
+ '报错': str(e),
1397
+ 'row': self._shorten_for_log(row),
1398
+ })
1399
+ raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1299
1400
  prepared_data.append(prepared_row)
1300
1401
  return prepared_data, filtered_set_typ
1301
1402
 
@@ -2061,6 +2162,70 @@ class MySQLUploader:
2061
2162
  except Exception as e:
2062
2163
  logger.error('创建索引失败', {'库': db_name, '表': table_name, '列': column, '错误': str(e)})
2063
2164
  raise
2165
+
2166
+ @_execute_with_retry
2167
+ def _add_column_to_table(self, db_name: str, table_name: str, column: str, column_type: str, allow_null: bool = False):
2168
+ """
2169
+ 添加列到指定表。
2170
+
2171
+ :param db_name: 数据库名
2172
+ :param table_name: 表名
2173
+ :param column: 需要添加的列名
2174
+ :param column_type: 列的数据类型
2175
+ :param allow_null: 是否允许空值,默认为False
2176
+ """
2177
+ db_name = self._validate_identifier(db_name, is_database=True)
2178
+ table_name = self._validate_identifier(table_name)
2179
+ column = self._validate_identifier(column)
2180
+
2181
+ # 构建ALTER TABLE语句
2182
+ null_constraint = "NULL" if allow_null else "NOT NULL"
2183
+
2184
+ # 为新添加的列设置默认值
2185
+ default_value = ""
2186
+ if not allow_null:
2187
+ column_type_lower = column_type.lower()
2188
+ if any(t in column_type_lower for t in ['int', 'bigint', 'tinyint', 'smallint', 'mediumint']):
2189
+ default_value = " DEFAULT 0"
2190
+ elif any(t in column_type_lower for t in ['decimal', 'float', 'double']):
2191
+ default_value = " DEFAULT 0.0"
2192
+ elif any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
2193
+ default_value = " DEFAULT 'none'"
2194
+ elif 'date' in column_type_lower:
2195
+ if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
2196
+ default_value = " DEFAULT '1970-01-01 00:00:00'"
2197
+ else:
2198
+ default_value = " DEFAULT '1970-01-01'"
2199
+ elif 'json' in column_type_lower:
2200
+ default_value = " DEFAULT '{}'"
2201
+
2202
+ sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD COLUMN `{column}` {column_type} {null_constraint}{default_value}'
2203
+
2204
+ conn = None
2205
+ try:
2206
+ with self._get_connection() as conn:
2207
+ with conn.cursor() as cursor:
2208
+ cursor.execute(sql)
2209
+ conn.commit()
2210
+ logger.debug('已为表添加列', {
2211
+ '库': db_name,
2212
+ '表': table_name,
2213
+ '列': column,
2214
+ '类型': column_type,
2215
+ '允许空值': allow_null
2216
+ })
2217
+ except Exception as e:
2218
+ logger.error('添加列失败', {
2219
+ '库': db_name,
2220
+ '表': table_name,
2221
+ '列': column,
2222
+ '类型': column_type,
2223
+ '错误': str(e),
2224
+ 'SQL': sql
2225
+ })
2226
+ if conn is not None:
2227
+ conn.rollback()
2228
+ raise
2064
2229
 
2065
2230
  def __enter__(self):
2066
2231
  return self
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: mdbq
3
- Version: 4.1.9
3
+ Version: 4.1.11
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '4.1.9'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes