mdbq 4.1.12__tar.gz → 4.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mdbq might be problematic. Click here for more details.

Files changed (46) hide show
  1. {mdbq-4.1.12 → mdbq-4.1.14}/PKG-INFO +1 -1
  2. mdbq-4.1.14/mdbq/__version__.py +1 -0
  3. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/mysql/deduplicator.py +7 -7
  4. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/mysql/uploader.py +87 -32
  5. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq.egg-info/PKG-INFO +1 -1
  6. mdbq-4.1.12/mdbq/__version__.py +0 -1
  7. {mdbq-4.1.12 → mdbq-4.1.14}/README.txt +0 -0
  8. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/__init__.py +0 -0
  9. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/auth/__init__.py +0 -0
  10. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/auth/auth_backend.py +0 -0
  11. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/auth/crypto.py +0 -0
  12. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/auth/rate_limiter.py +0 -0
  13. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/js/__init__.py +0 -0
  14. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/js/jc.py +0 -0
  15. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/log/__init__.py +0 -0
  16. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/log/mylogger.py +0 -0
  17. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/myconf/__init__.py +0 -0
  18. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/myconf/myconf.py +0 -0
  19. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/mysql/__init__.py +0 -0
  20. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/mysql/mysql.py +0 -0
  21. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/mysql/s_query.py +0 -0
  22. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/mysql/unique_.py +0 -0
  23. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/other/__init__.py +0 -0
  24. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/other/download_sku_picture.py +0 -0
  25. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/other/error_handler.py +0 -0
  26. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/other/otk.py +0 -0
  27. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/other/pov_city.py +0 -0
  28. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/other/ua_sj.py +0 -0
  29. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/pbix/__init__.py +0 -0
  30. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/pbix/pbix_refresh.py +0 -0
  31. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/pbix/refresh_all.py +0 -0
  32. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/redis/__init__.py +0 -0
  33. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/redis/getredis.py +0 -0
  34. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/redis/redis_cache.py +0 -0
  35. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/route/__init__.py +0 -0
  36. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/route/analytics.py +0 -0
  37. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/route/monitor.py +0 -0
  38. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/route/routes.py +0 -0
  39. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/selenium/__init__.py +0 -0
  40. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/selenium/get_driver.py +0 -0
  41. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq/spider/__init__.py +0 -0
  42. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq.egg-info/SOURCES.txt +0 -0
  43. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq.egg-info/dependency_links.txt +0 -0
  44. {mdbq-4.1.12 → mdbq-4.1.14}/mdbq.egg-info/top_level.txt +0 -0
  45. {mdbq-4.1.12 → mdbq-4.1.14}/setup.cfg +0 -0
  46. {mdbq-4.1.12 → mdbq-4.1.14}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdbq
3
- Version: 4.1.12
3
+ Version: 4.1.14
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.1.14'
@@ -1364,12 +1364,12 @@ def main():
1364
1364
  skip_system_dbs=True,
1365
1365
  max_retries=3,
1366
1366
  retry_waiting_time=5,
1367
- pool_size=20,
1368
- mincached=5,
1369
- maxcached=10,
1367
+ pool_size=10,
1368
+ mincached=2,
1369
+ maxcached=5,
1370
1370
  # recent_month=1,
1371
1371
  # date_range=['2025-06-09', '2025-06-10'],
1372
- exclude_columns=['更新时间'],
1372
+ exclude_columns=['创建时间', '更新时间'],
1373
1373
  exclude_databases=['cookie文件', '日志', '视频数据', '云电影'],
1374
1374
  # exclude_tables={
1375
1375
  # '推广数据2': [
@@ -1391,9 +1391,9 @@ def main():
1391
1391
 
1392
1392
  # # 指定表去重(使用特定列)
1393
1393
  deduplicator.deduplicate_table(
1394
- '生意参谋3',
1395
- '新品追踪_旧接口_2025',
1396
- columns=['商品id', '累计商品浏览量', '累计商品访客数'],
1394
+ '推广数据_奥莱店',
1395
+ '主体报表_2025',
1396
+ columns=['日期', '店铺名称', '场景id', '计划id', '主体id'],
1397
1397
  dry_run=False,
1398
1398
  reorder_id=True,
1399
1399
  )
@@ -435,15 +435,36 @@ class MySQLUploader:
435
435
  if not primary_keys:
436
436
  column_defs.append("`id` INT NOT NULL AUTO_INCREMENT")
437
437
 
438
- # 添加其他列
438
+ # 添加其他列,确保时间戳字段按正确顺序添加
439
+ timestamp_cols = ['创建时间', '更新时间']
440
+ regular_cols = []
441
+ timestamp_defs = []
442
+
443
+ # 先处理非时间戳字段
439
444
  for col_name, col_type in set_typ.items():
440
445
  if col_name == 'id':
441
446
  continue
447
+ if col_name in timestamp_cols:
448
+ continue # 时间戳字段稍后按顺序处理
449
+
442
450
  safe_col_name = self._normalize_col(col_name)
443
451
  col_def = f"`{safe_col_name}` {col_type}"
444
- if not allow_null and not col_type.lower().startswith('json'):
452
+ if not allow_null and not col_type.lower().startswith('json') and not col_type.lower().startswith('timestamp'):
445
453
  col_def += " NOT NULL"
446
- column_defs.append(col_def)
454
+ regular_cols.append(col_def)
455
+
456
+ # 按固定顺序添加时间戳字段
457
+ for timestamp_col in timestamp_cols:
458
+ if timestamp_col in set_typ:
459
+ safe_col_name = self._normalize_col(timestamp_col)
460
+ col_type = set_typ[timestamp_col]
461
+ col_def = f"`{safe_col_name}` {col_type}"
462
+ # TIMESTAMP字段不需要额外的NOT NULL,因为已经包含在类型定义中
463
+ timestamp_defs.append(col_def)
464
+
465
+ # 合并所有列定义:常规字段 + 时间戳字段
466
+ column_defs.extend(regular_cols)
467
+ column_defs.extend(timestamp_defs)
447
468
 
448
469
  # 主键处理逻辑调整
449
470
  def _index_col_sql(col):
@@ -672,9 +693,9 @@ class MySQLUploader:
672
693
  'decimal': 0.0,
673
694
  'float': 0.0,
674
695
  'double': 0.0,
675
- 'date': '1970-01-01',
676
- 'datetime': '1970-01-01 00:00:00',
677
- 'timestamp': '1970-01-01 00:00:00',
696
+ 'date': '2000-01-01',
697
+ 'datetime': '2000-01-01 00:00:00',
698
+ 'timestamp': '2000-01-01 00:00:00',
678
699
  'json': '{}',
679
700
  'varchar': 'none',
680
701
  'text': 'none',
@@ -783,6 +804,12 @@ class MySQLUploader:
783
804
  """
784
805
  column_type_lower = column_type.lower() if column_type else ''
785
806
 
807
+ # 对于包含CURRENT_TIMESTAMP的TIMESTAMP字段,跳过验证,让MySQL自动处理
808
+ if ('timestamp' in column_type_lower and 'current_timestamp' in column_type_lower and
809
+ col_name in ['创建时间', '更新时间']):
810
+ # 这些字段由MySQL自动处理,不需要传入值
811
+ return None
812
+
786
813
  # 统一的空值检查(None、空字符串、NaN)
787
814
  is_empty_value = False
788
815
  if value is None:
@@ -1335,6 +1362,15 @@ class MySQLUploader:
1335
1362
  # 跳过id列,不允许外部传入id
1336
1363
  if (self.case_sensitive and col_name == 'id') or (not self.case_sensitive and col_name.lower() == 'id'):
1337
1364
  continue
1365
+ # 对于自动时间戳字段,跳过处理,让MySQL自动处理
1366
+ col_type_lower = filtered_set_typ[col_name].lower()
1367
+ is_auto_timestamp = ('timestamp' in col_type_lower and 'current_timestamp' in col_type_lower and
1368
+ col_name in ['创建时间', '更新时间'])
1369
+
1370
+ if is_auto_timestamp:
1371
+ # 自动时间戳字段完全跳过,不在INSERT语句中包含
1372
+ continue
1373
+
1338
1374
  if col_name not in row:
1339
1375
  # 对于缺失的列,使用None作为默认值,在_validate_value中会根据allow_null和列类型进行进一步处理
1340
1376
  try:
@@ -1358,6 +1394,7 @@ class MySQLUploader:
1358
1394
  else:
1359
1395
  prepared_row[col_name] = None
1360
1396
  else:
1397
+ # 处理用户传入的值
1361
1398
  try:
1362
1399
  prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
1363
1400
  except ValueError as e:
@@ -1402,7 +1439,7 @@ class MySQLUploader:
1402
1439
  'row': self._shorten_for_log(row),
1403
1440
  })
1404
1441
  raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
1405
- prepared_data.append(prepared_row)
1442
+ prepared_data.append(prepared_row)
1406
1443
  return prepared_data, filtered_set_typ
1407
1444
 
1408
1445
  def upload_data(
@@ -1851,8 +1888,17 @@ class MySQLUploader:
1851
1888
  cached = self._prepared_statements.get(cache_key)
1852
1889
  if cached:
1853
1890
  return cached
1854
- # 获取所有列名(排除id
1855
- all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1891
+ # 获取所有列名(排除id和自动时间戳字段)
1892
+ all_columns = []
1893
+ for col in set_typ.keys():
1894
+ if col.lower() == 'id':
1895
+ continue
1896
+ # 检查是否是自动时间戳字段
1897
+ col_type_lower = set_typ[col].lower()
1898
+ is_auto_timestamp = ('timestamp' in col_type_lower and 'current_timestamp' in col_type_lower and
1899
+ col in ['创建时间', '更新时间'])
1900
+ if not is_auto_timestamp:
1901
+ all_columns.append(col)
1856
1902
  if not check_duplicate:
1857
1903
  sql = self._build_simple_insert_sql(db_name, table_name, all_columns,
1858
1904
  update_on_duplicate)
@@ -1908,8 +1954,19 @@ class MySQLUploader:
1908
1954
  return str(value)
1909
1955
  return value
1910
1956
 
1957
+
1911
1958
  batch_size = get_optimal_batch_size(len(data))
1912
- all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
1959
+ # 排除id列和自动时间戳列
1960
+ all_columns = []
1961
+ for col in set_typ.keys():
1962
+ if col.lower() == 'id':
1963
+ continue
1964
+ # 检查是否是自动时间戳字段
1965
+ col_type_lower = set_typ[col].lower()
1966
+ is_auto_timestamp = ('timestamp' in col_type_lower and 'current_timestamp' in col_type_lower and
1967
+ col in ['创建时间', '更新时间'])
1968
+ if not is_auto_timestamp:
1969
+ all_columns.append(col)
1913
1970
  total_inserted = 0
1914
1971
  total_skipped = 0
1915
1972
  total_failed = 0
@@ -1918,6 +1975,7 @@ class MySQLUploader:
1918
1975
  if transaction_mode == 'batch':
1919
1976
  for i in range(0, len(data), batch_size):
1920
1977
  batch = data[i:i + batch_size]
1978
+ # 使用批量插入逻辑
1921
1979
  values_list = []
1922
1980
  for row in batch:
1923
1981
  values = [ensure_basic_type(row.get(col)) for col in all_columns]
@@ -2209,14 +2267,22 @@ class MySQLUploader:
2209
2267
  default_value = " DEFAULT 0.0"
2210
2268
  elif any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
2211
2269
  default_value = " DEFAULT 'none'"
2270
+ elif 'timestamp' in column_type_lower:
2271
+ # TIMESTAMP类型已经包含DEFAULT定义,不需要额外添加
2272
+ default_value = ""
2212
2273
  elif 'date' in column_type_lower:
2213
- if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
2214
- default_value = " DEFAULT '1970-01-01 00:00:00'"
2274
+ if 'datetime' in column_type_lower:
2275
+ default_value = " DEFAULT '2000-01-01 00:00:00'"
2215
2276
  else:
2216
- default_value = " DEFAULT '1970-01-01'"
2277
+ default_value = " DEFAULT '2000-01-01'"
2217
2278
  elif 'json' in column_type_lower:
2218
2279
  default_value = " DEFAULT '{}'"
2219
2280
 
2281
+ # 对于TIMESTAMP类型,不添加额外的NULL约束,因为已经包含在类型定义中
2282
+ if 'timestamp' in column_type.lower() and ('default' in column_type.lower() or 'current_timestamp' in column_type.lower()):
2283
+ null_constraint = "" # TIMESTAMP类型已经包含完整定义
2284
+ default_value = ""
2285
+
2220
2286
  sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD COLUMN `{column}` {column_type} {null_constraint}{default_value}'
2221
2287
 
2222
2288
  conn = None
@@ -2615,39 +2681,31 @@ class MySQLUploader:
2615
2681
  # 定义时间戳列名
2616
2682
  created_col = '创建时间'
2617
2683
  updated_col = '更新时间'
2618
- current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
2619
2684
 
2620
2685
  # 复制set_typ以避免修改原始对象
2621
2686
  updated_set_typ = set_typ.copy()
2622
2687
 
2623
- # 添加时间戳列到set_typ
2624
- updated_set_typ[created_col] = 'DATETIME'
2625
- updated_set_typ[updated_col] = 'DATETIME'
2688
+ # 使用MySQL的CURRENT_TIMESTAMP功能,按固定顺序添加时间戳列
2689
+ # 创建时间:插入时自动设置,更新时不变
2690
+ updated_set_typ[created_col] = 'TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP'
2691
+ # 更新时间:插入和更新时都自动设置为当前时间
2692
+ updated_set_typ[updated_col] = 'TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP'
2626
2693
 
2627
2694
  # 处理DataFrame格式的数据
2628
2695
  if hasattr(data, 'shape') and hasattr(data, 'columns'):
2629
2696
  import pandas as pd
2630
2697
  df = data.copy()
2631
2698
 
2632
- # 移除原始数据中可能存在的时间戳列
2699
+ # 移除原始数据中可能存在的时间戳列,让MySQL自动处理
2633
2700
  columns_to_remove = []
2634
2701
  for col in df.columns:
2635
2702
  if col in [created_col, updated_col]:
2636
2703
  columns_to_remove.append(col)
2637
- logger.warning('移除原始数据中的时间戳列', {
2638
- '库': db_name,
2639
- '表': table_name,
2640
- '列': col,
2641
- '原因': '与自动时间戳功能冲突'
2642
- })
2643
2704
 
2644
2705
  if columns_to_remove:
2645
2706
  df = df.drop(columns=columns_to_remove)
2646
2707
 
2647
- # 添加时间戳列
2648
- df[created_col] = current_time
2649
- df[updated_col] = current_time
2650
-
2708
+ # 不再手动添加时间戳列,让MySQL的CURRENT_TIMESTAMP自动处理
2651
2709
  return df, updated_set_typ
2652
2710
 
2653
2711
  # 处理字典或字典列表格式的数据
@@ -2670,10 +2728,7 @@ class MySQLUploader:
2670
2728
  if key not in [created_col, updated_col]:
2671
2729
  new_row[key] = value
2672
2730
 
2673
- # 添加时间戳
2674
- new_row[created_col] = current_time
2675
- new_row[updated_col] = current_time
2676
-
2731
+ # 不再手动添加时间戳,让MySQL的CURRENT_TIMESTAMP自动处理
2677
2732
  processed_data.append(new_row)
2678
2733
 
2679
2734
  # 如果原始数据是单个字典,返回单个字典
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdbq
3
- Version: 4.1.12
3
+ Version: 4.1.14
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '4.1.12'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes