mdbq 4.1.12__py3-none-any.whl → 4.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mdbq might be problematic. Click here for more details.
- mdbq/__version__.py +1 -1
- mdbq/mysql/deduplicator.py +7 -7
- mdbq/mysql/uploader.py +87 -32
- {mdbq-4.1.12.dist-info → mdbq-4.1.14.dist-info}/METADATA +1 -1
- {mdbq-4.1.12.dist-info → mdbq-4.1.14.dist-info}/RECORD +7 -7
- {mdbq-4.1.12.dist-info → mdbq-4.1.14.dist-info}/WHEEL +0 -0
- {mdbq-4.1.12.dist-info → mdbq-4.1.14.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = '4.1.
|
|
1
|
+
VERSION = '4.1.14'
|
mdbq/mysql/deduplicator.py
CHANGED
|
@@ -1364,12 +1364,12 @@ def main():
|
|
|
1364
1364
|
skip_system_dbs=True,
|
|
1365
1365
|
max_retries=3,
|
|
1366
1366
|
retry_waiting_time=5,
|
|
1367
|
-
pool_size=
|
|
1368
|
-
mincached=
|
|
1369
|
-
maxcached=
|
|
1367
|
+
pool_size=10,
|
|
1368
|
+
mincached=2,
|
|
1369
|
+
maxcached=5,
|
|
1370
1370
|
# recent_month=1,
|
|
1371
1371
|
# date_range=['2025-06-09', '2025-06-10'],
|
|
1372
|
-
exclude_columns=['更新时间'],
|
|
1372
|
+
exclude_columns=['创建时间', '更新时间'],
|
|
1373
1373
|
exclude_databases=['cookie文件', '日志', '视频数据', '云电影'],
|
|
1374
1374
|
# exclude_tables={
|
|
1375
1375
|
# '推广数据2': [
|
|
@@ -1391,9 +1391,9 @@ def main():
|
|
|
1391
1391
|
|
|
1392
1392
|
# # 指定表去重(使用特定列)
|
|
1393
1393
|
deduplicator.deduplicate_table(
|
|
1394
|
-
'
|
|
1395
|
-
'
|
|
1396
|
-
columns=['
|
|
1394
|
+
'推广数据_奥莱店',
|
|
1395
|
+
'主体报表_2025',
|
|
1396
|
+
columns=['日期', '店铺名称', '场景id', '计划id', '主体id'],
|
|
1397
1397
|
dry_run=False,
|
|
1398
1398
|
reorder_id=True,
|
|
1399
1399
|
)
|
mdbq/mysql/uploader.py
CHANGED
|
@@ -435,15 +435,36 @@ class MySQLUploader:
|
|
|
435
435
|
if not primary_keys:
|
|
436
436
|
column_defs.append("`id` INT NOT NULL AUTO_INCREMENT")
|
|
437
437
|
|
|
438
|
-
#
|
|
438
|
+
# 添加其他列,确保时间戳字段按正确顺序添加
|
|
439
|
+
timestamp_cols = ['创建时间', '更新时间']
|
|
440
|
+
regular_cols = []
|
|
441
|
+
timestamp_defs = []
|
|
442
|
+
|
|
443
|
+
# 先处理非时间戳字段
|
|
439
444
|
for col_name, col_type in set_typ.items():
|
|
440
445
|
if col_name == 'id':
|
|
441
446
|
continue
|
|
447
|
+
if col_name in timestamp_cols:
|
|
448
|
+
continue # 时间戳字段稍后按顺序处理
|
|
449
|
+
|
|
442
450
|
safe_col_name = self._normalize_col(col_name)
|
|
443
451
|
col_def = f"`{safe_col_name}` {col_type}"
|
|
444
|
-
if not allow_null and not col_type.lower().startswith('json'):
|
|
452
|
+
if not allow_null and not col_type.lower().startswith('json') and not col_type.lower().startswith('timestamp'):
|
|
445
453
|
col_def += " NOT NULL"
|
|
446
|
-
|
|
454
|
+
regular_cols.append(col_def)
|
|
455
|
+
|
|
456
|
+
# 按固定顺序添加时间戳字段
|
|
457
|
+
for timestamp_col in timestamp_cols:
|
|
458
|
+
if timestamp_col in set_typ:
|
|
459
|
+
safe_col_name = self._normalize_col(timestamp_col)
|
|
460
|
+
col_type = set_typ[timestamp_col]
|
|
461
|
+
col_def = f"`{safe_col_name}` {col_type}"
|
|
462
|
+
# TIMESTAMP字段不需要额外的NOT NULL,因为已经包含在类型定义中
|
|
463
|
+
timestamp_defs.append(col_def)
|
|
464
|
+
|
|
465
|
+
# 合并所有列定义:常规字段 + 时间戳字段
|
|
466
|
+
column_defs.extend(regular_cols)
|
|
467
|
+
column_defs.extend(timestamp_defs)
|
|
447
468
|
|
|
448
469
|
# 主键处理逻辑调整
|
|
449
470
|
def _index_col_sql(col):
|
|
@@ -672,9 +693,9 @@ class MySQLUploader:
|
|
|
672
693
|
'decimal': 0.0,
|
|
673
694
|
'float': 0.0,
|
|
674
695
|
'double': 0.0,
|
|
675
|
-
'date': '
|
|
676
|
-
'datetime': '
|
|
677
|
-
'timestamp': '
|
|
696
|
+
'date': '2000-01-01',
|
|
697
|
+
'datetime': '2000-01-01 00:00:00',
|
|
698
|
+
'timestamp': '2000-01-01 00:00:00',
|
|
678
699
|
'json': '{}',
|
|
679
700
|
'varchar': 'none',
|
|
680
701
|
'text': 'none',
|
|
@@ -783,6 +804,12 @@ class MySQLUploader:
|
|
|
783
804
|
"""
|
|
784
805
|
column_type_lower = column_type.lower() if column_type else ''
|
|
785
806
|
|
|
807
|
+
# 对于包含CURRENT_TIMESTAMP的TIMESTAMP字段,跳过验证,让MySQL自动处理
|
|
808
|
+
if ('timestamp' in column_type_lower and 'current_timestamp' in column_type_lower and
|
|
809
|
+
col_name in ['创建时间', '更新时间']):
|
|
810
|
+
# 这些字段由MySQL自动处理,不需要传入值
|
|
811
|
+
return None
|
|
812
|
+
|
|
786
813
|
# 统一的空值检查(None、空字符串、NaN)
|
|
787
814
|
is_empty_value = False
|
|
788
815
|
if value is None:
|
|
@@ -1335,6 +1362,15 @@ class MySQLUploader:
|
|
|
1335
1362
|
# 跳过id列,不允许外部传入id
|
|
1336
1363
|
if (self.case_sensitive and col_name == 'id') or (not self.case_sensitive and col_name.lower() == 'id'):
|
|
1337
1364
|
continue
|
|
1365
|
+
# 对于自动时间戳字段,跳过处理,让MySQL自动处理
|
|
1366
|
+
col_type_lower = filtered_set_typ[col_name].lower()
|
|
1367
|
+
is_auto_timestamp = ('timestamp' in col_type_lower and 'current_timestamp' in col_type_lower and
|
|
1368
|
+
col_name in ['创建时间', '更新时间'])
|
|
1369
|
+
|
|
1370
|
+
if is_auto_timestamp:
|
|
1371
|
+
# 自动时间戳字段完全跳过,不在INSERT语句中包含
|
|
1372
|
+
continue
|
|
1373
|
+
|
|
1338
1374
|
if col_name not in row:
|
|
1339
1375
|
# 对于缺失的列,使用None作为默认值,在_validate_value中会根据allow_null和列类型进行进一步处理
|
|
1340
1376
|
try:
|
|
@@ -1358,6 +1394,7 @@ class MySQLUploader:
|
|
|
1358
1394
|
else:
|
|
1359
1395
|
prepared_row[col_name] = None
|
|
1360
1396
|
else:
|
|
1397
|
+
# 处理用户传入的值
|
|
1361
1398
|
try:
|
|
1362
1399
|
prepared_row[col_name] = self._validate_value(row[col_name], filtered_set_typ[col_name], allow_null, db_name, table_name, col_name)
|
|
1363
1400
|
except ValueError as e:
|
|
@@ -1402,7 +1439,7 @@ class MySQLUploader:
|
|
|
1402
1439
|
'row': self._shorten_for_log(row),
|
|
1403
1440
|
})
|
|
1404
1441
|
raise ValueError(f"行:{row_idx}, 列:`{col_name}`-> 报错: {str(e)}")
|
|
1405
|
-
|
|
1442
|
+
prepared_data.append(prepared_row)
|
|
1406
1443
|
return prepared_data, filtered_set_typ
|
|
1407
1444
|
|
|
1408
1445
|
def upload_data(
|
|
@@ -1851,8 +1888,17 @@ class MySQLUploader:
|
|
|
1851
1888
|
cached = self._prepared_statements.get(cache_key)
|
|
1852
1889
|
if cached:
|
|
1853
1890
|
return cached
|
|
1854
|
-
# 获取所有列名(排除id
|
|
1855
|
-
all_columns = [
|
|
1891
|
+
# 获取所有列名(排除id和自动时间戳字段)
|
|
1892
|
+
all_columns = []
|
|
1893
|
+
for col in set_typ.keys():
|
|
1894
|
+
if col.lower() == 'id':
|
|
1895
|
+
continue
|
|
1896
|
+
# 检查是否是自动时间戳字段
|
|
1897
|
+
col_type_lower = set_typ[col].lower()
|
|
1898
|
+
is_auto_timestamp = ('timestamp' in col_type_lower and 'current_timestamp' in col_type_lower and
|
|
1899
|
+
col in ['创建时间', '更新时间'])
|
|
1900
|
+
if not is_auto_timestamp:
|
|
1901
|
+
all_columns.append(col)
|
|
1856
1902
|
if not check_duplicate:
|
|
1857
1903
|
sql = self._build_simple_insert_sql(db_name, table_name, all_columns,
|
|
1858
1904
|
update_on_duplicate)
|
|
@@ -1908,8 +1954,19 @@ class MySQLUploader:
|
|
|
1908
1954
|
return str(value)
|
|
1909
1955
|
return value
|
|
1910
1956
|
|
|
1957
|
+
|
|
1911
1958
|
batch_size = get_optimal_batch_size(len(data))
|
|
1912
|
-
|
|
1959
|
+
# 排除id列和自动时间戳列
|
|
1960
|
+
all_columns = []
|
|
1961
|
+
for col in set_typ.keys():
|
|
1962
|
+
if col.lower() == 'id':
|
|
1963
|
+
continue
|
|
1964
|
+
# 检查是否是自动时间戳字段
|
|
1965
|
+
col_type_lower = set_typ[col].lower()
|
|
1966
|
+
is_auto_timestamp = ('timestamp' in col_type_lower and 'current_timestamp' in col_type_lower and
|
|
1967
|
+
col in ['创建时间', '更新时间'])
|
|
1968
|
+
if not is_auto_timestamp:
|
|
1969
|
+
all_columns.append(col)
|
|
1913
1970
|
total_inserted = 0
|
|
1914
1971
|
total_skipped = 0
|
|
1915
1972
|
total_failed = 0
|
|
@@ -1918,6 +1975,7 @@ class MySQLUploader:
|
|
|
1918
1975
|
if transaction_mode == 'batch':
|
|
1919
1976
|
for i in range(0, len(data), batch_size):
|
|
1920
1977
|
batch = data[i:i + batch_size]
|
|
1978
|
+
# 使用批量插入逻辑
|
|
1921
1979
|
values_list = []
|
|
1922
1980
|
for row in batch:
|
|
1923
1981
|
values = [ensure_basic_type(row.get(col)) for col in all_columns]
|
|
@@ -2209,14 +2267,22 @@ class MySQLUploader:
|
|
|
2209
2267
|
default_value = " DEFAULT 0.0"
|
|
2210
2268
|
elif any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
|
|
2211
2269
|
default_value = " DEFAULT 'none'"
|
|
2270
|
+
elif 'timestamp' in column_type_lower:
|
|
2271
|
+
# TIMESTAMP类型已经包含DEFAULT定义,不需要额外添加
|
|
2272
|
+
default_value = ""
|
|
2212
2273
|
elif 'date' in column_type_lower:
|
|
2213
|
-
if 'datetime' in column_type_lower
|
|
2214
|
-
default_value = " DEFAULT '
|
|
2274
|
+
if 'datetime' in column_type_lower:
|
|
2275
|
+
default_value = " DEFAULT '2000-01-01 00:00:00'"
|
|
2215
2276
|
else:
|
|
2216
|
-
default_value = " DEFAULT '
|
|
2277
|
+
default_value = " DEFAULT '2000-01-01'"
|
|
2217
2278
|
elif 'json' in column_type_lower:
|
|
2218
2279
|
default_value = " DEFAULT '{}'"
|
|
2219
2280
|
|
|
2281
|
+
# 对于TIMESTAMP类型,不添加额外的NULL约束,因为已经包含在类型定义中
|
|
2282
|
+
if 'timestamp' in column_type.lower() and ('default' in column_type.lower() or 'current_timestamp' in column_type.lower()):
|
|
2283
|
+
null_constraint = "" # TIMESTAMP类型已经包含完整定义
|
|
2284
|
+
default_value = ""
|
|
2285
|
+
|
|
2220
2286
|
sql = f'ALTER TABLE `{db_name}`.`{table_name}` ADD COLUMN `{column}` {column_type} {null_constraint}{default_value}'
|
|
2221
2287
|
|
|
2222
2288
|
conn = None
|
|
@@ -2615,39 +2681,31 @@ class MySQLUploader:
|
|
|
2615
2681
|
# 定义时间戳列名
|
|
2616
2682
|
created_col = '创建时间'
|
|
2617
2683
|
updated_col = '更新时间'
|
|
2618
|
-
current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
2619
2684
|
|
|
2620
2685
|
# 复制set_typ以避免修改原始对象
|
|
2621
2686
|
updated_set_typ = set_typ.copy()
|
|
2622
2687
|
|
|
2623
|
-
#
|
|
2624
|
-
|
|
2625
|
-
updated_set_typ[
|
|
2688
|
+
# 使用MySQL的CURRENT_TIMESTAMP功能,按固定顺序添加时间戳列
|
|
2689
|
+
# 创建时间:插入时自动设置,更新时不变
|
|
2690
|
+
updated_set_typ[created_col] = 'TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP'
|
|
2691
|
+
# 更新时间:插入和更新时都自动设置为当前时间
|
|
2692
|
+
updated_set_typ[updated_col] = 'TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP'
|
|
2626
2693
|
|
|
2627
2694
|
# 处理DataFrame格式的数据
|
|
2628
2695
|
if hasattr(data, 'shape') and hasattr(data, 'columns'):
|
|
2629
2696
|
import pandas as pd
|
|
2630
2697
|
df = data.copy()
|
|
2631
2698
|
|
|
2632
|
-
#
|
|
2699
|
+
# 移除原始数据中可能存在的时间戳列,让MySQL自动处理
|
|
2633
2700
|
columns_to_remove = []
|
|
2634
2701
|
for col in df.columns:
|
|
2635
2702
|
if col in [created_col, updated_col]:
|
|
2636
2703
|
columns_to_remove.append(col)
|
|
2637
|
-
logger.warning('移除原始数据中的时间戳列', {
|
|
2638
|
-
'库': db_name,
|
|
2639
|
-
'表': table_name,
|
|
2640
|
-
'列': col,
|
|
2641
|
-
'原因': '与自动时间戳功能冲突'
|
|
2642
|
-
})
|
|
2643
2704
|
|
|
2644
2705
|
if columns_to_remove:
|
|
2645
2706
|
df = df.drop(columns=columns_to_remove)
|
|
2646
2707
|
|
|
2647
|
-
#
|
|
2648
|
-
df[created_col] = current_time
|
|
2649
|
-
df[updated_col] = current_time
|
|
2650
|
-
|
|
2708
|
+
# 不再手动添加时间戳列,让MySQL的CURRENT_TIMESTAMP自动处理
|
|
2651
2709
|
return df, updated_set_typ
|
|
2652
2710
|
|
|
2653
2711
|
# 处理字典或字典列表格式的数据
|
|
@@ -2670,10 +2728,7 @@ class MySQLUploader:
|
|
|
2670
2728
|
if key not in [created_col, updated_col]:
|
|
2671
2729
|
new_row[key] = value
|
|
2672
2730
|
|
|
2673
|
-
#
|
|
2674
|
-
new_row[created_col] = current_time
|
|
2675
|
-
new_row[updated_col] = current_time
|
|
2676
|
-
|
|
2731
|
+
# 不再手动添加时间戳,让MySQL的CURRENT_TIMESTAMP自动处理
|
|
2677
2732
|
processed_data.append(new_row)
|
|
2678
2733
|
|
|
2679
2734
|
# 如果原始数据是单个字典,返回单个字典
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
|
2
|
-
mdbq/__version__.py,sha256=
|
|
2
|
+
mdbq/__version__.py,sha256=zj2uAJqL04jV1wMjxAyT-2T4sjHxXiGCOfNFXJfgjz0,18
|
|
3
3
|
mdbq/auth/__init__.py,sha256=pnPMAt63sh1B6kEvmutUuro46zVf2v2YDAG7q-jV_To,24
|
|
4
4
|
mdbq/auth/auth_backend.py,sha256=iLN7AqiSq7fQgFtNtge_TIlVOR1hrCSZXH6oId6uGX4,116924
|
|
5
5
|
mdbq/auth/crypto.py,sha256=fcZRFCnrKVVdWDUx_zds51ynFYwS9DBvJOrRQVldrfM,15931
|
|
@@ -11,11 +11,11 @@ mdbq/log/mylogger.py,sha256=DyBftCMNLe1pTTXsa830pUtDISJxpJHFIradYtE3lFA,26418
|
|
|
11
11
|
mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
12
12
|
mdbq/myconf/myconf.py,sha256=x_9mS6wOfKVjCVElbruxj2yjzitbyKiTkf59quG-5Zg,32529
|
|
13
13
|
mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
|
|
14
|
-
mdbq/mysql/deduplicator.py,sha256=
|
|
14
|
+
mdbq/mysql/deduplicator.py,sha256=tzLIm9K9S0lGLlVTI0dDQVYpWX796XCuyufmw1lU26Y,73098
|
|
15
15
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
|
16
16
|
mdbq/mysql/s_query.py,sha256=N2xHJf2CiUXjXIVBemdst-wamIP3908EGAJOFG13fCU,50475
|
|
17
17
|
mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
|
|
18
|
-
mdbq/mysql/uploader.py,sha256=
|
|
18
|
+
mdbq/mysql/uploader.py,sha256=5zpvcQpa1BAtFKkVF-EQYXSieeF2hHoZQq6lmEaEnjY,133474
|
|
19
19
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
|
20
20
|
mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
|
|
21
21
|
mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
|
|
@@ -35,7 +35,7 @@ mdbq/route/routes.py,sha256=QVGfTvDgu0CpcKCvk1ra74H8uojgqTLUav1fnVAqLEA,29433
|
|
|
35
35
|
mdbq/selenium/__init__.py,sha256=AKzeEceqZyvqn2dEDoJSzDQnbuENkJSHAlbHAD0u0ZI,10
|
|
36
36
|
mdbq/selenium/get_driver.py,sha256=1NTlVUE6QsyjTrVVVqTO2LOnYf578ccFWlWnvIXGtic,20903
|
|
37
37
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
|
38
|
-
mdbq-4.1.
|
|
39
|
-
mdbq-4.1.
|
|
40
|
-
mdbq-4.1.
|
|
41
|
-
mdbq-4.1.
|
|
38
|
+
mdbq-4.1.14.dist-info/METADATA,sha256=KTtSRhxxSKyWNWlEvHI7YIhD6XDneGbuEw3Qa1ku5EE,364
|
|
39
|
+
mdbq-4.1.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
mdbq-4.1.14.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
|
41
|
+
mdbq-4.1.14.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|