mdbq 4.0.30__py3-none-any.whl → 4.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/aggregation/query_data.py +12 -34
- mdbq/myconf/myconf.py +28 -0
- mdbq/mysql/deduplicator.py +5 -16
- mdbq/mysql/s_query.py +3 -3
- mdbq/mysql/unique_.py +2 -2
- mdbq/mysql/uploader.py +8 -0
- mdbq/other/download_sku_picture.py +55 -33
- {mdbq-4.0.30.dist-info → mdbq-4.0.32.dist-info}/METADATA +1 -1
- {mdbq-4.0.30.dist-info → mdbq-4.0.32.dist-info}/RECORD +12 -13
- mdbq/myconf/myconf_bak.py +0 -816
- {mdbq-4.0.30.dist-info → mdbq-4.0.32.dist-info}/WHEEL +0 -0
- {mdbq-4.0.30.dist-info → mdbq-4.0.32.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.32'
|
mdbq/aggregation/query_data.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# -*- coding: UTF-8 –*-
|
2
2
|
import re
|
3
|
-
# from mdbq.mysql import mysql
|
4
3
|
from mdbq.mysql import uploader
|
5
4
|
from mdbq.mysql import s_query
|
6
5
|
from mdbq.myconf import myconf
|
@@ -114,11 +113,13 @@ def upload_data_decorator(**upload_kwargs):
|
|
114
113
|
set_type = get_param_value('set_type', ['set_type', 'set_typ'])
|
115
114
|
db_name = get_param_value('db_name')
|
116
115
|
table_name = get_param_value('table_name')
|
116
|
+
logger.info('更新', {'库': db_name, '表': table_name, 'func': func.__name__, 'args': args, 'kwargs': kwargs})
|
117
117
|
|
118
118
|
# 执行原始函数
|
119
119
|
result = func(*args, **kwargs)
|
120
120
|
|
121
121
|
if result is None:
|
122
|
+
logger.info('函数返回None,直接返回原结果,不执行上传', {'库': db_name, '表': table_name, 'func': func.__name__})
|
122
123
|
return None
|
123
124
|
|
124
125
|
# 处理 DataFrame 结果
|
@@ -143,18 +144,19 @@ def upload_data_decorator(**upload_kwargs):
|
|
143
144
|
}
|
144
145
|
|
145
146
|
uld.upload_data(data=result, **merged_kwargs)
|
147
|
+
logger.info('上传完成', {'库': db_name, '表': table_name, 'func': func.__name__})
|
146
148
|
return True
|
147
149
|
|
148
150
|
# 处理元组结果
|
149
151
|
elif isinstance(result, tuple):
|
150
152
|
if len(result) < 2:
|
151
|
-
logger.warning('函数返回的元组长度小于2,直接返回原结果,不执行上传', {'
|
153
|
+
logger.warning('函数返回的元组长度小于2,直接返回原结果,不执行上传', {'库': db_name, '表': table_name, 'func': func.__name__})
|
152
154
|
return result
|
153
155
|
|
154
156
|
df, extra_kwargs = result[0], result[1]
|
155
157
|
|
156
158
|
if not isinstance(df, (pd.DataFrame, list, dict)):
|
157
|
-
logger.warning('函数返回的元组第一个元素不是DataFrame/list/dict,直接返回原结果,不执行上传', {'
|
159
|
+
logger.warning('函数返回的元组第一个元素不是DataFrame/list/dict,直接返回原结果,不执行上传', {'库': db_name, '表': table_name, 'func': func.__name__})
|
158
160
|
return result
|
159
161
|
|
160
162
|
if set_type is not None:
|
@@ -179,13 +181,13 @@ def upload_data_decorator(**upload_kwargs):
|
|
179
181
|
}
|
180
182
|
|
181
183
|
uld.upload_data(data=df, **merged_kwargs)
|
182
|
-
|
184
|
+
logger.info('上传完成', {'库': db_name, '表': table_name, 'func': func.__name__})
|
183
185
|
return result if len(result) > 2 else True
|
184
|
-
|
186
|
+
logger.info('上传完成', {'库': db_name, '表': table_name, 'func': func.__name__})
|
185
187
|
return result
|
186
188
|
|
187
189
|
except Exception as e:
|
188
|
-
logger.error('数据上传失败', {'
|
190
|
+
logger.error('数据上传失败', {'库': db_name, '表': table_name, 'func': func.__name__, '错误': str(e)})
|
189
191
|
return False
|
190
192
|
|
191
193
|
return wrapper
|
@@ -311,7 +313,7 @@ class MysqlDatasQuery:
|
|
311
313
|
'成交笔数', '成交金额', '直接成交笔数', '直接成交金额', '自然流量曝光量']]
|
312
314
|
}
|
313
315
|
)
|
314
|
-
logger.info('
|
316
|
+
logger.info('更新', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
315
317
|
|
316
318
|
uld.upload_data(
|
317
319
|
db_name=db_name,
|
@@ -362,7 +364,7 @@ class MysqlDatasQuery:
|
|
362
364
|
'花费': 'decimal(12,2)',
|
363
365
|
'更新时间': 'timestamp',
|
364
366
|
}
|
365
|
-
logger.info('
|
367
|
+
logger.info('更新', {'主机': f'{host}:{port}', '库': '属性设置3', '表': '商品索引表_主推排序调用'})
|
366
368
|
uld.upload_data(
|
367
369
|
db_name='属性设置3',
|
368
370
|
table_name='商品索引表_主推排序调用',
|
@@ -471,7 +473,6 @@ class MysqlDatasQuery:
|
|
471
473
|
'直接成交笔数': 'int',
|
472
474
|
'直接成交金额': 'decimal(12,2)',
|
473
475
|
}
|
474
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
475
476
|
return df, {
|
476
477
|
'db_name': db_name,
|
477
478
|
'table_name': table_name,
|
@@ -578,7 +579,6 @@ class MysqlDatasQuery:
|
|
578
579
|
'直接成交笔数': 'int',
|
579
580
|
'直接成交金额': 'decimal(12,2)',
|
580
581
|
}
|
581
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
582
582
|
return df, {
|
583
583
|
'db_name': db_name,
|
584
584
|
'table_name': table_name,
|
@@ -685,7 +685,6 @@ class MysqlDatasQuery:
|
|
685
685
|
'直接成交笔数': 'int',
|
686
686
|
'直接成交金额': 'decimal(12,2)',
|
687
687
|
}
|
688
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
689
688
|
return df, {
|
690
689
|
'db_name': db_name,
|
691
690
|
'table_name': table_name,
|
@@ -761,7 +760,6 @@ class MysqlDatasQuery:
|
|
761
760
|
'件均价': 'mediumint',
|
762
761
|
'价格带': 'varchar(100)',
|
763
762
|
}
|
764
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
765
763
|
return df, {
|
766
764
|
'db_name': db_name,
|
767
765
|
'table_name': table_name,
|
@@ -969,7 +967,6 @@ class MysqlDatasQuery:
|
|
969
967
|
'用户年龄': 'varchar(100)',
|
970
968
|
'人群分类': 'varchar(100)',
|
971
969
|
}
|
972
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
973
970
|
return df, {
|
974
971
|
'db_name': db_name,
|
975
972
|
'table_name': table_name,
|
@@ -1103,7 +1100,6 @@ class MysqlDatasQuery:
|
|
1103
1100
|
'是否品牌词': 'varchar(100)',
|
1104
1101
|
'词分类': 'varchar(100)',
|
1105
1102
|
}
|
1106
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1107
1103
|
return df, {
|
1108
1104
|
'db_name': db_name,
|
1109
1105
|
'table_name': table_name,
|
@@ -1270,7 +1266,6 @@ class MysqlDatasQuery:
|
|
1270
1266
|
'直接成交笔数': 'int',
|
1271
1267
|
'直接成交金额': 'decimal(12,2)',
|
1272
1268
|
}
|
1273
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1274
1269
|
return df, {
|
1275
1270
|
'db_name': db_name,
|
1276
1271
|
'table_name': table_name,
|
@@ -1379,7 +1374,7 @@ class MysqlDatasQuery:
|
|
1379
1374
|
'品牌搜索量': 'int',
|
1380
1375
|
'品牌搜索人数': 'int',
|
1381
1376
|
}
|
1382
|
-
logger.info('
|
1377
|
+
logger.info('更新', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1383
1378
|
return df, {
|
1384
1379
|
'db_name': db_name,
|
1385
1380
|
'table_name': table_name,
|
@@ -1428,7 +1423,7 @@ class MysqlDatasQuery:
|
|
1428
1423
|
'三级类目': 'varchar(100)',
|
1429
1424
|
'更新时间': 'timestamp'
|
1430
1425
|
}
|
1431
|
-
logger.info('
|
1426
|
+
logger.info('更新', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1432
1427
|
return df, {
|
1433
1428
|
'db_name': db_name,
|
1434
1429
|
'table_name': table_name,
|
@@ -1479,7 +1474,6 @@ class MysqlDatasQuery:
|
|
1479
1474
|
'sku地址': 'varchar(255)',
|
1480
1475
|
'更新时间': 'timestamp'
|
1481
1476
|
}
|
1482
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1483
1477
|
return df, {
|
1484
1478
|
'db_name': db_name,
|
1485
1479
|
'table_name': table_name,
|
@@ -1529,7 +1523,6 @@ class MysqlDatasQuery:
|
|
1529
1523
|
df['上市年份'] = df['商品id'].apply(lambda x: check_year(x))
|
1530
1524
|
p = df.pop('上市年份')
|
1531
1525
|
df.insert(loc=5, column='上市年份', value=p)
|
1532
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1533
1526
|
set_typ = {
|
1534
1527
|
'日期': 'date',
|
1535
1528
|
'店铺id': 'bigint',
|
@@ -1666,7 +1659,6 @@ class MysqlDatasQuery:
|
|
1666
1659
|
'更新时间': 'timestamp',
|
1667
1660
|
'上市年份': 'varchar(100)',
|
1668
1661
|
}
|
1669
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1670
1662
|
return df, {
|
1671
1663
|
'db_name': db_name,
|
1672
1664
|
'table_name': table_name,
|
@@ -1772,7 +1764,6 @@ class MysqlDatasQuery:
|
|
1772
1764
|
'二级来源索引': 'smallint',
|
1773
1765
|
'三级来源索引': 'smallint',
|
1774
1766
|
}
|
1775
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1776
1767
|
return df, {
|
1777
1768
|
'db_name': db_name,
|
1778
1769
|
'table_name': table_name,
|
@@ -1810,7 +1801,6 @@ class MysqlDatasQuery:
|
|
1810
1801
|
'天猫页面价': 'decimal(10,2)',
|
1811
1802
|
'天猫中促价': 'decimal(10,2)',
|
1812
1803
|
}
|
1813
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1814
1804
|
return df, {
|
1815
1805
|
'db_name': db_name,
|
1816
1806
|
'table_name': table_name,
|
@@ -1915,7 +1905,6 @@ class MysqlDatasQuery:
|
|
1915
1905
|
'直接加购数': 'int',
|
1916
1906
|
'总加购数': 'int',
|
1917
1907
|
}
|
1918
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1919
1908
|
return df, {
|
1920
1909
|
'db_name': db_name,
|
1921
1910
|
'table_name': table_name,
|
@@ -1983,7 +1972,6 @@ class MysqlDatasQuery:
|
|
1983
1972
|
'核心位置展现量': 'int',
|
1984
1973
|
'核心位置点击量': 'int',
|
1985
1974
|
}
|
1986
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
1987
1975
|
return df, {
|
1988
1976
|
'db_name': db_name,
|
1989
1977
|
'table_name': table_name,
|
@@ -2083,7 +2071,6 @@ class MysqlDatasQuery:
|
|
2083
2071
|
'k_是否品牌词': 'varchar(100)',
|
2084
2072
|
's_是否品牌词': 'varchar(100)',
|
2085
2073
|
}
|
2086
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
2087
2074
|
return df, {
|
2088
2075
|
'db_name': db_name,
|
2089
2076
|
'table_name': table_name,
|
@@ -2146,7 +2133,6 @@ class MysqlDatasQuery:
|
|
2146
2133
|
'成交金额': 'decimal(10,2)',
|
2147
2134
|
'sku_id': 'varchar(100)',
|
2148
2135
|
}
|
2149
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
2150
2136
|
return df, {
|
2151
2137
|
'db_name': db_name,
|
2152
2138
|
'table_name': table_name,
|
@@ -2209,7 +2195,6 @@ class MysqlDatasQuery:
|
|
2209
2195
|
'成交金额': 'decimal(10,2)',
|
2210
2196
|
'spu_id': 'varchar(100)',
|
2211
2197
|
}
|
2212
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
2213
2198
|
return df, {
|
2214
2199
|
'db_name': db_name,
|
2215
2200
|
'table_name': table_name,
|
@@ -2283,7 +2268,6 @@ class MysqlDatasQuery:
|
|
2283
2268
|
'客单价': 'decimal(10,2)',
|
2284
2269
|
'uv价值': 'decimal(10,2)',
|
2285
2270
|
}
|
2286
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
2287
2271
|
return df, {
|
2288
2272
|
'db_name': db_name,
|
2289
2273
|
'table_name': table_name,
|
@@ -2386,7 +2370,6 @@ class MysqlDatasQuery:
|
|
2386
2370
|
'封面图': 'text',
|
2387
2371
|
'更新时间': 'timestamp',
|
2388
2372
|
}
|
2389
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
2390
2373
|
ordered_columns = [
|
2391
2374
|
'日期',
|
2392
2375
|
'店铺名称',
|
@@ -2882,7 +2865,6 @@ class MysqlDatasQuery:
|
|
2882
2865
|
'成交笔数': 'int',
|
2883
2866
|
'成交金额': 'decimal(12,2)',
|
2884
2867
|
}
|
2885
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
2886
2868
|
return df, {
|
2887
2869
|
'db_name': db_name,
|
2888
2870
|
'table_name': table_name,
|
@@ -2972,7 +2954,6 @@ class MysqlDatasQuery:
|
|
2972
2954
|
'数据更新时间': 'timestamp',
|
2973
2955
|
'更新时间': 'timestamp',
|
2974
2956
|
}
|
2975
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
2976
2957
|
return df, {
|
2977
2958
|
'db_name': db_name,
|
2978
2959
|
'table_name': table_name,
|
@@ -3021,7 +3002,6 @@ class MysqlDatasQuery:
|
|
3021
3002
|
'长周期成交价值': 'decimal(13, 2)',
|
3022
3003
|
'达摩盘id': 'int',
|
3023
3004
|
}
|
3024
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
3025
3005
|
return df, {
|
3026
3006
|
'db_name': db_name,
|
3027
3007
|
'table_name': table_name,
|
@@ -3128,7 +3108,6 @@ class MysqlDatasQuery:
|
|
3128
3108
|
'消费能力等级': 'varchar(100)',
|
3129
3109
|
'用户性别': 'varchar(100)',
|
3130
3110
|
}
|
3131
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
3132
3111
|
return df, {
|
3133
3112
|
'db_name': db_name,
|
3134
3113
|
'table_name': table_name,
|
@@ -3517,7 +3496,6 @@ class MysqlDatasQuery:
|
|
3517
3496
|
'自然流量曝光量': 'int',
|
3518
3497
|
}
|
3519
3498
|
df['日期'] = pd.to_datetime(df['日期'], format='%Y-%m-%d', errors='ignore')
|
3520
|
-
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
3521
3499
|
return df, {
|
3522
3500
|
'db_name': db_name,
|
3523
3501
|
'table_name': table_name,
|
mdbq/myconf/myconf.py
CHANGED
@@ -151,9 +151,12 @@ class ConfigParser:
|
|
151
151
|
def open(self, file_path: Union[str, Path]) -> 'ConfigParser':
|
152
152
|
"""打开配置文件"""
|
153
153
|
file_path = Path(file_path)
|
154
|
+
logger.debug(f'尝试打开配置文件: {file_path}')
|
154
155
|
if not file_path.exists() and not self.options.auto_create:
|
156
|
+
logger.error(f'配置文件不存在: {file_path}')
|
155
157
|
raise ConfigException.file_not_found(file_path)
|
156
158
|
self._current_file = file_path
|
159
|
+
logger.debug(f'配置文件已打开: {file_path}')
|
157
160
|
return self
|
158
161
|
|
159
162
|
def _ensure_file_open(self) -> None:
|
@@ -208,6 +211,7 @@ class ConfigParser:
|
|
208
211
|
|
209
212
|
def _update_cache(self, file_path: str, config: Dict[str, Any]) -> None:
|
210
213
|
"""更新配置缓存"""
|
214
|
+
logger.debug(f'更新配置缓存: {file_path}')
|
211
215
|
self._config_cache[file_path] = config
|
212
216
|
self._cache_timestamps[file_path] = time.time()
|
213
217
|
|
@@ -232,11 +236,13 @@ class ConfigParser:
|
|
232
236
|
def _clear_cache(self, file_path: Optional[str] = None) -> None:
|
233
237
|
"""清除配置缓存"""
|
234
238
|
if file_path:
|
239
|
+
logger.debug(f'清除指定文件的缓存: {file_path}')
|
235
240
|
self._config_cache.pop(file_path, None)
|
236
241
|
self._cache_timestamps.pop(file_path, None)
|
237
242
|
self._comments_cache.pop(file_path, None)
|
238
243
|
self._section_map.pop(file_path, None)
|
239
244
|
else:
|
245
|
+
logger.debug('清除所有配置缓存')
|
240
246
|
self._config_cache.clear()
|
241
247
|
self._cache_timestamps.clear()
|
242
248
|
self._comments_cache.clear()
|
@@ -317,10 +323,12 @@ class ConfigParser:
|
|
317
323
|
default: Any = None, value_type: Optional[Type[T]] = None,
|
318
324
|
file_path: Optional[Union[str, Path]] = None) -> T:
|
319
325
|
"""获取指定配置项的值"""
|
326
|
+
logger.debug(f'获取配置项: section={section}, key={key}, file_path={file_path}')
|
320
327
|
if file_path is None:
|
321
328
|
self._ensure_file_open()
|
322
329
|
file_path = self._current_file
|
323
330
|
if not self._validate_key(key):
|
331
|
+
logger.error(f'无效的键名: {key}')
|
324
332
|
raise ConfigException.invalid_key_error(key, file_path, section)
|
325
333
|
config = self.read(file_path)
|
326
334
|
section = section or self.options.default_section
|
@@ -328,11 +336,15 @@ class ConfigParser:
|
|
328
336
|
original_section = self._get_original_section(str(file_path), normalized_section)
|
329
337
|
if original_section is None:
|
330
338
|
if default is not None:
|
339
|
+
logger.warning(f'节不存在,返回默认值: section={section}, key={key}, default={default}')
|
331
340
|
return default
|
341
|
+
logger.error(f'配置节不存在: {section}')
|
332
342
|
raise ConfigException.section_not_found(file_path, section)
|
333
343
|
if key not in config[original_section]:
|
334
344
|
if default is not None:
|
345
|
+
logger.warning(f'键不存在,返回默认值: section={section}, key={key}, default={default}')
|
335
346
|
return default
|
347
|
+
logger.error(f'配置键不存在: {key}')
|
336
348
|
raise ConfigException.key_not_found(file_path, original_section, key)
|
337
349
|
value = config[original_section][key]
|
338
350
|
if value_type is not None:
|
@@ -344,6 +356,7 @@ class ConfigParser:
|
|
344
356
|
value_types: Optional[Dict[str, Type]] = None,
|
345
357
|
file_path: Optional[Union[str, Path]] = None) -> Dict[str, Any]:
|
346
358
|
"""批量获取多个配置项的值"""
|
359
|
+
logger.debug(f'批量获取配置项: section={section}, keys={keys}, file_path={file_path}')
|
347
360
|
if file_path is None:
|
348
361
|
self._ensure_file_open()
|
349
362
|
file_path = self._current_file
|
@@ -375,6 +388,7 @@ class ConfigParser:
|
|
375
388
|
value_types: Optional[Dict[str, Type]] = None,
|
376
389
|
file_path: Optional[Union[str, Path]] = None) -> Tuple[Any, ...]:
|
377
390
|
"""获取指定节点下多个键的值元组"""
|
391
|
+
logger.debug(f'获取节下多个键的值: section={section}, keys={keys}, file_path={file_path}')
|
378
392
|
if file_path is None:
|
379
393
|
self._ensure_file_open()
|
380
394
|
file_path = self._current_file
|
@@ -405,6 +419,7 @@ class ConfigParser:
|
|
405
419
|
value_type: Optional[Type] = None,
|
406
420
|
file_path: Optional[Union[str, Path]] = None) -> None:
|
407
421
|
"""设置指定配置项的值"""
|
422
|
+
logger.debug(f'准备写入配置项: section={section}, key={key}, value={value}, file_path={file_path}')
|
408
423
|
if file_path is None:
|
409
424
|
self._ensure_file_open()
|
410
425
|
file_path = self._current_file
|
@@ -412,6 +427,7 @@ class ConfigParser:
|
|
412
427
|
file_path = Path(file_path)
|
413
428
|
|
414
429
|
if not self._validate_key(key):
|
430
|
+
logger.error(f'无效的键名: {key}')
|
415
431
|
raise ConfigException.invalid_key_error(key, file_path, section)
|
416
432
|
|
417
433
|
section = section or self.options.default_section
|
@@ -436,6 +452,7 @@ class ConfigParser:
|
|
436
452
|
else:
|
437
453
|
value = value_type(value)
|
438
454
|
except (ValueError, TypeError) as e:
|
455
|
+
logger.error(f'类型转换失败: value={value}, type={value_type}, error={e}')
|
439
456
|
raise ConfigException.conversion_error(value, value_type, file_path, section=section, key=key)
|
440
457
|
|
441
458
|
if isinstance(value, bool):
|
@@ -492,17 +509,21 @@ class ConfigParser:
|
|
492
509
|
file.write(f'{key}={value}\n')
|
493
510
|
|
494
511
|
self._clear_cache(str(file_path))
|
512
|
+
logger.info(f'配置项写入成功: section={section}, key={key}, value={value}, file_path={file_path}')
|
495
513
|
|
496
514
|
except Exception as e:
|
515
|
+
logger.error(f'写入配置项失败: section={section}, key={key}, value={value}, file_path={file_path}, error={e}')
|
497
516
|
raise ConfigException.write_error(file_path, e)
|
498
517
|
|
499
518
|
def set_values(self, section: Optional[str] = None, values: Dict[str, Any] = None,
|
500
519
|
value_types: Optional[Dict[str, Type]] = None,
|
501
520
|
file_path: Optional[Union[str, Path]] = None) -> None:
|
502
521
|
"""批量设置多个配置项的值"""
|
522
|
+
logger.debug(f'批量写入配置项: section={section}, keys={list(values.keys())}, file_path={file_path}')
|
503
523
|
for key, value in values.items():
|
504
524
|
value_type = value_types.get(key) if value_types else None
|
505
525
|
self.set_value(section, key, value, value_type, file_path)
|
526
|
+
logger.info(f'批量写入配置项完成: section={section}, file_path={file_path}')
|
506
527
|
|
507
528
|
def validate_config(self, section: Optional[str] = None, schema: Dict[str, Type] = None,
|
508
529
|
file_path: Optional[Union[str, Path]] = None) -> bool:
|
@@ -539,12 +560,15 @@ class ConfigParser:
|
|
539
560
|
else:
|
540
561
|
file_path = Path(file_path)
|
541
562
|
|
563
|
+
logger.debug(f'开始读取配置文件: {file_path}')
|
542
564
|
cached_config = self._get_cached_config(str(file_path))
|
543
565
|
if cached_config is not None:
|
566
|
+
logger.debug(f'命中配置缓存: {file_path}')
|
544
567
|
return cached_config
|
545
568
|
|
546
569
|
if not file_path.exists():
|
547
570
|
if not self.options.auto_create:
|
571
|
+
logger.error(f'配置文件不存在: {file_path}')
|
548
572
|
raise ConfigException.file_not_found(file_path)
|
549
573
|
logger.info(f'配置文件不存在,将创建: {file_path}')
|
550
574
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
@@ -568,6 +592,7 @@ class ConfigParser:
|
|
568
592
|
if stripped_line.startswith('[') and stripped_line.endswith(']'):
|
569
593
|
current_section = stripped_line[1:-1]
|
570
594
|
if not self._validate_key(current_section):
|
595
|
+
logger.error(f'无效的节名: {current_section}')
|
571
596
|
raise ConfigException.invalid_section_error(current_section, file_path)
|
572
597
|
self._update_section_map(str(file_path), current_section)
|
573
598
|
if current_section not in config:
|
@@ -581,6 +606,7 @@ class ConfigParser:
|
|
581
606
|
if key_value:
|
582
607
|
key, value = key_value
|
583
608
|
if not self._validate_key(key):
|
609
|
+
logger.error(f'无效的键名: {key}')
|
584
610
|
raise ConfigException.invalid_key_error(key, file_path, current_section)
|
585
611
|
value, comment = self._extract_comment(value)
|
586
612
|
|
@@ -595,9 +621,11 @@ class ConfigParser:
|
|
595
621
|
self._comments_cache.setdefault(str(file_path), {}).setdefault(current_section, []).append(comment)
|
596
622
|
|
597
623
|
self._update_cache(str(file_path), config)
|
624
|
+
logger.debug(f'配置文件读取成功: {file_path}')
|
598
625
|
return config
|
599
626
|
|
600
627
|
except Exception as e:
|
628
|
+
logger.error(f'读取配置文件失败: {file_path}, error={e}')
|
601
629
|
raise ConfigException.read_error(file_path, e)
|
602
630
|
|
603
631
|
|
mdbq/mysql/deduplicator.py
CHANGED
@@ -64,8 +64,8 @@ class MySQLDeduplicator:
|
|
64
64
|
max_retries: int = 3,
|
65
65
|
retry_waiting_time: int = 5,
|
66
66
|
pool_size: int = 20,
|
67
|
-
mincached: int =
|
68
|
-
maxcached: int =
|
67
|
+
mincached: int = 0,
|
68
|
+
maxcached: int = 0,
|
69
69
|
primary_key: str = 'id',
|
70
70
|
date_range: Optional[List[str]] = None,
|
71
71
|
recent_month: Optional[int] = None,
|
@@ -85,8 +85,9 @@ class MySQLDeduplicator:
|
|
85
85
|
:param exclude_databases: 排除的数据库名列表
|
86
86
|
:param exclude_tables: 排除的表名字典 {数据库名: [表名, ...]}
|
87
87
|
:param duplicate_keep_mode: 'keep_one'(默认,重复组保留一条),'remove_all'(全部删除重复组)
|
88
|
-
:param mincached:
|
89
|
-
:param maxcached:
|
88
|
+
:param mincached: 空闲连接数量
|
89
|
+
:param maxcached: 最大空闲连接数, 0表示不设上限, 由连接池自动管理
|
90
|
+
|
90
91
|
"""
|
91
92
|
# 连接池状态标志
|
92
93
|
self._closed = False
|
@@ -978,18 +979,6 @@ class MySQLDeduplicator:
|
|
978
979
|
r[1] for db in all_results.values()
|
979
980
|
for r in db.values()
|
980
981
|
)
|
981
|
-
logger.debug('全局完成', {
|
982
|
-
"总重复组": total_dup,
|
983
|
-
"总删除行": total_del,
|
984
|
-
"参数": {
|
985
|
-
"模拟运行": dry_run,
|
986
|
-
"并行处理": parallel,
|
987
|
-
'排除列': self.exclude_columns,
|
988
|
-
'重排id': reorder_id,
|
989
|
-
'use_python_dedup': use_python_dedup
|
990
|
-
},
|
991
|
-
"详细结果": dict(all_results)
|
992
|
-
})
|
993
982
|
# 只显示有删除的详细结果
|
994
983
|
if total_del > 0:
|
995
984
|
filtered_results = {
|
mdbq/mysql/s_query.py
CHANGED
@@ -35,7 +35,7 @@ class QueryDatas:
|
|
35
35
|
"""
|
36
36
|
|
37
37
|
def __init__(self, username: str, password: str, host: str, port: int, charset: str = 'utf8mb4',
|
38
|
-
pool_size: int = 20, mincached: int =
|
38
|
+
pool_size: int = 20, mincached: int = 0, maxcached: int = 0,
|
39
39
|
connect_timeout: int = 10, read_timeout: int = 30, write_timeout: int = 30,
|
40
40
|
max_retries: int = 3, retry_waiting_time: int = 5, collation: str = 'utf8mb4_0900_ai_ci') -> None:
|
41
41
|
"""
|
@@ -48,8 +48,8 @@ class QueryDatas:
|
|
48
48
|
port: 数据库端口
|
49
49
|
charset: 字符集,默认utf8mb4
|
50
50
|
pool_size: 最大活动连接数,默认20
|
51
|
-
mincached:
|
52
|
-
maxcached:
|
51
|
+
mincached: 空闲连接数量
|
52
|
+
maxcached: 最大空闲连接数, 0表示不设上限, 由连接池自动管理
|
53
53
|
connect_timeout: 连接超时时间,默认10秒
|
54
54
|
read_timeout: 读取超时时间,默认30秒
|
55
55
|
write_timeout: 写入超时时间,默认30秒
|
mdbq/mysql/unique_.py
CHANGED
mdbq/mysql/uploader.py
CHANGED
@@ -87,6 +87,8 @@ class MySQLUploader:
|
|
87
87
|
max_retries: int = 10,
|
88
88
|
retry_waiting_time: int = 10,
|
89
89
|
pool_size: int = 5,
|
90
|
+
mincached: int = 0,
|
91
|
+
maxcached: int = 0,
|
90
92
|
connect_timeout: int = 10,
|
91
93
|
read_timeout: int = 30,
|
92
94
|
write_timeout: int = 30,
|
@@ -104,6 +106,8 @@ class MySQLUploader:
|
|
104
106
|
:param max_retries: 最大重试次数,默认为10
|
105
107
|
:param retry_waiting_time: 重试间隔(秒),默认为10
|
106
108
|
:param pool_size: 连接池大小,默认为5
|
109
|
+
:param mincached: 空闲连接数量
|
110
|
+
:param maxcached: 最大空闲连接数, 0表示不设上限, 由连接池自动管理
|
107
111
|
:param connect_timeout: 连接超时(秒),默认为10
|
108
112
|
:param read_timeout: 读取超时(秒),默认为30
|
109
113
|
:param write_timeout: 写入超时(秒),默认为30
|
@@ -118,6 +122,8 @@ class MySQLUploader:
|
|
118
122
|
self.max_retries = max(max_retries, 1)
|
119
123
|
self.retry_waiting_time = max(retry_waiting_time, 1)
|
120
124
|
self.pool_size = max(pool_size, 1)
|
125
|
+
self.mincached = mincached
|
126
|
+
self.maxcached = maxcached
|
121
127
|
self.connect_timeout = connect_timeout
|
122
128
|
self.read_timeout = read_timeout
|
123
129
|
self.write_timeout = write_timeout
|
@@ -149,6 +155,8 @@ class MySQLUploader:
|
|
149
155
|
'charset': self.charset,
|
150
156
|
'cursorclass': pymysql.cursors.DictCursor,
|
151
157
|
'maxconnections': self.pool_size,
|
158
|
+
'mincached': self.mincached,
|
159
|
+
'maxcached': self.maxcached,
|
152
160
|
'ping': 7,
|
153
161
|
'connect_timeout': self.connect_timeout,
|
154
162
|
'read_timeout': self.read_timeout,
|