mdbq 4.0.7__tar.gz → 4.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-4.0.7 → mdbq-4.0.9}/PKG-INFO +1 -1
- mdbq-4.0.9/mdbq/__version__.py +1 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/aggregation/query_data.py +83 -62
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/log/mylogger.py +10 -47
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/mysql/s_query.py +1 -50
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/mysql/unique_.py +8 -8
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/mysql/uploader.py +58 -9
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/redis/getredis.py +0 -1
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq.egg-info/PKG-INFO +1 -1
- mdbq-4.0.7/mdbq/__version__.py +0 -1
- {mdbq-4.0.7 → mdbq-4.0.9}/README.txt +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/__init__.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/config/__init__.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/config/config.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/log/__init__.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/log/spider_logging.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/mysql/__init__.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/mysql/deduplicator.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/mysql/mysql.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/other/__init__.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/other/download_sku_picture.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/other/otk.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/other/pov_city.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/other/ua_sj.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/pbix/__init__.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/redis/__init__.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/spider/__init__.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq/spider/aikucun.py +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/setup.cfg +0 -0
- {mdbq-4.0.7 → mdbq-4.0.9}/setup.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
VERSION = '4.0.9'
|
@@ -119,9 +119,16 @@ def upload_data_decorator(**upload_kwargs):
|
|
119
119
|
return None
|
120
120
|
|
121
121
|
# 处理 DataFrame 结果
|
122
|
-
if isinstance(result, pd.DataFrame):
|
122
|
+
if isinstance(result, (pd.DataFrame, list, dict)):
|
123
123
|
if set_type is not None:
|
124
|
-
|
124
|
+
if isinstance(result, pd.DataFrame):
|
125
|
+
result = reorder_columns(result, set_type)
|
126
|
+
elif isinstance(result, list):
|
127
|
+
# 如果是list,转换为DataFrame以调整列顺序
|
128
|
+
result = reorder_columns(pd.DataFrame(result), set_type)
|
129
|
+
elif isinstance(result, dict):
|
130
|
+
# 如果是dict,转换为DataFrame以调整列顺序
|
131
|
+
result = reorder_columns(pd.DataFrame([result]), set_type)
|
125
132
|
|
126
133
|
# 合并参数
|
127
134
|
merged_kwargs = {
|
@@ -143,12 +150,19 @@ def upload_data_decorator(**upload_kwargs):
|
|
143
150
|
|
144
151
|
df, extra_kwargs = result[0], result[1]
|
145
152
|
|
146
|
-
if not isinstance(df, pd.DataFrame):
|
147
|
-
logger.warning('函数返回的元组第一个元素不是DataFrame,直接返回原结果,不执行上传', {'函数': func.__name__, '库': db_name, '表': table_name})
|
153
|
+
if not isinstance(df, (pd.DataFrame, list, dict)):
|
154
|
+
logger.warning('函数返回的元组第一个元素不是DataFrame/list/dict,直接返回原结果,不执行上传', {'函数': func.__name__, '库': db_name, '表': table_name})
|
148
155
|
return result
|
149
156
|
|
150
157
|
if set_type is not None:
|
151
|
-
|
158
|
+
if isinstance(df, pd.DataFrame):
|
159
|
+
df = reorder_columns(df, set_type)
|
160
|
+
elif isinstance(df, list):
|
161
|
+
# 如果是list,转换为DataFrame以调整列顺序
|
162
|
+
df = reorder_columns(pd.DataFrame(df), set_type)
|
163
|
+
elif isinstance(df, dict):
|
164
|
+
# 如果是dict,转换为DataFrame以调整列顺序
|
165
|
+
df = reorder_columns(pd.DataFrame([df]), set_type)
|
152
166
|
result = (df, extra_kwargs) + result[2:]
|
153
167
|
|
154
168
|
# 合并参数
|
@@ -2370,61 +2384,57 @@ class MysqlDatasQuery:
|
|
2370
2384
|
'更新时间': 'timestamp',
|
2371
2385
|
}
|
2372
2386
|
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
2373
|
-
|
2374
|
-
'日期'
|
2375
|
-
'店铺名称'
|
2376
|
-
'场次信息'
|
2377
|
-
'场次id'
|
2378
|
-
'直播开播时间'
|
2379
|
-
'开播时长'
|
2380
|
-
'封面图点击率'
|
2381
|
-
'观看人数'
|
2382
|
-
'观看次数'
|
2383
|
-
'新增粉丝数'
|
2384
|
-
'流量券消耗'
|
2385
|
-
'观看总时长'
|
2386
|
-
'人均观看时长'
|
2387
|
-
'次均观看时长'
|
2388
|
-
'商品点击人数'
|
2389
|
-
'商品点击次数'
|
2390
|
-
'商品点击率'
|
2391
|
-
'加购人数'
|
2392
|
-
'加购件数'
|
2393
|
-
'加购次数'
|
2394
|
-
'成交金额'
|
2395
|
-
'成交人数'
|
2396
|
-
'成交件数'
|
2397
|
-
'成交笔数'
|
2398
|
-
'成交转化率'
|
2399
|
-
'退款人数'
|
2400
|
-
'退款笔数'
|
2401
|
-
'退款件数'
|
2402
|
-
'退款金额'
|
2403
|
-
'预售定金支付金额'
|
2404
|
-
'预售预估总金额'
|
2405
|
-
|
2406
|
-
|
2407
|
-
for
|
2408
|
-
|
2409
|
-
|
2410
|
-
|
2411
|
-
|
2412
|
-
|
2413
|
-
|
2414
|
-
|
2415
|
-
|
2416
|
-
|
2417
|
-
|
2418
|
-
|
2419
|
-
|
2420
|
-
|
2421
|
-
|
2422
|
-
|
2423
|
-
|
2424
|
-
'unique_keys': [['场次id']], # 唯一约束列表
|
2425
|
-
}
|
2426
|
-
else:
|
2427
|
-
return None, None
|
2387
|
+
ordered_columns = [
|
2388
|
+
'日期',
|
2389
|
+
'店铺名称',
|
2390
|
+
'场次信息',
|
2391
|
+
'场次id',
|
2392
|
+
'直播开播时间',
|
2393
|
+
'开播时长',
|
2394
|
+
'封面图点击率',
|
2395
|
+
'观看人数',
|
2396
|
+
'观看次数',
|
2397
|
+
'新增粉丝数',
|
2398
|
+
'流量券消耗',
|
2399
|
+
'观看总时长',
|
2400
|
+
'人均观看时长',
|
2401
|
+
'次均观看时长',
|
2402
|
+
'商品点击人数',
|
2403
|
+
'商品点击次数',
|
2404
|
+
'商品点击率',
|
2405
|
+
'加购人数',
|
2406
|
+
'加购件数',
|
2407
|
+
'加购次数',
|
2408
|
+
'成交金额',
|
2409
|
+
'成交人数',
|
2410
|
+
'成交件数',
|
2411
|
+
'成交笔数',
|
2412
|
+
'成交转化率',
|
2413
|
+
'退款人数',
|
2414
|
+
'退款笔数',
|
2415
|
+
'退款件数',
|
2416
|
+
'退款金额',
|
2417
|
+
'预售定金支付金额',
|
2418
|
+
'预售预估总金额',
|
2419
|
+
]
|
2420
|
+
# 使用reindex重排列顺序,未定义的列会自动放在最后
|
2421
|
+
df = df.reindex(columns=[col for col in ordered_columns if col in df.columns] +
|
2422
|
+
[col for col in df.columns if col not in ordered_columns])
|
2423
|
+
return df, {
|
2424
|
+
'db_name': db_name,
|
2425
|
+
'table_name': table_name,
|
2426
|
+
'set_typ': set_typ,
|
2427
|
+
'primary_keys': [], # 创建唯一主键
|
2428
|
+
'check_duplicate': False, # 检查重复数据
|
2429
|
+
'duplicate_columns': [], # 指定排重的组合键
|
2430
|
+
'update_on_duplicate': True, # 更新旧数据
|
2431
|
+
'allow_null': False, # 允许插入空值
|
2432
|
+
'partition_by': None, # 分表方式
|
2433
|
+
'partition_date_column': '日期', # 用于分表的日期列名,默认为'日期'
|
2434
|
+
'indexes': [], # 普通索引列
|
2435
|
+
'transaction_mode': 'batch', # 事务模式
|
2436
|
+
'unique_keys': [['场次id']], # 唯一约束列表
|
2437
|
+
}
|
2428
2438
|
|
2429
2439
|
# @try_except
|
2430
2440
|
@upload_data_decorator()
|
@@ -3129,7 +3139,7 @@ class MysqlDatasQuery:
|
|
3129
3139
|
'partition_date_column': '日期', # 用于分表的日期列名,默认为'日期'
|
3130
3140
|
'indexes': [], # 普通索引列
|
3131
3141
|
'transaction_mode': 'batch', # 事务模式
|
3132
|
-
'unique_keys': [['日期', '店铺名称', '人群id', '营销渠道', '计划基础信息']], # 唯一约束列表
|
3142
|
+
'unique_keys': [['日期', '店铺名称', '人群id', '营销渠道', '计划基础信息', '推广单元信息']], # 唯一约束列表
|
3133
3143
|
}
|
3134
3144
|
|
3135
3145
|
@try_except
|
@@ -3683,5 +3693,16 @@ def main(months=3):
|
|
3683
3693
|
|
3684
3694
|
|
3685
3695
|
if __name__ == '__main__':
|
3686
|
-
|
3696
|
+
main(months=3)
|
3687
3697
|
pass
|
3698
|
+
|
3699
|
+
# download_manager = s_query.QueryDatas(
|
3700
|
+
# username=username,
|
3701
|
+
# password=password,
|
3702
|
+
# host=host,
|
3703
|
+
# port=port,
|
3704
|
+
# maxconnections=10,
|
3705
|
+
# )
|
3706
|
+
# sdq = MysqlDatasQuery(download_manager=download_manager)
|
3707
|
+
# sdq.months = 3
|
3708
|
+
# sdq.zb_ccfx(db_name='聚合数据', table_name='生意参谋_直播场次分析')
|
@@ -11,18 +11,13 @@ from typing import Optional, Dict, Any, List, Callable
|
|
11
11
|
import atexit
|
12
12
|
import traceback
|
13
13
|
import inspect
|
14
|
+
import psutil
|
14
15
|
|
15
|
-
try:
|
16
|
-
import psutil
|
17
|
-
HAS_PSUTIL = True
|
18
|
-
except ImportError:
|
19
|
-
HAS_PSUTIL = False
|
20
16
|
|
21
17
|
def get_caller_filename(default='mylogger'):
|
22
18
|
stack = inspect.stack()
|
23
19
|
for frame_info in stack:
|
24
20
|
filename = frame_info.filename
|
25
|
-
# 跳过本日志库自身
|
26
21
|
if not filename.endswith('mylogger.py'):
|
27
22
|
return os.path.splitext(os.path.basename(filename))[0]
|
28
23
|
return default
|
@@ -102,7 +97,6 @@ class MyLogger:
|
|
102
97
|
self.name = name
|
103
98
|
self.logging_mode = logging_mode.lower()
|
104
99
|
self.log_level = log_level.upper()
|
105
|
-
# log_file 自动为 name+'.log'
|
106
100
|
if log_file is None:
|
107
101
|
self.log_file = os.path.join(log_path, f"{self.name}.log")
|
108
102
|
else:
|
@@ -117,7 +111,7 @@ class MyLogger:
|
|
117
111
|
self.buffer_size = buffer_size
|
118
112
|
self.sample_rate = max(0.0, min(1.0, sample_rate))
|
119
113
|
self.filters = filters or []
|
120
|
-
self.enable_metrics = enable_metrics
|
114
|
+
self.enable_metrics = enable_metrics
|
121
115
|
self.metrics_interval = metrics_interval
|
122
116
|
self.message_limited = message_limited
|
123
117
|
self.flush_interval = flush_interval
|
@@ -167,17 +161,14 @@ class MyLogger:
|
|
167
161
|
self.old_context = {}
|
168
162
|
|
169
163
|
def __enter__(self):
|
170
|
-
# 保存旧上下文并设置新上下文
|
171
164
|
self.old_context = getattr(self.logger._context, 'data', {}).copy()
|
172
165
|
self.logger._context.data.update(self.context_vars)
|
173
166
|
return self.logger
|
174
167
|
|
175
168
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
176
|
-
# 恢复旧上下文
|
177
169
|
self.logger._context.data = self.old_context
|
178
170
|
if exc_type is not None:
|
179
|
-
self.logger.error(f"上下文内异常2: {exc_val}",
|
180
|
-
extra={'类型': str(exc_type)})
|
171
|
+
self.logger.error(f"上下文内异常2: {exc_val}", extra={'类型': str(exc_type)})
|
181
172
|
return False
|
182
173
|
|
183
174
|
def _init_logging(self):
|
@@ -185,15 +176,10 @@ class MyLogger:
|
|
185
176
|
valid_levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
|
186
177
|
if self.log_level not in valid_levels:
|
187
178
|
self.log_level = 'INFO'
|
188
|
-
|
189
179
|
self.logger.setLevel(self.log_level)
|
190
|
-
|
191
|
-
# 防止重复添加handler
|
192
180
|
if self.logger.handlers:
|
193
181
|
for handler in self.logger.handlers[:]:
|
194
182
|
self.logger.removeHandler(handler)
|
195
|
-
|
196
|
-
# 定义日志格式
|
197
183
|
if self.log_format.lower() == 'simple':
|
198
184
|
class SimpleFormatter(logging.Formatter):
|
199
185
|
def format(self, record):
|
@@ -255,14 +241,12 @@ class MyLogger:
|
|
255
241
|
)
|
256
242
|
file_handler.setFormatter(formatter)
|
257
243
|
self._handlers.append(file_handler)
|
258
|
-
|
259
244
|
if not self.enable_async:
|
260
245
|
for handler in self._handlers:
|
261
246
|
self.logger.addHandler(handler)
|
262
247
|
|
263
248
|
def _setup_async_logging(self):
|
264
|
-
|
265
|
-
self._log_queue = queue.Queue(maxsize=self.buffer_size) # 无限长度,绝不阻塞
|
249
|
+
self._log_queue = queue.Queue(maxsize=self.buffer_size)
|
266
250
|
queue_handler = logging.handlers.QueueHandler(self._log_queue)
|
267
251
|
self.logger.addHandler(queue_handler)
|
268
252
|
self._queue_listener = logging.handlers.QueueListener(
|
@@ -274,7 +258,6 @@ class MyLogger:
|
|
274
258
|
"""获取系统资源使用指标"""
|
275
259
|
if not self.enable_metrics:
|
276
260
|
return {}
|
277
|
-
|
278
261
|
try:
|
279
262
|
return {
|
280
263
|
'内存': {
|
@@ -301,8 +284,7 @@ class MyLogger:
|
|
301
284
|
}
|
302
285
|
}
|
303
286
|
except Exception as e:
|
304
|
-
self.logger.warning(f"无法采集系统性能指标: {e}",
|
305
|
-
extra={'extra_data': {'metrics_error': str(e)}})
|
287
|
+
self.logger.warning(f"无法采集系统性能指标: {e}", extra={'extra_data': {'metrics_error': str(e)}})
|
306
288
|
return {}
|
307
289
|
|
308
290
|
def _apply_filters(self, level: str, message: str, extra: Dict) -> bool:
|
@@ -310,11 +292,11 @@ class MyLogger:
|
|
310
292
|
for filter_func in self.filters:
|
311
293
|
try:
|
312
294
|
if not filter_func(level, message, extra):
|
313
|
-
return False
|
295
|
+
return False
|
314
296
|
except Exception as e:
|
315
297
|
self.logger.warning(f"过滤失败: {e}",
|
316
298
|
extra={'extra_data': {'filter_error': str(e)}})
|
317
|
-
return True
|
299
|
+
return True
|
318
300
|
|
319
301
|
def log_error_handler(retry_times=0, fallback_level='error'):
|
320
302
|
"""
|
@@ -334,11 +316,10 @@ class MyLogger:
|
|
334
316
|
except Exception as e:
|
335
317
|
last_exception = e
|
336
318
|
if attempt < retry_times:
|
337
|
-
time.sleep(0.1 * (attempt + 1))
|
319
|
+
time.sleep(0.1 * (attempt + 1))
|
338
320
|
continue
|
339
321
|
|
340
322
|
try:
|
341
|
-
# 降级处理
|
342
323
|
logging.basicConfig()
|
343
324
|
fallback_logger = logging.getLogger(f"{getattr(self, 'name', 'mylogger')}_fallback")
|
344
325
|
fallback_msg = f"[降级处理] {message}"[:1000]
|
@@ -359,23 +340,15 @@ class MyLogger:
|
|
359
340
|
"""同步日志记录(兼容异步,直接走logger)"""
|
360
341
|
if not hasattr(self.logger, level.lower()):
|
361
342
|
return
|
362
|
-
|
363
|
-
# message 仅接收字符串类型
|
364
343
|
if not isinstance(message, str):
|
365
344
|
message = str(message)
|
366
|
-
|
367
|
-
# 简化日志内容,避免过长
|
368
345
|
if len(message) > self.message_limited:
|
369
346
|
message = message[:self.message_limited] + '...'
|
370
|
-
|
371
|
-
# 定期收集系统指标
|
372
347
|
if self.enable_metrics:
|
373
348
|
now = time.time()
|
374
349
|
if now - self._last_metrics_time > self.metrics_interval:
|
375
350
|
self._metrics_cache = self._get_system_metrics()
|
376
351
|
self._last_metrics_time = now
|
377
|
-
|
378
|
-
# 准备日志额外数据
|
379
352
|
log_extra = {}
|
380
353
|
if self.enable_metrics:
|
381
354
|
log_extra['性能指标'] = self._metrics_cache
|
@@ -470,11 +443,8 @@ class MyLogger:
|
|
470
443
|
"""记录异常信息"""
|
471
444
|
if not extra:
|
472
445
|
extra = {}
|
473
|
-
|
474
|
-
# 使用inspect获取调用栈
|
475
446
|
frame = inspect.currentframe()
|
476
447
|
try:
|
477
|
-
# 向上追溯2层(1层是exception方法本身,2层是实际调用位置)
|
478
448
|
caller_frame = frame.f_back.f_back
|
479
449
|
extra.update({
|
480
450
|
'module': caller_frame.f_globals.get('__name__', ''),
|
@@ -486,9 +456,7 @@ class MyLogger:
|
|
486
456
|
'堆栈': self._format_traceback(exc_info)
|
487
457
|
})
|
488
458
|
finally:
|
489
|
-
del frame
|
490
|
-
|
491
|
-
# 直接使用logger的error方法记录,保留原始调用栈
|
459
|
+
del frame
|
492
460
|
self.log('error', message, extra)
|
493
461
|
|
494
462
|
def _format_traceback(self, exc_info):
|
@@ -556,7 +524,6 @@ class MyLogger:
|
|
556
524
|
"""关闭日志记录器,确保所有日志被刷新"""
|
557
525
|
if self.enable_async and self._queue_listener:
|
558
526
|
self._queue_listener.stop()
|
559
|
-
# 关闭所有handler
|
560
527
|
for handler in self.logger.handlers:
|
561
528
|
try:
|
562
529
|
handler.close()
|
@@ -569,7 +536,6 @@ class MyLogger:
|
|
569
536
|
pass
|
570
537
|
|
571
538
|
def main():
|
572
|
-
# 创建日志记录器
|
573
539
|
logger = MyLogger(
|
574
540
|
name='my_app',
|
575
541
|
logging_mode='both',
|
@@ -579,14 +545,11 @@ def main():
|
|
579
545
|
max_log_size=50,
|
580
546
|
backup_count=5,
|
581
547
|
enable_async=False, # 是否启用异步日志
|
582
|
-
sample_rate=1, # 采样
|
548
|
+
sample_rate=1, # 采样DEBUG/INFO日志
|
583
549
|
sensitive_fields=[], # 敏感字段列表
|
584
550
|
enable_metrics=False, # 是否启用性能指标
|
585
551
|
)
|
586
|
-
|
587
552
|
logger.info('123')
|
588
|
-
|
589
|
-
# 确保所有日志被刷新
|
590
553
|
logger.shutdown()
|
591
554
|
|
592
555
|
|
@@ -68,15 +68,12 @@ class QueryDatas:
|
|
68
68
|
self.connect_timeout = connect_timeout
|
69
69
|
self.read_timeout = read_timeout
|
70
70
|
self.write_timeout = write_timeout
|
71
|
-
|
72
|
-
# 连接池状态监控
|
73
71
|
self._pool_stats = {
|
74
72
|
'last_health_check': None,
|
75
73
|
'health_check_interval': 300, # 5分钟检查一次
|
76
74
|
'consecutive_failures': 0, # 连续失败次数
|
77
75
|
'max_consecutive_failures': 3 # 最大连续失败次数
|
78
76
|
}
|
79
|
-
|
80
77
|
self.base_config = {
|
81
78
|
'host': self.host,
|
82
79
|
'port': int(self.port),
|
@@ -90,8 +87,6 @@ class QueryDatas:
|
|
90
87
|
'write_timeout': write_timeout,
|
91
88
|
'autocommit': True
|
92
89
|
}
|
93
|
-
|
94
|
-
# 创建连接池
|
95
90
|
self.pool = self._create_connection_pool(maxconnections, mincached, maxcached)
|
96
91
|
|
97
92
|
def _create_connection_pool(self, maxconnections: int, mincached: int, maxcached: int) -> PooledDB:
|
@@ -111,10 +106,7 @@ class QueryDatas:
|
|
111
106
|
"""
|
112
107
|
if hasattr(self, 'pool') and self.pool is not None and self._check_pool_health():
|
113
108
|
return self.pool
|
114
|
-
|
115
109
|
self.pool = None
|
116
|
-
|
117
|
-
# 连接参数 - 这些参数会传递给底层的连接创建函数
|
118
110
|
connection_params = {
|
119
111
|
'host': self.host,
|
120
112
|
'port': int(self.port),
|
@@ -128,8 +120,6 @@ class QueryDatas:
|
|
128
120
|
'write_timeout': self.write_timeout,
|
129
121
|
'autocommit': True
|
130
122
|
}
|
131
|
-
|
132
|
-
# 连接池参数
|
133
123
|
pool_params = {
|
134
124
|
'creator': pymysql,
|
135
125
|
'maxconnections': maxconnections,
|
@@ -140,9 +130,7 @@ class QueryDatas:
|
|
140
130
|
'setsession': [],
|
141
131
|
'ping': 7
|
142
132
|
}
|
143
|
-
|
144
133
|
try:
|
145
|
-
# 创建连接池,将连接参数作为kwargs传递
|
146
134
|
pool = PooledDB(**pool_params, **connection_params)
|
147
135
|
logger.debug('连接池创建成功', {
|
148
136
|
'连接池大小': maxconnections,
|
@@ -170,17 +158,11 @@ class QueryDatas:
|
|
170
158
|
"""
|
171
159
|
if not self.pool:
|
172
160
|
return False
|
173
|
-
|
174
161
|
current_time = time.time()
|
175
|
-
# 检查是否需要执行健康检查
|
176
162
|
if (self._pool_stats['last_health_check'] is None or
|
177
163
|
current_time - self._pool_stats['last_health_check'] > self._pool_stats['health_check_interval']):
|
178
|
-
|
179
164
|
try:
|
180
|
-
# 更新健康检查时间
|
181
165
|
self._pool_stats['last_health_check'] = current_time
|
182
|
-
|
183
|
-
# 检查连接是否可用
|
184
166
|
with self.pool.connection() as conn:
|
185
167
|
with conn.cursor() as cursor:
|
186
168
|
cursor.execute('SELECT 1')
|
@@ -192,12 +174,9 @@ class QueryDatas:
|
|
192
174
|
'连续失败次数': self._pool_stats['consecutive_failures']
|
193
175
|
})
|
194
176
|
return False
|
195
|
-
|
196
|
-
# 重置连续失败计数
|
197
177
|
self._pool_stats['consecutive_failures'] = 0
|
198
178
|
logger.debug('连接池健康检查通过')
|
199
179
|
return True
|
200
|
-
|
201
180
|
except Exception as e:
|
202
181
|
self._pool_stats['consecutive_failures'] += 1
|
203
182
|
if self._pool_stats['consecutive_failures'] >= self._pool_stats['max_consecutive_failures']:
|
@@ -207,7 +186,6 @@ class QueryDatas:
|
|
207
186
|
'连续失败次数': self._pool_stats['consecutive_failures']
|
208
187
|
})
|
209
188
|
return False
|
210
|
-
|
211
189
|
return True
|
212
190
|
|
213
191
|
@staticmethod
|
@@ -292,28 +270,23 @@ class QueryDatas:
|
|
292
270
|
if self._pool_stats['consecutive_failures'] >= self._pool_stats['max_consecutive_failures']:
|
293
271
|
if not self._check_pool_health():
|
294
272
|
logger.warning('连接池不健康,尝试重新创建')
|
295
|
-
# 使用默认值重新创建连接池
|
296
273
|
self.pool = self._create_connection_pool(10, 2, 5)
|
297
|
-
# 重置连续失败计数
|
298
274
|
self._pool_stats['consecutive_failures'] = 0
|
299
275
|
|
300
276
|
conn = self.pool.connection()
|
301
277
|
if db_name:
|
302
|
-
# 使用原生pymysql连接来选择数据库
|
303
278
|
with conn.cursor() as cursor:
|
304
279
|
cursor.execute(f"USE `{db_name}`")
|
305
280
|
return conn
|
306
281
|
except pymysql.OperationalError as e:
|
307
282
|
error_code = e.args[0] if e.args else None
|
308
|
-
if error_code in (2003, 2006, 2013):
|
283
|
+
if error_code in (2003, 2006, 2013):
|
309
284
|
logger.error('数据库连接错误', {
|
310
285
|
'错误代码': error_code,
|
311
286
|
'错误信息': str(e),
|
312
287
|
'数据库': db_name
|
313
288
|
})
|
314
|
-
# 使用默认值重新创建连接池
|
315
289
|
self.pool = self._create_connection_pool(10, 2, 5)
|
316
|
-
# 重置连续失败计数
|
317
290
|
self._pool_stats['consecutive_failures'] = 0
|
318
291
|
raise ConnectionError(f'数据库连接错误: {str(e)}')
|
319
292
|
else:
|
@@ -389,18 +362,14 @@ class QueryDatas:
|
|
389
362
|
"""
|
390
363
|
if not date_str:
|
391
364
|
return default_date
|
392
|
-
|
393
|
-
# 记录尝试的日期格式
|
394
365
|
attempted_formats = []
|
395
366
|
try:
|
396
|
-
# 尝试多种日期格式
|
397
367
|
for fmt in ['%Y-%m-%d', '%Y/%m/%d', '%Y%m%d', '%Y.%m.%d']:
|
398
368
|
try:
|
399
369
|
attempted_formats.append(fmt)
|
400
370
|
return pd.to_datetime(date_str, format=fmt).strftime('%Y-%m-%d')
|
401
371
|
except ValueError:
|
402
372
|
continue
|
403
|
-
|
404
373
|
# 如果所有格式都失败,使用pandas的自动解析
|
405
374
|
attempted_formats.append('auto')
|
406
375
|
return pd.to_datetime(date_str).strftime('%Y-%m-%d')
|
@@ -429,27 +398,18 @@ class QueryDatas:
|
|
429
398
|
处理后的日期范围元组 (start_date, end_date),如果处理失败返回 (None, None)
|
430
399
|
"""
|
431
400
|
try:
|
432
|
-
# 如果两个日期都未提供,返回None表示不进行日期过滤
|
433
401
|
if start_date is None and end_date is None:
|
434
402
|
return None, None
|
435
|
-
|
436
|
-
# 如果只提供了开始日期,结束日期设为今天
|
437
403
|
if start_date is not None and end_date is None:
|
438
404
|
end_date = datetime.datetime.today().strftime('%Y-%m-%d')
|
439
405
|
logger.debug('未提供结束日期,使用当前日期', {'库': db_name, '表': table_name, '结束日期': end_date})
|
440
|
-
|
441
|
-
# 如果只提供了结束日期,开始日期设为1970年
|
442
406
|
if start_date is None and end_date is not None:
|
443
407
|
start_date = '1970-01-01'
|
444
408
|
logger.debug('未提供开始日期,使用默认日期', {'库': db_name, '表': table_name, '开始日期': start_date})
|
445
|
-
|
446
|
-
# 格式化日期
|
447
409
|
original_start = start_date
|
448
410
|
original_end = end_date
|
449
411
|
start_date = self.validate_and_format_date(start_date, '1970-01-01')
|
450
412
|
end_date = self.validate_and_format_date(end_date, datetime.datetime.today().strftime('%Y-%m-%d'))
|
451
|
-
|
452
|
-
# 如果日期格式被修改,记录日志
|
453
413
|
if original_start != start_date:
|
454
414
|
logger.debug('开始日期格式已调整', {
|
455
415
|
'库': db_name,
|
@@ -543,12 +503,8 @@ class QueryDatas:
|
|
543
503
|
if not cols_exist:
|
544
504
|
logger.warning('表没有可用列')
|
545
505
|
return []
|
546
|
-
|
547
|
-
# 如果 projection 为 None、空字典或空列表,返回所有列
|
548
506
|
if projection is None or projection == {} or projection == []:
|
549
507
|
return list(cols_exist)
|
550
|
-
|
551
|
-
# 验证列名是否包含特殊字符
|
552
508
|
invalid_chars = set('`\'"\\')
|
553
509
|
selected_columns = []
|
554
510
|
for col in projection:
|
@@ -557,11 +513,9 @@ class QueryDatas:
|
|
557
513
|
continue
|
558
514
|
if col in cols_exist and projection[col]:
|
559
515
|
selected_columns.append(col)
|
560
|
-
|
561
516
|
if not selected_columns:
|
562
517
|
logger.info('参数不匹配,返回所有列', {'参数': projection})
|
563
518
|
return list(cols_exist)
|
564
|
-
|
565
519
|
return selected_columns
|
566
520
|
|
567
521
|
def _build_query_sql(self, db_name: str, table_name: str, selected_columns: List[str],
|
@@ -960,10 +914,7 @@ def main():
|
|
960
914
|
username, password, host, port = my_cont['username'], my_cont['password'], my_cont['host'], int(my_cont['port'])
|
961
915
|
host = 'localhost'
|
962
916
|
|
963
|
-
# 创建QueryDatas实例
|
964
917
|
qd = QueryDatas(username=username, password=password, host=host, port=port)
|
965
|
-
|
966
|
-
# 执行查询
|
967
918
|
df = qd.data_to_df('聚合数据', '店铺流量来源构成', limit=10)
|
968
919
|
print(df)
|
969
920
|
|
@@ -321,14 +321,14 @@ def main():
|
|
321
321
|
# "sku榜单": [['日期', '平台', '店铺名称', '条码']],
|
322
322
|
# "spu榜单": [['日期', '平台', '店铺名称', '商品款号', '访客量']],
|
323
323
|
# },
|
324
|
-
"生意参谋3": {
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
},
|
324
|
+
# "生意参谋3": {
|
325
|
+
# "crm成交客户": [['客户id']],
|
326
|
+
# "商品排行": [['日期', '店铺名称', '商品id']],
|
327
|
+
# "流量来源构成": [['日期', '店铺名称', '来源构成', '类别', '一级来源', '二级来源', '三级来源']],
|
328
|
+
# "手淘搜索": [['日期', '店铺名称', '搜索词', '词类型', '访客数']],
|
329
|
+
# "新品追踪": [['日期', '店铺名称', '商品id']],
|
330
|
+
# "直播分场次效果": [['场次id']],
|
331
|
+
# },
|
332
332
|
# "生意经3": {
|
333
333
|
# "sku销量_按名称": [['日期', '店铺名称', '宝贝id', 'sku名称', '销售额']],
|
334
334
|
# "sku销量_按商家编码": [['日期', '店铺名称', '宝贝id', 'sku编码', '销售额']],
|
@@ -404,7 +404,15 @@ class MySQLUploader:
|
|
404
404
|
raise ValueError('set_typ 未指定')
|
405
405
|
# set_typ的键清洗
|
406
406
|
set_typ = {self._normalize_col(k): v for k, v in set_typ.items()}
|
407
|
-
|
407
|
+
|
408
|
+
# 处理id列和主键
|
409
|
+
column_defs = []
|
410
|
+
|
411
|
+
# 添加id列(仅在没有指定主键时)
|
412
|
+
if not primary_keys:
|
413
|
+
column_defs.append("`id` INT NOT NULL AUTO_INCREMENT")
|
414
|
+
|
415
|
+
# 添加其他列
|
408
416
|
for col_name, col_type in set_typ.items():
|
409
417
|
if col_name == 'id':
|
410
418
|
continue
|
@@ -413,18 +421,23 @@ class MySQLUploader:
|
|
413
421
|
if not allow_null and not col_type.lower().startswith('json'):
|
414
422
|
col_def += " NOT NULL"
|
415
423
|
column_defs.append(col_def)
|
424
|
+
|
416
425
|
# 主键处理逻辑调整
|
417
426
|
def _index_col_sql(col):
|
418
427
|
col_type = set_typ.get(col, '').lower()
|
419
428
|
if 'varchar' in col_type or 'text' in col_type:
|
420
429
|
return f"`{self._normalize_col(col)}`(100)"
|
421
430
|
return f"`{self._normalize_col(col)}`"
|
431
|
+
|
432
|
+
# 处理主键
|
422
433
|
if primary_keys and len(primary_keys) > 0:
|
434
|
+
# 如果指定了主键,直接使用指定的主键
|
423
435
|
safe_primary_keys = [_index_col_sql(pk) for pk in primary_keys]
|
424
436
|
primary_key_sql = f"PRIMARY KEY ({','.join(safe_primary_keys)})"
|
425
437
|
else:
|
426
|
-
|
438
|
+
# 如果没有指定主键,使用id作为主键
|
427
439
|
primary_key_sql = f"PRIMARY KEY (`id`)"
|
440
|
+
|
428
441
|
# 索引统一在CREATE TABLE中定义
|
429
442
|
index_defs = []
|
430
443
|
if date_column and date_column in set_typ:
|
@@ -435,15 +448,28 @@ class MySQLUploader:
|
|
435
448
|
if idx_col in set_typ:
|
436
449
|
safe_idx_col = _index_col_sql(idx_col)
|
437
450
|
index_defs.append(f"INDEX `idx_{self._normalize_col(idx_col)}` ({safe_idx_col})")
|
451
|
+
|
438
452
|
# UNIQUE KEY定义
|
439
453
|
unique_defs = []
|
440
454
|
if unique_keys:
|
441
455
|
for unique_cols in unique_keys:
|
442
456
|
if not unique_cols:
|
443
457
|
continue
|
458
|
+
# 检查唯一约束是否与主键冲突
|
459
|
+
if primary_keys:
|
460
|
+
# 如果唯一约束的列是主键的一部分,则跳过
|
461
|
+
if set(unique_cols).issubset(set(primary_keys)):
|
462
|
+
logger.warning('跳过与主键冲突的唯一约束', {
|
463
|
+
'库': db_name,
|
464
|
+
'表': table_name,
|
465
|
+
'唯一约束': unique_cols,
|
466
|
+
'主键': primary_keys
|
467
|
+
})
|
468
|
+
continue
|
444
469
|
safe_unique_cols = [_index_col_sql(col) for col in unique_cols]
|
445
470
|
unique_name = f"uniq_{'_'.join([self._normalize_col(c) for c in unique_cols])}"
|
446
471
|
unique_defs.append(f"UNIQUE KEY `{unique_name}` ({','.join(safe_unique_cols)})")
|
472
|
+
|
447
473
|
index_defs = list(set(index_defs))
|
448
474
|
all_defs = column_defs + [primary_key_sql] + index_defs + unique_defs
|
449
475
|
sql = f"""
|
@@ -1437,11 +1463,22 @@ class MySQLUploader:
|
|
1437
1463
|
try:
|
1438
1464
|
cursor.executemany(sql, values_list)
|
1439
1465
|
conn.commit()
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1466
|
+
# 在batch模式下,affected_rows表示实际影响的行数
|
1467
|
+
# 如果update_on_duplicate为True,则affected_rows包含更新的行数
|
1468
|
+
# 如果update_on_duplicate为False,则affected_rows只包含插入的行数
|
1469
|
+
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1470
|
+
if update_on_duplicate:
|
1471
|
+
# 当启用更新时,affected_rows包含插入和更新的行数
|
1472
|
+
# 我们需要区分插入和更新的行数
|
1473
|
+
# 由于无法准确区分,我们假设所有行都是插入的
|
1474
|
+
total_inserted += len(batch)
|
1475
|
+
else:
|
1476
|
+
# 当不启用更新时,affected_rows只包含插入的行数
|
1477
|
+
total_inserted += affected
|
1478
|
+
total_skipped += len(batch) - affected
|
1443
1479
|
except pymysql.err.IntegrityError as e:
|
1444
1480
|
conn.rollback()
|
1481
|
+
# 在唯一约束冲突时,所有行都被跳过
|
1445
1482
|
total_skipped += len(batch)
|
1446
1483
|
logger.debug('批量插入唯一约束冲突,全部跳过', {'库': db_name, '表': table_name, '错误': str(e)})
|
1447
1484
|
except Exception as e:
|
@@ -1460,10 +1497,16 @@ class MySQLUploader:
|
|
1460
1497
|
values += [row.get(col) for col in dup_cols]
|
1461
1498
|
cursor.execute(sql, values)
|
1462
1499
|
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1463
|
-
if
|
1500
|
+
if update_on_duplicate:
|
1501
|
+
# 当启用更新时,affected_rows包含插入和更新的行数
|
1502
|
+
# 假设所有行都是插入的,因为无法区分插入和更新
|
1464
1503
|
total_inserted += 1
|
1465
1504
|
else:
|
1466
|
-
|
1505
|
+
# 当不启用更新时,affected_rows只包含插入的行数
|
1506
|
+
if affected > 0:
|
1507
|
+
total_inserted += 1
|
1508
|
+
else:
|
1509
|
+
total_skipped += 1
|
1467
1510
|
except pymysql.err.IntegrityError as e:
|
1468
1511
|
conn.rollback()
|
1469
1512
|
total_skipped += 1
|
@@ -1482,10 +1525,16 @@ class MySQLUploader:
|
|
1482
1525
|
values += [row.get(col) for col in dup_cols]
|
1483
1526
|
cursor.execute(sql, values)
|
1484
1527
|
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1485
|
-
if
|
1528
|
+
if update_on_duplicate:
|
1529
|
+
# 当启用更新时,affected_rows包含插入和更新的行数
|
1530
|
+
# 假设所有行都是插入的,因为无法区分插入和更新
|
1486
1531
|
total_inserted += 1
|
1487
1532
|
else:
|
1488
|
-
|
1533
|
+
# 当不启用更新时,affected_rows只包含插入的行数
|
1534
|
+
if affected > 0:
|
1535
|
+
total_inserted += 1
|
1536
|
+
else:
|
1537
|
+
total_skipped += 1
|
1489
1538
|
conn.commit()
|
1490
1539
|
except pymysql.err.IntegrityError as e:
|
1491
1540
|
conn.rollback()
|
mdbq-4.0.7/mdbq/__version__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
VERSION = '4.0.7'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|