mdbq 4.0.8__tar.gz → 4.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mdbq-4.0.8 → mdbq-4.0.9}/PKG-INFO +1 -1
- mdbq-4.0.9/mdbq/__version__.py +1 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/aggregation/query_data.py +81 -60
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/uploader.py +58 -9
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq.egg-info/PKG-INFO +1 -1
- mdbq-4.0.8/mdbq/__version__.py +0 -1
- {mdbq-4.0.8 → mdbq-4.0.9}/README.txt +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/__init__.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/aggregation/__init__.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/config/__init__.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/config/config.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/log/__init__.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/log/mylogger.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/log/spider_logging.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/__init__.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/deduplicator.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/mysql.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/s_query.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/unique_.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/__init__.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/download_sku_picture.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/otk.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/pov_city.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/ua_sj.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/pbix/__init__.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/pbix/pbix_refresh.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/pbix/refresh_all.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/redis/__init__.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/redis/getredis.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/spider/__init__.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/spider/aikucun.py +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq.egg-info/SOURCES.txt +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq.egg-info/dependency_links.txt +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/mdbq.egg-info/top_level.txt +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/setup.cfg +0 -0
- {mdbq-4.0.8 → mdbq-4.0.9}/setup.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
VERSION = '4.0.9'
|
@@ -119,9 +119,16 @@ def upload_data_decorator(**upload_kwargs):
|
|
119
119
|
return None
|
120
120
|
|
121
121
|
# 处理 DataFrame 结果
|
122
|
-
if isinstance(result, pd.DataFrame):
|
122
|
+
if isinstance(result, (pd.DataFrame, list, dict)):
|
123
123
|
if set_type is not None:
|
124
|
-
|
124
|
+
if isinstance(result, pd.DataFrame):
|
125
|
+
result = reorder_columns(result, set_type)
|
126
|
+
elif isinstance(result, list):
|
127
|
+
# 如果是list,转换为DataFrame以调整列顺序
|
128
|
+
result = reorder_columns(pd.DataFrame(result), set_type)
|
129
|
+
elif isinstance(result, dict):
|
130
|
+
# 如果是dict,转换为DataFrame以调整列顺序
|
131
|
+
result = reorder_columns(pd.DataFrame([result]), set_type)
|
125
132
|
|
126
133
|
# 合并参数
|
127
134
|
merged_kwargs = {
|
@@ -143,12 +150,19 @@ def upload_data_decorator(**upload_kwargs):
|
|
143
150
|
|
144
151
|
df, extra_kwargs = result[0], result[1]
|
145
152
|
|
146
|
-
if not isinstance(df, pd.DataFrame):
|
147
|
-
logger.warning('函数返回的元组第一个元素不是DataFrame,直接返回原结果,不执行上传', {'函数': func.__name__, '库': db_name, '表': table_name})
|
153
|
+
if not isinstance(df, (pd.DataFrame, list, dict)):
|
154
|
+
logger.warning('函数返回的元组第一个元素不是DataFrame/list/dict,直接返回原结果,不执行上传', {'函数': func.__name__, '库': db_name, '表': table_name})
|
148
155
|
return result
|
149
156
|
|
150
157
|
if set_type is not None:
|
151
|
-
|
158
|
+
if isinstance(df, pd.DataFrame):
|
159
|
+
df = reorder_columns(df, set_type)
|
160
|
+
elif isinstance(df, list):
|
161
|
+
# 如果是list,转换为DataFrame以调整列顺序
|
162
|
+
df = reorder_columns(pd.DataFrame(df), set_type)
|
163
|
+
elif isinstance(df, dict):
|
164
|
+
# 如果是dict,转换为DataFrame以调整列顺序
|
165
|
+
df = reorder_columns(pd.DataFrame([df]), set_type)
|
152
166
|
result = (df, extra_kwargs) + result[2:]
|
153
167
|
|
154
168
|
# 合并参数
|
@@ -2370,61 +2384,57 @@ class MysqlDatasQuery:
|
|
2370
2384
|
'更新时间': 'timestamp',
|
2371
2385
|
}
|
2372
2386
|
logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
|
2373
|
-
|
2374
|
-
'日期'
|
2375
|
-
'店铺名称'
|
2376
|
-
'场次信息'
|
2377
|
-
'场次id'
|
2378
|
-
'直播开播时间'
|
2379
|
-
'开播时长'
|
2380
|
-
'封面图点击率'
|
2381
|
-
'观看人数'
|
2382
|
-
'观看次数'
|
2383
|
-
'新增粉丝数'
|
2384
|
-
'流量券消耗'
|
2385
|
-
'观看总时长'
|
2386
|
-
'人均观看时长'
|
2387
|
-
'次均观看时长'
|
2388
|
-
'商品点击人数'
|
2389
|
-
'商品点击次数'
|
2390
|
-
'商品点击率'
|
2391
|
-
'加购人数'
|
2392
|
-
'加购件数'
|
2393
|
-
'加购次数'
|
2394
|
-
'成交金额'
|
2395
|
-
'成交人数'
|
2396
|
-
'成交件数'
|
2397
|
-
'成交笔数'
|
2398
|
-
'成交转化率'
|
2399
|
-
'退款人数'
|
2400
|
-
'退款笔数'
|
2401
|
-
'退款件数'
|
2402
|
-
'退款金额'
|
2403
|
-
'预售定金支付金额'
|
2404
|
-
'预售预估总金额'
|
2405
|
-
|
2406
|
-
|
2407
|
-
for
|
2408
|
-
|
2409
|
-
|
2410
|
-
|
2411
|
-
|
2412
|
-
|
2413
|
-
|
2414
|
-
|
2415
|
-
|
2416
|
-
|
2417
|
-
|
2418
|
-
|
2419
|
-
|
2420
|
-
|
2421
|
-
|
2422
|
-
|
2423
|
-
|
2424
|
-
'unique_keys': [['场次id']], # 唯一约束列表
|
2425
|
-
}
|
2426
|
-
else:
|
2427
|
-
return None, None
|
2387
|
+
ordered_columns = [
|
2388
|
+
'日期',
|
2389
|
+
'店铺名称',
|
2390
|
+
'场次信息',
|
2391
|
+
'场次id',
|
2392
|
+
'直播开播时间',
|
2393
|
+
'开播时长',
|
2394
|
+
'封面图点击率',
|
2395
|
+
'观看人数',
|
2396
|
+
'观看次数',
|
2397
|
+
'新增粉丝数',
|
2398
|
+
'流量券消耗',
|
2399
|
+
'观看总时长',
|
2400
|
+
'人均观看时长',
|
2401
|
+
'次均观看时长',
|
2402
|
+
'商品点击人数',
|
2403
|
+
'商品点击次数',
|
2404
|
+
'商品点击率',
|
2405
|
+
'加购人数',
|
2406
|
+
'加购件数',
|
2407
|
+
'加购次数',
|
2408
|
+
'成交金额',
|
2409
|
+
'成交人数',
|
2410
|
+
'成交件数',
|
2411
|
+
'成交笔数',
|
2412
|
+
'成交转化率',
|
2413
|
+
'退款人数',
|
2414
|
+
'退款笔数',
|
2415
|
+
'退款件数',
|
2416
|
+
'退款金额',
|
2417
|
+
'预售定金支付金额',
|
2418
|
+
'预售预估总金额',
|
2419
|
+
]
|
2420
|
+
# 使用reindex重排列顺序,未定义的列会自动放在最后
|
2421
|
+
df = df.reindex(columns=[col for col in ordered_columns if col in df.columns] +
|
2422
|
+
[col for col in df.columns if col not in ordered_columns])
|
2423
|
+
return df, {
|
2424
|
+
'db_name': db_name,
|
2425
|
+
'table_name': table_name,
|
2426
|
+
'set_typ': set_typ,
|
2427
|
+
'primary_keys': [], # 创建唯一主键
|
2428
|
+
'check_duplicate': False, # 检查重复数据
|
2429
|
+
'duplicate_columns': [], # 指定排重的组合键
|
2430
|
+
'update_on_duplicate': True, # 更新旧数据
|
2431
|
+
'allow_null': False, # 允许插入空值
|
2432
|
+
'partition_by': None, # 分表方式
|
2433
|
+
'partition_date_column': '日期', # 用于分表的日期列名,默认为'日期'
|
2434
|
+
'indexes': [], # 普通索引列
|
2435
|
+
'transaction_mode': 'batch', # 事务模式
|
2436
|
+
'unique_keys': [['场次id']], # 唯一约束列表
|
2437
|
+
}
|
2428
2438
|
|
2429
2439
|
# @try_except
|
2430
2440
|
@upload_data_decorator()
|
@@ -3685,3 +3695,14 @@ def main(months=3):
|
|
3685
3695
|
if __name__ == '__main__':
|
3686
3696
|
main(months=3)
|
3687
3697
|
pass
|
3698
|
+
|
3699
|
+
# download_manager = s_query.QueryDatas(
|
3700
|
+
# username=username,
|
3701
|
+
# password=password,
|
3702
|
+
# host=host,
|
3703
|
+
# port=port,
|
3704
|
+
# maxconnections=10,
|
3705
|
+
# )
|
3706
|
+
# sdq = MysqlDatasQuery(download_manager=download_manager)
|
3707
|
+
# sdq.months = 3
|
3708
|
+
# sdq.zb_ccfx(db_name='聚合数据', table_name='生意参谋_直播场次分析')
|
@@ -404,7 +404,15 @@ class MySQLUploader:
|
|
404
404
|
raise ValueError('set_typ 未指定')
|
405
405
|
# set_typ的键清洗
|
406
406
|
set_typ = {self._normalize_col(k): v for k, v in set_typ.items()}
|
407
|
-
|
407
|
+
|
408
|
+
# 处理id列和主键
|
409
|
+
column_defs = []
|
410
|
+
|
411
|
+
# 添加id列(仅在没有指定主键时)
|
412
|
+
if not primary_keys:
|
413
|
+
column_defs.append("`id` INT NOT NULL AUTO_INCREMENT")
|
414
|
+
|
415
|
+
# 添加其他列
|
408
416
|
for col_name, col_type in set_typ.items():
|
409
417
|
if col_name == 'id':
|
410
418
|
continue
|
@@ -413,18 +421,23 @@ class MySQLUploader:
|
|
413
421
|
if not allow_null and not col_type.lower().startswith('json'):
|
414
422
|
col_def += " NOT NULL"
|
415
423
|
column_defs.append(col_def)
|
424
|
+
|
416
425
|
# 主键处理逻辑调整
|
417
426
|
def _index_col_sql(col):
|
418
427
|
col_type = set_typ.get(col, '').lower()
|
419
428
|
if 'varchar' in col_type or 'text' in col_type:
|
420
429
|
return f"`{self._normalize_col(col)}`(100)"
|
421
430
|
return f"`{self._normalize_col(col)}`"
|
431
|
+
|
432
|
+
# 处理主键
|
422
433
|
if primary_keys and len(primary_keys) > 0:
|
434
|
+
# 如果指定了主键,直接使用指定的主键
|
423
435
|
safe_primary_keys = [_index_col_sql(pk) for pk in primary_keys]
|
424
436
|
primary_key_sql = f"PRIMARY KEY ({','.join(safe_primary_keys)})"
|
425
437
|
else:
|
426
|
-
|
438
|
+
# 如果没有指定主键,使用id作为主键
|
427
439
|
primary_key_sql = f"PRIMARY KEY (`id`)"
|
440
|
+
|
428
441
|
# 索引统一在CREATE TABLE中定义
|
429
442
|
index_defs = []
|
430
443
|
if date_column and date_column in set_typ:
|
@@ -435,15 +448,28 @@ class MySQLUploader:
|
|
435
448
|
if idx_col in set_typ:
|
436
449
|
safe_idx_col = _index_col_sql(idx_col)
|
437
450
|
index_defs.append(f"INDEX `idx_{self._normalize_col(idx_col)}` ({safe_idx_col})")
|
451
|
+
|
438
452
|
# UNIQUE KEY定义
|
439
453
|
unique_defs = []
|
440
454
|
if unique_keys:
|
441
455
|
for unique_cols in unique_keys:
|
442
456
|
if not unique_cols:
|
443
457
|
continue
|
458
|
+
# 检查唯一约束是否与主键冲突
|
459
|
+
if primary_keys:
|
460
|
+
# 如果唯一约束的列是主键的一部分,则跳过
|
461
|
+
if set(unique_cols).issubset(set(primary_keys)):
|
462
|
+
logger.warning('跳过与主键冲突的唯一约束', {
|
463
|
+
'库': db_name,
|
464
|
+
'表': table_name,
|
465
|
+
'唯一约束': unique_cols,
|
466
|
+
'主键': primary_keys
|
467
|
+
})
|
468
|
+
continue
|
444
469
|
safe_unique_cols = [_index_col_sql(col) for col in unique_cols]
|
445
470
|
unique_name = f"uniq_{'_'.join([self._normalize_col(c) for c in unique_cols])}"
|
446
471
|
unique_defs.append(f"UNIQUE KEY `{unique_name}` ({','.join(safe_unique_cols)})")
|
472
|
+
|
447
473
|
index_defs = list(set(index_defs))
|
448
474
|
all_defs = column_defs + [primary_key_sql] + index_defs + unique_defs
|
449
475
|
sql = f"""
|
@@ -1437,11 +1463,22 @@ class MySQLUploader:
|
|
1437
1463
|
try:
|
1438
1464
|
cursor.executemany(sql, values_list)
|
1439
1465
|
conn.commit()
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1466
|
+
# 在batch模式下,affected_rows表示实际影响的行数
|
1467
|
+
# 如果update_on_duplicate为True,则affected_rows包含更新的行数
|
1468
|
+
# 如果update_on_duplicate为False,则affected_rows只包含插入的行数
|
1469
|
+
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1470
|
+
if update_on_duplicate:
|
1471
|
+
# 当启用更新时,affected_rows包含插入和更新的行数
|
1472
|
+
# 我们需要区分插入和更新的行数
|
1473
|
+
# 由于无法准确区分,我们假设所有行都是插入的
|
1474
|
+
total_inserted += len(batch)
|
1475
|
+
else:
|
1476
|
+
# 当不启用更新时,affected_rows只包含插入的行数
|
1477
|
+
total_inserted += affected
|
1478
|
+
total_skipped += len(batch) - affected
|
1443
1479
|
except pymysql.err.IntegrityError as e:
|
1444
1480
|
conn.rollback()
|
1481
|
+
# 在唯一约束冲突时,所有行都被跳过
|
1445
1482
|
total_skipped += len(batch)
|
1446
1483
|
logger.debug('批量插入唯一约束冲突,全部跳过', {'库': db_name, '表': table_name, '错误': str(e)})
|
1447
1484
|
except Exception as e:
|
@@ -1460,10 +1497,16 @@ class MySQLUploader:
|
|
1460
1497
|
values += [row.get(col) for col in dup_cols]
|
1461
1498
|
cursor.execute(sql, values)
|
1462
1499
|
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1463
|
-
if
|
1500
|
+
if update_on_duplicate:
|
1501
|
+
# 当启用更新时,affected_rows包含插入和更新的行数
|
1502
|
+
# 假设所有行都是插入的,因为无法区分插入和更新
|
1464
1503
|
total_inserted += 1
|
1465
1504
|
else:
|
1466
|
-
|
1505
|
+
# 当不启用更新时,affected_rows只包含插入的行数
|
1506
|
+
if affected > 0:
|
1507
|
+
total_inserted += 1
|
1508
|
+
else:
|
1509
|
+
total_skipped += 1
|
1467
1510
|
except pymysql.err.IntegrityError as e:
|
1468
1511
|
conn.rollback()
|
1469
1512
|
total_skipped += 1
|
@@ -1482,10 +1525,16 @@ class MySQLUploader:
|
|
1482
1525
|
values += [row.get(col) for col in dup_cols]
|
1483
1526
|
cursor.execute(sql, values)
|
1484
1527
|
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1485
|
-
if
|
1528
|
+
if update_on_duplicate:
|
1529
|
+
# 当启用更新时,affected_rows包含插入和更新的行数
|
1530
|
+
# 假设所有行都是插入的,因为无法区分插入和更新
|
1486
1531
|
total_inserted += 1
|
1487
1532
|
else:
|
1488
|
-
|
1533
|
+
# 当不启用更新时,affected_rows只包含插入的行数
|
1534
|
+
if affected > 0:
|
1535
|
+
total_inserted += 1
|
1536
|
+
else:
|
1537
|
+
total_skipped += 1
|
1489
1538
|
conn.commit()
|
1490
1539
|
except pymysql.err.IntegrityError as e:
|
1491
1540
|
conn.rollback()
|
mdbq-4.0.8/mdbq/__version__.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
VERSION = '4.0.8'
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|