mdbq 4.0.45__py3-none-any.whl → 4.0.47__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/__version__.py +1 -1
- mdbq/aggregation/query_data.py +2 -2
- mdbq/mysql/uploader.py +60 -9
- {mdbq-4.0.45.dist-info → mdbq-4.0.47.dist-info}/METADATA +1 -1
- {mdbq-4.0.45.dist-info → mdbq-4.0.47.dist-info}/RECORD +7 -7
- {mdbq-4.0.45.dist-info → mdbq-4.0.47.dist-info}/WHEEL +0 -0
- {mdbq-4.0.45.dist-info → mdbq-4.0.47.dist-info}/top_level.txt +0 -0
mdbq/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION = '4.0.
|
1
|
+
VERSION = '4.0.47'
|
mdbq/aggregation/query_data.py
CHANGED
@@ -24,7 +24,7 @@ host, port, username, password = parser.get_section_values(
|
|
24
24
|
section='mysql',
|
25
25
|
keys=['host', 'port', 'username', 'password'],
|
26
26
|
)
|
27
|
-
host = 'localhost'
|
27
|
+
# host = 'localhost'
|
28
28
|
uld = uploader.MySQLUploader(username=username, password=password, host=host, port=int(port), pool_size=10)
|
29
29
|
|
30
30
|
logger = mylogger.MyLogger(
|
@@ -2270,7 +2270,7 @@ class MysqlDatasQuery:
|
|
2270
2270
|
'日期': 'date',
|
2271
2271
|
'店铺名称': 'varchar(100)',
|
2272
2272
|
'词类型': 'varchar(100)',
|
2273
|
-
'搜索词': 'varchar(
|
2273
|
+
'搜索词': 'varchar(255)',
|
2274
2274
|
'访客数': 'int',
|
2275
2275
|
'加购人数': 'int',
|
2276
2276
|
'支付金额': 'decimal(10,2)',
|
mdbq/mysql/uploader.py
CHANGED
@@ -14,6 +14,7 @@ from dbutils.pooled_db import PooledDB
|
|
14
14
|
import sys
|
15
15
|
from decimal import Decimal, InvalidOperation
|
16
16
|
import math
|
17
|
+
import json
|
17
18
|
|
18
19
|
warnings.filterwarnings('ignore')
|
19
20
|
logger = mylogger.MyLogger(
|
@@ -625,11 +626,43 @@ class MySQLUploader:
|
|
625
626
|
根据列类型验证并转换数据值
|
626
627
|
"""
|
627
628
|
column_type_lower = column_type.lower() if column_type else ''
|
628
|
-
|
629
|
+
|
630
|
+
# JSON类型验证和转换(优先处理,避免pd.isna的问题)
|
631
|
+
if 'json' in column_type_lower:
|
632
|
+
if isinstance(value, (dict, list)):
|
633
|
+
try:
|
634
|
+
return json.dumps(value, ensure_ascii=False)
|
635
|
+
except (TypeError, ValueError) as e:
|
636
|
+
logger.error(f"JSON序列化失败: {e}", {"库": db_name, "表": table_name, "列": col_name, "值": value})
|
637
|
+
raise ValueError(f"JSON序列化失败: {e}")
|
638
|
+
elif isinstance(value, str):
|
639
|
+
# 验证字符串是否为有效的JSON
|
640
|
+
try:
|
641
|
+
json.loads(value)
|
642
|
+
return value
|
643
|
+
except (TypeError, ValueError) as e:
|
644
|
+
logger.error(f"无效的JSON字符串: {e}", {"库": db_name, "表": table_name, "列": col_name, "值": value})
|
645
|
+
raise ValueError(f"无效的JSON字符串: {e}")
|
646
|
+
else:
|
647
|
+
# 其他类型转换为字符串
|
648
|
+
return str(value)
|
649
|
+
|
650
|
+
# 统一判断None/NaN(排除列表和字典类型)
|
629
651
|
if value == '':
|
630
652
|
if any(t in column_type_lower for t in ['varchar', 'text', 'char', 'mediumtext', 'longtext']):
|
631
653
|
return ""
|
632
|
-
|
654
|
+
|
655
|
+
# 安全地检查NaN值,避免对列表和字典使用pd.isna
|
656
|
+
is_nan = False
|
657
|
+
if isinstance(value, (list, dict)):
|
658
|
+
is_nan = False # 列表和字典不是NaN
|
659
|
+
else:
|
660
|
+
try:
|
661
|
+
is_nan = pd.isna(value) or (isinstance(value, (float, Decimal)) and math.isinf(value))
|
662
|
+
except (ValueError, TypeError):
|
663
|
+
is_nan = False
|
664
|
+
|
665
|
+
if value == '' or is_nan:
|
633
666
|
# 兜底填充值映射
|
634
667
|
fallback_map = {
|
635
668
|
'int': 0,
|
@@ -709,7 +742,15 @@ class MySQLUploader:
|
|
709
742
|
except (AttributeError, IndexError):
|
710
743
|
pass
|
711
744
|
return str_value
|
712
|
-
|
745
|
+
|
746
|
+
# 兜底处理:确保所有返回值都是基本数据类型
|
747
|
+
if isinstance(value, (dict, list)):
|
748
|
+
try:
|
749
|
+
return json.dumps(value, ensure_ascii=False)
|
750
|
+
except (TypeError, ValueError):
|
751
|
+
return str(value)
|
752
|
+
else:
|
753
|
+
return str(value)
|
713
754
|
|
714
755
|
@_execute_with_retry
|
715
756
|
def _get_table_columns(self, db_name: str, table_name: str) -> Dict[str, str]:
|
@@ -1517,6 +1558,16 @@ class MySQLUploader:
|
|
1517
1558
|
return 1000
|
1518
1559
|
else:
|
1519
1560
|
return 2000
|
1561
|
+
|
1562
|
+
def ensure_basic_type(value):
|
1563
|
+
"""确保值是基本数据类型,如果是字典或列表则转换为字符串"""
|
1564
|
+
if isinstance(value, (dict, list)):
|
1565
|
+
try:
|
1566
|
+
return json.dumps(value, ensure_ascii=False)
|
1567
|
+
except (TypeError, ValueError):
|
1568
|
+
return str(value)
|
1569
|
+
return value
|
1570
|
+
|
1520
1571
|
batch_size = get_optimal_batch_size(len(data))
|
1521
1572
|
all_columns = [col for col in set_typ.keys() if col.lower() != 'id']
|
1522
1573
|
total_inserted = 0
|
@@ -1529,10 +1580,10 @@ class MySQLUploader:
|
|
1529
1580
|
batch = data[i:i + batch_size]
|
1530
1581
|
values_list = []
|
1531
1582
|
for row in batch:
|
1532
|
-
values = [row.get(col) for col in all_columns]
|
1583
|
+
values = [ensure_basic_type(row.get(col)) for col in all_columns]
|
1533
1584
|
if check_duplicate and not update_on_duplicate:
|
1534
1585
|
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
1535
|
-
values += [row.get(col) for col in dup_cols]
|
1586
|
+
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
1536
1587
|
values_list.append(values)
|
1537
1588
|
try:
|
1538
1589
|
cursor.executemany(sql, values_list)
|
@@ -1565,10 +1616,10 @@ class MySQLUploader:
|
|
1565
1616
|
batch = data[i:i + hybrid_n]
|
1566
1617
|
for row in batch:
|
1567
1618
|
try:
|
1568
|
-
values = [row.get(col) for col in all_columns]
|
1619
|
+
values = [ensure_basic_type(row.get(col)) for col in all_columns]
|
1569
1620
|
if check_duplicate and not update_on_duplicate:
|
1570
1621
|
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
1571
|
-
values += [row.get(col) for col in dup_cols]
|
1622
|
+
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
1572
1623
|
cursor.execute(sql, values)
|
1573
1624
|
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1574
1625
|
if update_on_duplicate:
|
@@ -1593,10 +1644,10 @@ class MySQLUploader:
|
|
1593
1644
|
else: # row模式
|
1594
1645
|
for row in data:
|
1595
1646
|
try:
|
1596
|
-
values = [row.get(col) for col in all_columns]
|
1647
|
+
values = [ensure_basic_type(row.get(col)) for col in all_columns]
|
1597
1648
|
if check_duplicate and not update_on_duplicate:
|
1598
1649
|
dup_cols = duplicate_columns if duplicate_columns else [col for col in all_columns if col.lower() not in self.base_excute_col]
|
1599
|
-
values += [row.get(col) for col in dup_cols]
|
1650
|
+
values += [ensure_basic_type(row.get(col)) for col in dup_cols]
|
1600
1651
|
cursor.execute(sql, values)
|
1601
1652
|
affected = cursor.rowcount if cursor.rowcount is not None else 0
|
1602
1653
|
if update_on_duplicate:
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mdbq/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
|
2
|
-
mdbq/__version__.py,sha256=
|
2
|
+
mdbq/__version__.py,sha256=VOS7S7alTl8woUwMDIW96XJ-1hJ6jsjObiRC2Q4yLis,18
|
3
3
|
mdbq/aggregation/__init__.py,sha256=EeDqX2Aml6SPx8363J-v1lz0EcZtgwIBYyCJV6CcEDU,40
|
4
|
-
mdbq/aggregation/query_data.py,sha256=
|
4
|
+
mdbq/aggregation/query_data.py,sha256=gQUcdrK0QCA0nEBkRA9zBieRWqSnkvpV5Eb-hXKw9Y8,170094
|
5
5
|
mdbq/log/__init__.py,sha256=Mpbrav0s0ifLL7lVDAuePEi1hJKiSHhxcv1byBKDl5E,15
|
6
6
|
mdbq/log/mylogger.py,sha256=kPe3wsQNaB1slfX-Z7VMqzZoMoqPfc7ylYXZDBeFzzI,24945
|
7
7
|
mdbq/myconf/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
@@ -11,7 +11,7 @@ mdbq/mysql/deduplicator.py,sha256=AB3gL7ZwhcmzGHSu4UY4M6YZVPFZ2wlAN3BCcwAhegQ,73
|
|
11
11
|
mdbq/mysql/mysql.py,sha256=pDg771xBugCMSTWeskIFTi3pFLgaqgyG3smzf-86Wn8,56772
|
12
12
|
mdbq/mysql/s_query.py,sha256=1wJ3HVjHEF6FA-bVeeesRlsf73CZSvVTEQ51CF1OsE4,46786
|
13
13
|
mdbq/mysql/unique_.py,sha256=MaztT-WIyEQUs-OOYY4pFulgHVcXR1BfCy3QUz0XM_U,21127
|
14
|
-
mdbq/mysql/uploader.py,sha256=
|
14
|
+
mdbq/mysql/uploader.py,sha256=Y5gCXuhZR-Oo89xaU4wRlcrzDtarABEyJLt43GvDhcI,88718
|
15
15
|
mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
|
16
16
|
mdbq/other/download_sku_picture.py,sha256=X66sVdvVgzoNzmgVJyPtd7bjEvctEKtLPblEPF65EWc,46940
|
17
17
|
mdbq/other/error_handler.py,sha256=4p5haAXSY-P78stp4Xwo_MwAngWYqyKj5ogWIuYXMeY,12631
|
@@ -25,7 +25,7 @@ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
|
|
25
25
|
mdbq/redis/getredis.py,sha256=vpBuNc22uj9Vr-_Dh25_wpwWM1e-072EAAIBdB_IpL0,23494
|
26
26
|
mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
|
27
27
|
mdbq/spider/aikucun.py,sha256=XptHjGzbout9IYzWAOQUpMMV5qEgLTU8pL1ZGt8oNEA,21868
|
28
|
-
mdbq-4.0.
|
29
|
-
mdbq-4.0.
|
30
|
-
mdbq-4.0.
|
31
|
-
mdbq-4.0.
|
28
|
+
mdbq-4.0.47.dist-info/METADATA,sha256=QhfkX2DquhukBOdBvFoJMZfRoZ6Hcja6VWQxaZGcPz0,364
|
29
|
+
mdbq-4.0.47.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
30
|
+
mdbq-4.0.47.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
|
31
|
+
mdbq-4.0.47.dist-info/RECORD,,
|
File without changes
|
File without changes
|