mdbq 4.0.4__tar.gz → 4.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {mdbq-4.0.4 → mdbq-4.0.6}/PKG-INFO +1 -1
  2. mdbq-4.0.6/mdbq/__version__.py +1 -0
  3. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/aggregation/query_data.py +54 -87
  4. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/mysql/unique_.py +8 -8
  5. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/mysql/uploader.py +18 -7
  6. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq.egg-info/PKG-INFO +1 -1
  7. mdbq-4.0.4/mdbq/__version__.py +0 -1
  8. {mdbq-4.0.4 → mdbq-4.0.6}/README.txt +0 -0
  9. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/__init__.py +0 -0
  10. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/aggregation/__init__.py +0 -0
  11. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/config/__init__.py +0 -0
  12. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/config/config.py +0 -0
  13. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/log/__init__.py +0 -0
  14. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/log/mylogger.py +0 -0
  15. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/log/spider_logging.py +0 -0
  16. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/mysql/__init__.py +0 -0
  17. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/mysql/deduplicator.py +0 -0
  18. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/mysql/mysql.py +0 -0
  19. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/mysql/s_query.py +0 -0
  20. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/other/__init__.py +0 -0
  21. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/other/download_sku_picture.py +0 -0
  22. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/other/otk.py +0 -0
  23. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/other/pov_city.py +0 -0
  24. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/other/ua_sj.py +0 -0
  25. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/pbix/__init__.py +0 -0
  26. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/pbix/pbix_refresh.py +0 -0
  27. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/pbix/refresh_all.py +0 -0
  28. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/redis/__init__.py +0 -0
  29. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/redis/getredis.py +0 -0
  30. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/spider/__init__.py +0 -0
  31. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq/spider/aikucun.py +0 -0
  32. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq.egg-info/SOURCES.txt +0 -0
  33. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq.egg-info/dependency_links.txt +0 -0
  34. {mdbq-4.0.4 → mdbq-4.0.6}/mdbq.egg-info/top_level.txt +0 -0
  35. {mdbq-4.0.4 → mdbq-4.0.6}/setup.cfg +0 -0
  36. {mdbq-4.0.4 → mdbq-4.0.6}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.4
3
+ Version: 4.0.6
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.0.6'
@@ -87,108 +87,90 @@ def upload_data_decorator(**upload_kwargs):
87
87
  def decorator(func):
88
88
  @wraps(func)
89
89
  def wrapper(*args, **kwargs):
90
+ db_name = None
91
+ table_name = None
90
92
  try:
91
- # 获取 set_type 或 set_typ 参数
92
- set_type = None
93
- # 先从kwargs查找
94
- for key in ['set_type', 'set_typ']:
95
- if key in kwargs:
96
- set_type = kwargs[key]
97
- break
98
- # 如果没在kwargs找到,尝试从args按参数名顺序查找
99
- if set_type is None:
100
- sig = inspect.signature(func)
101
- params = list(sig.parameters)
102
- for key in ['set_type', 'set_typ']:
103
- if key in params:
104
- idx = params.index(key)
105
- if len(args) > idx:
106
- set_type = args[idx]
107
- break
93
+ # 获取函数签名和参数
94
+ sig = inspect.signature(func)
95
+ bound_args = sig.bind(*args, **kwargs)
96
+ args_dict = bound_args.arguments
97
+
98
+ # 获取所需参数
99
+ def get_param_value(param_name, alternatives=None):
100
+ if alternatives is None:
101
+ alternatives = [param_name]
102
+ # kwargs 或 args_dict 中获取参数值
103
+ for key in alternatives:
104
+ if key in kwargs:
105
+ return kwargs[key]
106
+ if key in args_dict:
107
+ return args_dict[key]
108
+ return None
109
+
110
+ # 获取参数值
111
+ set_type = get_param_value('set_type', ['set_type', 'set_typ'])
112
+ db_name = get_param_value('db_name')
113
+ table_name = get_param_value('table_name')
108
114
 
109
115
  # 执行原始函数
110
116
  result = func(*args, **kwargs)
111
117
 
112
- # 如果返回 None,直接返回
113
118
  if result is None:
114
119
  return None
115
-
116
- # 如果返回的是 DataFrame
120
+
121
+ # 处理 DataFrame 结果
117
122
  if isinstance(result, pd.DataFrame):
118
- # 调整列顺序
119
123
  if set_type is not None:
120
124
  result = reorder_columns(result, set_type)
121
- # 设置默认值
122
- default_kwargs = {
125
+
126
+ # 合并参数
127
+ merged_kwargs = {
123
128
  'check_duplicate': False,
124
129
  'update_on_duplicate': True,
125
130
  'allow_null': False,
126
- 'transaction_mode': 'batch'
131
+ 'transaction_mode': 'batch',
132
+ **upload_kwargs
127
133
  }
128
- # 更新参数,优先使用装饰器参数
129
- merged_kwargs = {**default_kwargs, **upload_kwargs}
130
134
 
131
- # 上传数据
132
- uld.upload_data(
133
- data=result,
134
- **merged_kwargs
135
- )
135
+ uld.upload_data(data=result, **merged_kwargs)
136
136
  return True
137
-
138
- # 如果返回的是元组
137
+
138
+ # 处理元组结果
139
139
  elif isinstance(result, tuple):
140
- # 检查元组长度
141
140
  if len(result) < 2:
142
- logger.warning('函数返回的元组长度小于2,直接返回原结果,不执行上传', {'函数': func.__name__})
141
+ logger.warning('函数返回的元组长度小于2,直接返回原结果,不执行上传', {'函数': func.__name__, '库': db_name, '表': table_name})
143
142
  return result
144
-
145
- # 获取前两个元素
143
+
146
144
  df, extra_kwargs = result[0], result[1]
147
145
 
148
- # 检查第一个元素是否为DataFrame
149
146
  if not isinstance(df, pd.DataFrame):
150
- logger.warning('函数返回的元组第一个元素不是DataFrame,直接返回原结果,不执行上传', {'函数': func.__name__})
147
+ logger.warning('函数返回的元组第一个元素不是DataFrame,直接返回原结果,不执行上传', {'函数': func.__name__, '库': db_name, '表': table_name})
151
148
  return result
152
-
153
- # 调整列顺序
149
+
154
150
  if set_type is not None:
155
151
  df = reorder_columns(df, set_type)
156
- # 保持元组结构
157
152
  result = (df, extra_kwargs) + result[2:]
158
- # 合并装饰器参数和函数参数
159
- merged_kwargs = {**upload_kwargs}
160
- merged_kwargs.update(extra_kwargs)
161
-
162
- # 设置默认值
163
- default_kwargs = {
153
+
154
+ # 合并参数
155
+ merged_kwargs = {
164
156
  'check_duplicate': False,
165
157
  'update_on_duplicate': True,
166
158
  'allow_null': False,
167
- 'transaction_mode': 'batch'
159
+ 'transaction_mode': 'batch',
160
+ **upload_kwargs,
161
+ **extra_kwargs
168
162
  }
169
- # 更新参数,优先使用装饰器参数
170
- for key, value in default_kwargs.items():
171
- if key not in merged_kwargs:
172
- merged_kwargs[key] = value
173
163
 
174
- # 上传数据
175
- uld.upload_data(
176
- data=df,
177
- **merged_kwargs
178
- )
164
+ uld.upload_data(data=df, **merged_kwargs)
179
165
 
180
- # 如果元组长度大于2,返回完整元组
181
- if len(result) > 2:
182
- return result
183
- return True
184
-
185
- # 其他情况直接返回结果
166
+ return result if len(result) > 2 else True
167
+
186
168
  return result
187
-
169
+
188
170
  except Exception as e:
189
- logger.error('数据上传失败', {'函数': func.__name__, '错误': str(e)})
171
+ logger.error('数据上传失败', {'函数': func.__name__, '库': db_name, '表': table_name, '错误': str(e)})
190
172
  return False
191
-
173
+
192
174
  return wrapper
193
175
  return decorator
194
176
 
@@ -2019,7 +2001,7 @@ class MysqlDatasQuery:
2019
2001
  'partition_date_column': '日期', # 用于分表的日期列名,默认为'日期'
2020
2002
  'indexes': [], # 普通索引列
2021
2003
  'transaction_mode': 'batch', # 事务模式
2022
- 'unique_keys': [['日期', '店铺名称', '产品线', '触发sku_id', '跟单sku_id']], # 唯一约束列表
2004
+ 'unique_keys': [['日期', '店铺名称', '产品线', '触发sku_id', '跟单sku_id', '花费']], # 唯一约束列表
2023
2005
  }
2024
2006
 
2025
2007
  @try_except
@@ -3761,12 +3743,7 @@ def date_table():
3761
3743
  }
3762
3744
 
3763
3745
 
3764
- def query1(months=1, less_dict=None, download_manager=None):
3765
- if less_dict is None:
3766
- less_dict = []
3767
- if months == 0:
3768
- logger.info('months 不建议为 0')
3769
- return
3746
+ def query1(months=1, download_manager=None):
3770
3747
  sdq = MysqlDatasQuery(download_manager=download_manager) # 实例化数据处理类
3771
3748
  sdq.months = months # 设置数据周期, 1 表示近 2 个月
3772
3749
 
@@ -3793,12 +3770,7 @@ def query1(months=1, less_dict=None, download_manager=None):
3793
3770
  sdq.performance_concat(bb_tg=False, db_name='聚合数据', table_name='天猫_推广汇总') # _推广商品销售
3794
3771
 
3795
3772
 
3796
- def query2(months=1, less_dict=None, download_manager=None):
3797
- if less_dict is None:
3798
- less_dict = []
3799
- if months == 0:
3800
- logger.info('months 不建议为 0')
3801
- return
3773
+ def query2(months=1, download_manager=None):
3802
3774
  sdq = MysqlDatasQuery(download_manager=download_manager) # 实例化数据处理类
3803
3775
  sdq.months = months # 设置数据周期, 1 表示近 2 个月
3804
3776
  sdq.dplyd(db_name='聚合数据', table_name='店铺流量来源构成')
@@ -3811,18 +3783,13 @@ def query2(months=1, less_dict=None, download_manager=None):
3811
3783
  sdq.deeplink(db_name='聚合数据', table_name='达摩盘_deeplink人群洞察')
3812
3784
 
3813
3785
 
3814
- def query3(months=1, less_dict=None, download_manager=None):
3815
- if less_dict is None:
3816
- less_dict = []
3817
- if months == 0:
3818
- logger.info('months 不建议为 0')
3819
- return
3786
+ def query3(months=1, download_manager=None):
3820
3787
  sdq = MysqlDatasQuery(download_manager=download_manager) # 实例化数据处理类
3821
3788
  sdq.months = months # 设置数据周期, 1 表示近 2 个月
3822
3789
  sdq.spph(db_name='聚合数据', table_name='天猫_商品排行')
3823
3790
 
3824
3791
 
3825
- def main(days=150, months=3):
3792
+ def main(months=3):
3826
3793
  # 1. 更新日期表 更新货品年份基准表, 属性设置 3 - 货品年份基准
3827
3794
  date_table()
3828
3795
 
@@ -321,14 +321,14 @@ def main():
321
321
  # "sku榜单": [['日期', '平台', '店铺名称', '条码']],
322
322
  # "spu榜单": [['日期', '平台', '店铺名称', '商品款号', '访客量']],
323
323
  # },
324
- # "生意参谋3": {
325
- # "crm成交客户": [['客户id']],
326
- # "商品排行": [['日期', '店铺名称', '商品id']],
327
- # "流量来源构成": [['日期', '店铺名称', '来源构成', '类别', '一级来源', '二级来源', '三级来源']],
328
- # "手淘搜索": [['日期', '店铺名称', '搜索词', '词类型', '访客数']],
329
- # "新品追踪": [['日期', '店铺名称', '商品id']],
330
- # "直播分场次效果": [['场次id']],
331
- # },
324
+ "生意参谋3": {
325
+ # "crm成交客户": [['客户id']],
326
+ # "商品排行": [['日期', '店铺名称', '商品id']],
327
+ "流量来源构成": [['日期', '店铺名称', '来源构成', '类别', '一级来源', '二级来源', '三级来源']],
328
+ # "手淘搜索": [['日期', '店铺名称', '搜索词', '词类型', '访客数']],
329
+ # "新品追踪": [['日期', '店铺名称', '商品id']],
330
+ # "直播分场次效果": [['场次id']],
331
+ },
332
332
  # "生意经3": {
333
333
  # "sku销量_按名称": [['日期', '店铺名称', '宝贝id', 'sku名称', '销售额']],
334
334
  # "sku销量_按商家编码": [['日期', '店铺名称', '宝贝id', 'sku编码', '销售额']],
@@ -19,7 +19,7 @@ import math
19
19
  warnings.filterwarnings('ignore')
20
20
  logger = mylogger.MyLogger(
21
21
  logging_mode='file',
22
- log_level='info',
22
+ log_level='debug',
23
23
  log_format='json',
24
24
  max_log_size=50,
25
25
  backup_count=5,
@@ -528,7 +528,6 @@ class MySQLUploader:
528
528
  })
529
529
  return 0.0
530
530
  elif 'date' in column_type_lower or 'time' in column_type_lower:
531
- # 判断是date还是datetime/timestamp
532
531
  if 'datetime' in column_type_lower or 'timestamp' in column_type_lower:
533
532
  default_date = '2000-01-01 00:00:00'
534
533
  else:
@@ -545,14 +544,14 @@ class MySQLUploader:
545
544
  return None
546
545
  try:
547
546
  if isinstance(value, str) and value.strip().endswith('%'):
548
- try:
547
+ if re.match(r'^\d+(\.\d+)?%$', value.strip()):
549
548
  percent_str = value.strip().replace('%', '')
550
549
  percent_value = float(percent_str)
551
550
  decimal_value = percent_value / 100
552
551
  logger.debug('百分比字符串转小数', {'原始': value, '结果': decimal_value})
553
552
  return decimal_value
554
- except ValueError:
555
- logger.warning('百分比字符串转小数失败', {
553
+ else:
554
+ logger.warning('百分比字符串不符合格式,跳过转换', {
556
555
  '库': db_name, '表': table_name, '列': col_name, '原始': value
557
556
  })
558
557
  elif 'int' in column_type_lower:
@@ -582,10 +581,22 @@ class MySQLUploader:
582
581
  })
583
582
  raise ValueError(f"无效日期格式: `{value}` -> {str(e)}")
584
583
  return str(value)
585
- elif 'char' in column_type_lower or 'text' in column_type_lower:
584
+ elif 'varchar' in column_type_lower:
586
585
  if isinstance(value, str):
587
586
  return value.replace('\\', '\\\\').replace("'", "\\'")
588
- return str(value)
587
+ else:
588
+ return str(value)
589
+ elif 'text' in column_type_lower:
590
+ if isinstance(value, str):
591
+ max_length = 65535
592
+ if len(value) > max_length:
593
+ logger.warning(f'TEXT字符串长度不允许超过 {max_length},已截断', {
594
+ '库': db_name, '表': table_name, '列': col_name, '原始值': f'{value[:50]}...', '截断后值': f'{value[:50]}...'
595
+ })
596
+ value = value[:max_length]
597
+ return value.replace('\\', '\\\\').replace("'", "\\'")
598
+ else:
599
+ return str(value)
589
600
  elif 'json' in column_type_lower:
590
601
  return json.dumps(value) if value is not None else None
591
602
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.4
3
+ Version: 4.0.6
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '4.0.4'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes