mdbq 4.0.8__tar.gz → 4.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {mdbq-4.0.8 → mdbq-4.0.9}/PKG-INFO +1 -1
  2. mdbq-4.0.9/mdbq/__version__.py +1 -0
  3. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/aggregation/query_data.py +81 -60
  4. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/uploader.py +58 -9
  5. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq.egg-info/PKG-INFO +1 -1
  6. mdbq-4.0.8/mdbq/__version__.py +0 -1
  7. {mdbq-4.0.8 → mdbq-4.0.9}/README.txt +0 -0
  8. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/__init__.py +0 -0
  9. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/aggregation/__init__.py +0 -0
  10. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/config/__init__.py +0 -0
  11. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/config/config.py +0 -0
  12. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/log/__init__.py +0 -0
  13. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/log/mylogger.py +0 -0
  14. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/log/spider_logging.py +0 -0
  15. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/__init__.py +0 -0
  16. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/deduplicator.py +0 -0
  17. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/mysql.py +0 -0
  18. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/s_query.py +0 -0
  19. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/mysql/unique_.py +0 -0
  20. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/__init__.py +0 -0
  21. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/download_sku_picture.py +0 -0
  22. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/otk.py +0 -0
  23. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/pov_city.py +0 -0
  24. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/other/ua_sj.py +0 -0
  25. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/pbix/__init__.py +0 -0
  26. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/pbix/pbix_refresh.py +0 -0
  27. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/pbix/refresh_all.py +0 -0
  28. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/redis/__init__.py +0 -0
  29. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/redis/getredis.py +0 -0
  30. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/spider/__init__.py +0 -0
  31. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq/spider/aikucun.py +0 -0
  32. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq.egg-info/SOURCES.txt +0 -0
  33. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq.egg-info/dependency_links.txt +0 -0
  34. {mdbq-4.0.8 → mdbq-4.0.9}/mdbq.egg-info/top_level.txt +0 -0
  35. {mdbq-4.0.8 → mdbq-4.0.9}/setup.cfg +0 -0
  36. {mdbq-4.0.8 → mdbq-4.0.9}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.8
3
+ Version: 4.0.9
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -0,0 +1 @@
1
+ VERSION = '4.0.9'
@@ -119,9 +119,16 @@ def upload_data_decorator(**upload_kwargs):
119
119
  return None
120
120
 
121
121
  # 处理 DataFrame 结果
122
- if isinstance(result, pd.DataFrame):
122
+ if isinstance(result, (pd.DataFrame, list, dict)):
123
123
  if set_type is not None:
124
- result = reorder_columns(result, set_type)
124
+ if isinstance(result, pd.DataFrame):
125
+ result = reorder_columns(result, set_type)
126
+ elif isinstance(result, list):
127
+ # 如果是list,转换为DataFrame以调整列顺序
128
+ result = reorder_columns(pd.DataFrame(result), set_type)
129
+ elif isinstance(result, dict):
130
+ # 如果是dict,转换为DataFrame以调整列顺序
131
+ result = reorder_columns(pd.DataFrame([result]), set_type)
125
132
 
126
133
  # 合并参数
127
134
  merged_kwargs = {
@@ -143,12 +150,19 @@ def upload_data_decorator(**upload_kwargs):
143
150
 
144
151
  df, extra_kwargs = result[0], result[1]
145
152
 
146
- if not isinstance(df, pd.DataFrame):
147
- logger.warning('函数返回的元组第一个元素不是DataFrame,直接返回原结果,不执行上传', {'函数': func.__name__, '库': db_name, '表': table_name})
153
+ if not isinstance(df, (pd.DataFrame, list, dict)):
154
+ logger.warning('函数返回的元组第一个元素不是DataFrame/list/dict,直接返回原结果,不执行上传', {'函数': func.__name__, '库': db_name, '表': table_name})
148
155
  return result
149
156
 
150
157
  if set_type is not None:
151
- df = reorder_columns(df, set_type)
158
+ if isinstance(df, pd.DataFrame):
159
+ df = reorder_columns(df, set_type)
160
+ elif isinstance(df, list):
161
+ # 如果是list,转换为DataFrame以调整列顺序
162
+ df = reorder_columns(pd.DataFrame(df), set_type)
163
+ elif isinstance(df, dict):
164
+ # 如果是dict,转换为DataFrame以调整列顺序
165
+ df = reorder_columns(pd.DataFrame([df]), set_type)
152
166
  result = (df, extra_kwargs) + result[2:]
153
167
 
154
168
  # 合并参数
@@ -2370,61 +2384,57 @@ class MysqlDatasQuery:
2370
2384
  '更新时间': 'timestamp',
2371
2385
  }
2372
2386
  logger.info('正在更新数据库', {'主机': f'{host}:{port}', '库': db_name, '表': table_name})
2373
- new_dict = {
2374
- '日期': '',
2375
- '店铺名称': '',
2376
- '场次信息': '',
2377
- '场次id': '',
2378
- '直播开播时间': '',
2379
- '开播时长': '',
2380
- '封面图点击率': '',
2381
- '观看人数': '',
2382
- '观看次数': '',
2383
- '新增粉丝数': '',
2384
- '流量券消耗': '',
2385
- '观看总时长': '',
2386
- '人均观看时长': '',
2387
- '次均观看时长': '',
2388
- '商品点击人数': '',
2389
- '商品点击次数': '',
2390
- '商品点击率': '',
2391
- '加购人数': '',
2392
- '加购件数': '',
2393
- '加购次数': '',
2394
- '成交金额': '',
2395
- '成交人数': '',
2396
- '成交件数': '',
2397
- '成交笔数': '',
2398
- '成交转化率': '',
2399
- '退款人数': '',
2400
- '退款笔数': '',
2401
- '退款件数': '',
2402
- '退款金额': '',
2403
- '预售定金支付金额': '',
2404
- '预售预估总金额': '',
2405
- }
2406
- _results = []
2407
- for dict_data in df.to_dict(orient='records'):
2408
- new_dict.update(dict_data)
2409
- _results.append(new_dict)
2410
- if _results:
2411
- return _results, {
2412
- 'db_name': db_name,
2413
- 'table_name': table_name,
2414
- 'set_typ': set_typ,
2415
- 'primary_keys': ['场次id'], # 创建唯一主键
2416
- 'check_duplicate': False, # 检查重复数据
2417
- 'duplicate_columns': [], # 指定排重的组合键
2418
- 'update_on_duplicate': True, # 更新旧数据
2419
- 'allow_null': False, # 允许插入空值
2420
- 'partition_by': None, # 分表方式
2421
- 'partition_date_column': '日期', # 用于分表的日期列名,默认为'日期'
2422
- 'indexes': [], # 普通索引列
2423
- 'transaction_mode': 'batch', # 事务模式
2424
- 'unique_keys': [['场次id']], # 唯一约束列表
2425
- }
2426
- else:
2427
- return None, None
2387
+ ordered_columns = [
2388
+ '日期',
2389
+ '店铺名称',
2390
+ '场次信息',
2391
+ '场次id',
2392
+ '直播开播时间',
2393
+ '开播时长',
2394
+ '封面图点击率',
2395
+ '观看人数',
2396
+ '观看次数',
2397
+ '新增粉丝数',
2398
+ '流量券消耗',
2399
+ '观看总时长',
2400
+ '人均观看时长',
2401
+ '次均观看时长',
2402
+ '商品点击人数',
2403
+ '商品点击次数',
2404
+ '商品点击率',
2405
+ '加购人数',
2406
+ '加购件数',
2407
+ '加购次数',
2408
+ '成交金额',
2409
+ '成交人数',
2410
+ '成交件数',
2411
+ '成交笔数',
2412
+ '成交转化率',
2413
+ '退款人数',
2414
+ '退款笔数',
2415
+ '退款件数',
2416
+ '退款金额',
2417
+ '预售定金支付金额',
2418
+ '预售预估总金额',
2419
+ ]
2420
+ # 使用reindex重排列顺序,未定义的列会自动放在最后
2421
+ df = df.reindex(columns=[col for col in ordered_columns if col in df.columns] +
2422
+ [col for col in df.columns if col not in ordered_columns])
2423
+ return df, {
2424
+ 'db_name': db_name,
2425
+ 'table_name': table_name,
2426
+ 'set_typ': set_typ,
2427
+ 'primary_keys': [], # 创建唯一主键
2428
+ 'check_duplicate': False, # 检查重复数据
2429
+ 'duplicate_columns': [], # 指定排重的组合键
2430
+ 'update_on_duplicate': True, # 更新旧数据
2431
+ 'allow_null': False, # 允许插入空值
2432
+ 'partition_by': None, # 分表方式
2433
+ 'partition_date_column': '日期', # 用于分表的日期列名,默认为'日期'
2434
+ 'indexes': [], # 普通索引列
2435
+ 'transaction_mode': 'batch', # 事务模式
2436
+ 'unique_keys': [['场次id']], # 唯一约束列表
2437
+ }
2428
2438
 
2429
2439
  # @try_except
2430
2440
  @upload_data_decorator()
@@ -3685,3 +3695,14 @@ def main(months=3):
3685
3695
  if __name__ == '__main__':
3686
3696
  main(months=3)
3687
3697
  pass
3698
+
3699
+ # download_manager = s_query.QueryDatas(
3700
+ # username=username,
3701
+ # password=password,
3702
+ # host=host,
3703
+ # port=port,
3704
+ # maxconnections=10,
3705
+ # )
3706
+ # sdq = MysqlDatasQuery(download_manager=download_manager)
3707
+ # sdq.months = 3
3708
+ # sdq.zb_ccfx(db_name='聚合数据', table_name='生意参谋_直播场次分析')
@@ -404,7 +404,15 @@ class MySQLUploader:
404
404
  raise ValueError('set_typ 未指定')
405
405
  # set_typ的键清洗
406
406
  set_typ = {self._normalize_col(k): v for k, v in set_typ.items()}
407
- column_defs = ["`id` INT NOT NULL AUTO_INCREMENT"]
407
+
408
+ # 处理id列和主键
409
+ column_defs = []
410
+
411
+ # 添加id列(仅在没有指定主键时)
412
+ if not primary_keys:
413
+ column_defs.append("`id` INT NOT NULL AUTO_INCREMENT")
414
+
415
+ # 添加其他列
408
416
  for col_name, col_type in set_typ.items():
409
417
  if col_name == 'id':
410
418
  continue
@@ -413,18 +421,23 @@ class MySQLUploader:
413
421
  if not allow_null and not col_type.lower().startswith('json'):
414
422
  col_def += " NOT NULL"
415
423
  column_defs.append(col_def)
424
+
416
425
  # 主键处理逻辑调整
417
426
  def _index_col_sql(col):
418
427
  col_type = set_typ.get(col, '').lower()
419
428
  if 'varchar' in col_type or 'text' in col_type:
420
429
  return f"`{self._normalize_col(col)}`(100)"
421
430
  return f"`{self._normalize_col(col)}`"
431
+
432
+ # 处理主键
422
433
  if primary_keys and len(primary_keys) > 0:
434
+ # 如果指定了主键,直接使用指定的主键
423
435
  safe_primary_keys = [_index_col_sql(pk) for pk in primary_keys]
424
436
  primary_key_sql = f"PRIMARY KEY ({','.join(safe_primary_keys)})"
425
437
  else:
426
- safe_primary_keys = [_index_col_sql('id')]
438
+ # 如果没有指定主键,使用id作为主键
427
439
  primary_key_sql = f"PRIMARY KEY (`id`)"
440
+
428
441
  # 索引统一在CREATE TABLE中定义
429
442
  index_defs = []
430
443
  if date_column and date_column in set_typ:
@@ -435,15 +448,28 @@ class MySQLUploader:
435
448
  if idx_col in set_typ:
436
449
  safe_idx_col = _index_col_sql(idx_col)
437
450
  index_defs.append(f"INDEX `idx_{self._normalize_col(idx_col)}` ({safe_idx_col})")
451
+
438
452
  # UNIQUE KEY定义
439
453
  unique_defs = []
440
454
  if unique_keys:
441
455
  for unique_cols in unique_keys:
442
456
  if not unique_cols:
443
457
  continue
458
+ # 检查唯一约束是否与主键冲突
459
+ if primary_keys:
460
+ # 如果唯一约束的列是主键的一部分,则跳过
461
+ if set(unique_cols).issubset(set(primary_keys)):
462
+ logger.warning('跳过与主键冲突的唯一约束', {
463
+ '库': db_name,
464
+ '表': table_name,
465
+ '唯一约束': unique_cols,
466
+ '主键': primary_keys
467
+ })
468
+ continue
444
469
  safe_unique_cols = [_index_col_sql(col) for col in unique_cols]
445
470
  unique_name = f"uniq_{'_'.join([self._normalize_col(c) for c in unique_cols])}"
446
471
  unique_defs.append(f"UNIQUE KEY `{unique_name}` ({','.join(safe_unique_cols)})")
472
+
447
473
  index_defs = list(set(index_defs))
448
474
  all_defs = column_defs + [primary_key_sql] + index_defs + unique_defs
449
475
  sql = f"""
@@ -1437,11 +1463,22 @@ class MySQLUploader:
1437
1463
  try:
1438
1464
  cursor.executemany(sql, values_list)
1439
1465
  conn.commit()
1440
- inserted = cursor.rowcount if cursor.rowcount is not None else 0
1441
- total_inserted += inserted
1442
- total_skipped += len(batch) - inserted
1466
+ # 在batch模式下,affected_rows表示实际影响的行数
1467
+ # 如果update_on_duplicate为True,则affected_rows包含更新的行数
1468
+ # 如果update_on_duplicate为False,则affected_rows只包含插入的行数
1469
+ affected = cursor.rowcount if cursor.rowcount is not None else 0
1470
+ if update_on_duplicate:
1471
+ # 当启用更新时,affected_rows包含插入和更新的行数
1472
+ # 我们需要区分插入和更新的行数
1473
+ # 由于无法准确区分,我们假设所有行都是插入的
1474
+ total_inserted += len(batch)
1475
+ else:
1476
+ # 当不启用更新时,affected_rows只包含插入的行数
1477
+ total_inserted += affected
1478
+ total_skipped += len(batch) - affected
1443
1479
  except pymysql.err.IntegrityError as e:
1444
1480
  conn.rollback()
1481
+ # 在唯一约束冲突时,所有行都被跳过
1445
1482
  total_skipped += len(batch)
1446
1483
  logger.debug('批量插入唯一约束冲突,全部跳过', {'库': db_name, '表': table_name, '错误': str(e)})
1447
1484
  except Exception as e:
@@ -1460,10 +1497,16 @@ class MySQLUploader:
1460
1497
  values += [row.get(col) for col in dup_cols]
1461
1498
  cursor.execute(sql, values)
1462
1499
  affected = cursor.rowcount if cursor.rowcount is not None else 0
1463
- if affected > 0:
1500
+ if update_on_duplicate:
1501
+ # 当启用更新时,affected_rows包含插入和更新的行数
1502
+ # 假设所有行都是插入的,因为无法区分插入和更新
1464
1503
  total_inserted += 1
1465
1504
  else:
1466
- total_skipped += 1
1505
+ # 当不启用更新时,affected_rows只包含插入的行数
1506
+ if affected > 0:
1507
+ total_inserted += 1
1508
+ else:
1509
+ total_skipped += 1
1467
1510
  except pymysql.err.IntegrityError as e:
1468
1511
  conn.rollback()
1469
1512
  total_skipped += 1
@@ -1482,10 +1525,16 @@ class MySQLUploader:
1482
1525
  values += [row.get(col) for col in dup_cols]
1483
1526
  cursor.execute(sql, values)
1484
1527
  affected = cursor.rowcount if cursor.rowcount is not None else 0
1485
- if affected > 0:
1528
+ if update_on_duplicate:
1529
+ # 当启用更新时,affected_rows包含插入和更新的行数
1530
+ # 假设所有行都是插入的,因为无法区分插入和更新
1486
1531
  total_inserted += 1
1487
1532
  else:
1488
- total_skipped += 1
1533
+ # 当不启用更新时,affected_rows只包含插入的行数
1534
+ if affected > 0:
1535
+ total_inserted += 1
1536
+ else:
1537
+ total_skipped += 1
1489
1538
  conn.commit()
1490
1539
  except pymysql.err.IntegrityError as e:
1491
1540
  conn.rollback()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mdbq
3
- Version: 4.0.8
3
+ Version: 4.0.9
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -1 +0,0 @@
1
- VERSION = '4.0.8'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes