mdbq 3.6.9__py3-none-any.whl → 3.6.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/redis/getredis.py CHANGED
@@ -271,7 +271,6 @@ class RedisData(object):
271
271
  temp_df[col] = temp_df[col].dt.strftime("%Y-%m-%d")
272
272
  return temp_df.to_json(orient="records", force_ascii=False)
273
273
 
274
-
275
274
  class RedisDataHash(object):
276
275
  """
277
276
  存储 hash
@@ -285,13 +284,6 @@ class RedisDataHash(object):
285
284
  """
286
285
 
287
286
  def __init__(self, redis_engine, download, cache_ttl: int):
288
- """
289
- 初始化缓存处理器
290
-
291
- :param redis_engine: Redis连接实例
292
- :param download: 数据下载处理器(需实现data_to_df方法)
293
- :param cache_ttl: 缓存存活时间(单位:分钟,内部转换为秒存储)
294
- """
295
287
  self.redis_engine = redis_engine
296
288
  self.download = download
297
289
  self.cache_ttl = cache_ttl * 60 # 转换为秒存储
@@ -304,20 +296,8 @@ class RedisDataHash(object):
304
296
  start_date,
305
297
  end_date
306
298
  ) -> pd.DataFrame:
307
- """
308
- 从MySQL直接获取数据的核心方法
309
-
310
- 处理逻辑:
311
- 1. 当启用年份分表时(set_year=True),自动遍历2024到当前年份的所有分表
312
- 2. 合并所有符合条件的数据表内容
313
- 3. 自动处理日期列格式转换
314
-
315
- :return: 合并后的DataFrame(可能包含多个分表数据)
316
- """
317
- # 原有实现保持不变
318
299
  dfs = []
319
300
  if set_year:
320
- # 处理年份分表情况(例如 table_2024, table_2025...)
321
301
  current_year = datetime.datetime.today().year
322
302
  for year in range(2024, current_year + 1):
323
303
  df = self._fetch_table_data(
@@ -326,12 +306,10 @@ class RedisDataHash(object):
326
306
  if df is not None:
327
307
  dfs.append(df)
328
308
  else:
329
- # 单表查询模式
330
309
  df = self._fetch_table_data(db_name, table_name, start_date, end_date)
331
310
  if df is not None:
332
311
  dfs.append(df)
333
312
 
334
- # 合并结果并处理空数据情况
335
313
  combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
336
314
  if combined_df.empty:
337
315
  logger.warn(f"warning: {db_name}.{table_name} 未读取到数据")
@@ -347,69 +325,45 @@ class RedisDataHash(object):
347
325
  start_date,
348
326
  end_date
349
327
  ) -> pd.DataFrame:
350
- """
351
- 带缓存策略的数据获取主入口
352
-
353
- 执行流程:
354
- 1. 生成缓存键并检查TTL(存活时间)
355
- 2. 当TTL<60秒时触发异步更新,同时直接访问MySQL获取最新数据
356
- 3. 从Redis获取历史数据并进行日期过滤
357
- 4. 若缓存数据不完整,触发异步更新并降级到MySQL查询
358
- 5. 异常时自动降级到MySQL查询
359
-
360
- 设计特点:
361
- - 缓存预热:首次访问时异步更新缓存
362
- - 降级机制:任何异常自动切换直连MySQL
363
- - 过时缓存:当TTL不足时并行更新缓存
364
- """
365
- # 时分秒部分重置为 00:00:00 这是个巨坑,不可以省略
366
328
  start_dt = pd.to_datetime(start_date).floor('D')
367
329
  end_dt = pd.to_datetime(end_date).floor('D')
368
- # 生成缓存键名
369
330
  cache_key = self._generate_cache_key(db_name, table_name, set_year)
370
331
 
371
332
  try:
372
- # 检查缓存
373
333
  ttl = self.redis_engine.ttl(cache_key)
374
- if ttl < 60: # 当剩余时间不足1分钟时触发更新
375
- # 获取当前缓存
334
+ if ttl < 60:
376
335
  cache_data = self._fetch_redis_data(cache_key)
377
- # 异步更新缓存
378
336
  self._trigger_async_cache_update(
379
337
  cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
380
338
  )
381
- # 立即降级返回MySQL查询
382
339
  return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
383
340
 
384
- # 按年份范围获取缓存数据(优化大数据量时的读取效率)
385
- start_year = start_dt.year
386
- end_year = end_dt.year
387
- cache_data = self._fetch_redis_data(cache_key, start_year, end_year)
388
- # 空数据检查(缓存未命中)
341
+ # 生成月份范围
342
+ start_month = start_dt.to_period('M')
343
+ end_month = end_dt.to_period('M')
344
+ months = pd.period_range(start_month, end_month, freq='M').strftime("%Y%m").tolist()
345
+ cache_data = self._fetch_redis_data(cache_key, months)
389
346
  if cache_data.empty:
390
347
  self._trigger_async_cache_update(
391
348
  cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
392
349
  )
393
350
  return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
394
- # 按请求范围过滤数据(应对按年存储的粗粒度缓存)
351
+
395
352
  filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
396
353
  if not filtered_df.empty:
397
354
  if '日期' in filtered_df.columns.tolist():
398
- # 缓存数据的日期在请求日期范围内时,直接返回缓存数据
399
355
  exsit_min_date = filtered_df['日期'].min()
400
356
  if exsit_min_date <= start_dt:
401
357
  return filtered_df
402
358
  else:
403
359
  return filtered_df
404
- # 缓存数据不完整时触发异步更新缓存
360
+
405
361
  self._trigger_async_cache_update(
406
362
  cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
407
363
  )
408
- # 立即降级返回MySQL查询
409
364
  return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
410
365
 
411
366
  except Exception as e:
412
- # 异常策略:立即返回MySQL查询,保障服务可用
413
367
  logger.error(f"Redis 连接异常: {e},直接访问 MySQL")
414
368
  return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
415
369
 
@@ -423,45 +377,41 @@ class RedisDataHash(object):
423
377
  end_date,
424
378
  existing_data: pd.DataFrame
425
379
  ) -> None:
426
- """
427
- 异步缓存更新方法
428
-
429
- 核心逻辑:
430
- 1. 获取MySQL最新数据
431
- 2. 合并新旧数据(保留历史数据中不在新数据时间范围内的部分)
432
- 3. 智能存储策略:
433
- - 无日期字段:全量存储到"all"字段
434
- - 有日期字段:按年份分片存储(提升查询效率)
435
-
436
- 设计特点:
437
- - 增量更新:仅合并必要数据,避免全量覆盖
438
- - 数据分片:按年存储提升大数据的读取性能
439
- - 容错处理:跳过无日期字段的异常情况
440
- """
441
380
  try:
442
- # 获取最新数据(使用最新查询条件)
443
381
  new_data = self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
444
382
  if new_data.empty:
445
383
  return
446
384
 
447
- # 合并缓存数据
448
385
  combined_data = self._merge_data(new_data, existing_data)
449
386
 
450
387
  if not combined_data.empty:
451
- # 处理无日期字段的特殊情况
452
- if '日期' not in combined_data.columns.tolist():
453
- # 数据序列化
454
- serialized_data = self._serialize_data(combined_data)
455
- self.redis_engine.hset(cache_key, "all", serialized_data)
456
- self.redis_engine.expire(cache_key, self.cache_ttl)
388
+ if '日期' not in combined_data.columns:
389
+ # 原子化删除旧分片
390
+ # 优化分片存储性能
391
+ chunk_size = 5000
392
+ with self.redis_engine.pipeline(transaction=False) as pipe:
393
+ # 批量删除旧分片
394
+ for key in self.redis_engine.hscan_iter(cache_key, match="all_*"):
395
+ pipe.hdel(cache_key, key[0])
396
+
397
+ # 批量写入新分片
398
+ for idx in range(0, len(combined_data), chunk_size):
399
+ chunk = combined_data.iloc[idx:idx + chunk_size]
400
+ chunk_key = f"all_{idx // chunk_size:04d}"
401
+ pipe.hset(cache_key, chunk_key, self._serialize_data(chunk))
402
+
403
+ pipe.expire(cache_key, self.cache_ttl)
404
+ pipe.execute()
405
+ # serialized_data = self._serialize_data(combined_data)
406
+ # self.redis_engine.hset(cache_key, "all", serialized_data)
407
+ # self.redis_engine.expire(cache_key, self.cache_ttl)
457
408
  else:
458
- # 按年份分片存储策略
459
- combined_data['年份'] = combined_data['日期'].dt.year
460
- # 分组存储到Redis哈希的不同字段(例如2024字段存储当年数据)
461
- for year, group in combined_data.groupby('年份'):
462
- year_str = str(year)
463
- serialized_data = self._serialize_data(group.drop(columns=['年份']))
464
- self.redis_engine.hset(cache_key, year_str, serialized_data)
409
+ # 按月分片存储
410
+ combined_data['month'] = combined_data['日期'].dt.to_period('M').dt.strftime("%Y%m")
411
+ for month_str, group in combined_data.groupby('month'):
412
+ group = group.drop(columns=['month'])
413
+ serialized_data = self._serialize_data(group)
414
+ self.redis_engine.hset(cache_key, month_str, serialized_data)
465
415
  self.redis_engine.expire(cache_key, self.cache_ttl)
466
416
  logger.info(f"缓存更新 {cache_key} | 数据量: {len(combined_data)}")
467
417
  except Exception as e:
@@ -474,7 +424,6 @@ class RedisDataHash(object):
474
424
  start_date,
475
425
  end_date
476
426
  ) -> pd.DataFrame:
477
- """执行MySQL查询并返回DataFrame(带异常处理)"""
478
427
  try:
479
428
  return self.download.data_to_df(
480
429
  db_name=db_name,
@@ -484,23 +433,77 @@ class RedisDataHash(object):
484
433
  projection={}
485
434
  )
486
435
  except Exception as e:
487
- logger.info(f"MySQL 查询异常 {db_name}.{table_name}: {e}")
436
+ logger.error(f"MySQL 查询异常 {db_name}.{table_name}: {e}")
488
437
  return pd.DataFrame()
489
438
 
490
- def _fetch_redis_data(self, cache_key: str, start_year: int = None, end_year: int = None) -> pd.DataFrame:
491
- """
492
- 从Redis哈希表读取数据
439
+ def _fetch_redis_data(self, cache_key: str, months: list = None) -> pd.DataFrame:
440
+ try:
441
+ dfs = []
493
442
 
494
- 优化策略:
495
- - 当指定年份范围时,仅获取相关字段(hmget)
496
- - 未指定范围时全量获取(hgetall)
497
- -- 从mysql过来的表,虽然没有日期列,但也指定了 start_year/end_year,再redis中存储的键名是"all",所以要把 all也加进去
498
- """
443
+ if months is not None:
444
+ # 1. 获取指定月份数据
445
+ month_fields = months.copy()
446
+ month_data = self.redis_engine.hmget(cache_key, month_fields)
447
+
448
+ # 处理月份数据
449
+ for data, field in zip(month_data, month_fields):
450
+ if data:
451
+ try:
452
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
453
+ df = self._convert_date_columns(df)
454
+ dfs.append(df)
455
+ except Exception as e:
456
+ logger.error(f"月份数据解析失败 {field}: {e}")
457
+
458
+ # 2. 获取所有分片数据
459
+ # 优化分片数据获取
460
+ pipeline = self.redis_engine.pipeline()
461
+ cursor, keys = self.redis_engine.hscan(cache_key, match="all_*")
462
+ while True:
463
+ for key in keys:
464
+ pipeline.hget(cache_key, key)
465
+ if cursor == 0:
466
+ break
467
+ cursor, keys = self.redis_engine.hscan(cache_key, cursor=cursor, match="all_*")
468
+ shard_values = pipeline.execute()
469
+
470
+ # 处理分片数据
471
+ for value in shard_values:
472
+ if value:
473
+ try:
474
+ df = pd.DataFrame(json.loads(value.decode("utf-8")))
475
+ dfs.append(self._convert_date_columns(df))
476
+ except Exception as e:
477
+ logger.error(f"分片数据解析失败: {e}")
478
+
479
+ else:
480
+ # 原有全量获取逻辑保持不变
481
+ data_dict = self.redis_engine.hgetall(cache_key)
482
+ for field, data in data_dict.items():
483
+ try:
484
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
485
+ df = self._convert_date_columns(df)
486
+ dfs.append(df)
487
+ except Exception as e:
488
+ logger.error(f"Redis 数据解析失败 {field.decode()}: {e}")
489
+
490
+ # 统一合并和排序处理
491
+ if dfs:
492
+ final_df = pd.concat(dfs, ignore_index=True)
493
+ if '日期' in final_df.columns:
494
+ final_df = final_df.sort_values('日期', ascending=False)
495
+ return final_df
496
+ return pd.DataFrame()
497
+
498
+ except Exception as e:
499
+ logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
500
+ return pd.DataFrame()
501
+
502
+ def _fetch_redis_data_bak(self, cache_key: str, months: list = None) -> pd.DataFrame:
499
503
  try:
500
- if start_year is not None and end_year is not None:
501
- # 按年份范围精确获取字段(提升性能)
502
- fields = [str(y) for y in range(start_year, end_year + 1)]
503
- fields += ['all']
504
+ if months is not None:
505
+ fields = months.copy()
506
+ fields.append('all')
504
507
  data_list = self.redis_engine.hmget(cache_key, fields)
505
508
  dfs = []
506
509
  for data, field in zip(data_list, fields):
@@ -510,29 +513,40 @@ class RedisDataHash(object):
510
513
  dfs.append(df)
511
514
  return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
512
515
  else:
513
- # 全量获取模式
514
- data_dict = self.redis_engine.hgetall(cache_key)
516
+ # 优化分片数据获取
517
+ cursor, data = self.redis_engine.hscan(cache_key, match="all_*")
515
518
  dfs = []
516
- for field, data in data_dict.items():
517
- try:
518
- df = pd.DataFrame(json.loads(data.decode("utf-8")))
519
- df = self._convert_date_columns(df)
520
- dfs.append(df)
521
- except Exception as e:
522
- logger.info(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
519
+ while True:
520
+ for field, value in data.items():
521
+ try:
522
+ df = pd.DataFrame(json.loads(value))
523
+ dfs.append(self._convert_date_columns(df))
524
+ except Exception as e:
525
+ logger.error(f"分片解析失败 {field}: {e}")
526
+ if cursor == 0:
527
+ break
528
+ cursor, data = self.redis_engine.hscan(cache_key, cursor=cursor, match="all_*")
529
+ return pd.concat(dfs) if dfs else pd.DataFrame()
530
+ # data_dict = self.redis_engine.hgetall(cache_key)
531
+ # dfs = []
532
+ # for field, data in data_dict.items():
533
+ # try:
534
+ # df = pd.DataFrame(json.loads(data.decode("utf-8")))
535
+ # df = self._convert_date_columns(df)
536
+ # dfs.append(df)
537
+ # except Exception as e:
538
+ # logger.error(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
523
539
  return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
524
540
  except Exception as e:
525
- logger.info(f"Redis 数据获取失败 {cache_key}: {e}")
541
+ logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
526
542
  return pd.DataFrame()
527
543
 
528
544
  def _convert_date_columns(self, df: pd.DataFrame) -> pd.DataFrame:
529
- """统一日期列格式转换"""
530
545
  if "日期" in df.columns:
531
546
  df["日期"] = pd.to_datetime(df["日期"], format="%Y-%m-%d", errors="coerce")
532
547
  return df
533
548
 
534
549
  def _generate_cache_key(self, db_name: str, table_name: str, set_year: bool) -> str:
535
- """生成缓存键名"""
536
550
  return f"{db_name}:{table_name}_haveyear" if set_year else f"{db_name}:{table_name}"
537
551
 
538
552
  def _filter_by_date_range(
@@ -541,7 +555,6 @@ class RedisDataHash(object):
541
555
  start_dt: datetime.datetime,
542
556
  end_dt: datetime.datetime
543
557
  ) -> pd.DataFrame:
544
- """按日期范围精确过滤数据"""
545
558
  if "日期" not in df.columns:
546
559
  return df
547
560
  date_mask = (df["日期"] >= start_dt) & (df["日期"] <= end_dt)
@@ -557,7 +570,6 @@ class RedisDataHash(object):
557
570
  end_date: str,
558
571
  existing_data: pd.DataFrame
559
572
  ):
560
- """启动异步线程执行缓存更新(不阻塞主流程)"""
561
573
  thread = threading.Thread(
562
574
  target=self.set_redis,
563
575
  args=(cache_key, db_name, table_name, set_year, start_date, end_date, existing_data),
@@ -566,17 +578,14 @@ class RedisDataHash(object):
566
578
  thread.start()
567
579
 
568
580
  def _merge_data(self, new_data: pd.DataFrame, existing_data: pd.DataFrame) -> pd.DataFrame:
569
- """合并新旧数据集策略:保留现有数据中在新数据范围外的历史数据,并按日期排序"""
570
581
  if existing_data.empty or "日期" not in existing_data.columns:
571
582
  return new_data
572
583
  new_data["日期"] = pd.to_datetime(new_data["日期"])
573
584
  existing_data["日期"] = pd.to_datetime(existing_data["日期"])
574
585
 
575
- # 计算新数据日期范围
576
586
  new_min = new_data["日期"].min()
577
587
  new_max = new_data["日期"].max()
578
588
 
579
- # 保留现有数据中在新数据范围之外的部分
580
589
  valid_historical = existing_data[
581
590
  (existing_data["日期"] < new_min) | (existing_data["日期"] > new_max)
582
591
  ]
@@ -585,53 +594,35 @@ class RedisDataHash(object):
585
594
  return merged_data
586
595
 
587
596
  def _serialize_data(self, df: pd.DataFrame) -> bytes:
588
- """
589
- 高性能数据序列化方法
590
-
591
- 处理要点:
592
- 1. 日期类型转换为字符串
593
- 2. Decimal类型转换为浮点数
594
- 3. NaN值统一转换为None
595
- 4. 优化JSON序列化性能
596
- """
597
597
  if df.empty:
598
598
  return json.dumps([], ensure_ascii=False).encode("utf-8")
599
599
  temp_df = df.copy()
600
600
 
601
- # 处理日期类型列(安全转换)
602
601
  date_cols = temp_df.select_dtypes(include=["datetime64[ns]"]).columns
603
602
  for col in date_cols:
604
- # 处理全NaT列避免类型错误
605
603
  if temp_df[col].isna().all():
606
- temp_df[col] = temp_df[col].astype(object) # 转换为object类型避免NaT
604
+ temp_df[col] = temp_df[col].astype(object)
607
605
  temp_df[col] = (
608
606
  temp_df[col]
609
- .dt.strftime("%Y-%m-%d") # 安全使用dt访问器(因类型强制为datetime)
607
+ .dt.strftime("%Y-%m-%d")
610
608
  .where(temp_df[col].notna(), None)
611
609
  )
612
610
 
613
- # 统一空值处理(保护全None列类型)
614
611
  def safe_null_convert(series):
615
- """保留全None列的原始dtype"""
616
612
  if series.isna().all():
617
613
  return series.astype(object).where(pd.notnull(series), None)
618
614
  return series.where(pd.notnull(series), None)
619
615
 
620
616
  temp_df = temp_df.apply(safe_null_convert)
621
617
 
622
- # 类型处理函数(增强嵌套结构处理)
623
618
  def decimal_serializer(obj):
624
- """递归序列化处理"""
625
- # 提前处理None值
626
619
  if obj is None:
627
620
  return None
628
-
629
- # 按类型分发处理
630
621
  if isinstance(obj, Decimal):
631
622
  return round(float(obj), 6)
632
623
  elif isinstance(obj, pd.Timestamp):
633
- return obj.strftime("%Y-%m-%d %H:%M:%S") # 兜底处理漏网之鱼
634
- elif isinstance(obj, np.generic): # 处理所有numpy标量类型
624
+ return obj.strftime("%Y-%m-%d %H:%M:%S")
625
+ elif isinstance(obj, np.generic):
635
626
  return obj.item()
636
627
  elif isinstance(obj, (datetime.date, datetime.datetime)):
637
628
  return obj.isoformat()
@@ -640,11 +631,10 @@ class RedisDataHash(object):
640
631
  elif isinstance(obj, dict):
641
632
  return {decimal_serializer(k): decimal_serializer(v) for k, v in obj.items()}
642
633
  elif isinstance(obj, bytes):
643
- return obj.decode("utf-8", errors="replace") # 二进制安全处理
644
- elif isinstance(obj, pd.Series): # 防止意外传入Series对象
634
+ return obj.decode("utf-8", errors="replace")
635
+ elif isinstance(obj, pd.Series):
645
636
  return obj.to_list()
646
637
  else:
647
- # 尝试直接转换可序列化类型
648
638
  try:
649
639
  json.dumps(obj)
650
640
  return obj
@@ -652,18 +642,15 @@ class RedisDataHash(object):
652
642
  logger.error(f"无法序列化类型 {type(obj)}: {str(obj)}")
653
643
  raise
654
644
 
655
- # 序列化前防御性检查
656
645
  try:
657
646
  data_records = temp_df.to_dict(orient="records")
658
647
  except Exception as e:
659
648
  logger.error(f"数据转换字典失败: {str(e)}")
660
649
  raise
661
650
 
662
- # 空记录特殊处理
663
651
  if not data_records:
664
652
  return json.dumps([], ensure_ascii=False).encode("utf-8")
665
653
 
666
- # 执行序列化
667
654
  try:
668
655
  return json.dumps(
669
656
  data_records,
@@ -0,0 +1,710 @@
1
+ # -*- coding: UTF-8 –*-
2
+ import os.path
3
+ import redis
4
+ import socket
5
+ from mdbq.mysql import s_query
6
+ from mdbq.config import myconfig
7
+ import pandas as pd
8
+ import json
9
+ import datetime
10
+ import threading
11
+ import logging
12
+ from logging.handlers import RotatingFileHandler
13
+ import getpass
14
+ import platform
15
+ from decimal import Decimal
16
+
17
+ if platform.system() == 'Windows':
18
+ D_PATH = os.path.join(f'C:\\Users\\{getpass.getuser()}\\Downloads')
19
+ else:
20
+ D_PATH = os.path.join(f'/Users/{getpass.getuser()}/Downloads')
21
+
22
+
23
+ if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
24
+ conf = myconfig.main()
25
+ conf_data = conf['Windows']['company']['mysql']['local']
26
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
27
+ redis_password = conf['Windows']['company']['redis']['local']['password']
28
+ elif socket.gethostname() == 'MacBookPro':
29
+ conf = myconfig.main()
30
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
31
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
32
+ redis_password = conf['Windows']['company']['redis']['local']['password']
33
+ else:
34
+ conf = myconfig.main()
35
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
36
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
37
+ redis_password = conf['Windows']['company']['redis']['local']['password'] # redis 使用本地数据,全部机子相同
38
+
39
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
40
+
41
+ # 获取当前模块的日志记录器
42
+ logger = logging.getLogger(__name__)
43
+
44
+ # 创建一个文件处理器,用于将日志写入文件
45
+ # file_handler = logging.FileHandler(os.path.join(D_PATH, 'logfile', 'redis.log'))
46
+ if not os.path.isdir(os.path.join(D_PATH, 'logfile')):
47
+ os.makedirs(os.path.join(D_PATH, 'logfile'))
48
+ log_file = os.path.join(D_PATH, 'logfile', 'redis.log')
49
+ file_handler = RotatingFileHandler(log_file, maxBytes=3 * 1024 * 1024, backupCount=10) # 保留10个备份文件
50
+ file_handler.setLevel(logging.INFO) # 设置文件处理器的日志级别
51
+
52
+ # 创建一个日志格式器,并设置给文件处理器
53
+ formatter = logging.Formatter('[%(asctime)s] %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
54
+ file_handler.setFormatter(formatter)
55
+
56
+ # 将文件处理器添加到日志记录器
57
+ logger.addHandler(file_handler)
58
+
59
+
60
+ class RedisData(object):
61
+ """
62
+ 存储 string
63
+ """
64
+ def __init__(self, redis_engine, download, cache_ttl: int):
65
+ self.redis_engine = redis_engine # Redis 数据处理引擎
66
+ self.download = download # MySQL 数据处理引擎
67
+ self.cache_ttl = cache_ttl * 60 # 缓存过期时间(秒)
68
+
69
+ def get_from_mysql(
70
+ self,
71
+ db_name: str,
72
+ table_name: str,
73
+ set_year: bool,
74
+ start_date,
75
+ end_date
76
+ ) -> pd.DataFrame:
77
+ """
78
+ 从 MySQL 读取数据并返回 DataFrame
79
+
80
+ Args:
81
+ set_year: 表名是否包含年份后缀
82
+ """
83
+ dfs = []
84
+ if set_year:
85
+ current_year = datetime.datetime.today().year
86
+ for year in range(2024, current_year + 1):
87
+ df = self._fetch_table_data(
88
+ db_name, f"{table_name}_{year}", start_date, end_date
89
+ )
90
+ if df is not None:
91
+ dfs.append(df)
92
+ else:
93
+ df = self._fetch_table_data(db_name, table_name, start_date, end_date)
94
+ if df is not None:
95
+ dfs.append(df)
96
+
97
+ combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
98
+ if combined_df.empty:
99
+ logger.info(f"警告: {db_name}.{table_name} 未读取到数据")
100
+ else:
101
+ combined_df = self._convert_date_columns(combined_df)
102
+ return combined_df
103
+
104
+ def get_from_redis(
105
+ self,
106
+ db_name: str,
107
+ table_name: str,
108
+ set_year: bool,
109
+ start_date,
110
+ end_date
111
+ ) -> pd.DataFrame:
112
+ """
113
+ 从 Redis 获取数据,若缓存过期/不完整则触发异步更新
114
+ """
115
+ start_dt = pd.to_datetime(start_date)
116
+ end_dt = pd.to_datetime(end_date)
117
+ cache_key = self._generate_cache_key(db_name, table_name, set_year)
118
+
119
+ # 尝试获取缓存元数据
120
+ try:
121
+ ttl = self.redis_engine.ttl(cache_key)
122
+ cache_data = self._fetch_redis_data(cache_key)
123
+ except Exception as e:
124
+ logger.info(f"Redis 连接异常: {e},直接访问 MySQL")
125
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
126
+
127
+ # 缓存失效处理逻辑
128
+ if ttl < 60 or cache_data.empty:
129
+ self._trigger_async_cache_update(
130
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
131
+ )
132
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
133
+
134
+ # 处理有效缓存数据
135
+ filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
136
+ if not filtered_df.empty:
137
+ return filtered_df
138
+
139
+ # 缓存数据不满足查询范围要求
140
+ self._trigger_async_cache_update(
141
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
142
+ )
143
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
144
+
145
+ def set_redis(
146
+ self,
147
+ cache_key: str,
148
+ db_name: str,
149
+ table_name: str,
150
+ set_year: bool,
151
+ start_date,
152
+ end_date,
153
+ existing_data: pd.DataFrame
154
+ ) -> pd.DataFrame:
155
+ """
156
+ 异步更新 Redis 缓存,合并新旧数据
157
+ """
158
+ try:
159
+ # 从 MySQL 获取新数据
160
+ new_data = self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
161
+ if new_data.empty:
162
+ return pd.DataFrame()
163
+
164
+ # 合并历史数据
165
+ combined_data = self._merge_data(new_data, existing_data)
166
+
167
+ # 序列化并存储到 Redis
168
+ serialized_data = self._serialize_data(combined_data)
169
+ self.redis_engine.set(cache_key, serialized_data)
170
+ self.redis_engine.expire(cache_key, self.cache_ttl)
171
+
172
+ logger.info(f"缓存更新 {cache_key} | 数据量: {len(combined_data)}")
173
+ return combined_data
174
+
175
+ except Exception as e:
176
+ logger.info(f"缓存更新失败: {cache_key} - {str(e)}")
177
+ return pd.DataFrame()
178
+
179
+ # Helper Methods ------------------------------------------------
180
+
181
+ def _fetch_table_data(
182
+ self,
183
+ db_name: str,
184
+ table_name: str,
185
+ start_date,
186
+ end_date
187
+ ) -> pd.DataFrame:
188
+ """封装 MySQL 数据获取逻辑"""
189
+ try:
190
+ return self.download.data_to_df(
191
+ db_name=db_name,
192
+ table_name=table_name,
193
+ start_date=start_date,
194
+ end_date=end_date,
195
+ projection={}
196
+ )
197
+ except Exception as e:
198
+ logger.info(f"MySQL 查询异常 {db_name}.{table_name}: {e}")
199
+ return pd.DataFrame()
200
+
201
+ def _fetch_redis_data(self, cache_key: str) -> pd.DataFrame:
202
+ """从 Redis 获取并解析数据(自动转换日期列)"""
203
+ try:
204
+ data = self.redis_engine.get(cache_key)
205
+ if not data:
206
+ return pd.DataFrame()
207
+ # 反序列化数据
208
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
209
+ return self._convert_date_columns(df)
210
+ except Exception as e:
211
+ logger.info(f"Redis 数据解析失败 {cache_key}: {e}")
212
+ return pd.DataFrame()
213
+
214
+ def _convert_date_columns(self, df: pd.DataFrame) -> pd.DataFrame:
215
+ """统一处理日期列转换"""
216
+ if "日期" in df.columns:
217
+ df["日期"] = pd.to_datetime(df["日期"], format="%Y-%m-%d", errors="coerce")
218
+ return df
219
+
220
+ def _generate_cache_key(self, db_name: str, table_name: str, set_year: bool) -> str:
221
+ """生成标准化的缓存键"""
222
+ return f"{db_name}:{table_name}_haveyear" if set_year else f"{db_name}:{table_name}"
223
+
224
+ def _filter_by_date_range(
225
+ self,
226
+ df: pd.DataFrame,
227
+ start_dt: datetime.datetime,
228
+ end_dt: datetime.datetime
229
+ ) -> pd.DataFrame:
230
+ """按日期范围筛选数据"""
231
+ if "日期" not in df.columns:
232
+ return df
233
+ date_mask = (df["日期"] >= start_dt) & (df["日期"] <= end_dt)
234
+ return df[date_mask].copy()
235
+
236
+ def _trigger_async_cache_update(
237
+ self,
238
+ cache_key: str,
239
+ db_name: str,
240
+ table_name: str,
241
+ set_year: bool,
242
+ start_date: str,
243
+ end_date: str,
244
+ existing_data: pd.DataFrame
245
+ ):
246
+ """启动异步缓存更新线程"""
247
+ thread = threading.Thread(
248
+ target=self.set_redis,
249
+ args=(cache_key, db_name, table_name, set_year, start_date, end_date, existing_data),
250
+ daemon=True
251
+ )
252
+ thread.start()
253
+
254
+ def _merge_data(self, new_data: pd.DataFrame, existing_data: pd.DataFrame) -> pd.DataFrame:
255
+ """合并新旧数据集"""
256
+ if existing_data.empty or "日期" not in existing_data.columns:
257
+ return new_data
258
+
259
+ new_min = new_data["日期"].min()
260
+ new_max = new_data["日期"].max()
261
+ valid_historical = existing_data[
262
+ (existing_data["日期"] < new_min) | (existing_data["日期"] > new_max)
263
+ ]
264
+ return pd.concat([new_data, valid_historical], ignore_index=True).drop_duplicates(subset=["日期"])
265
+
266
+ def _serialize_data(self, df: pd.DataFrame) -> str:
267
+ """序列化 DataFrame 并处理日期类型"""
268
+ temp_df = df.copy()
269
+ date_cols = temp_df.select_dtypes(include=["datetime64[ns]"]).columns
270
+ for col in date_cols:
271
+ temp_df[col] = temp_df[col].dt.strftime("%Y-%m-%d")
272
+ return temp_df.to_json(orient="records", force_ascii=False)
273
+
274
+
275
+ class RedisDataHash(object):
276
+ """
277
+ 存储 hash
278
+ Redis缓存与MySQL数据联合查询处理器
279
+
280
+ 功能特性:
281
+ - 支持带年份分表的MySQL数据查询
282
+ - 多级缓存策略(内存缓存+Redis缓存)
283
+ - 异步缓存更新机制
284
+ - 自动处理日期范围和数据类型转换
285
+ """
286
+
287
+ def __init__(self, redis_engine, download, cache_ttl: int):
288
+ """
289
+ 初始化缓存处理器
290
+
291
+ :param redis_engine: Redis连接实例
292
+ :param download: 数据下载处理器(需实现data_to_df方法)
293
+ :param cache_ttl: 缓存存活时间(单位:分钟,内部转换为秒存储)
294
+ """
295
+ self.redis_engine = redis_engine
296
+ self.download = download
297
+ self.cache_ttl = cache_ttl * 60 # 转换为秒存储
298
+
299
+ def get_from_mysql(
300
+ self,
301
+ db_name: str,
302
+ table_name: str,
303
+ set_year: bool,
304
+ start_date,
305
+ end_date
306
+ ) -> pd.DataFrame:
307
+ """
308
+ 从MySQL直接获取数据的核心方法
309
+
310
+ 处理逻辑:
311
+ 1. 当启用年份分表时(set_year=True),自动遍历2024到当前年份的所有分表
312
+ 2. 合并所有符合条件的数据表内容
313
+ 3. 自动处理日期列格式转换
314
+
315
+ :return: 合并后的DataFrame(可能包含多个分表数据)
316
+ """
317
+ # 原有实现保持不变
318
+ dfs = []
319
+ if set_year:
320
+ # 处理年份分表情况(例如 table_2024, table_2025...)
321
+ current_year = datetime.datetime.today().year
322
+ for year in range(2024, current_year + 1):
323
+ df = self._fetch_table_data(
324
+ db_name, f"{table_name}_{year}", start_date, end_date
325
+ )
326
+ if df is not None:
327
+ dfs.append(df)
328
+ else:
329
+ # 单表查询模式
330
+ df = self._fetch_table_data(db_name, table_name, start_date, end_date)
331
+ if df is not None:
332
+ dfs.append(df)
333
+
334
+ # 合并结果并处理空数据情况
335
+ combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
336
+ if combined_df.empty:
337
+ logger.warn(f"warning: {db_name}.{table_name} 未读取到数据")
338
+ else:
339
+ combined_df = self._convert_date_columns(combined_df)
340
+ return combined_df
341
+
342
+ def get_from_redis(
343
+ self,
344
+ db_name: str,
345
+ table_name: str,
346
+ set_year: bool,
347
+ start_date,
348
+ end_date
349
+ ) -> pd.DataFrame:
350
+ """
351
+ 带缓存策略的数据获取主入口
352
+
353
+ 执行流程:
354
+ 1. 生成缓存键并检查TTL(存活时间)
355
+ 2. 当TTL<60秒时触发异步更新,同时直接访问MySQL获取最新数据
356
+ 3. 从Redis获取历史数据并进行日期过滤
357
+ 4. 若缓存数据不完整,触发异步更新并降级到MySQL查询
358
+ 5. 异常时自动降级到MySQL查询
359
+
360
+ 设计特点:
361
+ - 缓存预热:首次访问时异步更新缓存
362
+ - 降级机制:任何异常自动切换直连MySQL
363
+ - 过时缓存:当TTL不足时并行更新缓存
364
+ """
365
+ # 时分秒部分重置为 00:00:00 这是个巨坑,不可以省略
366
+ start_dt = pd.to_datetime(start_date).floor('D')
367
+ end_dt = pd.to_datetime(end_date).floor('D')
368
+ # 生成缓存键名
369
+ cache_key = self._generate_cache_key(db_name, table_name, set_year)
370
+
371
+ try:
372
+ # 检查缓存
373
+ ttl = self.redis_engine.ttl(cache_key)
374
+ if ttl < 60: # 当剩余时间不足1分钟时触发更新
375
+ # 获取当前缓存
376
+ cache_data = self._fetch_redis_data(cache_key)
377
+ # 异步更新缓存
378
+ self._trigger_async_cache_update(
379
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
380
+ )
381
+ # 立即降级返回MySQL查询
382
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
383
+
384
+ # 按年份范围获取缓存数据(优化大数据量时的读取效率)
385
+ start_year = start_dt.year
386
+ end_year = end_dt.year
387
+ cache_data = self._fetch_redis_data(cache_key, start_year, end_year)
388
+ # 空数据检查(缓存未命中)
389
+ if cache_data.empty:
390
+ self._trigger_async_cache_update(
391
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
392
+ )
393
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
394
+ # 按请求范围过滤数据(应对按年存储的粗粒度缓存)
395
+ filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
396
+ if not filtered_df.empty:
397
+ if '日期' in filtered_df.columns.tolist():
398
+ # 缓存数据的日期在请求日期范围内时,直接返回缓存数据
399
+ exsit_min_date = filtered_df['日期'].min()
400
+ if exsit_min_date <= start_dt:
401
+ return filtered_df
402
+ else:
403
+ return filtered_df
404
+ # 缓存数据不完整时触发异步更新缓存
405
+ self._trigger_async_cache_update(
406
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
407
+ )
408
+ # 立即降级返回MySQL查询
409
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
410
+
411
+ except Exception as e:
412
+ # 异常策略:立即返回MySQL查询,保障服务可用
413
+ logger.error(f"Redis 连接异常: {e},直接访问 MySQL")
414
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
415
+
416
+ def set_redis(
417
+ self,
418
+ cache_key: str,
419
+ db_name: str,
420
+ table_name: str,
421
+ set_year: bool,
422
+ start_date,
423
+ end_date,
424
+ existing_data: pd.DataFrame
425
+ ) -> None:
426
+ """
427
+ 异步缓存更新方法
428
+
429
+ 核心逻辑:
430
+ 1. 获取MySQL最新数据
431
+ 2. 合并新旧数据(保留历史数据中不在新数据时间范围内的部分)
432
+ 3. 智能存储策略:
433
+ - 无日期字段:全量存储到"all"字段
434
+ - 有日期字段:按年份分片存储(提升查询效率)
435
+
436
+ 设计特点:
437
+ - 增量更新:仅合并必要数据,避免全量覆盖
438
+ - 数据分片:按年存储提升大数据的读取性能
439
+ - 容错处理:跳过无日期字段的异常情况
440
+ """
441
+ try:
442
+ # 获取最新数据(使用最新查询条件)
443
+ new_data = self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
444
+ if new_data.empty:
445
+ return
446
+
447
+ # 合并缓存数据
448
+ combined_data = self._merge_data(new_data, existing_data)
449
+
450
+ if not combined_data.empty:
451
+ # 处理无日期字段的特殊情况
452
+ if '日期' not in combined_data.columns.tolist():
453
+ # 数据序列化
454
+ serialized_data = self._serialize_data(combined_data)
455
+ self.redis_engine.hset(cache_key, "all", serialized_data)
456
+ self.redis_engine.expire(cache_key, self.cache_ttl)
457
+ else:
458
+ # 按年份分片存储策略
459
+ combined_data['年份'] = combined_data['日期'].dt.year
460
+ # 分组存储到Redis哈希的不同字段(例如2024字段存储当年数据)
461
+ for year, group in combined_data.groupby('年份'):
462
+ year_str = str(year)
463
+ serialized_data = self._serialize_data(group.drop(columns=['年份']))
464
+ self.redis_engine.hset(cache_key, year_str, serialized_data)
465
+ self.redis_engine.expire(cache_key, self.cache_ttl)
466
+ logger.info(f"缓存更新 {cache_key} | 数据量: {len(combined_data)}")
467
+ except Exception as e:
468
+ logger.error(f"缓存更新失败: {cache_key} - {str(e)}")
469
+
470
+ def _fetch_table_data(
471
+ self,
472
+ db_name: str,
473
+ table_name: str,
474
+ start_date,
475
+ end_date
476
+ ) -> pd.DataFrame:
477
+ """执行MySQL查询并返回DataFrame(带异常处理)"""
478
+ try:
479
+ return self.download.data_to_df(
480
+ db_name=db_name,
481
+ table_name=table_name,
482
+ start_date=start_date,
483
+ end_date=end_date,
484
+ projection={}
485
+ )
486
+ except Exception as e:
487
+ logger.info(f"MySQL 查询异常 {db_name}.{table_name}: {e}")
488
+ return pd.DataFrame()
489
+
490
+ def _fetch_redis_data(self, cache_key: str, start_year: int = None, end_year: int = None) -> pd.DataFrame:
491
+ """
492
+ 从Redis哈希表读取数据
493
+
494
+ 优化策略:
495
+ - 当指定年份范围时,仅获取相关字段(hmget)
496
+ - 未指定范围时全量获取(hgetall)
497
+ -- 从mysql过来的表,虽然没有日期列,但也指定了 start_year/end_year,再redis中存储的键名是"all",所以要把 all也加进去
498
+ """
499
+ try:
500
+ if start_year is not None and end_year is not None:
501
+ # 按年份范围精确获取字段(提升性能)
502
+ fields = [str(y) for y in range(start_year, end_year + 1)]
503
+ fields += ['all']
504
+ data_list = self.redis_engine.hmget(cache_key, fields)
505
+ dfs = []
506
+ for data, field in zip(data_list, fields):
507
+ if data:
508
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
509
+ df = self._convert_date_columns(df)
510
+ dfs.append(df)
511
+ return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
512
+ else:
513
+ # 全量获取模式
514
+ data_dict = self.redis_engine.hgetall(cache_key)
515
+ dfs = []
516
+ for field, data in data_dict.items():
517
+ try:
518
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
519
+ df = self._convert_date_columns(df)
520
+ dfs.append(df)
521
+ except Exception as e:
522
+ logger.info(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
523
+ return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
524
+ except Exception as e:
525
+ logger.info(f"Redis 数据获取失败 {cache_key}: {e}")
526
+ return pd.DataFrame()
527
+
528
+ def _convert_date_columns(self, df: pd.DataFrame) -> pd.DataFrame:
529
+ """统一日期列格式转换"""
530
+ if "日期" in df.columns:
531
+ df["日期"] = pd.to_datetime(df["日期"], format="%Y-%m-%d", errors="coerce")
532
+ return df
533
+
534
+ def _generate_cache_key(self, db_name: str, table_name: str, set_year: bool) -> str:
535
+ """生成缓存键名"""
536
+ return f"{db_name}:{table_name}_haveyear" if set_year else f"{db_name}:{table_name}"
537
+
538
+ def _filter_by_date_range(
539
+ self,
540
+ df: pd.DataFrame,
541
+ start_dt: datetime.datetime,
542
+ end_dt: datetime.datetime
543
+ ) -> pd.DataFrame:
544
+ """按日期范围精确过滤数据"""
545
+ if "日期" not in df.columns:
546
+ return df
547
+ date_mask = (df["日期"] >= start_dt) & (df["日期"] <= end_dt)
548
+ return df[date_mask].copy()
549
+
550
+ def _trigger_async_cache_update(
551
+ self,
552
+ cache_key: str,
553
+ db_name: str,
554
+ table_name: str,
555
+ set_year: bool,
556
+ start_date: str,
557
+ end_date: str,
558
+ existing_data: pd.DataFrame
559
+ ):
560
+ """启动异步线程执行缓存更新(不阻塞主流程)"""
561
+ thread = threading.Thread(
562
+ target=self.set_redis,
563
+ args=(cache_key, db_name, table_name, set_year, start_date, end_date, existing_data),
564
+ daemon=True
565
+ )
566
+ thread.start()
567
+
568
+ def _merge_data(self, new_data: pd.DataFrame, existing_data: pd.DataFrame) -> pd.DataFrame:
569
+ """合并新旧数据集策略:保留现有数据中在新数据范围外的历史数据,并按日期排序"""
570
+ if existing_data.empty or "日期" not in existing_data.columns:
571
+ return new_data
572
+ new_data["日期"] = pd.to_datetime(new_data["日期"])
573
+ existing_data["日期"] = pd.to_datetime(existing_data["日期"])
574
+
575
+ # 计算新数据日期范围
576
+ new_min = new_data["日期"].min()
577
+ new_max = new_data["日期"].max()
578
+
579
+ # 保留现有数据中在新数据范围之外的部分
580
+ valid_historical = existing_data[
581
+ (existing_data["日期"] < new_min) | (existing_data["日期"] > new_max)
582
+ ]
583
+ merged_data = pd.concat([new_data, valid_historical], ignore_index=True)
584
+ merged_data.sort_values(['日期'], ascending=[False], ignore_index=True, inplace=True)
585
+ return merged_data
586
+
587
+ def _serialize_data(self, df: pd.DataFrame) -> bytes:
588
+ """
589
+ 高性能数据序列化方法
590
+
591
+ 处理要点:
592
+ 1. 日期类型转换为字符串
593
+ 2. Decimal类型转换为浮点数
594
+ 3. NaN值统一转换为None
595
+ 4. 优化JSON序列化性能
596
+ """
597
+ if df.empty:
598
+ return json.dumps([], ensure_ascii=False).encode("utf-8")
599
+ temp_df = df.copy()
600
+
601
+ # 处理日期类型列(安全转换)
602
+ date_cols = temp_df.select_dtypes(include=["datetime64[ns]"]).columns
603
+ for col in date_cols:
604
+ # 处理全NaT列避免类型错误
605
+ if temp_df[col].isna().all():
606
+ temp_df[col] = temp_df[col].astype(object) # 转换为object类型避免NaT
607
+ temp_df[col] = (
608
+ temp_df[col]
609
+ .dt.strftime("%Y-%m-%d") # 安全使用dt访问器(因类型强制为datetime)
610
+ .where(temp_df[col].notna(), None)
611
+ )
612
+
613
+ # 统一空值处理(保护全None列类型)
614
+ def safe_null_convert(series):
615
+ """保留全None列的原始dtype"""
616
+ if series.isna().all():
617
+ return series.astype(object).where(pd.notnull(series), None)
618
+ return series.where(pd.notnull(series), None)
619
+
620
+ temp_df = temp_df.apply(safe_null_convert)
621
+
622
+ # 类型处理函数(增强嵌套结构处理)
623
+ def decimal_serializer(obj):
624
+ """递归序列化处理"""
625
+ # 提前处理None值
626
+ if obj is None:
627
+ return None
628
+
629
+ # 按类型分发处理
630
+ if isinstance(obj, Decimal):
631
+ return round(float(obj), 6)
632
+ elif isinstance(obj, pd.Timestamp):
633
+ return obj.strftime("%Y-%m-%d %H:%M:%S") # 兜底处理漏网之鱼
634
+ elif isinstance(obj, np.generic): # 处理所有numpy标量类型
635
+ return obj.item()
636
+ elif isinstance(obj, (datetime.date, datetime.datetime)):
637
+ return obj.isoformat()
638
+ elif isinstance(obj, (list, tuple, set)):
639
+ return [decimal_serializer(item) for item in obj]
640
+ elif isinstance(obj, dict):
641
+ return {decimal_serializer(k): decimal_serializer(v) for k, v in obj.items()}
642
+ elif isinstance(obj, bytes):
643
+ return obj.decode("utf-8", errors="replace") # 二进制安全处理
644
+ elif isinstance(obj, pd.Series): # 防止意外传入Series对象
645
+ return obj.to_list()
646
+ else:
647
+ # 尝试直接转换可序列化类型
648
+ try:
649
+ json.dumps(obj)
650
+ return obj
651
+ except TypeError:
652
+ logger.error(f"无法序列化类型 {type(obj)}: {str(obj)}")
653
+ raise
654
+
655
+ # 序列化前防御性检查
656
+ try:
657
+ data_records = temp_df.to_dict(orient="records")
658
+ except Exception as e:
659
+ logger.error(f"数据转换字典失败: {str(e)}")
660
+ raise
661
+
662
+ # 空记录特殊处理
663
+ if not data_records:
664
+ return json.dumps([], ensure_ascii=False).encode("utf-8")
665
+
666
+ # 执行序列化
667
+ try:
668
+ return json.dumps(
669
+ data_records,
670
+ ensure_ascii=False,
671
+ default=decimal_serializer
672
+ ).encode("utf-8")
673
+ except TypeError as e:
674
+ logger.error(f"序列化失败,请检查未处理的数据类型: {str(e)}")
675
+ raise
676
+
677
+
678
+ if __name__ == '__main__':
679
+ # # ****************************************************
680
+ # # 这一部分在外部定义,只需要定义一次,开始
681
+ # redis_config = {
682
+ # 'host': '127.0.0.1',
683
+ # 'port': 6379, # 默认Redis端口
684
+ # 'db': 0, # 默认Redis数据库索引
685
+ # # 'username': 'default',
686
+ # 'password': redis_password,
687
+ # }
688
+ # # redis 实例化
689
+ # r = redis.Redis(**redis_config)
690
+ # # mysql 实例化
691
+ # d = s_query.QueryDatas(username=username, password=password, host=host, port=port)
692
+ # # 将两个库的实例化对象传给 RedisData 类,并实例化数据处理引擎
693
+ # m = RedisData(redis_engin=r, download=d)
694
+ # # ****************************************************
695
+ #
696
+ # # 以下为动态获取数据库数据
697
+ # db_name = '聚合数据'
698
+ # table_name = '多店推广场景_按日聚合'
699
+ # set_year = False
700
+ # df = m.get_from_redis(
701
+ # db_name=db_name,
702
+ # table_name=table_name,
703
+ # set_year=set_year,
704
+ # start_date='2025-01-01',
705
+ # end_date='2025-01-31'
706
+ # )
707
+ # logger.info(df)
708
+ #
709
+
710
+ logger.info(socket.gethostname())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.6.9
3
+ Version: 3.6.11
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -34,10 +34,11 @@ mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,239
34
34
  mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
35
35
  mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
36
36
  mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
37
- mdbq/redis/getredis.py,sha256=q7omKJCPw_6Zr_r6WwTv4RGSXzZzpLPkIaqJ22svJhE,29104
37
+ mdbq/redis/getredis.py,sha256=QAiqkxgrQf6AHgWQdIKah3FKkM5HE8TqwJdTXrlyR6c,28427
38
+ mdbq/redis/getredis_优化hash.py,sha256=q7omKJCPw_6Zr_r6WwTv4RGSXzZzpLPkIaqJ22svJhE,29104
38
39
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
39
40
  mdbq/spider/aikucun.py,sha256=v7VO5gtEXR6_4Q6ujbTyu1FHu7TXHcwSQ6hIO249YH0,22208
40
- mdbq-3.6.9.dist-info/METADATA,sha256=m6rX1e31X7uhBfVC0ZE07nWd5EY4QVO6RZC93uAdr68,243
41
- mdbq-3.6.9.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
42
- mdbq-3.6.9.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
43
- mdbq-3.6.9.dist-info/RECORD,,
41
+ mdbq-3.6.11.dist-info/METADATA,sha256=nf9h8l9QqT6ZrZ-J4cassVWcqRi2r3Oicu9eicCtCaA,244
42
+ mdbq-3.6.11.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
43
+ mdbq-3.6.11.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
44
+ mdbq-3.6.11.dist-info/RECORD,,
File without changes