mdbq 3.6.8__py3-none-any.whl → 3.6.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/mysql/s_query.py CHANGED
@@ -12,6 +12,7 @@ from sqlalchemy import create_engine
12
12
  import os
13
13
  import calendar
14
14
  from mdbq.dataframe import converter
15
+ from decimal import Decimal
15
16
 
16
17
  warnings.filterwarnings('ignore')
17
18
  """
@@ -49,9 +50,14 @@ class QueryDatas:
49
50
  return columns
50
51
 
51
52
  def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict=[]):
52
-
53
- start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
54
- end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
53
+ if start_date:
54
+ start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
55
+ else:
56
+ start_date = '1970-01-01'
57
+ if end_date:
58
+ end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
59
+ else:
60
+ end_date = datetime.datetime.today().strftime('%Y-%m-%d')
55
61
  df = pd.DataFrame() # 初始化df
56
62
 
57
63
  if self.check_infos(db_name, table_name) == False:
@@ -97,6 +103,9 @@ class QueryDatas:
97
103
  rows = cursor.fetchall() # 获取查询结果
98
104
  columns = [desc[0] for desc in cursor.description]
99
105
  df = pd.DataFrame(rows, columns=columns) # 转为 df
106
+ # 使用applymap将每个Decimal转换为float
107
+ df_float = df.applymap(lambda x: float(x) if isinstance(x, Decimal) else x)
108
+
100
109
  if 'id' in df.columns.tolist():
101
110
  df.pop('id') # 默认不返回 id 列
102
111
  if len(df) == 0:
mdbq/redis/getredis.py CHANGED
@@ -12,6 +12,7 @@ import logging
12
12
  from logging.handlers import RotatingFileHandler
13
13
  import getpass
14
14
  import platform
15
+ from decimal import Decimal
15
16
 
16
17
  if platform.system() == 'Windows':
17
18
  D_PATH = os.path.join(f'C:\\Users\\{getpass.getuser()}\\Downloads')
@@ -57,6 +58,9 @@ logger.addHandler(file_handler)
57
58
 
58
59
 
59
60
  class RedisData(object):
61
+ """
62
+ 存储 string
63
+ """
60
64
  def __init__(self, redis_engine, download, cache_ttl: int):
61
65
  self.redis_engine = redis_engine # Redis 数据处理引擎
62
66
  self.download = download # MySQL 数据处理引擎
@@ -267,6 +271,304 @@ class RedisData(object):
267
271
  temp_df[col] = temp_df[col].dt.strftime("%Y-%m-%d")
268
272
  return temp_df.to_json(orient="records", force_ascii=False)
269
273
 
274
+ class RedisDataHash(object):
275
+ """
276
+ 存储 hash
277
+ Redis缓存与MySQL数据联合查询处理器
278
+
279
+ 功能特性:
280
+ - 支持带年份分表的MySQL数据查询
281
+ - 多级缓存策略(内存缓存+Redis缓存)
282
+ - 异步缓存更新机制
283
+ - 自动处理日期范围和数据类型转换
284
+ """
285
+
286
+ def __init__(self, redis_engine, download, cache_ttl: int):
287
+ self.redis_engine = redis_engine
288
+ self.download = download
289
+ self.cache_ttl = cache_ttl * 60 # 转换为秒存储
290
+
291
+ def get_from_mysql(
292
+ self,
293
+ db_name: str,
294
+ table_name: str,
295
+ set_year: bool,
296
+ start_date,
297
+ end_date
298
+ ) -> pd.DataFrame:
299
+ dfs = []
300
+ if set_year:
301
+ current_year = datetime.datetime.today().year
302
+ for year in range(2024, current_year + 1):
303
+ df = self._fetch_table_data(
304
+ db_name, f"{table_name}_{year}", start_date, end_date
305
+ )
306
+ if df is not None:
307
+ dfs.append(df)
308
+ else:
309
+ df = self._fetch_table_data(db_name, table_name, start_date, end_date)
310
+ if df is not None:
311
+ dfs.append(df)
312
+
313
+ combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
314
+ if combined_df.empty:
315
+ logger.warn(f"warning: {db_name}.{table_name} 未读取到数据")
316
+ else:
317
+ combined_df = self._convert_date_columns(combined_df)
318
+ return combined_df
319
+
320
+ def get_from_redis(
321
+ self,
322
+ db_name: str,
323
+ table_name: str,
324
+ set_year: bool,
325
+ start_date,
326
+ end_date
327
+ ) -> pd.DataFrame:
328
+ start_dt = pd.to_datetime(start_date).floor('D')
329
+ end_dt = pd.to_datetime(end_date).floor('D')
330
+ cache_key = self._generate_cache_key(db_name, table_name, set_year)
331
+
332
+ try:
333
+ ttl = self.redis_engine.ttl(cache_key)
334
+ if ttl < 60:
335
+ cache_data = self._fetch_redis_data(cache_key)
336
+ self._trigger_async_cache_update(
337
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
338
+ )
339
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
340
+
341
+ # 生成月份范围
342
+ start_month = start_dt.to_period('M')
343
+ end_month = end_dt.to_period('M')
344
+ months = pd.period_range(start_month, end_month, freq='M').strftime("%Y%m").tolist()
345
+ cache_data = self._fetch_redis_data(cache_key, months)
346
+
347
+ if cache_data.empty:
348
+ self._trigger_async_cache_update(
349
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
350
+ )
351
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
352
+
353
+ filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
354
+ if not filtered_df.empty:
355
+ if '日期' in filtered_df.columns.tolist():
356
+ exsit_min_date = filtered_df['日期'].min()
357
+ if exsit_min_date <= start_dt:
358
+ return filtered_df
359
+ else:
360
+ return filtered_df
361
+
362
+ self._trigger_async_cache_update(
363
+ cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
364
+ )
365
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
366
+
367
+ except Exception as e:
368
+ logger.error(f"Redis 连接异常: {e},直接访问 MySQL")
369
+ return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
370
+
371
+ def set_redis(
372
+ self,
373
+ cache_key: str,
374
+ db_name: str,
375
+ table_name: str,
376
+ set_year: bool,
377
+ start_date,
378
+ end_date,
379
+ existing_data: pd.DataFrame
380
+ ) -> None:
381
+ try:
382
+ new_data = self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
383
+ if new_data.empty:
384
+ return
385
+
386
+ combined_data = self._merge_data(new_data, existing_data)
387
+
388
+ if not combined_data.empty:
389
+ if '日期' not in combined_data.columns.tolist():
390
+ serialized_data = self._serialize_data(combined_data)
391
+ self.redis_engine.hset(cache_key, "all", serialized_data)
392
+ self.redis_engine.expire(cache_key, self.cache_ttl)
393
+ else:
394
+ # 按月分片存储
395
+ combined_data['month'] = combined_data['日期'].dt.to_period('M').dt.strftime("%Y%m")
396
+ for month_str, group in combined_data.groupby('month'):
397
+ group = group.drop(columns=['month'])
398
+ serialized_data = self._serialize_data(group)
399
+ self.redis_engine.hset(cache_key, month_str, serialized_data)
400
+ self.redis_engine.expire(cache_key, self.cache_ttl)
401
+ logger.info(f"缓存更新 {cache_key} | 数据量: {len(combined_data)}")
402
+ except Exception as e:
403
+ logger.error(f"缓存更新失败: {cache_key} - {str(e)}")
404
+
405
+ def _fetch_table_data(
406
+ self,
407
+ db_name: str,
408
+ table_name: str,
409
+ start_date,
410
+ end_date
411
+ ) -> pd.DataFrame:
412
+ try:
413
+ return self.download.data_to_df(
414
+ db_name=db_name,
415
+ table_name=table_name,
416
+ start_date=start_date,
417
+ end_date=end_date,
418
+ projection={}
419
+ )
420
+ except Exception as e:
421
+ logger.error(f"MySQL 查询异常 {db_name}.{table_name}: {e}")
422
+ return pd.DataFrame()
423
+
424
+ def _fetch_redis_data(self, cache_key: str, months: list = None) -> pd.DataFrame:
425
+ try:
426
+ if months is not None:
427
+ fields = months.copy()
428
+ fields.append('all')
429
+ data_list = self.redis_engine.hmget(cache_key, fields)
430
+ dfs = []
431
+ for data, field in zip(data_list, fields):
432
+ if data:
433
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
434
+ df = self._convert_date_columns(df)
435
+ dfs.append(df)
436
+ return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
437
+ else:
438
+ data_dict = self.redis_engine.hgetall(cache_key)
439
+ dfs = []
440
+ for field, data in data_dict.items():
441
+ try:
442
+ df = pd.DataFrame(json.loads(data.decode("utf-8")))
443
+ df = self._convert_date_columns(df)
444
+ dfs.append(df)
445
+ except Exception as e:
446
+ logger.error(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
447
+ return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
448
+ except Exception as e:
449
+ logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
450
+ return pd.DataFrame()
451
+
452
+ def _convert_date_columns(self, df: pd.DataFrame) -> pd.DataFrame:
453
+ if "日期" in df.columns:
454
+ df["日期"] = pd.to_datetime(df["日期"], format="%Y-%m-%d", errors="coerce")
455
+ return df
456
+
457
+ def _generate_cache_key(self, db_name: str, table_name: str, set_year: bool) -> str:
458
+ return f"{db_name}:{table_name}_haveyear" if set_year else f"{db_name}:{table_name}"
459
+
460
+ def _filter_by_date_range(
461
+ self,
462
+ df: pd.DataFrame,
463
+ start_dt: datetime.datetime,
464
+ end_dt: datetime.datetime
465
+ ) -> pd.DataFrame:
466
+ if "日期" not in df.columns:
467
+ return df
468
+ date_mask = (df["日期"] >= start_dt) & (df["日期"] <= end_dt)
469
+ return df[date_mask].copy()
470
+
471
+ def _trigger_async_cache_update(
472
+ self,
473
+ cache_key: str,
474
+ db_name: str,
475
+ table_name: str,
476
+ set_year: bool,
477
+ start_date: str,
478
+ end_date: str,
479
+ existing_data: pd.DataFrame
480
+ ):
481
+ thread = threading.Thread(
482
+ target=self.set_redis,
483
+ args=(cache_key, db_name, table_name, set_year, start_date, end_date, existing_data),
484
+ daemon=True
485
+ )
486
+ thread.start()
487
+
488
+ def _merge_data(self, new_data: pd.DataFrame, existing_data: pd.DataFrame) -> pd.DataFrame:
489
+ if existing_data.empty or "日期" not in existing_data.columns:
490
+ return new_data
491
+ new_data["日期"] = pd.to_datetime(new_data["日期"])
492
+ existing_data["日期"] = pd.to_datetime(existing_data["日期"])
493
+
494
+ new_min = new_data["日期"].min()
495
+ new_max = new_data["日期"].max()
496
+
497
+ valid_historical = existing_data[
498
+ (existing_data["日期"] < new_min) | (existing_data["日期"] > new_max)
499
+ ]
500
+ merged_data = pd.concat([new_data, valid_historical], ignore_index=True)
501
+ merged_data.sort_values(['日期'], ascending=[False], ignore_index=True, inplace=True)
502
+ return merged_data
503
+
504
+ def _serialize_data(self, df: pd.DataFrame) -> bytes:
505
+ if df.empty:
506
+ return json.dumps([], ensure_ascii=False).encode("utf-8")
507
+ temp_df = df.copy()
508
+
509
+ date_cols = temp_df.select_dtypes(include=["datetime64[ns]"]).columns
510
+ for col in date_cols:
511
+ if temp_df[col].isna().all():
512
+ temp_df[col] = temp_df[col].astype(object)
513
+ temp_df[col] = (
514
+ temp_df[col]
515
+ .dt.strftime("%Y-%m-%d")
516
+ .where(temp_df[col].notna(), None)
517
+ )
518
+
519
+ def safe_null_convert(series):
520
+ if series.isna().all():
521
+ return series.astype(object).where(pd.notnull(series), None)
522
+ return series.where(pd.notnull(series), None)
523
+
524
+ temp_df = temp_df.apply(safe_null_convert)
525
+
526
+ def decimal_serializer(obj):
527
+ if obj is None:
528
+ return None
529
+ if isinstance(obj, Decimal):
530
+ return round(float(obj), 6)
531
+ elif isinstance(obj, pd.Timestamp):
532
+ return obj.strftime("%Y-%m-%d %H:%M:%S")
533
+ elif isinstance(obj, np.generic):
534
+ return obj.item()
535
+ elif isinstance(obj, (datetime.date, datetime.datetime)):
536
+ return obj.isoformat()
537
+ elif isinstance(obj, (list, tuple, set)):
538
+ return [decimal_serializer(item) for item in obj]
539
+ elif isinstance(obj, dict):
540
+ return {decimal_serializer(k): decimal_serializer(v) for k, v in obj.items()}
541
+ elif isinstance(obj, bytes):
542
+ return obj.decode("utf-8", errors="replace")
543
+ elif isinstance(obj, pd.Series):
544
+ return obj.to_list()
545
+ else:
546
+ try:
547
+ json.dumps(obj)
548
+ return obj
549
+ except TypeError:
550
+ logger.error(f"无法序列化类型 {type(obj)}: {str(obj)}")
551
+ raise
552
+
553
+ try:
554
+ data_records = temp_df.to_dict(orient="records")
555
+ except Exception as e:
556
+ logger.error(f"数据转换字典失败: {str(e)}")
557
+ raise
558
+
559
+ if not data_records:
560
+ return json.dumps([], ensure_ascii=False).encode("utf-8")
561
+
562
+ try:
563
+ return json.dumps(
564
+ data_records,
565
+ ensure_ascii=False,
566
+ default=decimal_serializer
567
+ ).encode("utf-8")
568
+ except TypeError as e:
569
+ logger.error(f"序列化失败,请检查未处理的数据类型: {str(e)}")
570
+ raise
571
+
270
572
 
271
573
  if __name__ == '__main__':
272
574
  # # ****************************************************