mdbq 3.6.8__py3-none-any.whl → 3.6.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdbq/mysql/mysql_bak.py +1808 -0
- mdbq/mysql/s_query.py +12 -3
- mdbq/redis/getredis.py +302 -0
- mdbq/redis/getredis_/344/274/230/345/214/226hash.py +710 -0
- {mdbq-3.6.8.dist-info → mdbq-3.6.10.dist-info}/METADATA +1 -1
- {mdbq-3.6.8.dist-info → mdbq-3.6.10.dist-info}/RECORD +8 -8
- mdbq/redis/getredis_bak20250131.py +0 -265
- mdbq/redis/getredis_deepseek.py +0 -235
- {mdbq-3.6.8.dist-info → mdbq-3.6.10.dist-info}/WHEEL +0 -0
- {mdbq-3.6.8.dist-info → mdbq-3.6.10.dist-info}/top_level.txt +0 -0
mdbq/mysql/s_query.py
CHANGED
@@ -12,6 +12,7 @@ from sqlalchemy import create_engine
|
|
12
12
|
import os
|
13
13
|
import calendar
|
14
14
|
from mdbq.dataframe import converter
|
15
|
+
from decimal import Decimal
|
15
16
|
|
16
17
|
warnings.filterwarnings('ignore')
|
17
18
|
"""
|
@@ -49,9 +50,14 @@ class QueryDatas:
|
|
49
50
|
return columns
|
50
51
|
|
51
52
|
def data_to_df(self, db_name, table_name, start_date, end_date, projection: dict=[]):
|
52
|
-
|
53
|
-
|
54
|
-
|
53
|
+
if start_date:
|
54
|
+
start_date = pd.to_datetime(start_date).strftime('%Y-%m-%d')
|
55
|
+
else:
|
56
|
+
start_date = '1970-01-01'
|
57
|
+
if end_date:
|
58
|
+
end_date = pd.to_datetime(end_date).strftime('%Y-%m-%d')
|
59
|
+
else:
|
60
|
+
end_date = datetime.datetime.today().strftime('%Y-%m-%d')
|
55
61
|
df = pd.DataFrame() # 初始化df
|
56
62
|
|
57
63
|
if self.check_infos(db_name, table_name) == False:
|
@@ -97,6 +103,9 @@ class QueryDatas:
|
|
97
103
|
rows = cursor.fetchall() # 获取查询结果
|
98
104
|
columns = [desc[0] for desc in cursor.description]
|
99
105
|
df = pd.DataFrame(rows, columns=columns) # 转为 df
|
106
|
+
# 使用applymap将每个Decimal转换为float
|
107
|
+
df_float = df.applymap(lambda x: float(x) if isinstance(x, Decimal) else x)
|
108
|
+
|
100
109
|
if 'id' in df.columns.tolist():
|
101
110
|
df.pop('id') # 默认不返回 id 列
|
102
111
|
if len(df) == 0:
|
mdbq/redis/getredis.py
CHANGED
@@ -12,6 +12,7 @@ import logging
|
|
12
12
|
from logging.handlers import RotatingFileHandler
|
13
13
|
import getpass
|
14
14
|
import platform
|
15
|
+
from decimal import Decimal
|
15
16
|
|
16
17
|
if platform.system() == 'Windows':
|
17
18
|
D_PATH = os.path.join(f'C:\\Users\\{getpass.getuser()}\\Downloads')
|
@@ -57,6 +58,9 @@ logger.addHandler(file_handler)
|
|
57
58
|
|
58
59
|
|
59
60
|
class RedisData(object):
|
61
|
+
"""
|
62
|
+
存储 string
|
63
|
+
"""
|
60
64
|
def __init__(self, redis_engine, download, cache_ttl: int):
|
61
65
|
self.redis_engine = redis_engine # Redis 数据处理引擎
|
62
66
|
self.download = download # MySQL 数据处理引擎
|
@@ -267,6 +271,304 @@ class RedisData(object):
|
|
267
271
|
temp_df[col] = temp_df[col].dt.strftime("%Y-%m-%d")
|
268
272
|
return temp_df.to_json(orient="records", force_ascii=False)
|
269
273
|
|
274
|
+
class RedisDataHash(object):
|
275
|
+
"""
|
276
|
+
存储 hash
|
277
|
+
Redis缓存与MySQL数据联合查询处理器
|
278
|
+
|
279
|
+
功能特性:
|
280
|
+
- 支持带年份分表的MySQL数据查询
|
281
|
+
- 多级缓存策略(内存缓存+Redis缓存)
|
282
|
+
- 异步缓存更新机制
|
283
|
+
- 自动处理日期范围和数据类型转换
|
284
|
+
"""
|
285
|
+
|
286
|
+
def __init__(self, redis_engine, download, cache_ttl: int):
|
287
|
+
self.redis_engine = redis_engine
|
288
|
+
self.download = download
|
289
|
+
self.cache_ttl = cache_ttl * 60 # 转换为秒存储
|
290
|
+
|
291
|
+
def get_from_mysql(
|
292
|
+
self,
|
293
|
+
db_name: str,
|
294
|
+
table_name: str,
|
295
|
+
set_year: bool,
|
296
|
+
start_date,
|
297
|
+
end_date
|
298
|
+
) -> pd.DataFrame:
|
299
|
+
dfs = []
|
300
|
+
if set_year:
|
301
|
+
current_year = datetime.datetime.today().year
|
302
|
+
for year in range(2024, current_year + 1):
|
303
|
+
df = self._fetch_table_data(
|
304
|
+
db_name, f"{table_name}_{year}", start_date, end_date
|
305
|
+
)
|
306
|
+
if df is not None:
|
307
|
+
dfs.append(df)
|
308
|
+
else:
|
309
|
+
df = self._fetch_table_data(db_name, table_name, start_date, end_date)
|
310
|
+
if df is not None:
|
311
|
+
dfs.append(df)
|
312
|
+
|
313
|
+
combined_df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
|
314
|
+
if combined_df.empty:
|
315
|
+
logger.warn(f"warning: {db_name}.{table_name} 未读取到数据")
|
316
|
+
else:
|
317
|
+
combined_df = self._convert_date_columns(combined_df)
|
318
|
+
return combined_df
|
319
|
+
|
320
|
+
def get_from_redis(
|
321
|
+
self,
|
322
|
+
db_name: str,
|
323
|
+
table_name: str,
|
324
|
+
set_year: bool,
|
325
|
+
start_date,
|
326
|
+
end_date
|
327
|
+
) -> pd.DataFrame:
|
328
|
+
start_dt = pd.to_datetime(start_date).floor('D')
|
329
|
+
end_dt = pd.to_datetime(end_date).floor('D')
|
330
|
+
cache_key = self._generate_cache_key(db_name, table_name, set_year)
|
331
|
+
|
332
|
+
try:
|
333
|
+
ttl = self.redis_engine.ttl(cache_key)
|
334
|
+
if ttl < 60:
|
335
|
+
cache_data = self._fetch_redis_data(cache_key)
|
336
|
+
self._trigger_async_cache_update(
|
337
|
+
cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
|
338
|
+
)
|
339
|
+
return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
|
340
|
+
|
341
|
+
# 生成月份范围
|
342
|
+
start_month = start_dt.to_period('M')
|
343
|
+
end_month = end_dt.to_period('M')
|
344
|
+
months = pd.period_range(start_month, end_month, freq='M').strftime("%Y%m").tolist()
|
345
|
+
cache_data = self._fetch_redis_data(cache_key, months)
|
346
|
+
|
347
|
+
if cache_data.empty:
|
348
|
+
self._trigger_async_cache_update(
|
349
|
+
cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
|
350
|
+
)
|
351
|
+
return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
|
352
|
+
|
353
|
+
filtered_df = self._filter_by_date_range(cache_data, start_dt, end_dt)
|
354
|
+
if not filtered_df.empty:
|
355
|
+
if '日期' in filtered_df.columns.tolist():
|
356
|
+
exsit_min_date = filtered_df['日期'].min()
|
357
|
+
if exsit_min_date <= start_dt:
|
358
|
+
return filtered_df
|
359
|
+
else:
|
360
|
+
return filtered_df
|
361
|
+
|
362
|
+
self._trigger_async_cache_update(
|
363
|
+
cache_key, db_name, table_name, set_year, start_date, end_date, cache_data
|
364
|
+
)
|
365
|
+
return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
|
366
|
+
|
367
|
+
except Exception as e:
|
368
|
+
logger.error(f"Redis 连接异常: {e},直接访问 MySQL")
|
369
|
+
return self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
|
370
|
+
|
371
|
+
def set_redis(
|
372
|
+
self,
|
373
|
+
cache_key: str,
|
374
|
+
db_name: str,
|
375
|
+
table_name: str,
|
376
|
+
set_year: bool,
|
377
|
+
start_date,
|
378
|
+
end_date,
|
379
|
+
existing_data: pd.DataFrame
|
380
|
+
) -> None:
|
381
|
+
try:
|
382
|
+
new_data = self.get_from_mysql(db_name, table_name, set_year, start_date, end_date)
|
383
|
+
if new_data.empty:
|
384
|
+
return
|
385
|
+
|
386
|
+
combined_data = self._merge_data(new_data, existing_data)
|
387
|
+
|
388
|
+
if not combined_data.empty:
|
389
|
+
if '日期' not in combined_data.columns.tolist():
|
390
|
+
serialized_data = self._serialize_data(combined_data)
|
391
|
+
self.redis_engine.hset(cache_key, "all", serialized_data)
|
392
|
+
self.redis_engine.expire(cache_key, self.cache_ttl)
|
393
|
+
else:
|
394
|
+
# 按月分片存储
|
395
|
+
combined_data['month'] = combined_data['日期'].dt.to_period('M').dt.strftime("%Y%m")
|
396
|
+
for month_str, group in combined_data.groupby('month'):
|
397
|
+
group = group.drop(columns=['month'])
|
398
|
+
serialized_data = self._serialize_data(group)
|
399
|
+
self.redis_engine.hset(cache_key, month_str, serialized_data)
|
400
|
+
self.redis_engine.expire(cache_key, self.cache_ttl)
|
401
|
+
logger.info(f"缓存更新 {cache_key} | 数据量: {len(combined_data)}")
|
402
|
+
except Exception as e:
|
403
|
+
logger.error(f"缓存更新失败: {cache_key} - {str(e)}")
|
404
|
+
|
405
|
+
def _fetch_table_data(
|
406
|
+
self,
|
407
|
+
db_name: str,
|
408
|
+
table_name: str,
|
409
|
+
start_date,
|
410
|
+
end_date
|
411
|
+
) -> pd.DataFrame:
|
412
|
+
try:
|
413
|
+
return self.download.data_to_df(
|
414
|
+
db_name=db_name,
|
415
|
+
table_name=table_name,
|
416
|
+
start_date=start_date,
|
417
|
+
end_date=end_date,
|
418
|
+
projection={}
|
419
|
+
)
|
420
|
+
except Exception as e:
|
421
|
+
logger.error(f"MySQL 查询异常 {db_name}.{table_name}: {e}")
|
422
|
+
return pd.DataFrame()
|
423
|
+
|
424
|
+
def _fetch_redis_data(self, cache_key: str, months: list = None) -> pd.DataFrame:
|
425
|
+
try:
|
426
|
+
if months is not None:
|
427
|
+
fields = months.copy()
|
428
|
+
fields.append('all')
|
429
|
+
data_list = self.redis_engine.hmget(cache_key, fields)
|
430
|
+
dfs = []
|
431
|
+
for data, field in zip(data_list, fields):
|
432
|
+
if data:
|
433
|
+
df = pd.DataFrame(json.loads(data.decode("utf-8")))
|
434
|
+
df = self._convert_date_columns(df)
|
435
|
+
dfs.append(df)
|
436
|
+
return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
|
437
|
+
else:
|
438
|
+
data_dict = self.redis_engine.hgetall(cache_key)
|
439
|
+
dfs = []
|
440
|
+
for field, data in data_dict.items():
|
441
|
+
try:
|
442
|
+
df = pd.DataFrame(json.loads(data.decode("utf-8")))
|
443
|
+
df = self._convert_date_columns(df)
|
444
|
+
dfs.append(df)
|
445
|
+
except Exception as e:
|
446
|
+
logger.error(f"Redis 数据解析失败 {cache_key} 字段 {field}: {e}")
|
447
|
+
return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
|
448
|
+
except Exception as e:
|
449
|
+
logger.error(f"Redis 数据获取失败 {cache_key}: {e}")
|
450
|
+
return pd.DataFrame()
|
451
|
+
|
452
|
+
def _convert_date_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
453
|
+
if "日期" in df.columns:
|
454
|
+
df["日期"] = pd.to_datetime(df["日期"], format="%Y-%m-%d", errors="coerce")
|
455
|
+
return df
|
456
|
+
|
457
|
+
def _generate_cache_key(self, db_name: str, table_name: str, set_year: bool) -> str:
|
458
|
+
return f"{db_name}:{table_name}_haveyear" if set_year else f"{db_name}:{table_name}"
|
459
|
+
|
460
|
+
def _filter_by_date_range(
|
461
|
+
self,
|
462
|
+
df: pd.DataFrame,
|
463
|
+
start_dt: datetime.datetime,
|
464
|
+
end_dt: datetime.datetime
|
465
|
+
) -> pd.DataFrame:
|
466
|
+
if "日期" not in df.columns:
|
467
|
+
return df
|
468
|
+
date_mask = (df["日期"] >= start_dt) & (df["日期"] <= end_dt)
|
469
|
+
return df[date_mask].copy()
|
470
|
+
|
471
|
+
def _trigger_async_cache_update(
|
472
|
+
self,
|
473
|
+
cache_key: str,
|
474
|
+
db_name: str,
|
475
|
+
table_name: str,
|
476
|
+
set_year: bool,
|
477
|
+
start_date: str,
|
478
|
+
end_date: str,
|
479
|
+
existing_data: pd.DataFrame
|
480
|
+
):
|
481
|
+
thread = threading.Thread(
|
482
|
+
target=self.set_redis,
|
483
|
+
args=(cache_key, db_name, table_name, set_year, start_date, end_date, existing_data),
|
484
|
+
daemon=True
|
485
|
+
)
|
486
|
+
thread.start()
|
487
|
+
|
488
|
+
def _merge_data(self, new_data: pd.DataFrame, existing_data: pd.DataFrame) -> pd.DataFrame:
|
489
|
+
if existing_data.empty or "日期" not in existing_data.columns:
|
490
|
+
return new_data
|
491
|
+
new_data["日期"] = pd.to_datetime(new_data["日期"])
|
492
|
+
existing_data["日期"] = pd.to_datetime(existing_data["日期"])
|
493
|
+
|
494
|
+
new_min = new_data["日期"].min()
|
495
|
+
new_max = new_data["日期"].max()
|
496
|
+
|
497
|
+
valid_historical = existing_data[
|
498
|
+
(existing_data["日期"] < new_min) | (existing_data["日期"] > new_max)
|
499
|
+
]
|
500
|
+
merged_data = pd.concat([new_data, valid_historical], ignore_index=True)
|
501
|
+
merged_data.sort_values(['日期'], ascending=[False], ignore_index=True, inplace=True)
|
502
|
+
return merged_data
|
503
|
+
|
504
|
+
def _serialize_data(self, df: pd.DataFrame) -> bytes:
|
505
|
+
if df.empty:
|
506
|
+
return json.dumps([], ensure_ascii=False).encode("utf-8")
|
507
|
+
temp_df = df.copy()
|
508
|
+
|
509
|
+
date_cols = temp_df.select_dtypes(include=["datetime64[ns]"]).columns
|
510
|
+
for col in date_cols:
|
511
|
+
if temp_df[col].isna().all():
|
512
|
+
temp_df[col] = temp_df[col].astype(object)
|
513
|
+
temp_df[col] = (
|
514
|
+
temp_df[col]
|
515
|
+
.dt.strftime("%Y-%m-%d")
|
516
|
+
.where(temp_df[col].notna(), None)
|
517
|
+
)
|
518
|
+
|
519
|
+
def safe_null_convert(series):
|
520
|
+
if series.isna().all():
|
521
|
+
return series.astype(object).where(pd.notnull(series), None)
|
522
|
+
return series.where(pd.notnull(series), None)
|
523
|
+
|
524
|
+
temp_df = temp_df.apply(safe_null_convert)
|
525
|
+
|
526
|
+
def decimal_serializer(obj):
|
527
|
+
if obj is None:
|
528
|
+
return None
|
529
|
+
if isinstance(obj, Decimal):
|
530
|
+
return round(float(obj), 6)
|
531
|
+
elif isinstance(obj, pd.Timestamp):
|
532
|
+
return obj.strftime("%Y-%m-%d %H:%M:%S")
|
533
|
+
elif isinstance(obj, np.generic):
|
534
|
+
return obj.item()
|
535
|
+
elif isinstance(obj, (datetime.date, datetime.datetime)):
|
536
|
+
return obj.isoformat()
|
537
|
+
elif isinstance(obj, (list, tuple, set)):
|
538
|
+
return [decimal_serializer(item) for item in obj]
|
539
|
+
elif isinstance(obj, dict):
|
540
|
+
return {decimal_serializer(k): decimal_serializer(v) for k, v in obj.items()}
|
541
|
+
elif isinstance(obj, bytes):
|
542
|
+
return obj.decode("utf-8", errors="replace")
|
543
|
+
elif isinstance(obj, pd.Series):
|
544
|
+
return obj.to_list()
|
545
|
+
else:
|
546
|
+
try:
|
547
|
+
json.dumps(obj)
|
548
|
+
return obj
|
549
|
+
except TypeError:
|
550
|
+
logger.error(f"无法序列化类型 {type(obj)}: {str(obj)}")
|
551
|
+
raise
|
552
|
+
|
553
|
+
try:
|
554
|
+
data_records = temp_df.to_dict(orient="records")
|
555
|
+
except Exception as e:
|
556
|
+
logger.error(f"数据转换字典失败: {str(e)}")
|
557
|
+
raise
|
558
|
+
|
559
|
+
if not data_records:
|
560
|
+
return json.dumps([], ensure_ascii=False).encode("utf-8")
|
561
|
+
|
562
|
+
try:
|
563
|
+
return json.dumps(
|
564
|
+
data_records,
|
565
|
+
ensure_ascii=False,
|
566
|
+
default=decimal_serializer
|
567
|
+
).encode("utf-8")
|
568
|
+
except TypeError as e:
|
569
|
+
logger.error(f"序列化失败,请检查未处理的数据类型: {str(e)}")
|
570
|
+
raise
|
571
|
+
|
270
572
|
|
271
573
|
if __name__ == '__main__':
|
272
574
|
# # ****************************************************
|