mdbq 3.5.5__py3-none-any.whl → 3.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbq/mysql/s_query.py CHANGED
@@ -100,7 +100,7 @@ class QueryDatas:
100
100
  if 'id' in df.columns.tolist():
101
101
  df.pop('id') # 默认不返回 id 列
102
102
  if len(df) == 0:
103
- print(f'database: {db_name}, table: {table_name} 查询的数据为空1')
103
+ print(f's_query.py -> data_to_df -> database: {db_name}, table: {table_name} 查询的数据为空1')
104
104
  connection.close()
105
105
  return df
106
106
 
mdbq/redis/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+
2
+
3
+
4
+ # 缓存
mdbq/redis/getredis.py ADDED
@@ -0,0 +1,176 @@
1
+ # -*- coding: UTF-8 –*-
2
+ import redis
3
+ import socket
4
+ from mdbq.mysql import s_query
5
+ from mdbq.config import myconfig
6
+ import pandas as pd
7
+ import json
8
+ import datetime
9
+ import threading
10
+
11
+
12
+ if socket.gethostname() == 'company' or socket.gethostname() == 'Mac2.local':
13
+ conf = myconfig.main()
14
+ conf_data = conf['Windows']['company']['mysql']['local']
15
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
16
+ redis_password = conf['Windows']['company']['redis']['local']['password']
17
+ else:
18
+ conf = myconfig.main()
19
+ conf_data = conf['Windows']['xigua_lx']['mysql']['local']
20
+ username, password, host, port = conf_data['username'], conf_data['password'], conf_data['host'], conf_data['port']
21
+ redis_password = conf['Windows']['company']['redis']['local']['password'] # redis 使用本地数据,全部机子相同
22
+
23
+
24
+
25
+ class RedisData(object):
26
+ def __init__(self, redis_engin, download):
27
+ self.redis_engin = redis_engin # redis 数据处理引擎
28
+ self.download = download # mysql 数据处理引擎
29
+ self.minute = 60 # 缓存过期时间: 分钟
30
+
31
+ def get_from_mysql(self, _db_name, _table_name, _set_year, start_date, end_date):
32
+ """
33
+ _set_year: _table_name 中是否含有年份
34
+ """
35
+ if _set_year:
36
+ __res = []
37
+ for year in range(2024, datetime.datetime.today().year + 1):
38
+ _df = self.download.data_to_df(
39
+ db_name=_db_name,
40
+ table_name=f'{_table_name}_{year}',
41
+ start_date=start_date,
42
+ end_date=end_date,
43
+ projection={},
44
+ )
45
+ __res.append(_df)
46
+ _df = pd.concat(__res, ignore_index=True)
47
+ else:
48
+ _df = self.download.data_to_df(
49
+ db_name=_db_name,
50
+ table_name=_table_name,
51
+ start_date=start_date,
52
+ end_date=end_date,
53
+ projection={},
54
+ )
55
+ if len(_df) == 0:
56
+ print(f'{_db_name} - {_table_name}: mysql读取的数据不能为空')
57
+ return pd.DataFrame()
58
+ if '日期' in _df.columns.tolist():
59
+ _df['日期'] = pd.to_datetime(_df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
60
+ return _df
61
+
62
+ def get_from_redis(self, _db_name, _table_name, _set_year, start_date, end_date):
63
+ """
64
+ _set_year: _table_name 中是否含有年份
65
+ _col_list: 如果不传就取 table 的所有列
66
+ 对于日期: 最终传出的是日期格式,但如果存入 redis ,需要先格式化为 str,避免日期变整数形式
67
+ """
68
+ start_date = pd.to_datetime(start_date)
69
+ end_date = pd.to_datetime(end_date)
70
+ if _set_year:
71
+ my_key = f'{_db_name}:{_table_name}_haveyear'
72
+ else:
73
+ my_key = f'{_db_name}:{_table_name}'
74
+ # ttl 对于不存在的键,它返回 -2;而对于没有设置过期时间的键,它返回 -1
75
+ try:
76
+ ttl_result = self.redis_engin.ttl(my_key)
77
+ except Exception as e:
78
+ # redis 连接失败, 则绕过 redis 直接从 mysql 获取数据
79
+ print('redis 连接失败, 绕过 redis 直接从 mysql 获取数据')
80
+ _df = self.get_from_mysql(_db_name=_db_name, _table_name=_table_name, start_date=start_date, end_date=end_date, _set_year=_set_year)
81
+ return _df
82
+ _df = pd.DataFrame()
83
+
84
+ if ttl_result < 60:
85
+ # 1. redis 没有该数据时
86
+ print(f'数据不存在或过期')
87
+ thread = threading.Thread(target=self.set_redis, args=(my_key, _db_name, _table_name, _set_year, start_date, end_date, _df))
88
+ thread.start()
89
+ # _df = self.set_redis(my_key=my_key, _db_name=_db_name, _table_name=_table_name, _set_year=_set_year, start_date=start_date, end_date=end_date)
90
+ _df = self.get_from_mysql(_db_name=_db_name, _table_name=_table_name, start_date=start_date, end_date=end_date, _set_year=_set_year)
91
+ return _df
92
+ # 2. redis 有数据时
93
+ json_string = self.redis_engin.get(my_key)
94
+ data_dict = json.loads(json_string.decode('utf-8'))
95
+ _df = pd.DataFrame(data_dict)
96
+
97
+ if '日期' in _df.columns.tolist():
98
+ _df['日期'] = pd.to_datetime(_df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
99
+ min_date = _df['日期'].min()
100
+ max_date = _df['日期'].max()
101
+ # Bug: 如果外部请求日期小于 table 最小日期,每次都要从 mysql 获取数据,即使 redis 缓存了数据
102
+ if start_date < min_date: # 外部请求日期小于 redis 数据最小日期
103
+ # 3. redis 有数据但数据不完整时
104
+ print(f'数据日期不对劲需要更新')
105
+ thread = threading.Thread(target=self.set_redis, args=(my_key, _db_name, _table_name, _set_year, start_date, end_date, _df))
106
+ thread.start()
107
+ # _df = self.set_redis(my_key=my_key, _db_name=_db_name, _table_name=_table_name, _set_year=_set_year, start_date=start_date, end_date=end_date)
108
+ _df = self.get_from_mysql(_db_name=_db_name, _table_name=_table_name, start_date=start_date, end_date=end_date, _set_year=_set_year)
109
+ return _df
110
+ _df = _df[(_df['日期'] >= start_date) & (_df['日期'] <= end_date)]
111
+
112
+ return _df
113
+
114
+ def set_redis(self, my_key, _db_name, _table_name, _set_year, start_date, end_date, before_df):
115
+ """
116
+ 从 mysql 读取数据并存储 redis
117
+ 由于这个函数是异步执行的,从页面段首次加载数据时,可能返回空,等待异步执行结束后会正常返回数据
118
+ """
119
+ _df = self.get_from_mysql(
120
+ _db_name=_db_name,
121
+ _table_name=_table_name,
122
+ start_date=start_date,
123
+ end_date=end_date,
124
+ _set_year=_set_year
125
+ )
126
+ if '日期' in _df.columns.tolist():
127
+ _min_date = _df['日期'].min()
128
+ _max_date = _df['日期'].max()
129
+ if '日期' in before_df.columns.tolist():
130
+ # 移除 redis 指定范围的数据,再合并新数据
131
+ before_df1 = before_df[(before_df['日期'] < _min_date)]
132
+ before_df2 = before_df[(before_df['日期'] > _max_date)]
133
+ _df = pd.concat([_df, before_df1, before_df2], ignore_index=True, axis=0)
134
+ if '日期' in _df.columns.tolist():
135
+ # 存入 redis ,需要先格式化为 str,避免日期变整数形式
136
+ _df['日期'] = _df['日期'].astype('str')
137
+ jsondata = _df.to_json(orient='records', force_ascii=False)
138
+ self.redis_engin.set(my_key, jsondata)
139
+ self.redis_engin.expire(my_key, self.minute * 60) # 设置缓存过期时间: 分钟
140
+ if '日期' in _df.columns.tolist():
141
+ _df['日期'] = pd.to_datetime(_df['日期'], format='%Y-%m-%d', errors='ignore') # 转换日期列
142
+ now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
143
+ print(f'{now}: 刷新 redis -> {_db_name}:{_table_name}')
144
+ return _df
145
+
146
+
147
+ if __name__ == '__main__':
148
+ # ****************************************************
149
+ # 这一部分在外部定义,只需要定义一次,开始
150
+ redis_config = {
151
+ 'host': '127.0.0.1',
152
+ 'port': 6379, # 默认Redis端口
153
+ 'db': 0, # 默认Redis数据库索引
154
+ # 'username': 'default',
155
+ 'password': redis_password,
156
+ }
157
+ # redis 实例化
158
+ r = redis.Redis(**redis_config)
159
+ # mysql 实例化
160
+ d = s_query.QueryDatas(username=username, password=password, host=host, port=port)
161
+ # 将两个库的实例化对象传给 RedisData 类,并实例化数据处理引擎
162
+ m = RedisData(redis_engin=r, download=d)
163
+ # ****************************************************
164
+
165
+ # 以下为动态获取数据库数据
166
+ db_name = '聚合数据'
167
+ table_name = '多店推广场景_按日聚合'
168
+ set_year = False
169
+ df = m.get_from_redis(
170
+ _db_name=db_name,
171
+ _table_name=table_name,
172
+ _set_year=set_year,
173
+ start_date='2025-01-01',
174
+ end_date='2025-01-31'
175
+ )
176
+ print(df)
mdbq/spider/aikucun.py CHANGED
@@ -483,7 +483,7 @@ class AikuCunNew:
483
483
 
484
484
  if __name__ == '__main__':
485
485
  get_cookie_aikucun() # 登录并获取 cookies
486
- akucun(date_num=5, headless=True) # 下载数据
486
+ akucun(date_num=10, headless=True) # 下载数据
487
487
 
488
488
  # a = AikuCunNew(shop_name='aikucun')
489
489
  # a.akc()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mdbq
3
- Version: 3.5.5
3
+ Version: 3.5.7
4
4
  Home-page: https://pypi.org/project/mdbq
5
5
  Author: xigua,
6
6
  Author-email: 2587125111@qq.com
@@ -20,7 +20,7 @@ mdbq/mongo/mongo.py,sha256=M9DUeUCMPDngkwn9-ui0uTiFrvfNU1kLs22s5SmoNm0,31899
20
20
  mdbq/mysql/__init__.py,sha256=A_DPJyAoEvTSFojiI2e94zP0FKtCkkwKP1kYUCSyQzo,11
21
21
  mdbq/mysql/mysql.py,sha256=r5YkS1WnV9dGtEHFcwaekjtUBgFcvkdmwif-m52CyHI,99560
22
22
  mdbq/mysql/recheck_mysql.py,sha256=ppBTfBLgkRWirMVZ31e_ZPULiGPJU7K3PP9G6QBZ3QI,8605
23
- mdbq/mysql/s_query.py,sha256=6L5Cp90zq13noZHjzSA5mqms_hD01c8GO1_NfbYDu6w,9252
23
+ mdbq/mysql/s_query.py,sha256=Z0C3lQQcGtnizyVHa62HYIAqZ8R3KeSHP-N0O7Qb5eU,9280
24
24
  mdbq/mysql/year_month_day.py,sha256=VgewoE2pJxK7ErjfviL_SMTN77ki8GVbTUcao3vFUCE,1523
25
25
  mdbq/other/__init__.py,sha256=jso1oHcy6cJEfa7udS_9uO5X6kZLoPBF8l3wCYmr5dM,18
26
26
  mdbq/other/download_sku_picture.py,sha256=GdphR7Q3psXXVuZoyJ4u_6OWn_rWlcbT0iJ-1zPT6O0,45368
@@ -32,9 +32,11 @@ mdbq/pbix/__init__.py,sha256=Trtfaynu9RjoTyLLYBN2xdRxTvm_zhCniUkVTAYwcjo,24
32
32
  mdbq/pbix/pbix_refresh.py,sha256=JUjKW3bNEyoMVfVfo77UhguvS5AWkixvVhDbw4_MHco,2396
33
33
  mdbq/pbix/refresh_all.py,sha256=OBT9EewSZ0aRS9vL_FflVn74d4l2G00wzHiikCC4TC0,5926
34
34
  mdbq/pbix/refresh_all_old.py,sha256=_pq3WSQ728GPtEG5pfsZI2uTJhU8D6ra-htIk1JXYzw,7192
35
+ mdbq/redis/__init__.py,sha256=YtgBlVSMDphtpwYX248wGge1x-Ex_mMufz4-8W0XRmA,12
36
+ mdbq/redis/getredis.py,sha256=PlWAGMC-WqdZtyvtjjj-i0i8AiBsNP6zgAUb5Fdkark,8481
35
37
  mdbq/spider/__init__.py,sha256=RBMFXGy_jd1HXZhngB2T2XTvJqki8P_Fr-pBcwijnew,18
36
- mdbq/spider/aikucun.py,sha256=zOacjrJ3MvToyuugA68xB-oN6RKj8K3GxMKudnln9EA,22207
37
- mdbq-3.5.5.dist-info/METADATA,sha256=vh6KUSs4_g68EQt2S8VwB93eEhuEfNQ_i4aobYARvuQ,243
38
- mdbq-3.5.5.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
39
- mdbq-3.5.5.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
40
- mdbq-3.5.5.dist-info/RECORD,,
38
+ mdbq/spider/aikucun.py,sha256=v7VO5gtEXR6_4Q6ujbTyu1FHu7TXHcwSQ6hIO249YH0,22208
39
+ mdbq-3.5.7.dist-info/METADATA,sha256=2vbVtGiIZ0FKagUbtistkxCiI5m3Iwbh8c-kTpWEkSk,243
40
+ mdbq-3.5.7.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
41
+ mdbq-3.5.7.dist-info/top_level.txt,sha256=2FQ-uLnCSB-OwFiWntzmwosW3X2Xqsg0ewh1axsaylA,5
42
+ mdbq-3.5.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.1.0)
2
+ Generator: bdist_wheel (0.44.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5