kaq-quant-common 0.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kaq_quant_common/__init__.py +0 -0
- kaq_quant_common/api/__init__.py +0 -0
- kaq_quant_common/api/common/__init__.py +1 -0
- kaq_quant_common/api/common/api_interface.py +38 -0
- kaq_quant_common/api/common/auth.py +118 -0
- kaq_quant_common/api/rest/__init__.py +0 -0
- kaq_quant_common/api/rest/api_client_base.py +42 -0
- kaq_quant_common/api/rest/api_server_base.py +135 -0
- kaq_quant_common/api/rest/instruction/helper/order_helper.py +342 -0
- kaq_quant_common/api/rest/instruction/instruction_client.py +86 -0
- kaq_quant_common/api/rest/instruction/instruction_server_base.py +154 -0
- kaq_quant_common/api/rest/instruction/models/__init__.py +17 -0
- kaq_quant_common/api/rest/instruction/models/account.py +49 -0
- kaq_quant_common/api/rest/instruction/models/order.py +248 -0
- kaq_quant_common/api/rest/instruction/models/position.py +70 -0
- kaq_quant_common/api/rest/instruction/models/transfer.py +32 -0
- kaq_quant_common/api/ws/__init__.py +0 -0
- kaq_quant_common/api/ws/exchange/models.py +23 -0
- kaq_quant_common/api/ws/exchange/ws_exchange_client.py +31 -0
- kaq_quant_common/api/ws/exchange/ws_exchange_server.py +440 -0
- kaq_quant_common/api/ws/instruction/__init__.py +0 -0
- kaq_quant_common/api/ws/instruction/ws_instruction_client.py +82 -0
- kaq_quant_common/api/ws/instruction/ws_instruction_server_base.py +139 -0
- kaq_quant_common/api/ws/models.py +46 -0
- kaq_quant_common/api/ws/ws_client_base.py +235 -0
- kaq_quant_common/api/ws/ws_server_base.py +288 -0
- kaq_quant_common/common/__init__.py +0 -0
- kaq_quant_common/common/ddb_table_monitor.py +106 -0
- kaq_quant_common/common/http_monitor.py +69 -0
- kaq_quant_common/common/modules/funding_rate_helper.py +137 -0
- kaq_quant_common/common/modules/limit_order_helper.py +158 -0
- kaq_quant_common/common/modules/limit_order_symbol_monitor.py +76 -0
- kaq_quant_common/common/modules/limit_order_symbol_monitor_group.py +69 -0
- kaq_quant_common/common/monitor_base.py +84 -0
- kaq_quant_common/common/monitor_group.py +97 -0
- kaq_quant_common/common/redis_table_monitor.py +123 -0
- kaq_quant_common/common/statistics/funding_rate_history_statistics.py +208 -0
- kaq_quant_common/common/statistics/kline_history_statistics.py +211 -0
- kaq_quant_common/common/ws_wrapper.py +21 -0
- kaq_quant_common/config/config.yaml +5 -0
- kaq_quant_common/resources/__init__.py +0 -0
- kaq_quant_common/resources/kaq_ddb_pool_stream_read_resources.py +56 -0
- kaq_quant_common/resources/kaq_ddb_stream_init_resources.py +88 -0
- kaq_quant_common/resources/kaq_ddb_stream_read_resources.py +81 -0
- kaq_quant_common/resources/kaq_ddb_stream_write_resources.py +359 -0
- kaq_quant_common/resources/kaq_mysql_init_resources.py +23 -0
- kaq_quant_common/resources/kaq_mysql_resources.py +341 -0
- kaq_quant_common/resources/kaq_postgresql_resources.py +58 -0
- kaq_quant_common/resources/kaq_quant_hive_resources.py +107 -0
- kaq_quant_common/resources/kaq_redis_resources.py +117 -0
- kaq_quant_common/utils/__init__.py +0 -0
- kaq_quant_common/utils/dagster_job_check_utils.py +29 -0
- kaq_quant_common/utils/dagster_utils.py +19 -0
- kaq_quant_common/utils/date_util.py +204 -0
- kaq_quant_common/utils/enums_utils.py +79 -0
- kaq_quant_common/utils/error_utils.py +22 -0
- kaq_quant_common/utils/hash_utils.py +48 -0
- kaq_quant_common/utils/log_time_utils.py +32 -0
- kaq_quant_common/utils/logger_utils.py +97 -0
- kaq_quant_common/utils/mytt_utils.py +372 -0
- kaq_quant_common/utils/signal_utils.py +23 -0
- kaq_quant_common/utils/sqlite_utils.py +169 -0
- kaq_quant_common/utils/uuid_utils.py +5 -0
- kaq_quant_common/utils/yml_utils.py +148 -0
- kaq_quant_common-0.2.12.dist-info/METADATA +66 -0
- kaq_quant_common-0.2.12.dist-info/RECORD +67 -0
- kaq_quant_common-0.2.12.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
import traceback
|
|
5
|
+
import datetime
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from natsort import natsorted
|
|
10
|
+
from sqlalchemy import create_engine, text
|
|
11
|
+
from sqlalchemy.orm import sessionmaker
|
|
12
|
+
|
|
13
|
+
from kaq_quant_common.utils import hash_utils, yml_utils
|
|
14
|
+
from kaq_quant_common.utils.logger_utils import get_logger
|
|
15
|
+
|
|
16
|
+
mutex = threading.Lock()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def append_id(df: pd.DataFrame):
|
|
20
|
+
"""
|
|
21
|
+
统一添加id和时间戳
|
|
22
|
+
:param df:
|
|
23
|
+
:return:
|
|
24
|
+
"""
|
|
25
|
+
if df is not None and not df.empty:
|
|
26
|
+
df = df.copy()
|
|
27
|
+
df["id"] = df.apply(lambda i: hash_utils.generate_hash_id(i), axis=1)
|
|
28
|
+
|
|
29
|
+
now_time = time.localtime()
|
|
30
|
+
df["ctimestampe"] = pd.Timestamp(now_time.tm_year, now_time.tm_mon, now_time.tm_mday, now_time.tm_hour, now_time.tm_min, now_time.tm_sec)
|
|
31
|
+
return df
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class KaqQuantMysqlRepository:
|
|
35
|
+
"""
|
|
36
|
+
获取过往一段时间的归集成交
|
|
37
|
+
数据库创建语句:
|
|
38
|
+
mysql -uroot -pxxxxxxxxxx -e "create database if not exists db_kaq_binance character set 'utf8mb4';"
|
|
39
|
+
PS: 高并发下可能产生错误 (1205, 'Lock wait timeout exceeded; try restarting transaction. 使用`SET GLOBAL innodb_lock_wait_timeout = 3000;`调整)
|
|
40
|
+
同时设置为可以读取已经提交的数据:
|
|
41
|
+
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
|
|
42
|
+
SHOW VARIABLES LIKE 'autocommit';
|
|
43
|
+
-- 禁用自动提交, 便于直接一次性提交
|
|
44
|
+
-- SET autocommit = 0;
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, host, port, user, passwd, database, charset="utf8mb4", pool_size=3):
|
|
48
|
+
self.logger = get_logger(self)
|
|
49
|
+
# 创建写入引擎数据库连接池
|
|
50
|
+
self.conn_engine = create_engine(
|
|
51
|
+
f"mysql+mysqldb://{user}:{passwd}@{host}:{port}/{database}?charset={charset}",
|
|
52
|
+
# 连接池大小
|
|
53
|
+
pool_size=pool_size,
|
|
54
|
+
# 超出连接池后,允许的最大扩展数
|
|
55
|
+
max_overflow=5,
|
|
56
|
+
# 池中没有线程最多等待的时间(秒)
|
|
57
|
+
pool_timeout=60,
|
|
58
|
+
# 多久之后,连接自动断开,-1 表示不自动断开(秒)
|
|
59
|
+
pool_recycle=650,
|
|
60
|
+
# 启用连接前检查
|
|
61
|
+
pool_pre_ping=True,
|
|
62
|
+
echo=False, # 关闭 SQL 语句日志
|
|
63
|
+
echo_pool=False, # 关闭连接池日志
|
|
64
|
+
)
|
|
65
|
+
# 会话工厂
|
|
66
|
+
self.session_maker = sessionmaker(bind=self.conn_engine, autoflush=False)
|
|
67
|
+
# todo 删掉
|
|
68
|
+
self.session = self.session_maker()
|
|
69
|
+
|
|
70
|
+
# 判断表是否存在
|
|
71
|
+
def table_exists(self, table_name):
|
|
72
|
+
# 从连接池获取连接
|
|
73
|
+
session = self.session_maker()
|
|
74
|
+
try:
|
|
75
|
+
tables = session.execute(text("SHOW TABLES")).mappings().all()
|
|
76
|
+
table_list = np.array([[v for _, v in table.items()] for table in tables]).flatten()
|
|
77
|
+
return table_name in table_list
|
|
78
|
+
except Exception as e:
|
|
79
|
+
self.logger.error(f"【mysql-table_exists】异常, {table_name} - {str(e)}")
|
|
80
|
+
finally:
|
|
81
|
+
session.close()
|
|
82
|
+
|
|
83
|
+
# 执行sql
|
|
84
|
+
def execute_sql(self, sql, need_commit=False):
|
|
85
|
+
# 从连接池获取连接
|
|
86
|
+
session = self.session_maker()
|
|
87
|
+
ret = None
|
|
88
|
+
try:
|
|
89
|
+
ret = session.execute(text(sql))
|
|
90
|
+
if need_commit:
|
|
91
|
+
session.commit()
|
|
92
|
+
except Exception as e:
|
|
93
|
+
self.logger.error(f"【mysql.execute_sql】异常, {sql} - {str(e)}")
|
|
94
|
+
ret = None
|
|
95
|
+
session.rollback()
|
|
96
|
+
finally:
|
|
97
|
+
session.close()
|
|
98
|
+
return ret
|
|
99
|
+
|
|
100
|
+
# 判断表是否存在
|
|
101
|
+
def rename_table(self, table_name):
|
|
102
|
+
# 从连接池获取连接
|
|
103
|
+
session = self.session_maker()
|
|
104
|
+
try:
|
|
105
|
+
time_str = time.strftime("%Y_%m_%d_%H_%M", time.localtime(time.time()))
|
|
106
|
+
sql = f"RENAME TABLE {table_name} TO {table_name}_{time_str};"
|
|
107
|
+
session.execute(text(sql))
|
|
108
|
+
return True
|
|
109
|
+
except Exception as e:
|
|
110
|
+
self.logger.error(f"【mysql-table_exists】异常, {table_name} - {str(e)}")
|
|
111
|
+
return False
|
|
112
|
+
finally:
|
|
113
|
+
session.close()
|
|
114
|
+
|
|
115
|
+
def get_table_size(self, table_name, database="db_kaq_binance"):
|
|
116
|
+
"""
|
|
117
|
+
查询操作,输入查询语句
|
|
118
|
+
"""
|
|
119
|
+
# 从连接池获取连接
|
|
120
|
+
session = self.session_maker()
|
|
121
|
+
try:
|
|
122
|
+
# 从session中获取数据
|
|
123
|
+
sql = f"""
|
|
124
|
+
SELECT ROUND(SUM(data_length + index_length) / 1024 / 1024, 2)
|
|
125
|
+
FROM information_schema.tables
|
|
126
|
+
WHERE table_name = '{table_name}' AND table_schema = '{database}'
|
|
127
|
+
"""
|
|
128
|
+
count = session.execute(text(sql)).fetchone()[0]
|
|
129
|
+
return count
|
|
130
|
+
except Exception as e:
|
|
131
|
+
self.logger.error(f"【mysql-fetch_data】异常, {sql} - {str(e)}")
|
|
132
|
+
finally:
|
|
133
|
+
session.close()
|
|
134
|
+
return 0
|
|
135
|
+
|
|
136
|
+
def get_table_list(self):
|
|
137
|
+
# 从连接池获取连接
|
|
138
|
+
session = self.session_maker()
|
|
139
|
+
try:
|
|
140
|
+
# 获取表名
|
|
141
|
+
tables = (
|
|
142
|
+
session.execute(
|
|
143
|
+
text(
|
|
144
|
+
"SELECT table_name, create_time FROM information_schema.tables WHERE table_schema = 'db_kaq_binance' ORDER BY create_time DESC"
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
.mappings()
|
|
148
|
+
.all()
|
|
149
|
+
)
|
|
150
|
+
table_list = [table["TABLE_NAME"] for table in tables]
|
|
151
|
+
table_list = list(reversed(natsorted(table_list)))
|
|
152
|
+
return table_list
|
|
153
|
+
except Exception as e:
|
|
154
|
+
self.logger.error(f"【mysql-get_table_list】异常, - {str(e)}")
|
|
155
|
+
finally:
|
|
156
|
+
session.close()
|
|
157
|
+
return []
|
|
158
|
+
|
|
159
|
+
def fetch_data(self, query):
|
|
160
|
+
"""
|
|
161
|
+
查询操作,输入查询语句
|
|
162
|
+
"""
|
|
163
|
+
# 从连接池获取连接
|
|
164
|
+
session = self.session_maker()
|
|
165
|
+
try:
|
|
166
|
+
# 从session中获取数据
|
|
167
|
+
rows = session.execute(text(query)).mappings().all()
|
|
168
|
+
df = pd.DataFrame(rows)
|
|
169
|
+
return df
|
|
170
|
+
except Exception as e:
|
|
171
|
+
self.logger.error(f"【mysql-fetch_data】异常, {query} - {str(e)}")
|
|
172
|
+
finally:
|
|
173
|
+
session.close()
|
|
174
|
+
return pd.DataFrame()
|
|
175
|
+
|
|
176
|
+
def fetch_data_count(self, query):
|
|
177
|
+
"""
|
|
178
|
+
查询操作,输入查询语句
|
|
179
|
+
"""
|
|
180
|
+
# 从连接池获取连接
|
|
181
|
+
session = self.session_maker()
|
|
182
|
+
try:
|
|
183
|
+
# 从session中获取数据
|
|
184
|
+
count = session.execute(text(query)).fetchone()[0]
|
|
185
|
+
return count
|
|
186
|
+
except Exception as e:
|
|
187
|
+
self.logger.error(f"【mysql-fetch_data】异常, {query} - {str(e)}")
|
|
188
|
+
finally:
|
|
189
|
+
session.close()
|
|
190
|
+
return 0
|
|
191
|
+
|
|
192
|
+
def get_conn_engine(self):
|
|
193
|
+
"""
|
|
194
|
+
create_engine: pandas写入支持的比较好
|
|
195
|
+
"""
|
|
196
|
+
return self.conn_engine
|
|
197
|
+
|
|
198
|
+
def get_exits_id_list(self, df, table_name):
|
|
199
|
+
"""
|
|
200
|
+
获取已经存在的id的列表
|
|
201
|
+
"""
|
|
202
|
+
if df is None or df.empty:
|
|
203
|
+
return []
|
|
204
|
+
id_list_str = ", ".join(["'" + _id + "'" for _id in df["id"].values.tolist()])
|
|
205
|
+
id_df = self.fetch_data(f"select id from {table_name} where id in ({id_list_str})")
|
|
206
|
+
exits_id_list = id_df["id"].values.tolist()
|
|
207
|
+
return exits_id_list
|
|
208
|
+
|
|
209
|
+
def insert_data(self, df, table_name):
|
|
210
|
+
# 将pdFrame转为table, 并存入mysql
|
|
211
|
+
session = None
|
|
212
|
+
try:
|
|
213
|
+
mutex.acquire(True)
|
|
214
|
+
if df is None or df.empty:
|
|
215
|
+
return
|
|
216
|
+
if "id" not in df:
|
|
217
|
+
df = append_id(df)
|
|
218
|
+
# df = df[~df['id'].isin(self.get_exits_id_list(df, table_name))]
|
|
219
|
+
# pandas写法
|
|
220
|
+
# pd.io.sql.to_sql(df, table_name, self.conn_engine, if_exists='append', index=False, chunksize=100000)
|
|
221
|
+
|
|
222
|
+
# 使用 INSERT IGNORE, 如果遇见重复的主键id,则跳过
|
|
223
|
+
columns = ", ".join([_col for _col in df.columns.values])
|
|
224
|
+
|
|
225
|
+
# mysql默认一次性可以写入16mb数据,如果超过16mb,有可能阻塞,所以设置为1000条,应该没有问题
|
|
226
|
+
mysql_max_allowed_packet = 300
|
|
227
|
+
total_rows = df.shape[0]
|
|
228
|
+
# 要切分的份数
|
|
229
|
+
part_rows = total_rows // mysql_max_allowed_packet
|
|
230
|
+
if total_rows % mysql_max_allowed_packet > 0:
|
|
231
|
+
part_rows = part_rows + 1
|
|
232
|
+
for i in range(part_rows):
|
|
233
|
+
df_part = df.iloc[i * mysql_max_allowed_packet : (i + 1) * mysql_max_allowed_packet]
|
|
234
|
+
|
|
235
|
+
# 拼接插入的sql语句
|
|
236
|
+
value_list = ["(" + ", ".join(["'" + str(_r) + "'" for _r in row.values]) + ")" for index, row in df_part.iterrows()]
|
|
237
|
+
value_list_str = ", ".join(value_list)
|
|
238
|
+
query = f"INSERT IGNORE INTO {table_name} ({columns}) VALUES {value_list_str} ;"
|
|
239
|
+
# 从连接池获取session
|
|
240
|
+
if session is None:
|
|
241
|
+
session = self.session_maker()
|
|
242
|
+
session.execute(text(query))
|
|
243
|
+
# 一并提交
|
|
244
|
+
if session is not None:
|
|
245
|
+
session.commit()
|
|
246
|
+
except Exception as e:
|
|
247
|
+
self.logger.error(f"【mysql-insert_data异常】- {str(e)} - {str(traceback.format_exc())}")
|
|
248
|
+
if session is not None:
|
|
249
|
+
session.rollback()
|
|
250
|
+
finally:
|
|
251
|
+
mutex.release()
|
|
252
|
+
if session is not None:
|
|
253
|
+
session.close()
|
|
254
|
+
|
|
255
|
+
def insert_data_duplicate(self, df, table_name, update_columns: list = None, mysql_max_allowed_packet = 300):
|
|
256
|
+
"""
|
|
257
|
+
插入数据,如果遇见主键冲突,则更新指定的列
|
|
258
|
+
"""
|
|
259
|
+
session = None
|
|
260
|
+
try:
|
|
261
|
+
mutex.acquire(True)
|
|
262
|
+
if df is None or df.empty:
|
|
263
|
+
return
|
|
264
|
+
if "id" not in df:
|
|
265
|
+
df = append_id(df)
|
|
266
|
+
# pandas写法
|
|
267
|
+
# pd.io.sql.to_sql(df, table_name, self.conn_engine, if_exists='append', index=False, chunksize=100000)
|
|
268
|
+
|
|
269
|
+
# 使用 INSERT ... ON DUPLICATE KEY UPDATE, 如果遇见重复的主键id,则更新指定的列
|
|
270
|
+
columns = ", ".join([_col for _col in df.columns.values])
|
|
271
|
+
update_columns_str = ", ".join([f"{col}=IFNULL(VALUES({col}), {col})" for col in update_columns])
|
|
272
|
+
|
|
273
|
+
# mysql默认一次性可以写入16mb数据,如果超过16mb,有可能阻塞,所以设置为1000条,应该没有问题
|
|
274
|
+
|
|
275
|
+
total_rows = df.shape[0]
|
|
276
|
+
# 要切分的份数
|
|
277
|
+
part_rows = total_rows // mysql_max_allowed_packet
|
|
278
|
+
if total_rows % mysql_max_allowed_packet > 0:
|
|
279
|
+
part_rows = part_rows + 1
|
|
280
|
+
for i in range(part_rows):
|
|
281
|
+
df_part = df.iloc[i * mysql_max_allowed_packet : (i + 1) * mysql_max_allowed_packet]
|
|
282
|
+
|
|
283
|
+
# 拼接插入的sql语句
|
|
284
|
+
value_list = ["(" + ", ".join(["'" + str(_r) + "'" if str(_r) != 'NaT' and str(_r) != 'nan' else "NULL" for _r in row.values]) + ")" for index, row in df_part.iterrows()]
|
|
285
|
+
value_list_str = ", ".join(value_list)
|
|
286
|
+
query = f"INSERT INTO {table_name} ({columns}) VALUES {value_list_str} ON DUPLICATE KEY UPDATE {update_columns_str} ;"
|
|
287
|
+
|
|
288
|
+
# 从连接池获取session
|
|
289
|
+
if session is None:
|
|
290
|
+
session = self.session_maker()
|
|
291
|
+
session.execute(text(query))
|
|
292
|
+
# 一并提交
|
|
293
|
+
if session is not None:
|
|
294
|
+
session.commit()
|
|
295
|
+
except Exception as e:
|
|
296
|
+
self.logger.error(f"【mysql-insert_data_duplicate异常】- {str(e)} - {str(traceback.format_exc())}")
|
|
297
|
+
if session is not None:
|
|
298
|
+
session.rollback()
|
|
299
|
+
finally:
|
|
300
|
+
mutex.release()
|
|
301
|
+
if session is not None:
|
|
302
|
+
session.close()
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def main(query="select * from kaq_binance_perpetual_klines_1h limit 5;"):
|
|
306
|
+
host, port, user, passwd, database, charset = yml_utils.get_mysql_info(os.getcwd())
|
|
307
|
+
KaqQuantMysqlRepository = KaqQuantMysqlRepository(host, port, user, passwd, database, charset)
|
|
308
|
+
|
|
309
|
+
print(KaqQuantMysqlRepository.table_exists("kaq_binance_perpetual_klines_1h"))
|
|
310
|
+
# print(KaqQuantMysqlRepository.fetch_data(query).head(3))
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
# 测试数据库连接池
|
|
314
|
+
def test_db_pool():
|
|
315
|
+
host, port, user, passwd, database, charset = yml_utils.get_mysql_info(os.getcwd())
|
|
316
|
+
KaqQuantMysqlRepository = KaqQuantMysqlRepository(host, port, user, passwd, database, charset)
|
|
317
|
+
|
|
318
|
+
# 获取一个session
|
|
319
|
+
session1 = KaqQuantMysqlRepository.session_maker()
|
|
320
|
+
session1.execute(text("SELECT 1;")).mappings().all()
|
|
321
|
+
# SHOW PROCESSLIST; 有1条连接
|
|
322
|
+
|
|
323
|
+
# 由于上面没有close,这里不会复用上面的连接
|
|
324
|
+
session2 = KaqQuantMysqlRepository.session_maker()
|
|
325
|
+
session2.execute(text("SELECT 1;")).mappings().all()
|
|
326
|
+
# SHOW PROCESSLIST; 有2条连接
|
|
327
|
+
|
|
328
|
+
# 关闭session
|
|
329
|
+
session1.close()
|
|
330
|
+
session2.close()
|
|
331
|
+
|
|
332
|
+
# 上面session关闭后,这里再创建session会复用之前创建的连接
|
|
333
|
+
session3 = KaqQuantMysqlRepository.session_maker()
|
|
334
|
+
session3.execute(text("SELECT 1;")).mappings().all()
|
|
335
|
+
# SHOW PROCESSLIST; 有2条连接
|
|
336
|
+
|
|
337
|
+
print("end")
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
if __name__ == "__main__":
|
|
341
|
+
main()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from kaq_quant_common.utils import yml_utils
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import threading
|
|
5
|
+
import psycopg2
|
|
6
|
+
from kaq_quant_common.utils.logger_utils import get_logger
|
|
7
|
+
|
|
8
|
+
mutex = threading.Lock()
|
|
9
|
+
|
|
10
|
+
class KaqQuantPostgreSqlRepository:
|
|
11
|
+
'''
|
|
12
|
+
timescaleDB的操作类
|
|
13
|
+
'''
|
|
14
|
+
def __init__(self, host, port, user, passwd, database, charset='utf8'):
|
|
15
|
+
self.logger = get_logger(self)
|
|
16
|
+
# 数据库连接参数
|
|
17
|
+
conn_params = {
|
|
18
|
+
"dbname": database,
|
|
19
|
+
"user": user,
|
|
20
|
+
"password": passwd,
|
|
21
|
+
"host": host,
|
|
22
|
+
"port": port
|
|
23
|
+
}
|
|
24
|
+
# 创建写入引擎数据库连接池
|
|
25
|
+
# 建立连接和游标
|
|
26
|
+
self.conn = psycopg2.connect(**conn_params)
|
|
27
|
+
self.cur = self.conn.cursor()
|
|
28
|
+
|
|
29
|
+
# 关闭游标和连接
|
|
30
|
+
# self.cur.close()
|
|
31
|
+
# self.conn.close()
|
|
32
|
+
|
|
33
|
+
def fetch_data(self, query):
|
|
34
|
+
'''
|
|
35
|
+
查询操作,输入查询语句
|
|
36
|
+
'''
|
|
37
|
+
try:
|
|
38
|
+
# 执行查询
|
|
39
|
+
self.cur.execute(query)
|
|
40
|
+
rows = self.cur.fetchall()
|
|
41
|
+
field_names = [desc[0] for desc in self.cur.description]
|
|
42
|
+
df = pd.DataFrame(rows, columns=field_names)
|
|
43
|
+
return df
|
|
44
|
+
except Exception as e:
|
|
45
|
+
self.logger.error(f'【posgresql-fetch_data】异常, {query} - {str(e)}')
|
|
46
|
+
return pd.DataFrame()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def main(query='select * from footprint_candle limit 5;'):
|
|
50
|
+
|
|
51
|
+
# alias
|
|
52
|
+
host, port, user, passwd, database, charset = yml_utils.get_posgresql_info(os.getcwd())
|
|
53
|
+
kaqBtcTimeScaleDbRepository = KaqQuantPostgreSqlRepository(host, port, user, passwd, database)
|
|
54
|
+
df = kaqBtcTimeScaleDbRepository.fetch_data(query)
|
|
55
|
+
print(df.head(3))
|
|
56
|
+
|
|
57
|
+
if __name__ == '__main__':
|
|
58
|
+
main()
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from pyhive import hive
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
class KaqQuantHiveClient:
|
|
5
|
+
def __init__(self, host="localhost", port=10000, username="hive", password=None, auth="NONE", database="default"):
|
|
6
|
+
"""
|
|
7
|
+
初始化 Hive 连接
|
|
8
|
+
"""
|
|
9
|
+
self.conn = hive.Connection(
|
|
10
|
+
host=host,
|
|
11
|
+
port=port,
|
|
12
|
+
username=username,
|
|
13
|
+
password=password,
|
|
14
|
+
auth=auth,
|
|
15
|
+
database=database
|
|
16
|
+
)
|
|
17
|
+
self.cursor = self.conn.cursor()
|
|
18
|
+
|
|
19
|
+
def query(self, sql):
|
|
20
|
+
"""
|
|
21
|
+
执行查询并返回结果(list of tuples)
|
|
22
|
+
"""
|
|
23
|
+
self.cursor.execute(sql)
|
|
24
|
+
return self.cursor.fetchall()
|
|
25
|
+
|
|
26
|
+
def query_df(self, sql):
|
|
27
|
+
"""
|
|
28
|
+
执行查询并返回 pandas.DataFrame
|
|
29
|
+
"""
|
|
30
|
+
return pd.read_sql(sql, self.conn)
|
|
31
|
+
|
|
32
|
+
def insert_many(self, table, columns, values_list, partition=None):
|
|
33
|
+
"""
|
|
34
|
+
批量插入数据
|
|
35
|
+
:param table: Hive 表名
|
|
36
|
+
:param columns: 插入的字段列表,如 ["id", "name", "department"]
|
|
37
|
+
:param values_list: 数据列表,如 [(1, "Alice", "HR"), (2, "Bob", "IT")]
|
|
38
|
+
:param partition: 可选分区,如 {"dt": "2025-09-21"}
|
|
39
|
+
"""
|
|
40
|
+
cols = ",".join(columns)
|
|
41
|
+
values_str = ",".join(
|
|
42
|
+
["(" + ",".join([f"'{str(v)}'" if isinstance(v, str) else str(v) for v in row]) + ")"
|
|
43
|
+
for row in values_list]
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
if partition:
|
|
47
|
+
part_clause = " PARTITION (" + ",".join([f"{k}='{v}'" for k, v in partition.items()]) + ")"
|
|
48
|
+
else:
|
|
49
|
+
part_clause = ""
|
|
50
|
+
|
|
51
|
+
sql = f"INSERT INTO {table}{part_clause} ({cols}) VALUES {values_str}"
|
|
52
|
+
print("执行SQL:", sql)
|
|
53
|
+
self.cursor.execute(sql)
|
|
54
|
+
|
|
55
|
+
def close(self):
|
|
56
|
+
"""
|
|
57
|
+
关闭连接
|
|
58
|
+
"""
|
|
59
|
+
self.cursor.close()
|
|
60
|
+
self.conn.close()
|
|
61
|
+
|
|
62
|
+
if __name__ == "__main__":
|
|
63
|
+
'''
|
|
64
|
+
CREATE TABLE employees (
|
|
65
|
+
id INT,
|
|
66
|
+
name STRING,
|
|
67
|
+
position STRING,
|
|
68
|
+
hire_date String
|
|
69
|
+
)
|
|
70
|
+
COMMENT 'Table for employee details'
|
|
71
|
+
PARTITIONED BY (department STRING);
|
|
72
|
+
|
|
73
|
+
INSERT INTO employees (id, name, position, hire_date, department) VALUES (6, 'Alice', 'Manager', '2020-05-12', 'HR');
|
|
74
|
+
|
|
75
|
+
-- 插入第二条数据
|
|
76
|
+
INSERT INTO employees (id, name, position, hire_date, department) VALUES (2, 'Bob', 'Software Engineer', '2018-11-05', 'Engineering');
|
|
77
|
+
|
|
78
|
+
-- 插入第三条数据
|
|
79
|
+
INSERT INTO employees (id, name, position, hire_date, department) VALUES (3, 'Charlie', 'Sales Representative', '2019-08-15', 'Sales');
|
|
80
|
+
|
|
81
|
+
-- 插入第四条数据
|
|
82
|
+
INSERT INTO employees (id, name, position, hire_date, department) VALUES (4, 'David', 'Data Analyst', '2021-01-22', 'Engineering');
|
|
83
|
+
|
|
84
|
+
-- 插入第五条数据
|
|
85
|
+
INSERT INTO employees (id, name, position, hire_date, department) VALUES (5, 'Eva', 'Marketing Specialist', '2020-02-17', 'Marketing');
|
|
86
|
+
'''
|
|
87
|
+
hive_client = KaqQuantHiveClient(
|
|
88
|
+
host="127.0.0.1",
|
|
89
|
+
port=10000,
|
|
90
|
+
username="hive"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# 查询数据
|
|
94
|
+
result = hive_client.query_df("SELECT * FROM employees")
|
|
95
|
+
print(result)
|
|
96
|
+
|
|
97
|
+
# 插入数据
|
|
98
|
+
hive_client.insert_many(
|
|
99
|
+
table="employees",
|
|
100
|
+
columns=["id", "name", "position", "hire_date", "department"],
|
|
101
|
+
values_list=[
|
|
102
|
+
(101, "Alice", "Manager", "2025-09-21", "HR"),
|
|
103
|
+
(102, "Bob", "Engineer", "2025-09-21", "IT")
|
|
104
|
+
],
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
hive_client.close()
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, List
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import redis
|
|
6
|
+
import threading
|
|
7
|
+
from kaq_quant_common.utils import yml_utils
|
|
8
|
+
from loguru import logger
|
|
9
|
+
import traceback
|
|
10
|
+
|
|
11
|
+
mutex = threading.Lock()
|
|
12
|
+
|
|
13
|
+
class KaqQuantRedisRepository:
|
|
14
|
+
'''
|
|
15
|
+
定义 redis操作
|
|
16
|
+
'''
|
|
17
|
+
def __init__(self, host='localhost', port=6379, password=None, db=0):
|
|
18
|
+
'''
|
|
19
|
+
redis连接池
|
|
20
|
+
'''
|
|
21
|
+
try:
|
|
22
|
+
mutex.acquire()
|
|
23
|
+
# 创建连接池
|
|
24
|
+
pool = redis.ConnectionPool(host=host, port=port, db=db, password=password, max_connections=3, decode_responses=True, health_check_interval=30)
|
|
25
|
+
|
|
26
|
+
# 共享的 Redis 客户端
|
|
27
|
+
self.client = redis.StrictRedis(connection_pool=pool)
|
|
28
|
+
except Exception as e:
|
|
29
|
+
logger.error(f"【创建redis连接】错误异常: {str(e)} - {str(traceback.format_exc())}")
|
|
30
|
+
finally:
|
|
31
|
+
mutex.release()
|
|
32
|
+
|
|
33
|
+
# 显式代理 Redis 常用方法
|
|
34
|
+
def set(self, name: str, value: Any, ex: int = None, px: int = None, nx: bool = False, xx: bool = False):
|
|
35
|
+
return self.client.set(name, value, ex, px, nx, xx)
|
|
36
|
+
|
|
37
|
+
def get(self, name: str) -> Any:
|
|
38
|
+
return self.client.get(name)
|
|
39
|
+
|
|
40
|
+
def delete(self, *names: str) -> int:
|
|
41
|
+
return self.client.delete(*names)
|
|
42
|
+
|
|
43
|
+
def keys(self, pattern: str) -> list:
|
|
44
|
+
return self.client.keys(pattern)
|
|
45
|
+
|
|
46
|
+
def exists(self, name: str) -> bool:
|
|
47
|
+
return self.client.exists(name)
|
|
48
|
+
|
|
49
|
+
def lrange(self, name: str) -> pd.DataFrame:
|
|
50
|
+
result = self.client.lrange(name, 0, -1)
|
|
51
|
+
return pd.DataFrame([json.loads(item) for item in result]) if result else pd.DataFrame()
|
|
52
|
+
|
|
53
|
+
def rpush(self, name: str, df: pd.DataFrame) -> pd.DataFrame:
|
|
54
|
+
# 每次都先删除,后添加
|
|
55
|
+
self.client.delete(name)
|
|
56
|
+
# 自定义 JSON 序列化函数
|
|
57
|
+
def timestamp_serializer(obj):
|
|
58
|
+
if isinstance(obj, pd.Timestamp):
|
|
59
|
+
return obj.isoformat() # 或者返回 int(obj.timestamp()) 转换为时间戳
|
|
60
|
+
raise TypeError("Type not serializable")
|
|
61
|
+
if df is not None and not df.empty:
|
|
62
|
+
json_data = [json.dumps(row.to_dict(), default=timestamp_serializer) for _, row in df.iterrows()]
|
|
63
|
+
self.client.rpush(name, *json_data)
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
def get_keys_by_pattern(self, pattern: str) -> List[str]:
|
|
67
|
+
'''
|
|
68
|
+
根据模糊前缀获取 Redis 中匹配的键
|
|
69
|
+
:param pattern: 模糊匹配的模式,支持 * 通配符
|
|
70
|
+
:return: 匹配的键列表
|
|
71
|
+
'''
|
|
72
|
+
try:
|
|
73
|
+
# 获取匹配的键
|
|
74
|
+
keys = self.client.keys(pattern)
|
|
75
|
+
return keys
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.error(f"获取匹配键时出错: {str(e)} - {str(traceback.format_exc())}")
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
def get_values_by_pattern(self, pattern: str) -> pd.DataFrame:
|
|
81
|
+
'''
|
|
82
|
+
批量获取 Redis 中符合模糊前缀匹配的键对应的 JSON 数据,并构建 pandas DataFrame
|
|
83
|
+
:param pattern: 模糊匹配的模式,支持 * 通配符
|
|
84
|
+
:return: 返回构建的 pandas DataFrame
|
|
85
|
+
'''
|
|
86
|
+
try:
|
|
87
|
+
# 获取所有匹配的键
|
|
88
|
+
keys = self.get_keys_by_pattern(pattern)
|
|
89
|
+
|
|
90
|
+
# 批量获取对应的 JSON 数据
|
|
91
|
+
json_data_list = self.client.mget(keys)
|
|
92
|
+
|
|
93
|
+
# 将 JSON 数据转换为字典列表
|
|
94
|
+
data_list = [json.loads(data.replace("'", '"')) if data else None for data in json_data_list]
|
|
95
|
+
|
|
96
|
+
# 去除为 None 的数据
|
|
97
|
+
data_list = [data for data in data_list if data is not None]
|
|
98
|
+
|
|
99
|
+
# 构建 pandas DataFrame
|
|
100
|
+
df = pd.DataFrame(data_list)
|
|
101
|
+
return df
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logger.error(f"获取并转换 JSON 数据时出错: {str(e)} - {str(traceback.format_exc())}")
|
|
104
|
+
return pd.DataFrame() # 返回空 DataFrame
|
|
105
|
+
|
|
106
|
+
def __getattr__(self, name):
|
|
107
|
+
'''
|
|
108
|
+
使得对象在没有明确调用时,能够直接调用 Redis 的方法
|
|
109
|
+
'''
|
|
110
|
+
return getattr(self.client, name)
|
|
111
|
+
|
|
112
|
+
if __name__ == '__main__':
|
|
113
|
+
host, port, passwd = yml_utils.get_redis_info(os.getcwd())
|
|
114
|
+
KaqQuantRedisRepository = KaqQuantRedisRepository(host=host, port=port, password=passwd)
|
|
115
|
+
# value = KaqQuantRedisRepository.get('test')
|
|
116
|
+
df = KaqQuantRedisRepository.get_values_by_pattern('kaq_binance_commision_rate_*')
|
|
117
|
+
print(df)
|
|
File without changes
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
|
|
2
|
+
from dagster import DagsterRunStatus, RunsFilter
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def dagster_job_check(context, job_name, jobs_number=1):
|
|
6
|
+
# 检查是否有正在运行的相同job
|
|
7
|
+
instance = context.instance
|
|
8
|
+
runs = instance.get_runs(filters=RunsFilter(job_name=job_name, statuses=[DagsterRunStatus.STARTED, DagsterRunStatus.QUEUED]))
|
|
9
|
+
# 过滤出当前job且状态是STARTED的运行
|
|
10
|
+
active_runs = [run for run in runs if run.job_name == job_name and run.status == DagsterRunStatus.STARTED]
|
|
11
|
+
if len(active_runs) > jobs_number:
|
|
12
|
+
# 有正在运行的实例,跳过此次执行
|
|
13
|
+
context.log.info(f'【正在运行:{job_name}.当前任务列表】{str(active_runs)}')
|
|
14
|
+
return False
|
|
15
|
+
context.log.info(f'【当前任务名称】{str(job_name)}')
|
|
16
|
+
return True
|
|
17
|
+
|
|
18
|
+
def dagster_sensor_check(context, job_name, jobs_number=1):
|
|
19
|
+
# 检查是否有正在运行的相同job
|
|
20
|
+
instance = context.instance
|
|
21
|
+
runs = instance.get_runs(filters=RunsFilter(job_name=job_name, statuses=[DagsterRunStatus.STARTED, DagsterRunStatus.STARTING, DagsterRunStatus.QUEUED]))
|
|
22
|
+
# 过滤出当前job且状态是STARTED的运行
|
|
23
|
+
active_runs = [run for run in runs if run.job_name == job_name]
|
|
24
|
+
if len(active_runs) > jobs_number:
|
|
25
|
+
# 有正在运行的实例,跳过此次执行
|
|
26
|
+
context.log.info(f'【正在运行:{job_name}.当前任务列表】{str(active_runs)}')
|
|
27
|
+
return False
|
|
28
|
+
context.log.info(f'【当前任务名称】{str(job_name)}')
|
|
29
|
+
return True
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from dagster import DagsterRunStatus, RunsFilter, RunStatusSensorContext
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# 检测dagster任务是否运行
|
|
5
|
+
def check_dagster_job_running(context: RunStatusSensorContext, job_name: str):
|
|
6
|
+
# 查询最近的运行状态
|
|
7
|
+
runs = context.instance.get_runs(filters=RunsFilter(job_name=job_name), limit=1)
|
|
8
|
+
|
|
9
|
+
# STARTING , STARTED 都是运行中
|
|
10
|
+
if runs and runs[0].status in [
|
|
11
|
+
DagsterRunStatus.QUEUED,
|
|
12
|
+
DagsterRunStatus.NOT_STARTED,
|
|
13
|
+
DagsterRunStatus.MANAGED,
|
|
14
|
+
DagsterRunStatus.STARTING,
|
|
15
|
+
DagsterRunStatus.STARTED,
|
|
16
|
+
]:
|
|
17
|
+
# context.log.info(f"Job {job_name} is already running")
|
|
18
|
+
return False
|
|
19
|
+
return True
|