kaq-quant-common 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,9 +4,10 @@ import threading
4
4
  import time
5
5
 
6
6
  import pandas as pd
7
-
8
7
  from kaq_quant_common.resources.kaq_ddb_stream_write_resources import (
9
- KaqQuantDdbStreamMTWWriteRepository, KaqQuantDdbStreamWriteRepository)
8
+ KaqQuantDdbStreamMTWWriteRepository,
9
+ KaqQuantDdbStreamWriteRepository,
10
+ )
10
11
  from kaq_quant_common.utils import logger_utils
11
12
 
12
13
 
@@ -25,10 +26,10 @@ class FundingRateHelper:
25
26
  self._stop_event = threading.Event()
26
27
  self._flusher_thread = threading.Thread(target=self._flush_loop, daemon=True)
27
28
  self._flusher_thread.name = "FundingRateHelperFlusherThread"
28
- self._is_df = False
29
29
 
30
30
  #
31
31
  self._ddb = ddb
32
+ self._isMtwDdb = isinstance(self._ddb, KaqQuantDdbStreamMTWWriteRepository)
32
33
  self._ddb_table_name = ddb_table_name
33
34
 
34
35
  #
@@ -65,39 +66,54 @@ class FundingRateHelper:
65
66
  now = int(datetime.datetime.now().timestamp() * 1000)
66
67
 
67
68
  for symbol, (data, arg) in to_process:
68
- sub_df = self._build_data(symbol, data, arg)
69
- if isinstance(sub_df, pd.DataFrame):
70
- self._is_df = True
71
- # 输出一下
72
- data_first_now = int(sub_df["create_time"].iloc[0])
73
- if now - data_first_now > 2000:
74
- self._logger.debug(
75
- f"数据时间{data_first_now} 与当前时间{now} 差值{now - data_first_now} 超过2000ms"
76
- )
77
-
78
- if df is None:
79
- df = sub_df
69
+ sub_data = self._build_data(symbol, data, arg)
70
+
71
+ if not self._isMtwDdb:
72
+ if is_df:
73
+ # df就用df的方式写入
74
+ # data_first_now = int(sub_data["create_time"].iloc[0])
75
+ # if now - data_first_now > 2000:
76
+ # self._logger.warning(f"数据时间{data_first_now} 与当前时间{now} 差值{now - data_first_now} 超过2000ms")
77
+ # pass
78
+
79
+ if df is None:
80
+ df = sub_data
81
+ else:
82
+ df = pd.concat([df, sub_data], ignore_index=True)
80
83
  else:
81
- df = pd.concat([df, sub_df], ignore_index=True)
84
+ # 数组就用数组的方式写入
85
+ # data_first_now = int(sub_data[0])
86
+ # if now - data_first_now > 2000:
87
+ # self._logger.warning(f"数据时间{data_first_now} 与当前时间{now} 差值{now - data_first_now} 超过2000ms")
88
+ # pass
89
+
90
+ list_data.append(sub_data)
82
91
  else:
83
- list_data = sub_df
92
+ # 只能是数组
93
+ if len(sub_data) > 0:
94
+ # 直接调用 save2stream_list 写入
95
+ try:
96
+ self._ddb.save2stream_list(sub_data)
97
+ except Exception as e:
98
+ # 避免刷库异常导致线程退出
99
+ self._logger.error(f"批量写入数组失败: {e}")
84
100
 
85
101
  # 入库
86
- if self._is_df:
102
+ if not self._isMtwDdb:
103
+ # 兼容df和数组
87
104
  if df is not None and not df.empty:
88
105
  try:
89
106
  self._ddb.save2stream_batch(self._ddb_table_name, df=df)
90
107
  except Exception as e:
91
108
  # 避免刷库异常导致线程退出
92
109
  self._logger.error(f"批量写入df失败: {e}")
93
- else:
94
110
  if len(list_data) > 0:
95
111
  try:
96
- self._ddb.save2stream_list(list_data)
112
+ self._ddb.save2stream_batch_list(self._ddb_table_name, data=list_data)
97
113
  except Exception as e:
98
114
  # 避免刷库异常导致线程退出
99
- self._logger.error(f"批量写入数组失败: {e}")
115
+ self._logger.error(f"批量写入list失败: {e}")
100
116
 
101
- # 是dataFrame的才睡眠,数组处理交由ddb自己控制节奏
102
- if self._is_df:
117
+ # mtw交由ddb自己控制节奏
118
+ if self._isMtwDdb:
103
119
  time.sleep(self._flush_interval_ms / 1000.0)
@@ -4,19 +4,17 @@ import threading
4
4
  import time
5
5
 
6
6
  import pandas as pd
7
-
8
7
  from kaq_quant_common.resources.kaq_ddb_stream_write_resources import (
9
- KaqQuantDdbStreamMTWWriteRepository, KaqQuantDdbStreamWriteRepository)
8
+ KaqQuantDdbStreamMTWWriteRepository,
9
+ KaqQuantDdbStreamWriteRepository,
10
+ )
10
11
  from kaq_quant_common.utils import logger_utils
11
12
 
12
13
 
13
14
  class LimitOrderHelper:
14
15
 
15
16
  def __init__(
16
- self,
17
- ddb: KaqQuantDdbStreamWriteRepository | KaqQuantDdbStreamMTWWriteRepository,
18
- ddb_table_name: str,
19
- _flush_interval_ms:int = 100
17
+ self, ddb: KaqQuantDdbStreamWriteRepository | KaqQuantDdbStreamMTWWriteRepository, ddb_table_name: str, _flush_interval_ms: int = 100
20
18
  ):
21
19
  # 最新快照缓存与刷库线程控制
22
20
  self._latest_snapshots: dict[str, tuple] = {}
@@ -26,10 +24,10 @@ class LimitOrderHelper:
26
24
  self._stop_event = threading.Event()
27
25
  self._flusher_thread = threading.Thread(target=self._flush_loop, daemon=True)
28
26
  self._flusher_thread.name = "LimitOrderHelperFlusherThread"
29
- self._is_df = False
30
27
 
31
28
  #
32
29
  self._ddb = ddb
30
+ self._isMtwDdb = isinstance(self._ddb, KaqQuantDdbStreamMTWWriteRepository)
33
31
  self._ddb_table_name = ddb_table_name
34
32
 
35
33
  #
@@ -52,6 +50,10 @@ class LimitOrderHelper:
52
50
  self._flusher_thread.join()
53
51
 
54
52
  def _flush_loop(self):
53
+ cum_count = 0
54
+ cum_convert_time = 0
55
+ cum_write_ddb_time = 0
56
+ cum_total_use_time = 0
55
57
  # 周期性地将每个symbol的最新快照批量入库
56
58
  while not self._stop_event.is_set():
57
59
  to_process = None
@@ -66,39 +68,76 @@ class LimitOrderHelper:
66
68
  now = int(datetime.datetime.now().timestamp() * 1000)
67
69
 
68
70
  for symbol, (data, arg) in to_process:
69
- sub_df = self._build_data(symbol, data, arg)
70
- if isinstance(sub_df, pd.DataFrame):
71
- self._is_df = True
72
- # 输出一下
73
- data_first_now = int(sub_df["create_time"].iloc[0])
74
- if now - data_first_now > 2000:
75
- self._logger.debug(
76
- f"数据时间{data_first_now} 与当前时间{now} 差值{now - data_first_now} 超过2000ms"
77
- )
78
-
79
- if df is None:
80
- df = sub_df
71
+ sub_data = self._build_data(symbol, data, arg)
72
+
73
+ if not self._isMtwDdb:
74
+ # 可以是数组,可以是dataFrame
75
+ is_df = type(sub_data) is pd.DataFrame
76
+
77
+ if is_df:
78
+ # df就用df的方式写入
79
+ data_first_now = int(sub_data["create_time"].iloc[0])
80
+ if now - data_first_now > 2000:
81
+ self._logger.debug(f"数据时间{data_first_now} 与当前时间{now} 差值{now - data_first_now} 超过2000ms")
82
+ pass
83
+
84
+ if df is None:
85
+ df = sub_data
86
+ else:
87
+ df = pd.concat([df, sub_data], ignore_index=True)
81
88
  else:
82
- df = pd.concat([df, sub_df], ignore_index=True)
89
+ # 数组就用数组的方式写入
90
+ data_first_now = int(sub_data[0])
91
+ if now - data_first_now > 2000:
92
+ self._logger.debug(f"数据时间{data_first_now} 与当前时间{now} 差值{now - data_first_now} 超过2000ms")
93
+ pass
94
+
95
+ list_data.append(sub_data)
83
96
  else:
84
- list_data = sub_df
97
+ # 只能是数组
98
+ if len(sub_data) > 0:
99
+ # 直接调用 save2stream_list 写入
100
+ try:
101
+ self._ddb.save2stream_list(sub_data)
102
+ except Exception as e:
103
+ # 避免刷库异常导致线程退出
104
+ self._logger.error(f"批量写入数组失败: {e}")
105
+
106
+ convert_time = int(datetime.datetime.now().timestamp() * 1000)
85
107
 
86
108
  # 入库
87
- if self._is_df:
109
+ if not self._isMtwDdb:
110
+ # 兼容df和数组
88
111
  if df is not None and not df.empty:
89
112
  try:
90
113
  self._ddb.save2stream_batch(self._ddb_table_name, df=df)
91
114
  except Exception as e:
92
115
  # 避免刷库异常导致线程退出
93
116
  self._logger.error(f"批量写入df失败: {e}")
94
- else:
95
117
  if len(list_data) > 0:
96
118
  try:
97
- self._ddb.save2stream_list(list_data)
119
+ self._ddb.save2stream_batch_list(self._ddb_table_name, data=list_data)
98
120
  except Exception as e:
99
121
  # 避免刷库异常导致线程退出
100
- self._logger.error(f"批量写入数组失败: {e}")
101
-
102
- # 是dataFrame的才睡眠,数组处理交由ddb自己控制节奏
103
- if self._is_df:
122
+ self._logger.error(f"批量写入list失败: {e}")
123
+
124
+ # 统计一下
125
+ end = int(datetime.datetime.now().timestamp() * 1000)
126
+ total_use_time = end - now
127
+ convert_use = convert_time - now
128
+ write_ddb_use = total_use_time - convert_use
129
+
130
+ #
131
+ cum_count += len(to_process)
132
+ cum_convert_time += convert_use
133
+ cum_write_ddb_time += write_ddb_use
134
+ cum_total_use_time += total_use_time
135
+
136
+ if total_use_time > 500 and cum_count > 0:
137
+ self._logger.debug(
138
+ f"批量写入{len(to_process)}条数据耗时{total_use_time}ms(avg {cum_total_use_time / cum_count:.2f}ms) 转换耗时{convert_use}ms(avg {cum_convert_time / cum_count:.2f}ms) 写入ddb耗时{write_ddb_use}ms(avg {cum_write_ddb_time / cum_count:.2f}ms)"
139
+ )
140
+
141
+ # mtw交由ddb自己控制节奏
142
+ if self._isMtwDdb:
104
143
  time.sleep(self._flush_interval_ms / 1000.0)
@@ -1,28 +1,31 @@
1
1
  import os
2
2
  import queue
3
+ import threading
3
4
  import time
5
+ import traceback
4
6
  from typing import Optional, Union
7
+
5
8
  import dolphindb as ddb
6
- from kaq_quant_common.utils import yml_utils
7
9
  import numpy as np
8
10
  import pandas as pd
9
- import threading
11
+ from kaq_quant_common.utils import yml_utils
10
12
  from kaq_quant_common.utils.logger_utils import get_logger
11
- import traceback
12
13
  from typing_extensions import deprecated
13
14
 
14
15
  mutex = threading.Lock()
15
16
 
17
+
16
18
  # 方式一: 异步调用
17
19
  class KaqQuantDdbStreamWriteRepository:
18
- '''
20
+ """
19
21
  异步不等待写入
20
- '''
22
+ """
23
+
21
24
  def __init__(self, host, port, user, passwd):
22
25
  self.logger = get_logger(self)
23
- '''
26
+ """
24
27
  创建ddb连接 && 添加ddb流数据表支持
25
- '''
28
+ """
26
29
  try:
27
30
  mutex.acquire()
28
31
  self.session = ddb.session(enableASYNC=True)
@@ -34,14 +37,14 @@ class KaqQuantDdbStreamWriteRepository:
34
37
  # 需要注意的是 fetchSize 取值不能小于 8192 (记录条数)
35
38
  self.size = 8192
36
39
  except Exception as e:
37
- self.logger.error(f'KaqQuantDdbStreamWriteRepository.__init__ is occured error: {str(e)} - {str(traceback.format_exc())}')
40
+ self.logger.error(f"KaqQuantDdbStreamWriteRepository.__init__ is occured error: {str(e)} - {str(traceback.format_exc())}")
38
41
  finally:
39
42
  mutex.release()
40
-
43
+
41
44
  def save_rows(self, ddb_table_name: str, rows: Optional[Union[np.ndarray, list]] = None):
42
- '''
45
+ """
43
46
  调用此方法之前, 需要将dataframe中的字符串类型的值 ,添加引号
44
- '''
47
+ """
45
48
  if rows is None:
46
49
  return
47
50
  # 获取维度
@@ -51,77 +54,139 @@ class KaqQuantDdbStreamWriteRepository:
51
54
  formatted_values = []
52
55
  for row in rows:
53
56
  # 这里的 row 是 np.array([1767708564161, "BTC", 92500.1])
54
- row = [f'\'{v}\'' if isinstance(v, str) else str(v) for v in row]
55
- formatted_values.append(f'({', '.join(row)})')
57
+ row = [f"'{v}'" if isinstance(v, str) else str(v) for v in row]
58
+ formatted_values.append(f"({', '.join(row)})")
56
59
  script = f"insert into {ddb_table_name} values {', '.join(str(x) for x in formatted_values)}"
57
-
60
+
58
61
  self.session.run(script, clearMemory=True)
59
62
  except Exception as e:
60
- self.logger.error(f'KaqQuantDdbStreamWriteRepository.save_rows is occured error: tableName is {ddb_table_name} - {str(e)} - {str(traceback.format_exc())}')
63
+ self.logger.error(
64
+ f"KaqQuantDdbStreamWriteRepository.save_rows is occured error: tableName is {ddb_table_name} - {str(e)} - {str(traceback.format_exc())}"
65
+ )
61
66
  else:
62
67
  # 是一行数据,调用 insert
63
- formatted_values = [f'\'{v}\'' if isinstance(v, str) else str(v) for v in rows]
68
+ formatted_values = [f"'{v}'" if isinstance(v, str) else str(v) for v in rows]
64
69
  script = f"insert into {ddb_table_name} values({', '.join(str(x) for x in formatted_values)})"
65
70
  try:
66
71
  self.session.run(script, clearMemory=True)
67
72
  except Exception as e:
68
- self.logger.error(f'KaqQuantDdbStreamWriteRepository.save_rows is occured error: tableName is {ddb_table_name} - {str(e)} - {str(traceback.format_exc())}')
69
-
70
- def save2stream(self, ddb_table_name: str, df : pd.DataFrame):
71
- '''
73
+ self.logger.error(
74
+ f"KaqQuantDdbStreamWriteRepository.save_rows is occured error: tableName is {ddb_table_name} - {str(e)} - {str(traceback.format_exc())}"
75
+ )
76
+
77
+ def save2stream(self, ddb_table_name: str, df: pd.DataFrame):
78
+ """
72
79
  调用此方法之前, 需要将dataframe中的字符串类型的值 ,添加引号
73
- '''
80
+ """
74
81
  # 遍历每列的数据类型
75
82
  for column, dtype in df.dtypes.items():
76
- if dtype == 'object' or dtype == 'str':
77
- df[column] = '\'' + df[column] + '\''
83
+ if dtype == "object" or dtype == "str":
84
+ df[column] = "'" + df[column] + "'"
78
85
  for index, row in df.iterrows():
79
86
  script = f"insert into {ddb_table_name} values({', '.join(str(x) for x in row.values)})"
80
87
  try:
81
88
  self.session.run(script, clearMemory=True)
82
89
  except Exception as e:
83
- self.logger.error(f'KaqQuantDdbStreamWriteRepository.save2stream is occured error: tableName is {ddb_table_name} - {str(e)} - {str(traceback.format_exc())}')
84
-
85
- def build_insert_values_fast(self, df):
86
- if df.empty:
90
+ self.logger.error(
91
+ f"KaqQuantDdbStreamWriteRepository.save2stream is occured error: tableName is {ddb_table_name} - {str(e)} - {str(traceback.format_exc())}"
92
+ )
93
+
94
+ def build_insert_values_fast(self, data: pd.DataFrame | list):
95
+ if data.empty:
87
96
  return []
88
- dtypes = df.dtypes.tolist()
97
+ dtypes = data.dtypes.tolist()
89
98
  # 提前确定哪些列需要加引号
90
- str_idx = {i for i, dt in enumerate(dtypes) if dt == object or dt == 'object' or dt == 'str'}
99
+ str_idx = {i for i, dt in enumerate(dtypes) if dt == object or dt == "object" or dt == "str"}
91
100
  # 转成 ndarray,减少 pandas 参与
92
- arr = df.to_numpy()
93
- rows = []
94
- for row in arr:
95
- parts = []
96
- for i, v in enumerate(row):
97
- if v is None or pd.isna(v):
98
- parts.append("NULL")
99
- elif i in str_idx:
100
- parts.append(f"'{v}'") # 直接拼接最快
101
- else:
102
- parts.append(str(v))
103
- rows.append("(" + ", ".join(parts) + ")")
104
- return rows
105
-
106
- def save2stream_batch(self, ddb_table_name: str, df : pd.DataFrame):
107
- '''
101
+ arr = data.to_numpy()
102
+
103
+ # 使用内部函数避免lambda闭包问题,提升性能
104
+ def format_value(i, v):
105
+ if v is None or (isinstance(v, float) and np.isnan(v)):
106
+ return "NULL"
107
+ elif i in str_idx:
108
+ return f"'{v}'"
109
+ else:
110
+ return str(v)
111
+
112
+ # 使用列表推导式一次性生成所有行,避免多次append
113
+ return ["(" + ", ".join(format_value(i, v) for i, v in enumerate(row)) + ")" for row in arr]
114
+
115
+ def build_insert_values_fast_list(self, data: list):
116
+ if not data:
117
+ return []
118
+ #
119
+ first_row = data[0]
120
+ str_idx = {i for i, v in enumerate(first_row) if type(v) is str}
121
+
122
+ # 优化:使用 type() 代替 isinstance() 减少函数调用开销
123
+ def format_value(i, v):
124
+ if v is None:
125
+ return "NULL"
126
+ # 如果在字符串索引中,直接格式化(避免 isinstance)
127
+ if i in str_idx:
128
+ return f"'{v}'"
129
+
130
+ # 使用 type() 比 isinstance() 更快
131
+ v_type = type(v)
132
+ if v_type is float:
133
+ if np.isnan(v):
134
+ return "NULL"
135
+ return str(v)
136
+ if v_type is str:
137
+ return f"'{v}'" # 第一行可能是 None 导致误判
138
+ return str(v)
139
+
140
+ # 使用列表推导式一次性生成所有行
141
+ return ["(" + ", ".join(format_value(i, v) for i, v in enumerate(row)) + ")" for row in data]
142
+
143
+ def save2stream_batch(self, ddb_table_name: str, df: pd.DataFrame):
144
+ """
108
145
  调用此方法之前, 需要将dataframe中的字符串类型的值 ,添加引号
109
- '''
146
+ """
110
147
  try:
111
- start1 = time.monotonic_ns()
148
+ # start1 = time.monotonic_ns()
112
149
  row = self.build_insert_values_fast(df)
113
- values = ', '.join(row)
150
+ values = ", ".join(row)
114
151
  script = f"insert into {ddb_table_name} values {values}"
115
- start2 = time.monotonic_ns()
152
+ # start2 = time.monotonic_ns()
116
153
  self.session.run(script, clearMemory=True)
117
- end = time.monotonic_ns()
118
- if "KAQ_QUANT_LOG" in os.environ:
119
- diff = end - start2
120
- if diff > 1_000_000_0: # 超过1毫秒
121
- self.logger.warning(f'KaqQuantDdbStreamWriteRepository.save2stream cost time is only write : {end - start2} ns, save2stream_batch :{end - start1} ns, batch size is {len(df)}, tableName is {ddb_table_name}')
154
+ # end = time.monotonic_ns()
155
+ # if "KAQ_QUANT_LOG" in os.environ:
156
+ # diff = end - start2
157
+ # if diff > 1_000_000_0: # 超过1毫秒
158
+ # self.logger.warning(
159
+ # f"KaqQuantDdbStreamWriteRepository.save2stream_batch cost time is only write : {end - start2} ns, save2stream_batch :{end - start1} ns, batch size is {len(df)}, tableName is {ddb_table_name}"
160
+ # )
122
161
  except Exception as e:
123
- self.logger.error(f'KaqQuantDdbStreamWriteRepository.save2stream_batch is occured error: tableName is {ddb_table_name} - {str(e)} - {str(traceback.format_exc())}')
124
-
162
+ self.logger.error(
163
+ f"KaqQuantDdbStreamWriteRepository.save2stream_batch is occured error: tableName is {ddb_table_name} - {str(e)} - {str(traceback.format_exc())}"
164
+ )
165
+
166
+ def save2stream_batch_list(self, ddb_table_name: str, data: list):
167
+ """
168
+ 调用此方法之前, 需要将数组中的字符串类型的值 ,添加引号
169
+ """
170
+ try:
171
+ # start1 = time.monotonic_ns()
172
+ row = self.build_insert_values_fast_list(data)
173
+ values = ", ".join(row)
174
+ script = f"insert into {ddb_table_name} values {values}"
175
+ # start2 = time.monotonic_ns()
176
+ self.session.run(script, clearMemory=True)
177
+ # end = time.monotonic_ns()
178
+ # if "KAQ_QUANT_LOG" in os.environ:
179
+ # diff = end - start2
180
+ # if diff > 1_000_000_0: # 超过1毫秒
181
+ # self.logger.warning(
182
+ # f"KaqQuantDdbStreamWriteRepository.save2stream_batch_list cost time is only write : {end - start2} ns, save2stream_batch_list :{end - start1} ns, batch size is {len(data)}, tableName is {ddb_table_name}"
183
+ # )
184
+ except Exception as e:
185
+ self.logger.error(
186
+ f"KaqQuantDdbStreamWriteRepository.save2stream_batch_list is occured error: tableName is {ddb_table_name} - {str(e)} - {str(traceback.format_exc())}"
187
+ )
188
+
189
+
125
190
  # 方式二: 同步调用,但有python端的队列等待
126
191
  class DDBAsyncDFWriter:
127
192
  def __init__(self, appender, batch_size=1000, flush_interval_ms=80):
@@ -129,10 +194,10 @@ class DDBAsyncDFWriter:
129
194
  self.appender = appender
130
195
  self.batch_size = batch_size
131
196
  self.flush_interval = flush_interval_ms / 1000.0
132
-
197
+
133
198
  self.queue = queue.Queue(maxsize=10000)
134
199
  self.running = True
135
-
200
+
136
201
  self.thread = threading.Thread(target=self._worker, daemon=True)
137
202
  self.thread.start()
138
203
 
@@ -165,9 +230,7 @@ class DDBAsyncDFWriter:
165
230
 
166
231
  now = time.time()
167
232
  # 触发条件:行数够了,或时间到了,或者程序正在停止
168
- if buffer and (current_rows >= self.batch_size or
169
- (now - last_flush_time) >= self.flush_interval or
170
- not self.running):
233
+ if buffer and (current_rows >= self.batch_size or (now - last_flush_time) >= self.flush_interval or not self.running):
171
234
  self._do_flush(buffer)
172
235
  buffer = []
173
236
  current_rows = 0
@@ -190,67 +253,80 @@ class DDBAsyncDFWriter:
190
253
  """
191
254
  self.logger.warning("正在停止 DDBAsyncDFWriter 并清空残留数据...")
192
255
  self.running = False
193
- self.thread.join() # 等待工作线程处理完最后一批 buffer
256
+ self.thread.join() # 等待工作线程处理完最后一批 buffer
194
257
  self.logger.info("DDBAsyncDFWriter 已安全停止。")
195
-
258
+
259
+
196
260
  class KaqQuantDdbStreamWriteSyncRepository:
197
- '''
261
+ """
198
262
  使用appender直接写入的方式
199
- '''
263
+ """
264
+
200
265
  def __init__(self, host, port, user, passwd, tableName=None, batch_size=1000, flush_interval_ms=80):
201
266
  if tableName is None:
202
- raise ValueError(f'Error tableName, please set. tableName={tableName}')
267
+ raise ValueError(f"Error tableName, please set. tableName={tableName}")
203
268
  self.tableName = tableName
204
269
  self.logger = get_logger(self)
205
- '''
270
+ """
206
271
  创建ddb连接 && 添加ddb流数据表支持
207
- '''
272
+ """
208
273
  try:
209
274
  mutex.acquire()
210
275
  self.session = ddb.session()
211
276
  self.session.connect(host, port, user, passwd, tryReconnectNums=100, reconnect=True, keepAliveTime=1000, readTimeout=10, writeTimeout=5)
212
-
213
- self.batch_writer = DDBAsyncDFWriter(ddb.TableAppender(table_name=self.tableName, conn=self.session), batch_size=batch_size, flush_interval_ms=flush_interval_ms)
277
+
278
+ self.batch_writer = DDBAsyncDFWriter(
279
+ ddb.TableAppender(table_name=self.tableName, conn=self.session), batch_size=batch_size, flush_interval_ms=flush_interval_ms
280
+ )
214
281
  # 需要注意的是 fetchSize 取值不能小于 8192 (记录条数)
215
282
  self.size = 8192
216
283
  except Exception as e:
217
- self.logger.error(f'KaqQuantDdbTableStreamWriteRepository.__init__ is occured error: {str(e)} - {str(traceback.format_exc())}')
284
+ self.logger.error(f"KaqQuantDdbTableStreamWriteRepository.__init__ is occured error: {str(e)} - {str(traceback.format_exc())}")
218
285
  finally:
219
286
  mutex.release()
220
-
287
+
221
288
  # @deprecated("请确保pandas数据与ddb表的数据类型一致.")
222
- def insert(self, df : pd.DataFrame):
223
- '''
289
+ def insert(self, df: pd.DataFrame):
290
+ """
224
291
  dataframe中日期等类型与ddb流表中一致,例如:
225
292
  df['create_time'] = pd.to_datetime(df['create_time'], unit='ms')
226
293
  df['event_time'] = pd.to_datetime(df['event_time'], unit='ms')
227
- '''
294
+ """
228
295
  try:
229
296
  self.batch_writer.add_df(df)
230
297
  except Exception as e:
231
- self.logger.error(f'KaqQuantDdbTableStreamWriteRepository.insert is occured error: {str(e)} - {str(traceback.format_exc())}')
298
+ self.logger.error(f"KaqQuantDdbTableStreamWriteRepository.insert is occured error: {str(e)} - {str(traceback.format_exc())}")
299
+
232
300
 
233
301
  # 方式三: 异步调用, 但属于ddb的client内部的c++多线程解析与写入,适合一条条写入
234
- class KaqQuantDdbStreamMTWWriteRepository:
235
- def __init__(self, host, port, user, passwd, tableName=None, batch_size=1000, throttle=50, partitionCol='', threadCount=1):
302
+ class KaqQuantDdbStreamMTWWriteRepository:
303
+ def __init__(self, host, port, user, passwd, tableName=None, batch_size=1000, throttle=50, partitionCol="", threadCount=1):
236
304
  self.logger = get_logger(self)
237
- '''
305
+ """
238
306
  创建ddb连接 && 添加ddb流数据表支持
239
- '''
307
+ """
240
308
  try:
241
309
  mutex.acquire()
242
310
  self.session = ddb.session(enableASYNC=True)
243
311
  self.session.connect(host, port, user, passwd, tryReconnectNums=100, reconnect=True, keepAliveTime=1000, readTimeout=10, writeTimeout=5)
244
312
  self.batch_writer = ddb.MultithreadedTableWriter(
245
- host, port, user, passwd, tableName=tableName, dbPath='',
246
- batchSize=batch_size, throttle=throttle, threadCount=threadCount, partitionCol=partitionCol
313
+ host,
314
+ port,
315
+ user,
316
+ passwd,
317
+ tableName=tableName,
318
+ dbPath="",
319
+ batchSize=batch_size,
320
+ throttle=throttle,
321
+ threadCount=threadCount,
322
+ partitionCol=partitionCol,
247
323
  )
248
324
  except Exception as e:
249
- self.logger.error(f'KaqQuantDdbStreamMTWWriteRepository.__init__ is occured error: {str(e)} - {str(traceback.format_exc())}')
325
+ self.logger.error(f"KaqQuantDdbStreamMTWWriteRepository.__init__ is occured error: {str(e)} - {str(traceback.format_exc())}")
250
326
  finally:
251
327
  mutex.release()
252
-
253
- def save2stream_batch(self, df:pd.DataFrame=pd.DataFrame(), cols:list=[]):
328
+
329
+ def save2stream_batch(self, df: pd.DataFrame = pd.DataFrame(), cols: list = []):
254
330
  try:
255
331
  if cols is None or len(cols) <= 0:
256
332
  for _, row in df.iterrows():
@@ -261,23 +337,23 @@ class KaqQuantDdbStreamMTWWriteRepository:
261
337
  _args = [row[i] for i in cols]
262
338
  self.batch_writer.insert(*_args)
263
339
  except Exception as e:
264
- self.logger.error(f'KaqQuantDdbStreamMTWWriteRepository.insert is occured error: {str(e)} - {str(traceback.format_exc())}')
265
-
266
- def save2stream_list(self, row:list=[]):
340
+ self.logger.error(f"KaqQuantDdbStreamMTWWriteRepository.insert is occured error: {str(e)} - {str(traceback.format_exc())}")
341
+
342
+ def save2stream_list(self, row: list = []):
267
343
  try:
268
344
  if row is None or len(row) <= 0:
269
345
  return
270
346
  self.batch_writer.insert(*row)
271
347
  except Exception as e:
272
- self.logger.error(f'KaqQuantDdbStreamMTWWriteRepository.insert is occured error: {str(e)} - {str(traceback.format_exc())}')
273
-
348
+ self.logger.error(f"KaqQuantDdbStreamMTWWriteRepository.insert is occured error: {str(e)} - {str(traceback.format_exc())}")
349
+
274
350
  def stop(self):
275
- '''
351
+ """
276
352
  结束调用
277
- '''
353
+ """
278
354
  self.batch_writer.waitForThreadCompletion()
279
-
280
-
281
- if __name__ == '__main__':
355
+
356
+
357
+ if __name__ == "__main__":
282
358
  host, port, user, passwd = yml_utils.get_ddb_info(os.getcwd())
283
- kaq = KaqQuantDdbStreamWriteRepository(host, port, user, passwd)
359
+ kaq = KaqQuantDdbStreamWriteRepository(host, port, user, passwd)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kaq_quant_common
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary:
5
5
  Author: kevinfuture
6
6
  Author-email: liuenbofuture@foxmail.com
@@ -27,8 +27,8 @@ kaq_quant_common/api/ws/ws_server_base.py,sha256=-JFA5fnYHXPYBZ09aZmhYuhgDHFfJbk
27
27
  kaq_quant_common/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  kaq_quant_common/common/ddb_table_monitor.py,sha256=7Yihz_uGGujo_QqqPl45Gp8fwUMMw1auXx5egbzyYlE,3662
29
29
  kaq_quant_common/common/http_monitor.py,sha256=_yChiwfVv1c5g_lKgYUjWY40fX61BWVK8SL4kXwRfwk,2375
30
- kaq_quant_common/common/modules/funding_rate_helper.py,sha256=BVGNS6E7aVPITOz5u4g7vBUHOrm1S1Uri8psgWxp-BQ,3853
31
- kaq_quant_common/common/modules/limit_order_helper.py,sha256=2k54QO-WwUmFHKvThGcdDk79Xfs7_uJt9F5h2py7zMg,3905
30
+ kaq_quant_common/common/modules/funding_rate_helper.py,sha256=jSlokGh2wYYOOTj83FrUeyrcngU-lgpjvK0FawFCxdo,4840
31
+ kaq_quant_common/common/modules/limit_order_helper.py,sha256=3JHLP72RxmNiKNk8lC8nRb9yIDK2nhR-xbdXFTNCyrU,6044
32
32
  kaq_quant_common/common/modules/limit_order_symbol_monitor.py,sha256=TBK48qyeCSQvkfDMv3J_0UM7f3OuBRKRFYDcL9kG6Cs,2876
33
33
  kaq_quant_common/common/modules/limit_order_symbol_monitor_group.py,sha256=oEqHIwxhqAzckmluHJHZHiHUNmAyaS2JyK2nXO58UhY,2394
34
34
  kaq_quant_common/common/monitor_base.py,sha256=E4EUMsO3adNltCDNRgxkvUSbTTfKOL9S1zzN3WkZvpU,2467
@@ -42,7 +42,7 @@ kaq_quant_common/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
42
42
  kaq_quant_common/resources/kaq_ddb_pool_stream_read_resources.py,sha256=q4P96rSrEcWn9ki09UD0vw00iFq_bpgOFTrRzVG7uCA,2537
43
43
  kaq_quant_common/resources/kaq_ddb_stream_init_resources.py,sha256=0MxxbajocTFzcRD344rfZQPKOwwuqCbyoT6dQpIox-o,3349
44
44
  kaq_quant_common/resources/kaq_ddb_stream_read_resources.py,sha256=WShsXMoL8o-JZvrtAd7H2Cg-vrE47QbsdGgURaQwiZs,3165
45
- kaq_quant_common/resources/kaq_ddb_stream_write_resources.py,sha256=HaRWik7aJydoxGnfMn6-E6XAevJBv-PJgG1C-TobAeU,12642
45
+ kaq_quant_common/resources/kaq_ddb_stream_write_resources.py,sha256=Z_-CHXh390OTMUx9Cf1I1rTWE7RLC_GwFVxopcYOKoQ,15148
46
46
  kaq_quant_common/resources/kaq_mysql_init_resources.py,sha256=UcqWey6LgoMqvLq1SxK33nS6-rkViGYhzUPxcrucOks,827
47
47
  kaq_quant_common/resources/kaq_mysql_resources.py,sha256=282jpXvYlEQNx-hicYTNBHDii85KYgN7BQQSMS9aPFM,13211
48
48
  kaq_quant_common/resources/kaq_postgresql_resources.py,sha256=iG1eYkciI0xUIBdEpGqKGOLBFxvVrfbBoTuaOmhQ0v0,1762
@@ -62,6 +62,6 @@ kaq_quant_common/utils/signal_utils.py,sha256=zBSyEltNTKqkQCsrETd47kEBb3Q_OWUBUn
62
62
  kaq_quant_common/utils/sqlite_utils.py,sha256=UDDFKfwL0N-jFifl40HdyOCENh2YQfW5so6hRaSJpv0,5722
63
63
  kaq_quant_common/utils/uuid_utils.py,sha256=pm_pnXpd8n9CI66x3A20cOEUiriJyqHaKGCeLrgkBxU,71
64
64
  kaq_quant_common/utils/yml_utils.py,sha256=gcKjb_-uuUajBGAl5QBPIZTg2wXm7qeeJvtHflj_zOE,4513
65
- kaq_quant_common-0.2.6.dist-info/METADATA,sha256=iie2yF9hawVHnby2gji5uT33bCORlgYdGxfYXSkQxw8,1970
66
- kaq_quant_common-0.2.6.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
67
- kaq_quant_common-0.2.6.dist-info/RECORD,,
65
+ kaq_quant_common-0.2.8.dist-info/METADATA,sha256=Vpw8yaybO3loNes1UmtfsmT0saExZ9EJQjN_nhlKhTY,1970
66
+ kaq_quant_common-0.2.8.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
67
+ kaq_quant_common-0.2.8.dist-info/RECORD,,