datahub_binary 1.7.10__cp312-cp312-win_amd64.whl → 1.7.12__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datahub/dbo/sr.pyi CHANGED
@@ -17,49 +17,565 @@ class StarRocks(Database):
17
17
  sftp_setting: Incomplete
18
18
  force_query: Incomplete
19
19
  def __init__(self, setting: StarRocksSetting) -> None: ...
20
- def query_with_cache(self, sql: str, return_format: Literal['dataframe', 'records'] = 'dataframe') -> pl.DataFrame | list[dict] | None: ...
20
+ def query_with_cache(self, sql: str, return_format: Literal['dataframe', 'records'] = 'dataframe') -> pl.DataFrame | list[dict] | None:
21
+ """
22
+ 使用查询缓存, 用于下载大量数据
23
+ 背景:
24
+ 由于数据库驱动与数据库存在大量的序列化和网络开销, 所以普通查询速度较慢,
25
+ 未来starrocks要添加arrow格式传输接口, 能解决此问题
26
+ 原理:
27
+ 1. 将查询请求转化为输出为parquet文件, 存放到FTP目录
28
+ 2. 从FTP目录下载parquet文件, 转化为dataframe
29
+
30
+ :param sql: SQL
31
+ :param return_format: 返回格式
32
+ :return: DataFrame
33
+ """
21
34
  def query_large_data(self, sql: str) -> pl.DataFrame: ...
22
- def get_indicator_type(self) -> pl.DataFrame: ...
35
+ def get_indicator_type(self) -> pl.DataFrame:
36
+ '''
37
+ 获取指标类型信息
38
+
39
+ :return:
40
+ {
41
+ "indicator_type": 指标类型,
42
+ "indicator_type_desc": 指标类型描述,
43
+ "update_time": 更新日期,
44
+ }
45
+ '''
23
46
  @property
24
47
  def factor_info(self) -> pl.DataFrame: ...
25
- def get_factor_info(self, factors: Sequence[str] = (), types: Sequence[str] = ()) -> pl.DataFrame: ...
26
- def get_seq_factor_info(self, resample_type: str, factors: Sequence[str] = ()) -> pl.DataFrame: ...
27
- def get_future_seq_factor_info(self, resample_type: str, factors: Sequence[str] = ()) -> pl.DataFrame: ...
28
- def get_seq_y_info(self, resample_type: str, factors: Sequence[str] = ()) -> pl.DataFrame: ...
29
- def get_factor_type(self) -> pl.DataFrame: ...
30
- def get_indicator_info(self, indicators: Sequence[str] = (), types: Sequence[str] = ()) -> pl.DataFrame: ...
31
- def get_future_domain_info(self, instrument_sub_type: str | None = None, start_date: date | None = None, end_date: date | None = None, market: str = 'CCFX', rank_id: int = 1, domain_factor: str = 'volume') -> pl.DataFrame: ...
32
- def get_future_snapshot(self, instrument_sub_type: str | Sequence[str], start_date: date, end_date: date, market: str = 'CCFX', rank_id: int = 1, domain_factor: str = 'volume') -> pl.DataFrame: ...
33
- def get_indicator_data(self, start_time: datetime, end_time: datetime, indicators: Sequence[str] = (), instruments: Sequence[str] = (), types: Sequence[str] = (), use_last: bool = False, realtime: bool = False) -> pl.DataFrame: ...
34
- def get_seq_factor_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame: ...
35
- def get_future_seq_factor_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame: ...
36
- def get_seq_factor_stat(self, start_time: datetime, end_time: datetime, stat_type: str, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame: ...
37
- def get_future_seq_factor_stat(self, start_time: datetime, end_time: datetime, stat_type: str, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame: ...
38
- def get_factor_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = (), types: Sequence[str] = ()) -> pl.DataFrame: ...
39
- def get_risk_factor_data(self, version: str, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame: ...
40
- def get_return_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = (), adj_method: Literal['forward'] = 'forward') -> pl.DataFrame: ...
41
- def get_seq_y_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame: ...
42
- def get_seq_y_stat(self, start_time: datetime, end_time: datetime, stat_type: str, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame: ...
43
- def get_ex_factor_info(self, instruments: Sequence[str] = (), trade_date: date | None = None) -> pl.DataFrame: ...
44
- def get_ex_split_info(self, instruments: Sequence[str] = (), trade_date: date | None = None) -> pl.DataFrame: ...
45
- def get_trading_days(self, start_date: date, end_date: date, market: str = 'XSHG') -> list[date]: ...
46
- def get_kline(self, freq: str, instruments: Sequence[str] = (), start_time: datetime | None = None, end_time: datetime | None = None, adj_method: str | None = None) -> pl.DataFrame: ...
47
- def get_fut_kline(self, freq: str, instrument_sub_type: Sequence[str], start_date: date, end_date: date, market: str = 'CCFX', rank_id: int = 1, domain_factor: str = 'volume') -> pl.DataFrame: ...
48
- def get_md_transaction(self, start_date: date, end_date: date, instruments: Sequence[str] = (), markets: Sequence[str] = ()) -> pl.DataFrame: ...
49
- def get_instrument_industry(self, trade_date: date, industry_source: str = 'sws', industry_level: Literal[1, 2, 3] = 1, use_last: bool = False) -> pl.DataFrame: ...
50
- def get_instrument_list(self, trade_date: date, indicators: Sequence[str] = ()) -> Sequence[str]: ...
51
- def get_universe(self, trade_date: date, universe: str | None = None) -> pl.DataFrame: ...
52
- def get_instrument_info(self, trade_date: date, fields: Sequence[str] | None = None, market: str | None = None, instrument_type: Literal['option', 'spot', 'future', 'etf'] | None = None) -> pl.DataFrame: ...
53
- def get_md_snapshot(self, start_date: date, end_date: date, instruments: Sequence[str] = (), markets: Sequence[str] = ()) -> pl.DataFrame: ...
54
- def get_seq_snapshot(self, start_date: date, end_date: date, instruments: Sequence[str] = ()) -> pl.DataFrame: ...
55
- def get_resample_lob(self, resample_type: str, start_date: date, end_date: date, instruments: Sequence[str] = ()) -> pl.DataFrame: ...
56
- def get_last_seq_snapshot(self, end_time: datetime, instruments: Sequence[str] = (), is_filter_limit: bool = False) -> pl.DataFrame: ...
57
- def get_predictor_basket_series(self, start_date: date, end_date: date, predictors: Sequence[int] = (), instruments: Sequence[str] = ()) -> pl.DataFrame: ...
58
- def get_index_weight(self, start_date: date, end_date: date, index_ids: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame: ...
59
- def get_etf_component(self, start_date: date, end_date: date, instrument_ids: Sequence[str] = (), com_instrument_ids: Sequence[str] = ()) -> pl.DataFrame: ...
60
- def get_etf_cash_component(self, start_date: date, end_date: date, instrument_ids: Sequence[str] = ()) -> pl.DataFrame: ...
61
- def get_loads(self, label: str) -> pl.DataFrame: ...
62
- def stream_load(self, database: str, table: str, file_path: str): ...
63
- def broker_load_parquet(self, database: str, table: str, fields: Sequence[str], file_path: str, timeout: int = 3600, label: str | None = None, is_sync: bool = False) -> str: ...
64
- def broker_load_csv(self, database: str, table: str, fields: Sequence[str], file_path: str, timeout: int = 3600, label: str | None = None, is_sync: bool = False) -> str: ...
65
- def get_profile(self, sql: str) -> str: ...
48
+ def get_factor_info(self, factors: Sequence[str] = (), types: Sequence[str] = ()) -> pl.DataFrame:
49
+ '''
50
+ 获取因子信息
51
+
52
+ :param factors: id列表, 为空表示不过滤id
53
+ :param types: type列表, 为空表示不过滤type
54
+ :return:
55
+ "factor_id": "str"
56
+ "factor_no": "i64"
57
+ "factor_type": "str"
58
+ "process_function": "str"
59
+ "create_by": "str"
60
+ "maintain_by": "str"
61
+ "data_source": "str"
62
+ "create_time": "datetime[μs]"
63
+ "update_time": "datetime[μs]"
64
+ "factor_description": "str"
65
+ "interval_minutes": "i64"
66
+ '''
67
+ def get_seq_factor_info(self, resample_type: str, factors: Sequence[str] = ()) -> pl.DataFrame:
68
+ """
69
+ 获取SEQ因子信息
70
+
71
+ :param resample_type: time_interval_ms_500
72
+ :param factors: id列表, 为空表示不过滤id
73
+ :return:
74
+ """
75
+ def get_future_seq_factor_info(self, resample_type: str, factors: Sequence[str] = ()) -> pl.DataFrame:
76
+ """
77
+ 获取期货SEQ因子信息
78
+
79
+ :param resample_type: time_interval_ms_500
80
+ :param factors: id列表, 为空表示不过滤id
81
+ :return:
82
+ """
83
+ def get_seq_y_info(self, resample_type: str, factors: Sequence[str] = ()) -> pl.DataFrame:
84
+ """
85
+ 获取SEQ y信息
86
+
87
+ :param resample_type: time_interval_ms_500
88
+ :param factors: id列表, 为空表示不过滤id
89
+ :return:
90
+ """
91
+ def get_factor_type(self) -> pl.DataFrame:
92
+ '''
93
+ 获取因子类型信息
94
+
95
+ :return:
96
+ {
97
+ "factor_type": 因子类型,
98
+ "factor_type_desc": 因子类型描述,
99
+ "update_time": 更新日期,
100
+ }
101
+ '''
102
+ def get_indicator_info(self, indicators: Sequence[str] = (), types: Sequence[str] = ()) -> pl.DataFrame:
103
+ '''
104
+ 获取indicator信息
105
+
106
+ :param indicators: id列表, 为空表示不过滤id
107
+ :param types: type列表, 为空表示不过滤type
108
+ :return:
109
+ {
110
+ "indicator_id": 指标ID,
111
+ "indicator_type": 指标类型,
112
+ "process_function": 处理函数,
113
+ "create_by": 创建人,
114
+ "maintain_by": 维护人,
115
+ "data_source": 来源,
116
+ "create_time": 创建时间,
117
+ "update_time": 更新时间,
118
+ "indicator_desc": 指标描述,
119
+ "interval_ms": 计算间隔
120
+ }
121
+ '''
122
+ def get_future_domain_info(self, instrument_sub_type: str | None = None, start_date: date | None = None, end_date: date | None = None, market: str = 'CCFX', rank_id: int = 1, domain_factor: str = 'volume') -> pl.DataFrame:
123
+ """
124
+ 获取主力期货合约信息, 额外生成一列instrument_id_ext格式为: {instrument_sub_type}{rank_id:04d}.{market}
125
+
126
+ :param instrument_sub_type: 标的类型, IC IF IH, 默认返回全部
127
+ :param start_date: 开始日期(包含)
128
+ :param end_date: 结束日期(不包含), 与start_date相等时包含
129
+ :param market: 交易市场
130
+ :param rank_id: 合约序号, 1为主力
131
+ :param domain_factor: 主力合约计算方式, 默认按交易量计算
132
+ :return:
133
+ ┌───────────────────┬────────────┬───────────────┬───────────────┬─────────────────────┬────────┬───────────┬─────────┬───────────────┬─────────┬─────────────────────┐
134
+ │ instrument_id_ext ┆ trade_date ┆ instrument_id ┆ domain_factor ┆ instrument_sub_type ┆ market ┆ turnover ┆ volume ┆ open_interest ┆ rank_id ┆ update_time │
135
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
136
+ │ str ┆ date ┆ str ┆ str ┆ str ┆ str ┆ f64 ┆ f64 ┆ f64 ┆ i64 ┆ datetime[μs] │
137
+ ╞═══════════════════╪════════════╪═══════════════╪═══════════════╪═════════════════════╪════════╪═══════════╪═════════╪═══════════════╪═════════╪═════════════════════╡
138
+ │ IC0001.CCFX ┆ 2022-01-04 ┆ IC2201.CCFX ┆ volume ┆ IC ┆ CCFX ┆ 6.5508e10 ┆ 44534.0 ┆ 92724.0 ┆ 1 ┆ 2025-03-20 22:41:02 │
139
+ │ IF0001.CCFX ┆ 2022-01-04 ┆ IF2201.CCFX ┆ volume ┆ IF ┆ CCFX ┆ 9.6336e10 ┆ 65395.0 ┆ 82784.0 ┆ 1 ┆ 2025-03-20 22:41:02 │
140
+ └───────────────────┴────────────┴───────────────┴───────────────┴─────────────────────┴────────┴───────────┴─────────┴───────────────┴─────────┴─────────────────────┘
141
+ """
142
+ def get_future_snapshot(self, instrument_sub_type: str | Sequence[str], start_date: date, end_date: date, market: str = 'CCFX', rank_id: int = 1, domain_factor: str = 'volume') -> pl.DataFrame:
143
+ """
144
+ 获取期货快照
145
+
146
+ :param instrument_sub_type: 标的类型, IC IF IH, 支持单个字符串或字符串列表
147
+ :param start_date: 开始日期(包含)
148
+ :param end_date: 结束日期(不包含), 与start_date相等时包含
149
+ :param market: 交易市场
150
+ :param rank_id: 合约序号, 1为主力
151
+ :param domain_factor: 主力合约计算方式, 默认按交易量计算
152
+ :return:
153
+ """
154
+ def get_indicator_data(self, start_time: datetime, end_time: datetime, indicators: Sequence[str] = (), instruments: Sequence[str] = (), types: Sequence[str] = (), use_last: bool = False, realtime: bool = False) -> pl.DataFrame:
155
+ '''
156
+ 获取指标的数据
157
+
158
+ :param start_time: >= 开始时间
159
+ :param end_time: < 结束时间, 当开始与结束时间相同时, 返回数据时间=start_time
160
+ :param indicators: id列表, 为空表示不过滤id
161
+ :param instruments: 标的列表, 为空表示不过滤标的
162
+ :param types: type列表, 为空表示不过滤type
163
+ :param use_last: 使用最新的数据, 而不是历史
164
+ :param realtime: True 时使用实盘 indicator 数据, 默认使用清算数据
165
+ :return:
166
+ {
167
+ "instrument_id": 标的ID,
168
+ "trade_time": 因子时间,
169
+ "indicator_id": 指标ID,
170
+ "value": 值,,
171
+ "update_time": 更新时间
172
+ }
173
+ '''
174
+ def get_seq_factor_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame:
175
+ '''
176
+ 获取指标的数据
177
+
178
+ :param start_time: >= 开始时间
179
+ :param end_time: < 结束时间
180
+ :param factors: id列表, 为空表示不过滤id
181
+ :param instruments: 标的列表, 为空表示不过滤标的
182
+ :return:
183
+ {
184
+ "instrument_id": 标的ID,
185
+ "trade_time": 因子时间,
186
+ "factor_id": 指标ID,
187
+ "value": 值,,
188
+ "update_time": 更新时间
189
+ }
190
+ '''
191
+ def get_future_seq_factor_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame:
192
+ '''
193
+ 获取期货因子的数据
194
+
195
+ :param start_time: >= 开始时间
196
+ :param end_time: < 结束时间
197
+ :param factors: id列表, 为空表示不过滤id
198
+ :param instruments: 标的列表, 为空表示不过滤标的
199
+ :return:
200
+ {
201
+ "instrument_id": 标的ID,
202
+ "trade_time": 因子时间,
203
+ "factor_id": 指标ID,
204
+ "value": 值,,
205
+ "update_time": 更新时间
206
+ }
207
+ '''
208
+ def get_seq_factor_stat(self, start_time: datetime, end_time: datetime, stat_type: str, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame:
209
+ '''
210
+ 获取seq factor 的统计值
211
+
212
+ :param start_time: >= 开始时间
213
+ :param end_time: < 结束时间
214
+ :param stat_type: "mean_5d", "std_5d" ...
215
+ :param factors: id列表, 为空表示不过滤id
216
+ :param instruments: 标的列表, 为空表示不过滤标的
217
+ :return:
218
+
219
+ '''
220
+ def get_future_seq_factor_stat(self, start_time: datetime, end_time: datetime, stat_type: str, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame:
221
+ '''
222
+ 获取期货seq factor 的统计值
223
+
224
+ :param start_time: >= 开始时间
225
+ :param end_time: < 结束时间
226
+ :param stat_type: "mean_5d", "std_5d" ...
227
+ :param factors: id列表, 为空表示不过滤id
228
+ :param instruments: 标的列表, 为空表示不过滤标的
229
+ :return:
230
+
231
+ '''
232
+ def get_factor_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = (), types: Sequence[str] = ()) -> pl.DataFrame:
233
+ """
234
+ 获取因子的数据
235
+
236
+ :param start_time: >= 开始时间
237
+ :param end_time: < 结束时间, 当开始与结束时间相同时, 返回数据时间=start_time
238
+ :param factors: id列表, 为空表示不过滤id
239
+ :param instruments: 标的列表, 为空表示不过滤标的
240
+ :param types: type列表, 为空表示不过滤type
241
+ :return: 因子数据DataFrame
242
+ """
243
+ def get_risk_factor_data(self, version: str, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame:
244
+ '''
245
+ 获取风险因子的数据
246
+
247
+ :param version: 因子版本
248
+ :param start_time: >= 开始时间
249
+ :param end_time: < 结束时间, 当开始与结束时间相同时, 返回数据时间=start_time
250
+ :param factors: id列表, 为空表示不过滤id
251
+ :param instruments: 标的列表, 为空表示不过滤标的
252
+ :return:
253
+ {
254
+ "instrument_id": 标的ID,
255
+ "trade_time": 因子时间,
256
+ "factor_id": 因子ID,
257
+ "value": 值,,
258
+ "update_time": 更新时间
259
+ }
260
+ '''
261
+ def get_return_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = (), adj_method: Literal['forward'] = 'forward') -> pl.DataFrame:
262
+ '''
263
+ 获取指标的数据
264
+
265
+ :param start_time: >= 开始时间
266
+ :param end_time: < 结束时间, 当开始与结束时间相同时, 返回数据时间=start_time
267
+ :param factors: 因子id列表, 为空表示不过滤id
268
+ :param instruments: 标的列表, 为空表示不过滤标的
269
+ :param adj_method: 复权方式, None 不复权, forward 前复权, backward 后复权
270
+ :return:
271
+ {
272
+ "instrument_id": 标的ID,
273
+ "trade_time": 因子时间,
274
+ "factor_id": 因子ID,
275
+ "value": 值,,
276
+ "update_time": 更新时间
277
+ }
278
+ '''
279
+ def get_seq_y_data(self, start_time: datetime, end_time: datetime, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame:
280
+ '''
281
+ 获取高频y的数据
282
+
283
+ :param start_time: >= 开始时间
284
+ :param end_time: < 结束时间, 当开始与结束时间相同时, 返回数据时间=start_time
285
+ :param factors: 因子id列表, 为空表示不过滤id
286
+ :param instruments: 标的列表, 为空表示不过滤标的
287
+ :return:
288
+ {
289
+ "instrument_id": 标的ID,
290
+ "trade_time": 因子时间,
291
+ "factor_id": 因子ID,
292
+ "value": 值,,
293
+ "update_time": 更新时间
294
+ }
295
+ '''
296
+ def get_seq_y_stat(self, start_time: datetime, end_time: datetime, stat_type: str, factors: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame:
297
+ '''
298
+ 获取seq y 的统计值
299
+
300
+ :param start_time: >= 开始时间
301
+ :param end_time: < 结束时间
302
+ :param stat_type: "mean_5d", "std_5d" ...
303
+ :param factors: id列表, 为空表示不过滤id
304
+ :param instruments: 标的列表, 为空表示不过滤标的
305
+ :return:
306
+
307
+ '''
308
+ def get_ex_factor_info(self, instruments: Sequence[str] = (), trade_date: date | None = None) -> pl.DataFrame:
309
+ """
310
+ 获取给定交易日最新的分红因子, 如果没有则为1
311
+
312
+ :param instruments: instrument_id列表, 如:[600519.XSHG,000001.XSHE], 空表示所有标的
313
+ :param trade_date: 交易日, 默认为最新日期
314
+ :return:
315
+ """
316
+ def get_ex_split_info(self, instruments: Sequence[str] = (), trade_date: date | None = None) -> pl.DataFrame:
317
+ """
318
+ 获取给定交易日最新的分股因子, 如果没有则为1
319
+
320
+ :param instruments: instrument_id列表, 如:[600519.XSHG,000001.XSHE], 空表示所有标的
321
+ :param trade_date: 交易日, 默认为最新日期
322
+ :return:
323
+ """
324
+ def get_trading_days(self, start_date: date, end_date: date, market: str = 'XSHG') -> list[date]:
325
+ """
326
+ 获取固定市场的交易日
327
+
328
+ :param start_date: >= 开始日期
329
+ :param end_date: < 结束日期, 当开始与结束时间相同时, 返回数据时间=start_time
330
+ :param market: 市场代码
331
+ :return: [交易日]
332
+ """
333
+ def get_kline(self, freq: str, instruments: Sequence[str] = (), start_time: datetime | None = None, end_time: datetime | None = None, adj_method: str | None = None) -> pl.DataFrame:
334
+ '''
335
+ 获取k线数据
336
+
337
+ :param freq: 1min, 5min, 10min, 15min, 1hour, 1day
338
+ :param instruments: instrument_id列表, 如:[600519.XSHG,000001.XSHE], 空表示所有标的
339
+ :param start_time: 开始时间, None 表示从1900年开始
340
+ :param end_time: 结束时间, None 表示当前时间
341
+ :param adj_method: 复权方式, None 不复权, forward 前复权(目前不支持), backward 后复权
342
+ :return:
343
+ {
344
+ "instrument_id": 600519.XSHG,
345
+ "open_price": 1500.0,
346
+ "high_price": 1500.0,
347
+ "low_price": 1500.0,
348
+ "close_price": 1500.0,
349
+ "volume": 100,
350
+ "amount": 10000,
351
+ "datetime": datetime(2020, 1, 1)
352
+ }
353
+ '''
354
+ def get_fut_kline(self, freq: str, instrument_sub_type: Sequence[str], start_date: date, end_date: date, market: str = 'CCFX', rank_id: int = 1, domain_factor: str = 'volume') -> pl.DataFrame:
355
+ """
356
+ 获取k线数据
357
+
358
+ :param freq: 1min, 5min, 10min, 15min, 1hour, 1day
359
+ :param instrument_sub_type:标的类型, IC IF IH, 支持单个字符串或字符串列表
360
+ :param start_date: 开始日期(包含)
361
+ :param end_date: 结束日期(不包含), 与start_date相等时包含
362
+ :param market: 交易市场
363
+ :param rank_id: 合约序号, 1为主力
364
+ :param domain_factor: 主力合约计算方式, 默认按交易量计算
365
+ :return:
366
+ """
367
+ def get_md_transaction(self, start_date: date, end_date: date, instruments: Sequence[str] = (), markets: Sequence[str] = ()) -> pl.DataFrame:
368
+ '''
369
+ 获取逐笔成交数据
370
+
371
+ :param start_date: >= 开始日期
372
+ :param end_date: < 结束日期, 当开始与结束时间相同时, 返回数据时间=start_time
373
+ :param instruments: 标的列表, 为空表示不过滤标的
374
+ :param markets: type列表, 为空表示不过滤type
375
+ :return:
376
+ `md_date` date NOT NULL COMMENT "",
377
+ `market` varchar(8) NOT NULL COMMENT "",
378
+ `channel_id` varchar(8) NOT NULL COMMENT "",
379
+ `instrument_id` varchar(64) NOT NULL COMMENT "",
380
+ `biz_index` bigint(20) NOT NULL COMMENT "",
381
+ `trade_time` datetime NOT NULL COMMENT "",
382
+ `last_timestamp` datetime NULL COMMENT "接收或者发送时间",
383
+ `trade_type` varchar(4) NOT NULL COMMENT "0-成交, C-撤单 AL-新增limit委托,AM-新增market委托 S-产品订单状态",
384
+ `bid_order_id` varchar(32) NULL COMMENT "",
385
+ `ask_order_id` varchar(32) NULL COMMENT "",
386
+ `trade_price` double NULL COMMENT "",
387
+ `trade_qty` double NULL COMMENT "",
388
+ `bs_flag` varchar(4) NULL COMMENT ""
389
+ '''
390
+ def get_instrument_industry(self, trade_date: date, industry_source: str = 'sws', industry_level: Literal[1, 2, 3] = 1, use_last: bool = False) -> pl.DataFrame:
391
+ """
392
+ 获取给定日期标的行业分类
393
+
394
+ :param trade_date: 交易日
395
+ :param industry_source: 数据源
396
+ :param industry_level: 分类级别
397
+ :param use_last: 如果给定的trade_date没有数据, 是否使用trade_date之前最新的数据
398
+ :return:
399
+ """
400
+ def get_instrument_list(self, trade_date: date, indicators: Sequence[str] = ()) -> Sequence[str]:
401
+ """
402
+ 获取给定日期和指标可用的标的列表
403
+
404
+ :param trade_date: 交易日
405
+ :param indicators: 可选, 为空表示所有indicator
406
+ :return:
407
+ """
408
+ def get_universe(self, trade_date: date, universe: str | None = None) -> pl.DataFrame:
409
+ """
410
+ 获取给定日期的标的池
411
+
412
+ :param trade_date: 交易日
413
+ :param universe: 可选, 为空表示所有universe
414
+ :return: trade_date, instrument_id, universe
415
+ """
416
+ def get_instrument_info(self, trade_date: date, fields: Sequence[str] | None = None, market: str | None = None, instrument_type: Literal['option', 'spot', 'future', 'etf'] | None = None) -> pl.DataFrame:
417
+ '''
418
+ 获取给定日期的标的信息
419
+
420
+ :param trade_date: 交易日
421
+ :param fields: 查询字段, 默认trade_date, instrument_id, lot_size, price_tick, contract_unit, symbol
422
+ :param market: 交易市场
423
+ :param instrument_type: 标的类型
424
+ :return:
425
+ `trade_date` date NOT NULL COMMENT "",
426
+ `instrument_id` varchar(32) NOT NULL COMMENT "合约ID",
427
+ `security_id` varchar(32) NULL COMMENT "证券ID",
428
+ `market` varchar(16) NULL COMMENT "市场",
429
+ `quote_currency_id` varchar(8) NULL COMMENT "报价币种",
430
+ `settle_currency_id` varchar(8) NULL COMMENT "结算币种",
431
+ `lot_size` decimal(20, 4) NULL COMMENT "最小交易单位",
432
+ `price_tick` decimal(20, 4) NULL COMMENT "价格步长",
433
+ `contract_unit` decimal(20, 4) NULL COMMENT "合约单位",
434
+ `intraday_trading` tinyint(4) NULL COMMENT "是否支持日内交易",
435
+ `delist_date` date NULL COMMENT "退市日期",
436
+ `list_date` date NULL COMMENT "上市日期",
437
+ `instrument_type` varchar(16) NULL COMMENT "合约类型",
438
+ `instrument_sub_type` varchar(16) NULL COMMENT "合约子类型",
439
+ `symbol` varchar(64) NULL COMMENT "合约名称",
440
+ `min_size` decimal(20, 4) NULL COMMENT "最小交易量",
441
+ `max_size` decimal(20, 4) NULL COMMENT "最大交易量",
442
+ `price_cage` decimal(20, 4) NULL COMMENT "涨跌幅限制",
443
+ `posi_qty_ratio_limit` decimal(20, 4) NULL COMMENT "持仓量限制比例",
444
+ `external_info` varchar(65533) NULL COMMENT "扩展信息",
445
+ `update_time` datetime NULL COMMENT "更新时间",
446
+ `enable` tinyint(4) NULL COMMENT "是否启用",
447
+ `underlying_instrument_id` varchar(32) NULL COMMENT "标的合约ID"
448
+ '''
449
+ def get_md_snapshot(self, start_date: date, end_date: date, instruments: Sequence[str] = (), markets: Sequence[str] = ()) -> pl.DataFrame:
450
+ """
451
+ 获取快照数据
452
+
453
+ :param start_date: >= 开始日期
454
+ :param end_date: < 结束日期, 当开始与结束时间相同时, 返回数据时间=start_time
455
+ :param instruments: 标的列表, 为空表示不过滤标的
456
+ :param markets: type列表, 为空表示不过滤type
457
+ :return:
458
+ """
459
+ def get_seq_snapshot(self, start_date: date, end_date: date, instruments: Sequence[str] = ()) -> pl.DataFrame:
460
+ """
461
+ 获取自建快照数据
462
+
463
+ :param start_date: >= 开始日期
464
+ :param end_date: < 结束日期, 当开始与结束时间相同时, 返回数据时间=start_time
465
+ :param instruments: 标的列表, 为空表示不过滤标的
466
+ :return:
467
+ """
468
+ def get_resample_lob(self, resample_type: str, start_date: date, end_date: date, instruments: Sequence[str] = ()) -> pl.DataFrame:
469
+ """
470
+ 获取抽样快照数据
471
+
472
+ :param resample_type: time_interval_ms_500
473
+ :param start_date: >= 开始日期
474
+ :param end_date: < 结束日期, 当开始与结束时间相同时, 返回数据时间=start_time
475
+ :param instruments: 标的列表, 为空表示不过滤标的
476
+ :return:
477
+ """
478
+ def get_last_seq_snapshot(self, end_time: datetime, instruments: Sequence[str] = (), is_filter_limit: bool = False) -> pl.DataFrame:
479
+ """
480
+ 获取当日最新快照数据
481
+
482
+ :param end_time: 结束时间
483
+ :param instruments: 标的列表, 为空表示不过滤标的
484
+ :param is_filter_limit: 是否过滤涨跌停
485
+ :return:
486
+ """
487
+ def get_predictor_basket_series(self, start_date: date, end_date: date, predictors: Sequence[int] = (), instruments: Sequence[str] = ()) -> pl.DataFrame:
488
+ """
489
+ 获取predictor_table_backtest
490
+
491
+ :param start_date: 开始日期(包含)
492
+ :param end_date: 结束日期(不包含)
493
+ :param predictors: 筛选id, 为空表示全部
494
+ :param instruments: 标的id, 为空表示全部
495
+ :return:
496
+ """
497
+ def get_index_weight(self, start_date: date, end_date: date, index_ids: Sequence[str] = (), instruments: Sequence[str] = ()) -> pl.DataFrame:
498
+ """
499
+ 获取指数权重数据
500
+
501
+ :param start_date: 开始日期(包含)
502
+ :param end_date: 结束日期(不包含)
503
+ :param index_ids: 筛选指数id, 为空表示全部
504
+ :param instruments: 筛选指数成分标的id, 为空表示全部
505
+ :return:
506
+ """
507
+ def get_etf_info(self, etf_ids: Sequence[str] = ()) -> pl.DataFrame:
508
+ """
509
+ 获取etf基础信息
510
+
511
+ :param etf_ids: 筛选etf id, 为空表示全部
512
+ :return:
513
+ """
514
+ def get_etf_component(self, start_date: date, end_date: date, instrument_ids: Sequence[str] = (), com_instrument_ids: Sequence[str] = ()) -> pl.DataFrame:
515
+ """
516
+ 获取etf权重数据
517
+
518
+ :param start_date: 开始日期(包含)
519
+ :param end_date: 结束日期(不包含)
520
+ :param instrument_ids: 筛选指数标的, 为空表示全部
521
+ :param com_instrument_ids: 筛选指数成分标的, 为空表示全部
522
+ :return:
523
+ """
524
+ def get_etf_cash_component(self, start_date: date, end_date: date, instrument_ids: Sequence[str] = ()) -> pl.DataFrame:
525
+ """
526
+ 获取etf现金成分
527
+
528
+ :param start_date: 开始日期(包含)
529
+ :param end_date: 结束日期(不包含)
530
+ :param instrument_ids: 筛选指数标的, 为空表示全部
531
+ :return:
532
+ """
533
+ def get_loads(self, label: str) -> pl.DataFrame:
534
+ """
535
+ 获取导入任务状态
536
+
537
+ :param label: 标签名
538
+ :return:
539
+ """
540
+ def stream_load(self, database: str, table: str, file_path: str):
541
+ """
542
+ 用stream_load写入csv文件到数据库
543
+
544
+ :param database: 数据库名
545
+ :param table: 表名
546
+ :param file_path: 路径
547
+ :return:
548
+ """
549
+ def broker_load_parquet(self, database: str, table: str, fields: Sequence[str], file_path: str, timeout: int = 3600, label: str | None = None, is_sync: bool = False) -> str:
550
+ """
551
+ 用broker_load写入parquet文件到数据库
552
+
553
+ :param database: 数据库名
554
+ :param table: 表名
555
+ :param fields: 列名
556
+ :param file_path: 文件路径
557
+ :param timeout: 超时时间
558
+ :param label: 标签名, 默认生成格式 {table}_{datetime.now().strftime('%Y%m%d%H%M%S')}
559
+ :param is_sync: 是否同步等待,默认异步执行
560
+ :return: 标签名
561
+ """
562
+ def broker_load_csv(self, database: str, table: str, fields: Sequence[str], file_path: str, timeout: int = 3600, label: str | None = None, is_sync: bool = False) -> str:
563
+ """
564
+ 用broker_load写入csv文件到数据库
565
+
566
+ :param database: 数据库名
567
+ :param table: 表名
568
+ :param fields: 列名
569
+ :param file_path: 文件路径
570
+ :param timeout: 超时时间
571
+ :param label: 标签名, 默认生成格式 {table}_{datetime.now().strftime('%Y%m%d%H%M%S')}
572
+ :param is_sync: 是否同步等待,默认异步执行
573
+ :return: 标签名
574
+ """
575
+ def get_profile(self, sql: str) -> str:
576
+ """
577
+ 获取sql执行效果 https://docs.starrocks.io/zh/docs/administration/query_profile_overview
578
+
579
+ :param sql: SQL
580
+ :return: profile
581
+ """
datahub/utils/logger.pyi CHANGED
@@ -1,10 +1,18 @@
1
1
  from .monitor import Feishu as Feishu
2
2
  from _typeshed import Incomplete
3
+ from datetime import datetime as datetime
4
+ from pathlib import Path as Path
3
5
  from typing import Callable, Literal
4
6
 
5
7
  LOGURU_LEVEL: Incomplete
6
8
 
7
- def timer_decorator(func: Callable) -> Callable: ...
9
+ def timer_decorator(func: Callable) -> Callable:
10
+ """
11
+ 简单计时装饰器,记录函数执行时间,并打印函数的传参值
12
+
13
+ :param func: 被测试函数
14
+ :return:
15
+ """
8
16
  def filter_log_level(record, level): ...
9
17
 
10
18
  class Logger:
@@ -22,6 +30,15 @@ class Logger:
22
30
  min_level_no: Incomplete
23
31
  monitor_type: Incomplete
24
32
  monitor: Incomplete
25
- def __init__(self, name: str, log_dir: str | None = None, retention: int = 5, monitor_type: Literal['Feishu'] = 'Feishu', prefix: str = '') -> None: ...
33
+ def __init__(self, name: str, log_dir: str | None = None, retention: int = 5, monitor_type: Literal['Feishu'] = 'Feishu', prefix: str = '') -> None:
34
+ """
35
+ 日志记录器
36
+
37
+ :param name: logger名称
38
+ :param log_dir: 输出目录, 无输出目录时不会记录log到文件
39
+ :param retention: 保留天数
40
+ :param monitor_type: 是否启用监控报警
41
+ :param prefix: 日志文件前缀,默认为空
42
+ """
26
43
 
27
44
  def main() -> None: ...