Qubx 0.0.1__cp311-cp311-manylinux_2_35_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Qubx might be problematic. Click here for more details.

qubx/core/strategy.py ADDED
@@ -0,0 +1,89 @@
1
+ """
2
+ # All interfaces related to strategy etc
3
+ """
4
+ from typing import Callable, Dict, List, Optional, Union
5
+ import numpy as np
6
+ from dataclasses import dataclass
7
+ from qubx.core.basics import ZERO_COSTS, Instrument, Position, Signal, TransactionCostsCalculator, dt_64
8
+
9
+ E_TIMER = 1
10
+ E_QUOTE = 2
11
+ E_TRADE = 3
12
+ E_OPENBOOK = 4
13
+ E_HIST_DATA_READY = 100
14
+ E_HIST_DATA_ERROR = -100
15
+ @dataclass
16
+ class Event:
17
+ time: dt_64
18
+ type: int # ??
19
+ instrument: Instrument
20
+
21
+ DataListener = Callable[[Instrument, int], None]
22
+ ExchListener = Callable[[Instrument, int], None]
23
+
24
+ class DataProvider:
25
+ def add_data_listener(self, listener: DataListener):
26
+ pass
27
+
28
+ def request_historical_data(self,
29
+ instruments: List[Instrument],
30
+ timeframe: str,
31
+ start: Union[str, int, dt_64],
32
+ stop: Union[str, int, dt_64]):
33
+ pass
34
+
35
+
36
+ class ExchangeServiceProvider:
37
+ def add_exchange_listener(self, listener: ExchListener):
38
+ pass
39
+
40
+ def get_position(self, instrument: Instrument) -> Position:
41
+ pass
42
+
43
+ def get_tcc(self, instrument: Instrument) -> TransactionCostsCalculator:
44
+ return ZERO_COSTS
45
+
46
+ def time(self) -> dt_64:
47
+ """
48
+ Returns current time
49
+ """
50
+ pass
51
+
52
+
53
+ class IStrategy:
54
+ ctx: 'TradingContext'
55
+
56
+ def on_init(self):
57
+ pass
58
+
59
+ def process_event(self, time: dt_64, event: Event) -> Optional[List[Signal]]:
60
+ return None
61
+
62
+
63
+ class TradingContext:
64
+ strategy: IStrategy
65
+ exchange: ExchangeServiceProvider
66
+ data: DataProvider
67
+ instruments: List[Instrument]
68
+ positions: Dict[str, Position]
69
+
70
+ def __init__(
71
+ self,
72
+ strategy: IStrategy,
73
+ exchange: ExchangeServiceProvider,
74
+ data: DataProvider,
75
+ instruments: List[Instrument]
76
+ ) -> None:
77
+ self.strategy = strategy
78
+ self.exchange = exchange
79
+ self.data = data
80
+
81
+ self.instruments = []
82
+ self.positions = {}
83
+ for instr in instruments:
84
+ # process instruments - need to find convertors etc
85
+ # . . . .
86
+ self.instruments.append(instr)
87
+ p = exchange.get_position(instr)
88
+ self.positions[instr.symbol] = Position(instr)
89
+
qubx/core/utils.pyx ADDED
@@ -0,0 +1,54 @@
1
+ from qubx.utils import convert_tf_str_td64
2
+ import numpy as np
3
+ cimport numpy as np
4
+
5
+
6
+ NS = 1_000_000_000
7
+
8
+ cpdef recognize_time(time):
9
+ return np.datetime64(time, 'ns') if isinstance(time, str) else np.datetime64(time, 'ms')
10
+
11
+
12
+ cpdef str time_to_str(long long t, str units = 'ns'):
13
+ return str(np.datetime64(t, units)) #.isoformat()
14
+
15
+
16
+ cpdef str time_delta_to_str(long long d):
17
+ """
18
+ Convert timedelta object to pretty print format
19
+
20
+ :param d:
21
+ :return:
22
+ """
23
+ days, seconds = divmod(d, 86400*NS)
24
+ hours, seconds = divmod(seconds, 3600*NS)
25
+ minutes, seconds = divmod(seconds, 60*NS)
26
+ seconds, rem = divmod(seconds, NS)
27
+ r = ''
28
+ if days > 0:
29
+ r += '%dD' % days
30
+ if hours > 0:
31
+ r += '%dH' % hours
32
+ if minutes > 0:
33
+ r += '%dMin' % minutes
34
+ if seconds > 0:
35
+ r += '%dS' % seconds
36
+ if rem > 0:
37
+ r += '%dmS' % (rem // 1000000)
38
+ return r
39
+
40
+
41
+ cpdef recognize_timeframe(timeframe):
42
+ tf = timeframe
43
+ if isinstance(timeframe, str):
44
+ tf = np.int64(convert_tf_str_td64(timeframe).item().total_seconds() * NS)
45
+
46
+ elif isinstance(timeframe, (int, float)) and timeframe >= 0:
47
+ tf = timeframe
48
+
49
+ elif isinstance(timeframe, np.timedelta64):
50
+ tf = np.int64(timeframe.item().total_seconds() * NS)
51
+
52
+ else:
53
+ raise ValueError('Unknown timeframe type !')
54
+ return tf
qubx/data/readers.py ADDED
@@ -0,0 +1,387 @@
1
+ from typing import List, Union, Optional, Iterable, Any
2
+ from os.path import exists
3
+ import numpy as np
4
+ import pandas as pd
5
+ import pyarrow as pa
6
+ from pyarrow import csv
7
+
8
+ from qubx import logger
9
+ from qubx.core.series import TimeSeries, OHLCV, time_as_nsec, Quote, Trade
10
+ from qubx.utils.time import infer_series_frequency, handle_start_stop
11
+
12
+ _DT = lambda x: pd.Timedelta(x).to_numpy().item()
13
+ D1, H1 = _DT('1D'), _DT('1H')
14
+
15
+ DEFAULT_DAILY_SESSION = (_DT('00:00:00.100'), _DT('23:59:59.900'))
16
+ STOCK_DAILY_SESSION = (_DT('9:30:00.100'), _DT('15:59:59.900'))
17
+ CME_FUTURES_DAILY_SESSION = (_DT('8:30:00.100'), _DT('15:14:59.900'))
18
+
19
+
20
+ def _recognize_t(t: Union[int, str], defaultvalue, timeunit) -> int:
21
+ if isinstance(t, (str, pd.Timestamp)):
22
+ try:
23
+ return np.datetime64(t, timeunit)
24
+ except:
25
+ pass
26
+ return defaultvalue
27
+
28
+
29
+ def _find_column_index_in_list(xs, *args):
30
+ xs = [x.lower() for x in xs]
31
+ for a in args:
32
+ ai = a.lower()
33
+ if ai in xs:
34
+ return xs.index(ai)
35
+ raise IndexError(f"Can't find any from {args} in list: {xs}")
36
+
37
+
38
+ class DataProcessor:
39
+ """
40
+ Common interface for data processor with default aggregating into list implementation
41
+ """
42
+ def __init__(self) -> None:
43
+ self.buffer = {}
44
+ self._column_names = []
45
+
46
+ def start_processing(self, column_names: List[str]):
47
+ self._column_names = column_names
48
+ self.buffer = {c: [] for c in column_names}
49
+
50
+ def process_data(self, columns_data: list) -> Optional[Iterable]:
51
+ for i, c in enumerate(columns_data):
52
+ self.buffer[self._column_names[i]].append(c)
53
+ return None
54
+
55
+ def get_result(self) -> Any:
56
+ return self.buffer
57
+
58
+
59
+ class DataReader:
60
+ """
61
+ Common interface for data reader
62
+ """
63
+ _processor: DataProcessor
64
+
65
+ def __init__(self, processor=None) -> None:
66
+ self._processor = DataProcessor() if processor is None else processor
67
+
68
+ def read(self, start: Optional[str]=None, stop: Optional[str]=None) -> Any:
69
+ pass
70
+
71
+
72
+ class QuotesDataProcessor(DataProcessor):
73
+ """
74
+ Process quotes data and collect them as list
75
+ """
76
+ def start_processing(self, fieldnames: List[str]):
77
+ self.buffer = list()
78
+ self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime')
79
+ self._bid_idx = _find_column_index_in_list(fieldnames, 'bid')
80
+ self._ask_idx = _find_column_index_in_list(fieldnames, 'ask')
81
+ self._bidvol_idx = _find_column_index_in_list(fieldnames, 'bidvol', 'bid_vol', 'bidsize', 'bid_size')
82
+ self._askvol_idx = _find_column_index_in_list(fieldnames, 'askvol', 'ask_vol', 'asksize', 'ask_size')
83
+
84
+ def process_data(self, columns_data: list) -> Optional[Iterable]:
85
+ tms = columns_data[self._time_idx]
86
+ bids = columns_data[self._bid_idx]
87
+ asks = columns_data[self._ask_idx]
88
+ bidvol = columns_data[self._bidvol_idx]
89
+ askvol = columns_data[self._askvol_idx]
90
+ for i in range(len(tms)):
91
+ self.buffer.append(Quote(tms[i], bids[i], asks[i], bidvol[i], askvol[i]))
92
+ return None
93
+
94
+
95
+ class QuotesFromOHLCVDataProcessor(DataProcessor):
96
+ """
97
+ Process OHLC and generate Quotes (+ Trades) from it
98
+ """
99
+ def __init__(self, trades: bool=False,
100
+ default_bid_size=1e9, # default bid/ask is big
101
+ default_ask_size=1e9, # default bid/ask is big
102
+ daily_session_start_end=DEFAULT_DAILY_SESSION,
103
+ spread=0.0,
104
+ ) -> None:
105
+ super().__init__()
106
+ self._trades = trades
107
+ self._bid_size = default_bid_size
108
+ self._ask_size = default_ask_size
109
+ self._s2 = spread / 2.0
110
+ self._d_session_start = daily_session_start_end[0]
111
+ self._d_session_end = daily_session_start_end[1]
112
+
113
+ def start_processing(self, fieldnames: List[str]):
114
+ self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
115
+ self._open_idx = _find_column_index_in_list(fieldnames, 'open')
116
+ self._high_idx = _find_column_index_in_list(fieldnames, 'high')
117
+ self._low_idx = _find_column_index_in_list(fieldnames, 'low')
118
+ self._close_idx = _find_column_index_in_list(fieldnames, 'close')
119
+ self._volume_idx = None
120
+ self._timeframe = None
121
+
122
+ try:
123
+ self._volume_idx = _find_column_index_in_list(fieldnames, 'volume', 'vol')
124
+ except:
125
+ pass
126
+
127
+ self.buffer = []
128
+
129
+ def process_data(self, data: list) -> Optional[Iterable]:
130
+ s2 = self._s2
131
+ if self._timeframe is None:
132
+ _freq = infer_series_frequency(data[self._time_idx])
133
+ self._timeframe = _freq.astype('timedelta64[s]')
134
+
135
+ # - timestamps when we emit simulated quotes
136
+ dt = _freq.astype('timedelta64[ns]').item()
137
+ if dt < D1:
138
+ self._t_start = dt // 10
139
+ self._t_mid1 = dt // 2 - dt // 10
140
+ self._t_mid2 = dt // 2 + dt // 10
141
+ self._t_end = dt - dt // 10
142
+ else:
143
+ self._t_start = self._d_session_start
144
+ self._t_mid1 = dt // 2 - H1
145
+ self._t_mid2 = dt // 2 + H1
146
+ self._t_end = self._d_session_end
147
+
148
+ # - input data
149
+ times = data[self._time_idx]
150
+ opens = data[self._open_idx]
151
+ highs = data[self._high_idx]
152
+ lows = data[self._low_idx]
153
+ closes = data[self._close_idx]
154
+ volumes = data[self._volume_idx] if self._volume_idx else None
155
+ if volumes is None and self._trades:
156
+ logger.warning("Input OHLC data doesn't contain volume information so trades can't be emulated !")
157
+ self._trades = False
158
+
159
+ for i in range(len(times)):
160
+ ti, o, h, l, c = times[i].astype('datetime64[ns]'), opens[i], highs[i], lows[i], closes[i]
161
+
162
+ if self._trades:
163
+ rv = volumes[i] / (h - l)
164
+
165
+ # - opening quote
166
+ self.buffer.append(Quote(ti + self._t_start, o - s2, o + s2, self._bid_size, self._ask_size))
167
+
168
+ if c >= o:
169
+ if self._trades:
170
+ self.buffer.append(Trade(ti + self._t_start, o - s2, rv * (o - l))) # sell 1
171
+ self.buffer.append(Quote(ti + self._t_mid1, l - s2, l + s2, self._bid_size, self._ask_size))
172
+
173
+ if self._trades:
174
+ self.buffer.append(Trade(ti + self._t_mid1, l + s2, rv * (c - o))) # buy 1
175
+ self.buffer.append(Quote(ti + self._t_mid2, h - s2, h + s2, self._bid_size, self._ask_size))
176
+
177
+ if self._trades:
178
+ self.buffer.append(Trade(ti + self._t_mid2, h - s2, rv * (h - c))) # sell 2
179
+ else:
180
+ if self._trades:
181
+ self.buffer.append(Trade(ti + self._t_start, o + s2, rv * (h - o))) # buy 1
182
+ self.buffer.append(Quote(ti + self._t_mid1, h - s2, h + s2, self._bid_size, self._ask_size))
183
+
184
+ if self._trades:
185
+ self.buffer.append(Trade(ti + self._t_mid1, h - s2, rv * (o - c))) # sell 1
186
+ self.buffer.append(Quote(ti + self._t_mid2, l - s2, l + s2, self._bid_size, self._ask_size))
187
+
188
+ if self._trades:
189
+ self.buffer.append(Trade(ti + self._t_mid2, l + s2, rv * (c - l))) # buy 2
190
+
191
+ # - closing quote
192
+ self.buffer.append(Quote(ti + self._t_end, c - s2, c + s2, self._bid_size, self._ask_size))
193
+
194
+ return None
195
+
196
+ def get_result(self) -> Any:
197
+ return self.buffer
198
+
199
+
200
+ class OhlcvDataProcessor(DataProcessor):
201
+ """
202
+ Process data and convert it to Qube OHLCV timeseries
203
+ """
204
+ def __init__(self, name: str) -> None:
205
+ super().__init__()
206
+ self._name = name
207
+
208
+ def start_processing(self, fieldnames: List[str]):
209
+ self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
210
+ self._open_idx = _find_column_index_in_list(fieldnames, 'open')
211
+ self._high_idx = _find_column_index_in_list(fieldnames, 'high')
212
+ self._low_idx = _find_column_index_in_list(fieldnames, 'low')
213
+ self._close_idx = _find_column_index_in_list(fieldnames, 'close')
214
+ self._volume_idx = None
215
+ self._timeframe = None
216
+
217
+ try:
218
+ self._volume_idx = _find_column_index_in_list(fieldnames, 'volume', 'vol')
219
+ except:
220
+ pass
221
+
222
+ self.ohlc = None
223
+
224
+ def process_data(self, data: list) -> Optional[Iterable]:
225
+ if self._timeframe is None:
226
+ self._timeframe = infer_series_frequency(data[self._time_idx]).astype('timedelta64[s]')
227
+
228
+ # - create instance after first data received
229
+ self.ohlc = OHLCV(self._name, self._timeframe)
230
+
231
+ self.ohlc.append_data(
232
+ data[self._time_idx],
233
+ data[self._open_idx], data[self._high_idx], data[self._low_idx], data[self._close_idx],
234
+ data[self._volume_idx] if self._volume_idx else []
235
+ )
236
+ return None
237
+
238
+ def get_result(self) -> Any:
239
+ return self.ohlc
240
+
241
+
242
+ class OhlcvPandasDataProcessor(DataProcessor):
243
+ """
244
+ Process data and convert it to pandas OHLCV dataframes
245
+ """
246
+ def __init__(self) -> None:
247
+ super().__init__()
248
+
249
+ def start_processing(self, fieldnames: List[str]):
250
+ self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
251
+ self._open_idx = _find_column_index_in_list(fieldnames, 'open')
252
+ self._high_idx = _find_column_index_in_list(fieldnames, 'high')
253
+ self._low_idx = _find_column_index_in_list(fieldnames, 'low')
254
+ self._close_idx = _find_column_index_in_list(fieldnames, 'close')
255
+ self._volume_idx = None
256
+ self._timeframe = None
257
+
258
+ try:
259
+ self._volume_idx = _find_column_index_in_list(fieldnames, 'volume', 'vol')
260
+ except:
261
+ pass
262
+
263
+ # self.ohlc = pd.DataFrame()
264
+
265
+ self._time = np.array([], dtype=np.datetime64)
266
+ self._open = np.array([])
267
+ self._high = np.array([])
268
+ self._low = np.array([])
269
+ self._close = np.array([])
270
+ self._volume = np.array([])
271
+
272
+ def process_data(self, data: list) -> Optional[Iterable]:
273
+ # p = pd.DataFrame({
274
+ # 'open': data[self._open_idx],
275
+ # 'high': data[self._high_idx],
276
+ # 'low': data[self._low_idx],
277
+ # 'close': data[self._close_idx],
278
+ # 'volume': data[self._volume_idx] if self._volume_idx else []},
279
+ # index = data[self._time_idx]
280
+ # )
281
+ # self.ohlc = pd.concat((self.ohlc, p), axis=0, sort=True, copy=True)
282
+ self._time = np.concatenate((self._time, data[self._time_idx]))
283
+ self._open = np.concatenate((self._open, data[self._open_idx]))
284
+ self._high = np.concatenate((self._high, data[self._high_idx]))
285
+ self._low = np.concatenate((self._low, data[self._low_idx]))
286
+ self._close = np.concatenate((self._close, data[self._close_idx]))
287
+ if self._volume_idx:
288
+ self._volume = np.concatenate((self._volume, data[self._volume_idx]))
289
+
290
+ return None
291
+
292
+ def get_result(self) -> Any:
293
+ # self.ohlc.index.name = 'time'
294
+ # return self.ohlc
295
+
296
+ return pd.DataFrame(
297
+ {
298
+ 'open': self._open,
299
+ 'high': self._high,
300
+ 'low': self._low,
301
+ 'close': self._close,
302
+ 'volume': self._volume if self._volume_idx else []
303
+ },
304
+ index = self._time
305
+ ).sort_index()
306
+
307
+
308
+ class CsvDataReader(DataReader):
309
+ """
310
+ CSV data file reader
311
+ """
312
+
313
+ def __init__(self, path: str, processor: DataProcessor=None, timestamp_parsers=None) -> None:
314
+ if not exists(path):
315
+ raise ValueError(f"CSV file not found at {path}")
316
+ super().__init__(processor)
317
+ self.time_parsers = timestamp_parsers
318
+ self.path = path
319
+
320
+ def __find_time_idx(self, arr: pa.ChunkedArray, v) -> int:
321
+ ix = arr.index(v).as_py()
322
+ if ix < 0:
323
+ for c in arr.iterchunks():
324
+ a = c.to_numpy()
325
+ ix = np.searchsorted(a, v, side='right')
326
+ if ix > 0 and ix < len(c):
327
+ ix = arr.index(a[ix]).as_py() - 1
328
+ break
329
+ return ix
330
+
331
+ def read(self, start: Optional[str]=None, stop: Optional[str]=None) -> Any:
332
+ convert_options = None
333
+ if self.time_parsers:
334
+ convert_options=csv.ConvertOptions(timestamp_parsers=self.time_parsers)
335
+
336
+ table = csv.read_csv(
337
+ self.path,
338
+ parse_options=csv.ParseOptions(ignore_empty_lines=True),
339
+ convert_options=convert_options
340
+ )
341
+ fieldnames = table.column_names
342
+
343
+ # - try to find range to load
344
+ start_idx, stop_idx = 0, table.num_rows
345
+ try:
346
+ _time_field_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
347
+ _time_type = table.field(_time_field_idx).type
348
+ _time_unit = _time_type.unit if hasattr(_time_type, 'unit') else 's'
349
+ _time_data = table[_time_field_idx]
350
+
351
+ # - check if need convert time to primitive types (i.e. Date32 -> timestamp[x])
352
+ _time_cast_function = lambda xs: xs
353
+ if _time_type != pa.timestamp(_time_unit):
354
+ _time_cast_function = lambda xs: xs.cast(pa.timestamp(_time_unit))
355
+ _time_data = _time_cast_function(_time_data)
356
+
357
+ # - preprocessing start and stop
358
+ t_0, t_1 = handle_start_stop(start, stop, convert=lambda x: _recognize_t(x, None, _time_unit))
359
+
360
+ # - check requested range
361
+ if t_0:
362
+ start_idx = self.__find_time_idx(_time_data, t_0)
363
+ if start_idx >= table.num_rows:
364
+ # no data for requested start date
365
+ return None
366
+
367
+ if t_1:
368
+ stop_idx = self.__find_time_idx(_time_data, t_1)
369
+ if stop_idx < 0 or stop_idx < start_idx:
370
+ stop_idx = table.num_rows
371
+
372
+ except Exception as exc:
373
+ logger.warning(exc)
374
+ logger.info('loading whole file')
375
+
376
+ length = (stop_idx - start_idx + 1)
377
+ self._processor.start_processing(fieldnames)
378
+ selected_table = table.slice(start_idx, length)
379
+ n_chunks = selected_table[table.column_names[0]].num_chunks
380
+ for n in range(n_chunks):
381
+ data = [
382
+ # - in some cases we need to convert time index to primitive type
383
+ _time_cast_function(selected_table[k].chunk(n)).to_numpy() if k == _time_field_idx else selected_table[k].chunk(n).to_numpy()
384
+ for k in range(selected_table.num_columns)]
385
+ self._processor.process_data(data)
386
+ return self._processor.get_result()
387
+
qubx/math/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .math import percentile_rank
qubx/math/stats.py ADDED
@@ -0,0 +1,42 @@
1
+ import numpy as np
2
+ from qubx.utils import sbp
3
+
4
+
5
+ def percentile_rank(x: np.ndarray, v, pctls=np.arange(1, 101)):
6
+ """
7
+ Find percentile rank of value v
8
+ :param x: values array
9
+ :param v: vakue to be ranked
10
+ :param pctls: percentiles
11
+ :return: rank
12
+
13
+ >>> percentile_rank(np.random.randn(1000), 1.69)
14
+ >>> 95
15
+ >>> percentile_rank(np.random.randn(1000), 1.69, [10,50,100])
16
+ >>> 2
17
+ """
18
+ return np.argmax(np.sign(np.append(np.percentile(x, pctls), np.inf) - v))
19
+
20
+
21
+ def compare_to_norm(xs, xranges=None):
22
+ """
23
+ Compare distribution from xs against normal using estimated mean and std
24
+ """
25
+ import scipy.stats as stats
26
+ import matplotlib.pyplot as plt
27
+ import seaborn as sns
28
+
29
+ _m, _s = np.mean(xs), np.std(xs)
30
+ fit = stats.norm.pdf(sorted(xs), _m, _s)
31
+
32
+ sbp(12, 1)
33
+ plt.plot(sorted(xs), fit, 'r--', lw=2, label='N(%.2f, %.2f)' % (_m, _s))
34
+ plt.legend(loc='upper right')
35
+
36
+ sns.kdeplot(xs, color='g', label='Data', shade=True)
37
+ if xranges is not None and len(xranges) > 1:
38
+ plt.xlim(xranges)
39
+ plt.legend(loc='upper right')
40
+
41
+ sbp(12, 2)
42
+ stats.probplot(xs, dist="norm", sparams=(_m, _s), plot=plt)
qubx/ta/__init__.py ADDED
File without changes