Qubx 0.0.1__cp311-cp311-manylinux_2_35_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Qubx might be problematic. Click here for more details.
- qubx/__init__.py +164 -0
- qubx/_nb_magic.py +69 -0
- qubx/core/__init__.py +0 -0
- qubx/core/basics.py +224 -0
- qubx/core/lookups.py +152 -0
- qubx/core/series.cpython-311-x86_64-linux-gnu.so +0 -0
- qubx/core/series.pxd +94 -0
- qubx/core/series.pyx +763 -0
- qubx/core/strategy.py +89 -0
- qubx/core/utils.cpython-311-x86_64-linux-gnu.so +0 -0
- qubx/core/utils.pyx +54 -0
- qubx/data/readers.py +387 -0
- qubx/math/__init__.py +1 -0
- qubx/math/stats.py +42 -0
- qubx/ta/__init__.py +0 -0
- qubx/ta/indicators.cpython-311-x86_64-linux-gnu.so +0 -0
- qubx/ta/indicators.pyx +258 -0
- qubx/utils/__init__.py +3 -0
- qubx/utils/_pyxreloader.py +271 -0
- qubx/utils/charting/mpl_helpers.py +182 -0
- qubx/utils/marketdata/binance.py +212 -0
- qubx/utils/misc.py +234 -0
- qubx/utils/pandas.py +206 -0
- qubx/utils/time.py +145 -0
- qubx-0.0.1.dist-info/METADATA +39 -0
- qubx-0.0.1.dist-info/RECORD +27 -0
- qubx-0.0.1.dist-info/WHEEL +4 -0
qubx/core/strategy.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
# All interfaces related to strategy etc
|
|
3
|
+
"""
|
|
4
|
+
from typing import Callable, Dict, List, Optional, Union
|
|
5
|
+
import numpy as np
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from qubx.core.basics import ZERO_COSTS, Instrument, Position, Signal, TransactionCostsCalculator, dt_64
|
|
8
|
+
|
|
9
|
+
E_TIMER = 1
|
|
10
|
+
E_QUOTE = 2
|
|
11
|
+
E_TRADE = 3
|
|
12
|
+
E_OPENBOOK = 4
|
|
13
|
+
E_HIST_DATA_READY = 100
|
|
14
|
+
E_HIST_DATA_ERROR = -100
|
|
15
|
+
@dataclass
|
|
16
|
+
class Event:
|
|
17
|
+
time: dt_64
|
|
18
|
+
type: int # ??
|
|
19
|
+
instrument: Instrument
|
|
20
|
+
|
|
21
|
+
DataListener = Callable[[Instrument, int], None]
|
|
22
|
+
ExchListener = Callable[[Instrument, int], None]
|
|
23
|
+
|
|
24
|
+
class DataProvider:
|
|
25
|
+
def add_data_listener(self, listener: DataListener):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def request_historical_data(self,
|
|
29
|
+
instruments: List[Instrument],
|
|
30
|
+
timeframe: str,
|
|
31
|
+
start: Union[str, int, dt_64],
|
|
32
|
+
stop: Union[str, int, dt_64]):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ExchangeServiceProvider:
|
|
37
|
+
def add_exchange_listener(self, listener: ExchListener):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
def get_position(self, instrument: Instrument) -> Position:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
def get_tcc(self, instrument: Instrument) -> TransactionCostsCalculator:
|
|
44
|
+
return ZERO_COSTS
|
|
45
|
+
|
|
46
|
+
def time(self) -> dt_64:
|
|
47
|
+
"""
|
|
48
|
+
Returns current time
|
|
49
|
+
"""
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class IStrategy:
|
|
54
|
+
ctx: 'TradingContext'
|
|
55
|
+
|
|
56
|
+
def on_init(self):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def process_event(self, time: dt_64, event: Event) -> Optional[List[Signal]]:
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class TradingContext:
|
|
64
|
+
strategy: IStrategy
|
|
65
|
+
exchange: ExchangeServiceProvider
|
|
66
|
+
data: DataProvider
|
|
67
|
+
instruments: List[Instrument]
|
|
68
|
+
positions: Dict[str, Position]
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
strategy: IStrategy,
|
|
73
|
+
exchange: ExchangeServiceProvider,
|
|
74
|
+
data: DataProvider,
|
|
75
|
+
instruments: List[Instrument]
|
|
76
|
+
) -> None:
|
|
77
|
+
self.strategy = strategy
|
|
78
|
+
self.exchange = exchange
|
|
79
|
+
self.data = data
|
|
80
|
+
|
|
81
|
+
self.instruments = []
|
|
82
|
+
self.positions = {}
|
|
83
|
+
for instr in instruments:
|
|
84
|
+
# process instruments - need to find convertors etc
|
|
85
|
+
# . . . .
|
|
86
|
+
self.instruments.append(instr)
|
|
87
|
+
p = exchange.get_position(instr)
|
|
88
|
+
self.positions[instr.symbol] = Position(instr)
|
|
89
|
+
|
|
Binary file
|
qubx/core/utils.pyx
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from qubx.utils import convert_tf_str_td64
|
|
2
|
+
import numpy as np
|
|
3
|
+
cimport numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
NS = 1_000_000_000
|
|
7
|
+
|
|
8
|
+
cpdef recognize_time(time):
|
|
9
|
+
return np.datetime64(time, 'ns') if isinstance(time, str) else np.datetime64(time, 'ms')
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
cpdef str time_to_str(long long t, str units = 'ns'):
|
|
13
|
+
return str(np.datetime64(t, units)) #.isoformat()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
cpdef str time_delta_to_str(long long d):
|
|
17
|
+
"""
|
|
18
|
+
Convert timedelta object to pretty print format
|
|
19
|
+
|
|
20
|
+
:param d:
|
|
21
|
+
:return:
|
|
22
|
+
"""
|
|
23
|
+
days, seconds = divmod(d, 86400*NS)
|
|
24
|
+
hours, seconds = divmod(seconds, 3600*NS)
|
|
25
|
+
minutes, seconds = divmod(seconds, 60*NS)
|
|
26
|
+
seconds, rem = divmod(seconds, NS)
|
|
27
|
+
r = ''
|
|
28
|
+
if days > 0:
|
|
29
|
+
r += '%dD' % days
|
|
30
|
+
if hours > 0:
|
|
31
|
+
r += '%dH' % hours
|
|
32
|
+
if minutes > 0:
|
|
33
|
+
r += '%dMin' % minutes
|
|
34
|
+
if seconds > 0:
|
|
35
|
+
r += '%dS' % seconds
|
|
36
|
+
if rem > 0:
|
|
37
|
+
r += '%dmS' % (rem // 1000000)
|
|
38
|
+
return r
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
cpdef recognize_timeframe(timeframe):
|
|
42
|
+
tf = timeframe
|
|
43
|
+
if isinstance(timeframe, str):
|
|
44
|
+
tf = np.int64(convert_tf_str_td64(timeframe).item().total_seconds() * NS)
|
|
45
|
+
|
|
46
|
+
elif isinstance(timeframe, (int, float)) and timeframe >= 0:
|
|
47
|
+
tf = timeframe
|
|
48
|
+
|
|
49
|
+
elif isinstance(timeframe, np.timedelta64):
|
|
50
|
+
tf = np.int64(timeframe.item().total_seconds() * NS)
|
|
51
|
+
|
|
52
|
+
else:
|
|
53
|
+
raise ValueError('Unknown timeframe type !')
|
|
54
|
+
return tf
|
qubx/data/readers.py
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
from typing import List, Union, Optional, Iterable, Any
|
|
2
|
+
from os.path import exists
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import pyarrow as pa
|
|
6
|
+
from pyarrow import csv
|
|
7
|
+
|
|
8
|
+
from qubx import logger
|
|
9
|
+
from qubx.core.series import TimeSeries, OHLCV, time_as_nsec, Quote, Trade
|
|
10
|
+
from qubx.utils.time import infer_series_frequency, handle_start_stop
|
|
11
|
+
|
|
12
|
+
_DT = lambda x: pd.Timedelta(x).to_numpy().item()
|
|
13
|
+
D1, H1 = _DT('1D'), _DT('1H')
|
|
14
|
+
|
|
15
|
+
DEFAULT_DAILY_SESSION = (_DT('00:00:00.100'), _DT('23:59:59.900'))
|
|
16
|
+
STOCK_DAILY_SESSION = (_DT('9:30:00.100'), _DT('15:59:59.900'))
|
|
17
|
+
CME_FUTURES_DAILY_SESSION = (_DT('8:30:00.100'), _DT('15:14:59.900'))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _recognize_t(t: Union[int, str], defaultvalue, timeunit) -> int:
|
|
21
|
+
if isinstance(t, (str, pd.Timestamp)):
|
|
22
|
+
try:
|
|
23
|
+
return np.datetime64(t, timeunit)
|
|
24
|
+
except:
|
|
25
|
+
pass
|
|
26
|
+
return defaultvalue
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _find_column_index_in_list(xs, *args):
|
|
30
|
+
xs = [x.lower() for x in xs]
|
|
31
|
+
for a in args:
|
|
32
|
+
ai = a.lower()
|
|
33
|
+
if ai in xs:
|
|
34
|
+
return xs.index(ai)
|
|
35
|
+
raise IndexError(f"Can't find any from {args} in list: {xs}")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DataProcessor:
|
|
39
|
+
"""
|
|
40
|
+
Common interface for data processor with default aggregating into list implementation
|
|
41
|
+
"""
|
|
42
|
+
def __init__(self) -> None:
|
|
43
|
+
self.buffer = {}
|
|
44
|
+
self._column_names = []
|
|
45
|
+
|
|
46
|
+
def start_processing(self, column_names: List[str]):
|
|
47
|
+
self._column_names = column_names
|
|
48
|
+
self.buffer = {c: [] for c in column_names}
|
|
49
|
+
|
|
50
|
+
def process_data(self, columns_data: list) -> Optional[Iterable]:
|
|
51
|
+
for i, c in enumerate(columns_data):
|
|
52
|
+
self.buffer[self._column_names[i]].append(c)
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
def get_result(self) -> Any:
|
|
56
|
+
return self.buffer
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class DataReader:
|
|
60
|
+
"""
|
|
61
|
+
Common interface for data reader
|
|
62
|
+
"""
|
|
63
|
+
_processor: DataProcessor
|
|
64
|
+
|
|
65
|
+
def __init__(self, processor=None) -> None:
|
|
66
|
+
self._processor = DataProcessor() if processor is None else processor
|
|
67
|
+
|
|
68
|
+
def read(self, start: Optional[str]=None, stop: Optional[str]=None) -> Any:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class QuotesDataProcessor(DataProcessor):
|
|
73
|
+
"""
|
|
74
|
+
Process quotes data and collect them as list
|
|
75
|
+
"""
|
|
76
|
+
def start_processing(self, fieldnames: List[str]):
|
|
77
|
+
self.buffer = list()
|
|
78
|
+
self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime')
|
|
79
|
+
self._bid_idx = _find_column_index_in_list(fieldnames, 'bid')
|
|
80
|
+
self._ask_idx = _find_column_index_in_list(fieldnames, 'ask')
|
|
81
|
+
self._bidvol_idx = _find_column_index_in_list(fieldnames, 'bidvol', 'bid_vol', 'bidsize', 'bid_size')
|
|
82
|
+
self._askvol_idx = _find_column_index_in_list(fieldnames, 'askvol', 'ask_vol', 'asksize', 'ask_size')
|
|
83
|
+
|
|
84
|
+
def process_data(self, columns_data: list) -> Optional[Iterable]:
|
|
85
|
+
tms = columns_data[self._time_idx]
|
|
86
|
+
bids = columns_data[self._bid_idx]
|
|
87
|
+
asks = columns_data[self._ask_idx]
|
|
88
|
+
bidvol = columns_data[self._bidvol_idx]
|
|
89
|
+
askvol = columns_data[self._askvol_idx]
|
|
90
|
+
for i in range(len(tms)):
|
|
91
|
+
self.buffer.append(Quote(tms[i], bids[i], asks[i], bidvol[i], askvol[i]))
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class QuotesFromOHLCVDataProcessor(DataProcessor):
|
|
96
|
+
"""
|
|
97
|
+
Process OHLC and generate Quotes (+ Trades) from it
|
|
98
|
+
"""
|
|
99
|
+
def __init__(self, trades: bool=False,
|
|
100
|
+
default_bid_size=1e9, # default bid/ask is big
|
|
101
|
+
default_ask_size=1e9, # default bid/ask is big
|
|
102
|
+
daily_session_start_end=DEFAULT_DAILY_SESSION,
|
|
103
|
+
spread=0.0,
|
|
104
|
+
) -> None:
|
|
105
|
+
super().__init__()
|
|
106
|
+
self._trades = trades
|
|
107
|
+
self._bid_size = default_bid_size
|
|
108
|
+
self._ask_size = default_ask_size
|
|
109
|
+
self._s2 = spread / 2.0
|
|
110
|
+
self._d_session_start = daily_session_start_end[0]
|
|
111
|
+
self._d_session_end = daily_session_start_end[1]
|
|
112
|
+
|
|
113
|
+
def start_processing(self, fieldnames: List[str]):
|
|
114
|
+
self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
|
|
115
|
+
self._open_idx = _find_column_index_in_list(fieldnames, 'open')
|
|
116
|
+
self._high_idx = _find_column_index_in_list(fieldnames, 'high')
|
|
117
|
+
self._low_idx = _find_column_index_in_list(fieldnames, 'low')
|
|
118
|
+
self._close_idx = _find_column_index_in_list(fieldnames, 'close')
|
|
119
|
+
self._volume_idx = None
|
|
120
|
+
self._timeframe = None
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
self._volume_idx = _find_column_index_in_list(fieldnames, 'volume', 'vol')
|
|
124
|
+
except:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
self.buffer = []
|
|
128
|
+
|
|
129
|
+
def process_data(self, data: list) -> Optional[Iterable]:
|
|
130
|
+
s2 = self._s2
|
|
131
|
+
if self._timeframe is None:
|
|
132
|
+
_freq = infer_series_frequency(data[self._time_idx])
|
|
133
|
+
self._timeframe = _freq.astype('timedelta64[s]')
|
|
134
|
+
|
|
135
|
+
# - timestamps when we emit simulated quotes
|
|
136
|
+
dt = _freq.astype('timedelta64[ns]').item()
|
|
137
|
+
if dt < D1:
|
|
138
|
+
self._t_start = dt // 10
|
|
139
|
+
self._t_mid1 = dt // 2 - dt // 10
|
|
140
|
+
self._t_mid2 = dt // 2 + dt // 10
|
|
141
|
+
self._t_end = dt - dt // 10
|
|
142
|
+
else:
|
|
143
|
+
self._t_start = self._d_session_start
|
|
144
|
+
self._t_mid1 = dt // 2 - H1
|
|
145
|
+
self._t_mid2 = dt // 2 + H1
|
|
146
|
+
self._t_end = self._d_session_end
|
|
147
|
+
|
|
148
|
+
# - input data
|
|
149
|
+
times = data[self._time_idx]
|
|
150
|
+
opens = data[self._open_idx]
|
|
151
|
+
highs = data[self._high_idx]
|
|
152
|
+
lows = data[self._low_idx]
|
|
153
|
+
closes = data[self._close_idx]
|
|
154
|
+
volumes = data[self._volume_idx] if self._volume_idx else None
|
|
155
|
+
if volumes is None and self._trades:
|
|
156
|
+
logger.warning("Input OHLC data doesn't contain volume information so trades can't be emulated !")
|
|
157
|
+
self._trades = False
|
|
158
|
+
|
|
159
|
+
for i in range(len(times)):
|
|
160
|
+
ti, o, h, l, c = times[i].astype('datetime64[ns]'), opens[i], highs[i], lows[i], closes[i]
|
|
161
|
+
|
|
162
|
+
if self._trades:
|
|
163
|
+
rv = volumes[i] / (h - l)
|
|
164
|
+
|
|
165
|
+
# - opening quote
|
|
166
|
+
self.buffer.append(Quote(ti + self._t_start, o - s2, o + s2, self._bid_size, self._ask_size))
|
|
167
|
+
|
|
168
|
+
if c >= o:
|
|
169
|
+
if self._trades:
|
|
170
|
+
self.buffer.append(Trade(ti + self._t_start, o - s2, rv * (o - l))) # sell 1
|
|
171
|
+
self.buffer.append(Quote(ti + self._t_mid1, l - s2, l + s2, self._bid_size, self._ask_size))
|
|
172
|
+
|
|
173
|
+
if self._trades:
|
|
174
|
+
self.buffer.append(Trade(ti + self._t_mid1, l + s2, rv * (c - o))) # buy 1
|
|
175
|
+
self.buffer.append(Quote(ti + self._t_mid2, h - s2, h + s2, self._bid_size, self._ask_size))
|
|
176
|
+
|
|
177
|
+
if self._trades:
|
|
178
|
+
self.buffer.append(Trade(ti + self._t_mid2, h - s2, rv * (h - c))) # sell 2
|
|
179
|
+
else:
|
|
180
|
+
if self._trades:
|
|
181
|
+
self.buffer.append(Trade(ti + self._t_start, o + s2, rv * (h - o))) # buy 1
|
|
182
|
+
self.buffer.append(Quote(ti + self._t_mid1, h - s2, h + s2, self._bid_size, self._ask_size))
|
|
183
|
+
|
|
184
|
+
if self._trades:
|
|
185
|
+
self.buffer.append(Trade(ti + self._t_mid1, h - s2, rv * (o - c))) # sell 1
|
|
186
|
+
self.buffer.append(Quote(ti + self._t_mid2, l - s2, l + s2, self._bid_size, self._ask_size))
|
|
187
|
+
|
|
188
|
+
if self._trades:
|
|
189
|
+
self.buffer.append(Trade(ti + self._t_mid2, l + s2, rv * (c - l))) # buy 2
|
|
190
|
+
|
|
191
|
+
# - closing quote
|
|
192
|
+
self.buffer.append(Quote(ti + self._t_end, c - s2, c + s2, self._bid_size, self._ask_size))
|
|
193
|
+
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
def get_result(self) -> Any:
|
|
197
|
+
return self.buffer
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class OhlcvDataProcessor(DataProcessor):
|
|
201
|
+
"""
|
|
202
|
+
Process data and convert it to Qube OHLCV timeseries
|
|
203
|
+
"""
|
|
204
|
+
def __init__(self, name: str) -> None:
|
|
205
|
+
super().__init__()
|
|
206
|
+
self._name = name
|
|
207
|
+
|
|
208
|
+
def start_processing(self, fieldnames: List[str]):
|
|
209
|
+
self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
|
|
210
|
+
self._open_idx = _find_column_index_in_list(fieldnames, 'open')
|
|
211
|
+
self._high_idx = _find_column_index_in_list(fieldnames, 'high')
|
|
212
|
+
self._low_idx = _find_column_index_in_list(fieldnames, 'low')
|
|
213
|
+
self._close_idx = _find_column_index_in_list(fieldnames, 'close')
|
|
214
|
+
self._volume_idx = None
|
|
215
|
+
self._timeframe = None
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
self._volume_idx = _find_column_index_in_list(fieldnames, 'volume', 'vol')
|
|
219
|
+
except:
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
self.ohlc = None
|
|
223
|
+
|
|
224
|
+
def process_data(self, data: list) -> Optional[Iterable]:
|
|
225
|
+
if self._timeframe is None:
|
|
226
|
+
self._timeframe = infer_series_frequency(data[self._time_idx]).astype('timedelta64[s]')
|
|
227
|
+
|
|
228
|
+
# - create instance after first data received
|
|
229
|
+
self.ohlc = OHLCV(self._name, self._timeframe)
|
|
230
|
+
|
|
231
|
+
self.ohlc.append_data(
|
|
232
|
+
data[self._time_idx],
|
|
233
|
+
data[self._open_idx], data[self._high_idx], data[self._low_idx], data[self._close_idx],
|
|
234
|
+
data[self._volume_idx] if self._volume_idx else []
|
|
235
|
+
)
|
|
236
|
+
return None
|
|
237
|
+
|
|
238
|
+
def get_result(self) -> Any:
|
|
239
|
+
return self.ohlc
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class OhlcvPandasDataProcessor(DataProcessor):
|
|
243
|
+
"""
|
|
244
|
+
Process data and convert it to pandas OHLCV dataframes
|
|
245
|
+
"""
|
|
246
|
+
def __init__(self) -> None:
|
|
247
|
+
super().__init__()
|
|
248
|
+
|
|
249
|
+
def start_processing(self, fieldnames: List[str]):
|
|
250
|
+
self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
|
|
251
|
+
self._open_idx = _find_column_index_in_list(fieldnames, 'open')
|
|
252
|
+
self._high_idx = _find_column_index_in_list(fieldnames, 'high')
|
|
253
|
+
self._low_idx = _find_column_index_in_list(fieldnames, 'low')
|
|
254
|
+
self._close_idx = _find_column_index_in_list(fieldnames, 'close')
|
|
255
|
+
self._volume_idx = None
|
|
256
|
+
self._timeframe = None
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
self._volume_idx = _find_column_index_in_list(fieldnames, 'volume', 'vol')
|
|
260
|
+
except:
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
# self.ohlc = pd.DataFrame()
|
|
264
|
+
|
|
265
|
+
self._time = np.array([], dtype=np.datetime64)
|
|
266
|
+
self._open = np.array([])
|
|
267
|
+
self._high = np.array([])
|
|
268
|
+
self._low = np.array([])
|
|
269
|
+
self._close = np.array([])
|
|
270
|
+
self._volume = np.array([])
|
|
271
|
+
|
|
272
|
+
def process_data(self, data: list) -> Optional[Iterable]:
|
|
273
|
+
# p = pd.DataFrame({
|
|
274
|
+
# 'open': data[self._open_idx],
|
|
275
|
+
# 'high': data[self._high_idx],
|
|
276
|
+
# 'low': data[self._low_idx],
|
|
277
|
+
# 'close': data[self._close_idx],
|
|
278
|
+
# 'volume': data[self._volume_idx] if self._volume_idx else []},
|
|
279
|
+
# index = data[self._time_idx]
|
|
280
|
+
# )
|
|
281
|
+
# self.ohlc = pd.concat((self.ohlc, p), axis=0, sort=True, copy=True)
|
|
282
|
+
self._time = np.concatenate((self._time, data[self._time_idx]))
|
|
283
|
+
self._open = np.concatenate((self._open, data[self._open_idx]))
|
|
284
|
+
self._high = np.concatenate((self._high, data[self._high_idx]))
|
|
285
|
+
self._low = np.concatenate((self._low, data[self._low_idx]))
|
|
286
|
+
self._close = np.concatenate((self._close, data[self._close_idx]))
|
|
287
|
+
if self._volume_idx:
|
|
288
|
+
self._volume = np.concatenate((self._volume, data[self._volume_idx]))
|
|
289
|
+
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
def get_result(self) -> Any:
|
|
293
|
+
# self.ohlc.index.name = 'time'
|
|
294
|
+
# return self.ohlc
|
|
295
|
+
|
|
296
|
+
return pd.DataFrame(
|
|
297
|
+
{
|
|
298
|
+
'open': self._open,
|
|
299
|
+
'high': self._high,
|
|
300
|
+
'low': self._low,
|
|
301
|
+
'close': self._close,
|
|
302
|
+
'volume': self._volume if self._volume_idx else []
|
|
303
|
+
},
|
|
304
|
+
index = self._time
|
|
305
|
+
).sort_index()
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class CsvDataReader(DataReader):
|
|
309
|
+
"""
|
|
310
|
+
CSV data file reader
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
def __init__(self, path: str, processor: DataProcessor=None, timestamp_parsers=None) -> None:
|
|
314
|
+
if not exists(path):
|
|
315
|
+
raise ValueError(f"CSV file not found at {path}")
|
|
316
|
+
super().__init__(processor)
|
|
317
|
+
self.time_parsers = timestamp_parsers
|
|
318
|
+
self.path = path
|
|
319
|
+
|
|
320
|
+
def __find_time_idx(self, arr: pa.ChunkedArray, v) -> int:
|
|
321
|
+
ix = arr.index(v).as_py()
|
|
322
|
+
if ix < 0:
|
|
323
|
+
for c in arr.iterchunks():
|
|
324
|
+
a = c.to_numpy()
|
|
325
|
+
ix = np.searchsorted(a, v, side='right')
|
|
326
|
+
if ix > 0 and ix < len(c):
|
|
327
|
+
ix = arr.index(a[ix]).as_py() - 1
|
|
328
|
+
break
|
|
329
|
+
return ix
|
|
330
|
+
|
|
331
|
+
def read(self, start: Optional[str]=None, stop: Optional[str]=None) -> Any:
|
|
332
|
+
convert_options = None
|
|
333
|
+
if self.time_parsers:
|
|
334
|
+
convert_options=csv.ConvertOptions(timestamp_parsers=self.time_parsers)
|
|
335
|
+
|
|
336
|
+
table = csv.read_csv(
|
|
337
|
+
self.path,
|
|
338
|
+
parse_options=csv.ParseOptions(ignore_empty_lines=True),
|
|
339
|
+
convert_options=convert_options
|
|
340
|
+
)
|
|
341
|
+
fieldnames = table.column_names
|
|
342
|
+
|
|
343
|
+
# - try to find range to load
|
|
344
|
+
start_idx, stop_idx = 0, table.num_rows
|
|
345
|
+
try:
|
|
346
|
+
_time_field_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
|
|
347
|
+
_time_type = table.field(_time_field_idx).type
|
|
348
|
+
_time_unit = _time_type.unit if hasattr(_time_type, 'unit') else 's'
|
|
349
|
+
_time_data = table[_time_field_idx]
|
|
350
|
+
|
|
351
|
+
# - check if need convert time to primitive types (i.e. Date32 -> timestamp[x])
|
|
352
|
+
_time_cast_function = lambda xs: xs
|
|
353
|
+
if _time_type != pa.timestamp(_time_unit):
|
|
354
|
+
_time_cast_function = lambda xs: xs.cast(pa.timestamp(_time_unit))
|
|
355
|
+
_time_data = _time_cast_function(_time_data)
|
|
356
|
+
|
|
357
|
+
# - preprocessing start and stop
|
|
358
|
+
t_0, t_1 = handle_start_stop(start, stop, convert=lambda x: _recognize_t(x, None, _time_unit))
|
|
359
|
+
|
|
360
|
+
# - check requested range
|
|
361
|
+
if t_0:
|
|
362
|
+
start_idx = self.__find_time_idx(_time_data, t_0)
|
|
363
|
+
if start_idx >= table.num_rows:
|
|
364
|
+
# no data for requested start date
|
|
365
|
+
return None
|
|
366
|
+
|
|
367
|
+
if t_1:
|
|
368
|
+
stop_idx = self.__find_time_idx(_time_data, t_1)
|
|
369
|
+
if stop_idx < 0 or stop_idx < start_idx:
|
|
370
|
+
stop_idx = table.num_rows
|
|
371
|
+
|
|
372
|
+
except Exception as exc:
|
|
373
|
+
logger.warning(exc)
|
|
374
|
+
logger.info('loading whole file')
|
|
375
|
+
|
|
376
|
+
length = (stop_idx - start_idx + 1)
|
|
377
|
+
self._processor.start_processing(fieldnames)
|
|
378
|
+
selected_table = table.slice(start_idx, length)
|
|
379
|
+
n_chunks = selected_table[table.column_names[0]].num_chunks
|
|
380
|
+
for n in range(n_chunks):
|
|
381
|
+
data = [
|
|
382
|
+
# - in some cases we need to convert time index to primitive type
|
|
383
|
+
_time_cast_function(selected_table[k].chunk(n)).to_numpy() if k == _time_field_idx else selected_table[k].chunk(n).to_numpy()
|
|
384
|
+
for k in range(selected_table.num_columns)]
|
|
385
|
+
self._processor.process_data(data)
|
|
386
|
+
return self._processor.get_result()
|
|
387
|
+
|
qubx/math/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .math import percentile_rank
|
qubx/math/stats.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from qubx.utils import sbp
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def percentile_rank(x: np.ndarray, v, pctls=np.arange(1, 101)):
|
|
6
|
+
"""
|
|
7
|
+
Find percentile rank of value v
|
|
8
|
+
:param x: values array
|
|
9
|
+
:param v: vakue to be ranked
|
|
10
|
+
:param pctls: percentiles
|
|
11
|
+
:return: rank
|
|
12
|
+
|
|
13
|
+
>>> percentile_rank(np.random.randn(1000), 1.69)
|
|
14
|
+
>>> 95
|
|
15
|
+
>>> percentile_rank(np.random.randn(1000), 1.69, [10,50,100])
|
|
16
|
+
>>> 2
|
|
17
|
+
"""
|
|
18
|
+
return np.argmax(np.sign(np.append(np.percentile(x, pctls), np.inf) - v))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def compare_to_norm(xs, xranges=None):
|
|
22
|
+
"""
|
|
23
|
+
Compare distribution from xs against normal using estimated mean and std
|
|
24
|
+
"""
|
|
25
|
+
import scipy.stats as stats
|
|
26
|
+
import matplotlib.pyplot as plt
|
|
27
|
+
import seaborn as sns
|
|
28
|
+
|
|
29
|
+
_m, _s = np.mean(xs), np.std(xs)
|
|
30
|
+
fit = stats.norm.pdf(sorted(xs), _m, _s)
|
|
31
|
+
|
|
32
|
+
sbp(12, 1)
|
|
33
|
+
plt.plot(sorted(xs), fit, 'r--', lw=2, label='N(%.2f, %.2f)' % (_m, _s))
|
|
34
|
+
plt.legend(loc='upper right')
|
|
35
|
+
|
|
36
|
+
sns.kdeplot(xs, color='g', label='Data', shade=True)
|
|
37
|
+
if xranges is not None and len(xranges) > 1:
|
|
38
|
+
plt.xlim(xranges)
|
|
39
|
+
plt.legend(loc='upper right')
|
|
40
|
+
|
|
41
|
+
sbp(12, 2)
|
|
42
|
+
stats.probplot(xs, dist="norm", sparams=(_m, _s), plot=plt)
|
qubx/ta/__init__.py
ADDED
|
File without changes
|
|
Binary file
|