Qubx 0.1.3__cp311-cp311-manylinux_2_35_x86_64.whl → 0.1.4__cp311-cp311-manylinux_2_35_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Qubx might be problematic. Click here for more details.
- qubx/core/series.cpython-311-x86_64-linux-gnu.so +0 -0
- qubx/core/utils.cpython-311-x86_64-linux-gnu.so +0 -0
- qubx/data/readers.py +323 -325
- qubx/ta/indicators.cpython-311-x86_64-linux-gnu.so +0 -0
- {qubx-0.1.3.dist-info → qubx-0.1.4.dist-info}/METADATA +1 -1
- {qubx-0.1.3.dist-info → qubx-0.1.4.dist-info}/RECORD +7 -7
- {qubx-0.1.3.dist-info → qubx-0.1.4.dist-info}/WHEEL +0 -0
|
Binary file
|
|
Binary file
|
qubx/data/readers.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import List, Union, Optional, Iterable, Any
|
|
3
|
-
from os.path import exists
|
|
1
|
+
import re, os
|
|
2
|
+
from typing import Callable, List, Union, Optional, Iterable, Any
|
|
3
|
+
from os.path import exists, join
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import pyarrow as pa
|
|
@@ -38,80 +38,301 @@ def _find_column_index_in_list(xs, *args):
|
|
|
38
38
|
raise IndexError(f"Can't find any from {args} in list: {xs}")
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
class
|
|
42
|
-
|
|
43
|
-
Common interface for data processor with default aggregating into list implementation
|
|
44
|
-
"""
|
|
41
|
+
class DataTransformer:
|
|
42
|
+
|
|
45
43
|
def __init__(self) -> None:
|
|
46
|
-
self.buffer =
|
|
44
|
+
self.buffer = []
|
|
47
45
|
self._column_names = []
|
|
48
46
|
|
|
49
|
-
def
|
|
47
|
+
def start_transform(self, name: str, column_names: List[str]):
|
|
50
48
|
self._column_names = column_names
|
|
51
|
-
self.buffer =
|
|
52
|
-
|
|
53
|
-
def
|
|
54
|
-
|
|
55
|
-
self.buffer
|
|
56
|
-
return None
|
|
57
|
-
|
|
58
|
-
def process_data_rows(self, rows_data: list) -> Optional[Iterable]:
|
|
59
|
-
for r in rows_data:
|
|
60
|
-
c = []
|
|
61
|
-
for j, d in enumerate(r):
|
|
62
|
-
self.buffer[self._column_names[j]].append(d)
|
|
63
|
-
return None
|
|
49
|
+
self.buffer = []
|
|
50
|
+
|
|
51
|
+
def process_data(self, rows_data: Iterable) -> Any:
|
|
52
|
+
if rows_data is not None:
|
|
53
|
+
self.buffer.extend(rows_data)
|
|
64
54
|
|
|
65
|
-
def
|
|
55
|
+
def collect(self) -> Any:
|
|
66
56
|
return self.buffer
|
|
67
57
|
|
|
68
58
|
|
|
69
59
|
class DataReader:
|
|
60
|
+
|
|
61
|
+
def get_names(self) -> List[str] :
|
|
62
|
+
raise NotImplemented()
|
|
63
|
+
|
|
64
|
+
def read(self, data_id: str, start: str | None=None, stop: str | None=None,
|
|
65
|
+
transform: DataTransformer = DataTransformer(),
|
|
66
|
+
chunksize=0,
|
|
67
|
+
**kwargs
|
|
68
|
+
) -> Iterable | List:
|
|
69
|
+
raise NotImplemented()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class CsvStorageDataReader(DataReader):
|
|
70
73
|
"""
|
|
71
|
-
|
|
74
|
+
Data reader for timeseries data stored as csv files in the specified directory
|
|
72
75
|
"""
|
|
73
|
-
_processor: DataProcessor
|
|
74
76
|
|
|
75
|
-
def __init__(self,
|
|
76
|
-
|
|
77
|
+
def __init__(self, path: str) -> None:
|
|
78
|
+
if not exists(path):
|
|
79
|
+
raise ValueError(f"Folder is not found at {path}")
|
|
80
|
+
self.path = path
|
|
77
81
|
|
|
78
|
-
def
|
|
79
|
-
|
|
82
|
+
def __find_time_idx(self, arr: pa.ChunkedArray, v) -> int:
|
|
83
|
+
ix = arr.index(v).as_py()
|
|
84
|
+
if ix < 0:
|
|
85
|
+
for c in arr.iterchunks():
|
|
86
|
+
a = c.to_numpy()
|
|
87
|
+
ix = np.searchsorted(a, v, side='right')
|
|
88
|
+
if ix > 0 and ix < len(c):
|
|
89
|
+
ix = arr.index(a[ix]).as_py() - 1
|
|
90
|
+
break
|
|
91
|
+
return ix
|
|
80
92
|
|
|
81
|
-
|
|
82
|
-
|
|
93
|
+
def __check_file_name(self, name: str) -> str | None:
|
|
94
|
+
_f = join(self.path, name)
|
|
95
|
+
for sfx in ['.csv', '.csv.gz', '']:
|
|
96
|
+
if exists(p:=(_f + sfx)):
|
|
97
|
+
return p
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
def read(self, data_id: str, start: str | None=None, stop: str | None=None,
|
|
101
|
+
transform: DataTransformer = DataTransformer(),
|
|
102
|
+
chunksize=0,
|
|
103
|
+
timestamp_formatters = None
|
|
104
|
+
) -> Iterable | Any:
|
|
105
|
+
|
|
106
|
+
f_path = self.__check_file_name(data_id)
|
|
107
|
+
if not f_path:
|
|
108
|
+
ValueError(f"Can't find any csv data for {data_id} in {self.path} !")
|
|
109
|
+
|
|
110
|
+
convert_options = None
|
|
111
|
+
if timestamp_formatters is not None:
|
|
112
|
+
convert_options=csv.ConvertOptions(timestamp_parsers=timestamp_formatters)
|
|
113
|
+
|
|
114
|
+
table = csv.read_csv(
|
|
115
|
+
f_path,
|
|
116
|
+
parse_options=csv.ParseOptions(ignore_empty_lines=True),
|
|
117
|
+
convert_options=convert_options
|
|
118
|
+
)
|
|
119
|
+
fieldnames = table.column_names
|
|
120
|
+
|
|
121
|
+
# - try to find range to load
|
|
122
|
+
start_idx, stop_idx = 0, table.num_rows
|
|
123
|
+
try:
|
|
124
|
+
_time_field_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
|
|
125
|
+
_time_type = table.field(_time_field_idx).type
|
|
126
|
+
_time_unit = _time_type.unit if hasattr(_time_type, 'unit') else 's'
|
|
127
|
+
_time_data = table[_time_field_idx]
|
|
128
|
+
|
|
129
|
+
# - check if need convert time to primitive types (i.e. Date32 -> timestamp[x])
|
|
130
|
+
_time_cast_function = lambda xs: xs
|
|
131
|
+
if _time_type != pa.timestamp(_time_unit):
|
|
132
|
+
_time_cast_function = lambda xs: xs.cast(pa.timestamp(_time_unit))
|
|
133
|
+
_time_data = _time_cast_function(_time_data)
|
|
134
|
+
|
|
135
|
+
# - preprocessing start and stop
|
|
136
|
+
t_0, t_1 = handle_start_stop(start, stop, convert=lambda x: _recognize_t(x, None, _time_unit))
|
|
137
|
+
|
|
138
|
+
# - check requested range
|
|
139
|
+
if t_0:
|
|
140
|
+
start_idx = self.__find_time_idx(_time_data, t_0)
|
|
141
|
+
if start_idx >= table.num_rows:
|
|
142
|
+
# no data for requested start date
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
if t_1:
|
|
146
|
+
stop_idx = self.__find_time_idx(_time_data, t_1)
|
|
147
|
+
if stop_idx < 0 or stop_idx < start_idx:
|
|
148
|
+
stop_idx = table.num_rows
|
|
149
|
+
|
|
150
|
+
except Exception as exc:
|
|
151
|
+
logger.warning(exc)
|
|
152
|
+
logger.info('loading whole file')
|
|
153
|
+
|
|
154
|
+
length = (stop_idx - start_idx + 1)
|
|
155
|
+
selected_table = table.slice(start_idx, length)
|
|
156
|
+
|
|
157
|
+
# - in this case we want to return iterable chunks of data
|
|
158
|
+
if chunksize > 0:
|
|
159
|
+
def _iter_chunks():
|
|
160
|
+
for n in range(0, length // chunksize + 1):
|
|
161
|
+
transform.start_transform(data_id, fieldnames)
|
|
162
|
+
raw_data = selected_table[n*chunksize : min((n+1)*chunksize, length)].to_pandas().to_numpy()
|
|
163
|
+
transform.process_data(raw_data)
|
|
164
|
+
yield transform.collect()
|
|
165
|
+
return _iter_chunks()
|
|
166
|
+
|
|
167
|
+
transform.start_transform(data_id, fieldnames)
|
|
168
|
+
raw_data = selected_table.to_pandas().to_numpy()
|
|
169
|
+
transform.process_data(raw_data)
|
|
170
|
+
return transform.collect()
|
|
171
|
+
|
|
172
|
+
def get_names(self) -> List[str] :
|
|
173
|
+
_n = []
|
|
174
|
+
for s in os.listdir(self.path):
|
|
175
|
+
if (m:=re.match(r'(.*)\.csv(.gz)?$', s)):
|
|
176
|
+
_n.append(m.group(1))
|
|
177
|
+
return _n
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class AsPandasFrame(DataTransformer):
|
|
83
181
|
"""
|
|
84
|
-
|
|
182
|
+
List of records to pandas dataframe transformer
|
|
85
183
|
"""
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
self._time_idx = _find_column_index_in_list(
|
|
89
|
-
self.
|
|
90
|
-
self.
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
184
|
+
|
|
185
|
+
def start_transform(self, name: str, column_names: List[str]):
|
|
186
|
+
self._time_idx = _find_column_index_in_list(column_names, 'time', 'timestamp', 'datetime', 'date')
|
|
187
|
+
self._column_names = column_names
|
|
188
|
+
self._frame = pd.DataFrame()
|
|
189
|
+
|
|
190
|
+
def process_data(self, rows_data: Iterable) -> Any:
|
|
191
|
+
self._frame
|
|
192
|
+
p = pd.DataFrame.from_records(rows_data, columns=self._column_names)
|
|
193
|
+
p.set_index(self._column_names[self._time_idx], drop=True, inplace=True)
|
|
194
|
+
p.sort_index(inplace=True)
|
|
195
|
+
self._frame = pd.concat((self._frame, p), axis=0, sort=True)
|
|
196
|
+
return p
|
|
197
|
+
|
|
198
|
+
def collect(self) -> Any:
|
|
199
|
+
return self._frame
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class AsOhlcvSeries(DataTransformer):
|
|
203
|
+
|
|
204
|
+
def __init__(self, timeframe: str | None = None, timestamp_units='ns') -> None:
|
|
205
|
+
super().__init__()
|
|
206
|
+
self.timeframe = timeframe
|
|
207
|
+
self._series = None
|
|
208
|
+
self._data_type = None
|
|
209
|
+
self.timestamp_units = timestamp_units
|
|
210
|
+
|
|
211
|
+
def start_transform(self, name: str, column_names: List[str]):
|
|
212
|
+
self._time_idx = _find_column_index_in_list(column_names, 'time', 'timestamp', 'datetime', 'date')
|
|
213
|
+
self._volume_idx = None
|
|
214
|
+
self._b_volume_idx = None
|
|
215
|
+
try:
|
|
216
|
+
self._close_idx = _find_column_index_in_list(column_names, 'close')
|
|
217
|
+
self._open_idx = _find_column_index_in_list(column_names, 'open')
|
|
218
|
+
self._high_idx = _find_column_index_in_list(column_names, 'high')
|
|
219
|
+
self._low_idx = _find_column_index_in_list(column_names, 'low')
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
self._volume_idx = _find_column_index_in_list(column_names, 'quote_volume', 'volume', 'vol')
|
|
223
|
+
except: pass
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
self._b_volume_idx = _find_column_index_in_list(column_names, 'taker_buy_volume', 'taker_buy_quote_volume', 'buy_volume')
|
|
227
|
+
except: pass
|
|
228
|
+
|
|
229
|
+
self._data_type = 'ohlc'
|
|
230
|
+
except:
|
|
231
|
+
try:
|
|
232
|
+
self._ask_idx = _find_column_index_in_list(column_names, 'ask')
|
|
233
|
+
self._bid_idx = _find_column_index_in_list(column_names, 'bid')
|
|
234
|
+
self._data_type = 'quotes'
|
|
235
|
+
except:
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
self._price_idx = _find_column_index_in_list(column_names, 'price')
|
|
239
|
+
self._size_idx = _find_column_index_in_list(column_names, 'quote_qty', 'qty', 'size', 'amount', 'volume')
|
|
240
|
+
self._taker_idx = None
|
|
241
|
+
try:
|
|
242
|
+
self._taker_idx = _find_column_index_in_list(column_names, 'is_buyer_maker', 'side', 'aggressive', 'taker', 'is_taker')
|
|
243
|
+
except: pass
|
|
244
|
+
|
|
245
|
+
self._data_type = 'trades'
|
|
246
|
+
except:
|
|
247
|
+
raise ValueError(f"Can't recognize data for update from header: {column_names}")
|
|
248
|
+
|
|
249
|
+
self._column_names = column_names
|
|
250
|
+
self._name = name
|
|
251
|
+
if self.timeframe:
|
|
252
|
+
self._series = OHLCV(self._name, self.timeframe)
|
|
253
|
+
|
|
254
|
+
def _time(self, t) -> int:
|
|
255
|
+
if self.timestamp_units == 'ns':
|
|
256
|
+
return np.datetime64(t, 'ns').item()
|
|
257
|
+
return np.datetime64(t, self.timestamp_units).astype('datetime64[ns]').item()
|
|
258
|
+
|
|
259
|
+
def _proc_ohlc(self, rows_data: List[List]):
|
|
260
|
+
for d in rows_data:
|
|
261
|
+
self._series.update_by_bar(
|
|
262
|
+
self._time(d[self._time_idx]),
|
|
263
|
+
d[self._open_idx], d[self._high_idx], d[self._low_idx], d[self._close_idx],
|
|
264
|
+
d[self._volume_idx] if self._volume_idx else 0,
|
|
265
|
+
d[self._b_volume_idx] if self._b_volume_idx else 0
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def _proc_quotes(self, rows_data: List[List]):
|
|
269
|
+
for d in rows_data:
|
|
270
|
+
self._series.update(
|
|
271
|
+
self._time(d[self._time_idx]),
|
|
272
|
+
(d[self._ask_idx] + d[self._bid_idx])/2
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
def _proc_trades(self, rows_data: List[List]):
|
|
276
|
+
for d in rows_data:
|
|
277
|
+
a = d[self._taker_idx] if self._taker_idx else 0
|
|
278
|
+
s = d[self._size_idx]
|
|
279
|
+
b = s if a else 0
|
|
280
|
+
self._series.update(self._time(d[self._time_idx]), d[self._price_idx], s, b)
|
|
281
|
+
|
|
282
|
+
def process_data(self, rows_data: List[List]) -> Any:
|
|
283
|
+
if self._series is None:
|
|
284
|
+
ts = [t[self._time_idx] for t in rows_data[:100]]
|
|
285
|
+
self.timeframe = pd.Timedelta(infer_series_frequency(ts)).asm8.item()
|
|
286
|
+
|
|
287
|
+
# - create instance after first data received if
|
|
288
|
+
self._series = OHLCV(self._name, self.timeframe)
|
|
289
|
+
|
|
290
|
+
match self._data_type:
|
|
291
|
+
case 'ohlc':
|
|
292
|
+
self._proc_ohlc(rows_data)
|
|
293
|
+
case 'quotes':
|
|
294
|
+
self._proc_quotes(rows_data)
|
|
295
|
+
case 'trades':
|
|
296
|
+
self._proc_trades(rows_data)
|
|
297
|
+
|
|
102
298
|
return None
|
|
103
299
|
|
|
300
|
+
def collect(self) -> Any:
|
|
301
|
+
return self._series
|
|
104
302
|
|
|
105
|
-
|
|
303
|
+
|
|
304
|
+
class AsQuotes(DataTransformer):
|
|
305
|
+
|
|
306
|
+
def start_transform(self, name: str, column_names: List[str]):
|
|
307
|
+
self.buffer = list()
|
|
308
|
+
self._time_idx = _find_column_index_in_list(column_names, 'time', 'timestamp', 'datetime')
|
|
309
|
+
self._bid_idx = _find_column_index_in_list(column_names, 'bid')
|
|
310
|
+
self._ask_idx = _find_column_index_in_list(column_names, 'ask')
|
|
311
|
+
self._bidvol_idx = _find_column_index_in_list(column_names, 'bidvol', 'bid_vol', 'bidsize', 'bid_size')
|
|
312
|
+
self._askvol_idx = _find_column_index_in_list(column_names, 'askvol', 'ask_vol', 'asksize', 'ask_size')
|
|
313
|
+
|
|
314
|
+
def process_data(self, rows_data: Iterable) -> Any:
|
|
315
|
+
if rows_data is not None:
|
|
316
|
+
for d in rows_data:
|
|
317
|
+
t = d[self._time_idx]
|
|
318
|
+
b = d[self._bid_idx]
|
|
319
|
+
a = d[self._ask_idx]
|
|
320
|
+
bv = d[self._bidvol_idx]
|
|
321
|
+
av = d[self._askvol_idx]
|
|
322
|
+
self.buffer.append(Quote(t.as_unit('ns').asm8.item(), b, a, bv, av))
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class RestoreTicksFromOHLC(DataTransformer):
|
|
106
326
|
"""
|
|
107
|
-
|
|
327
|
+
Emulates quotes (and trades) from OHLC bars
|
|
108
328
|
"""
|
|
109
|
-
|
|
329
|
+
|
|
330
|
+
def __init__(self,
|
|
331
|
+
trades: bool=False, # if we also wants 'trades'
|
|
110
332
|
default_bid_size=1e9, # default bid/ask is big
|
|
111
333
|
default_ask_size=1e9, # default bid/ask is big
|
|
112
334
|
daily_session_start_end=DEFAULT_DAILY_SESSION,
|
|
113
|
-
spread=0.0
|
|
114
|
-
) -> None:
|
|
335
|
+
spread=0.0):
|
|
115
336
|
super().__init__()
|
|
116
337
|
self._trades = trades
|
|
117
338
|
self._bid_size = default_bid_size
|
|
@@ -120,30 +341,36 @@ class QuotesFromOHLCVDataProcessor(DataProcessor):
|
|
|
120
341
|
self._d_session_start = daily_session_start_end[0]
|
|
121
342
|
self._d_session_end = daily_session_start_end[1]
|
|
122
343
|
|
|
123
|
-
def
|
|
124
|
-
self.
|
|
125
|
-
|
|
126
|
-
self.
|
|
127
|
-
self.
|
|
128
|
-
self.
|
|
344
|
+
def start_transform(self, name: str, column_names: List[str]):
|
|
345
|
+
self.buffer = []
|
|
346
|
+
# - it will fail if receive data doesn't look as ohlcv
|
|
347
|
+
self._time_idx = _find_column_index_in_list(column_names, 'time', 'timestamp', 'datetime', 'date')
|
|
348
|
+
self._open_idx = _find_column_index_in_list(column_names, 'open')
|
|
349
|
+
self._high_idx = _find_column_index_in_list(column_names, 'high')
|
|
350
|
+
self._low_idx = _find_column_index_in_list(column_names, 'low')
|
|
351
|
+
self._close_idx = _find_column_index_in_list(column_names, 'close')
|
|
129
352
|
self._volume_idx = None
|
|
130
|
-
self.
|
|
131
|
-
|
|
353
|
+
self._freq = None
|
|
132
354
|
try:
|
|
133
|
-
self._volume_idx = _find_column_index_in_list(
|
|
134
|
-
except:
|
|
135
|
-
pass
|
|
355
|
+
self._volume_idx = _find_column_index_in_list(column_names, 'volume', 'vol')
|
|
356
|
+
except: pass
|
|
136
357
|
|
|
137
|
-
self.
|
|
358
|
+
if self._volume_idx is None and self._trades:
|
|
359
|
+
logger.warning("Input OHLC data doesn't contain volume information so trades can't be emulated !")
|
|
360
|
+
self._trades = False
|
|
361
|
+
|
|
362
|
+
def process_data(self, rows_data:List[List]) -> Any:
|
|
363
|
+
if rows_data is None:
|
|
364
|
+
return
|
|
138
365
|
|
|
139
|
-
def process_data_columns(self, data: list) -> Optional[Iterable]:
|
|
140
366
|
s2 = self._s2
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
self.
|
|
367
|
+
|
|
368
|
+
if self._freq is None:
|
|
369
|
+
ts = [t[self._time_idx] for t in rows_data[:100]]
|
|
370
|
+
self._freq = infer_series_frequency(ts)
|
|
144
371
|
|
|
145
372
|
# - timestamps when we emit simulated quotes
|
|
146
|
-
dt = _freq.astype('timedelta64[ns]').item()
|
|
373
|
+
dt = self._freq.astype('timedelta64[ns]').item()
|
|
147
374
|
if dt < D1:
|
|
148
375
|
self._t_start = dt // 10
|
|
149
376
|
self._t_mid1 = dt // 2 - dt // 10
|
|
@@ -156,21 +383,13 @@ class QuotesFromOHLCVDataProcessor(DataProcessor):
|
|
|
156
383
|
self._t_end = self._d_session_end
|
|
157
384
|
|
|
158
385
|
# - input data
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
logger.warning("Input OHLC data doesn't contain volume information so trades can't be emulated !")
|
|
167
|
-
self._trades = False
|
|
168
|
-
|
|
169
|
-
for i in range(len(times)):
|
|
170
|
-
ti, o, h, l, c = times[i].astype('datetime64[ns]'), opens[i], highs[i], lows[i], closes[i]
|
|
171
|
-
|
|
172
|
-
if self._trades:
|
|
173
|
-
rv = volumes[i] / (h - l)
|
|
386
|
+
for data in rows_data:
|
|
387
|
+
ti = pd.Timestamp(data[self._time_idx]).as_unit('ns').asm8.item()
|
|
388
|
+
o = data[self._open_idx]
|
|
389
|
+
h= data[self._high_idx]
|
|
390
|
+
l = data[self._low_idx]
|
|
391
|
+
c = data[self._close_idx]
|
|
392
|
+
rv = data[self._volume_idx] if self._volume_idx else 0
|
|
174
393
|
|
|
175
394
|
# - opening quote
|
|
176
395
|
self.buffer.append(Quote(ti + self._t_start, o - s2, o + s2, self._bid_size, self._ask_size))
|
|
@@ -201,238 +420,6 @@ class QuotesFromOHLCVDataProcessor(DataProcessor):
|
|
|
201
420
|
# - closing quote
|
|
202
421
|
self.buffer.append(Quote(ti + self._t_end, c - s2, c + s2, self._bid_size, self._ask_size))
|
|
203
422
|
|
|
204
|
-
return None
|
|
205
|
-
|
|
206
|
-
def get_result(self) -> Any:
|
|
207
|
-
return self.buffer
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
class OhlcvDataProcessor(DataProcessor):
|
|
211
|
-
"""
|
|
212
|
-
Process data and convert it to Qube OHLCV timeseries
|
|
213
|
-
"""
|
|
214
|
-
def __init__(self, name: str | None = None) -> None:
|
|
215
|
-
super().__init__()
|
|
216
|
-
self._name = name
|
|
217
|
-
|
|
218
|
-
def start_processing(self, fieldnames: List[str], name: str | None = None):
|
|
219
|
-
self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
|
|
220
|
-
self._open_idx = _find_column_index_in_list(fieldnames, 'open')
|
|
221
|
-
self._high_idx = _find_column_index_in_list(fieldnames, 'high')
|
|
222
|
-
self._low_idx = _find_column_index_in_list(fieldnames, 'low')
|
|
223
|
-
self._close_idx = _find_column_index_in_list(fieldnames, 'close')
|
|
224
|
-
self._volume_idx = None
|
|
225
|
-
self._b_volume_idx = None
|
|
226
|
-
self._timeframe = None
|
|
227
|
-
self._name = name if name else self._name
|
|
228
|
-
|
|
229
|
-
try:
|
|
230
|
-
self._volume_idx = _find_column_index_in_list(fieldnames, 'quote_volume', 'volume', 'vol')
|
|
231
|
-
except: pass
|
|
232
|
-
|
|
233
|
-
try:
|
|
234
|
-
self._b_volume_idx = _find_column_index_in_list(fieldnames, 'taker_buy_volume', 'taker_buy_quote_volume', 'buy_volume')
|
|
235
|
-
except: pass
|
|
236
|
-
|
|
237
|
-
self.ohlc = None
|
|
238
|
-
|
|
239
|
-
def process_data_columns(self, data: list) -> Optional[Iterable]:
|
|
240
|
-
if self._timeframe is None:
|
|
241
|
-
self._timeframe = infer_series_frequency(data[self._time_idx]).astype('timedelta64[s]')
|
|
242
|
-
|
|
243
|
-
# - create instance after first data received
|
|
244
|
-
self.ohlc = OHLCV(self._name, self._timeframe)
|
|
245
|
-
|
|
246
|
-
self.ohlc.append_data(
|
|
247
|
-
data[self._time_idx],
|
|
248
|
-
data[self._open_idx], data[self._high_idx], data[self._low_idx], data[self._close_idx],
|
|
249
|
-
data[self._volume_idx] if self._volume_idx else np.empty(0),
|
|
250
|
-
data[self._b_volume_idx] if self._b_volume_idx else np.empty(0)
|
|
251
|
-
)
|
|
252
|
-
return None
|
|
253
|
-
|
|
254
|
-
def process_data_rows(self, data: List[list]) -> Iterable | None:
|
|
255
|
-
if self._timeframe is None:
|
|
256
|
-
ts = [t[self._time_idx] for t in data[:100]]
|
|
257
|
-
self._timeframe = pd.Timedelta(infer_series_frequency(ts)).asm8.item()
|
|
258
|
-
|
|
259
|
-
# - create instance after first data received
|
|
260
|
-
self.ohlc = OHLCV(self._name, self._timeframe)
|
|
261
|
-
|
|
262
|
-
for d in data:
|
|
263
|
-
self.ohlc.update_by_bar(
|
|
264
|
-
np.datetime64(d[self._time_idx], 'ns').item(),
|
|
265
|
-
d[self._open_idx], d[self._high_idx], d[self._low_idx], d[self._close_idx],
|
|
266
|
-
d[self._volume_idx] if self._volume_idx else 0,
|
|
267
|
-
d[self._b_volume_idx] if self._b_volume_idx else 0
|
|
268
|
-
)
|
|
269
|
-
return None
|
|
270
|
-
|
|
271
|
-
def get_result(self) -> Any:
|
|
272
|
-
return self.ohlc
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
class OhlcvPandasDataProcessor(DataProcessor):
|
|
276
|
-
"""
|
|
277
|
-
Process data and convert it to pandas OHLCV dataframes
|
|
278
|
-
"""
|
|
279
|
-
def __init__(self) -> None:
|
|
280
|
-
super().__init__()
|
|
281
|
-
self._fieldnames: List = []
|
|
282
|
-
|
|
283
|
-
def start_processing(self, fieldnames: List[str], name: str | None = None):
|
|
284
|
-
self._time_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
|
|
285
|
-
self._open_idx = _find_column_index_in_list(fieldnames, 'open')
|
|
286
|
-
self._high_idx = _find_column_index_in_list(fieldnames, 'high')
|
|
287
|
-
self._low_idx = _find_column_index_in_list(fieldnames, 'low')
|
|
288
|
-
self._close_idx = _find_column_index_in_list(fieldnames, 'close')
|
|
289
|
-
self._volume_idx = None
|
|
290
|
-
self._b_volume_idx = None
|
|
291
|
-
self._timeframe = None
|
|
292
|
-
|
|
293
|
-
try:
|
|
294
|
-
self._volume_idx = _find_column_index_in_list(fieldnames, 'quote_volume', 'volume', 'vol')
|
|
295
|
-
except: pass
|
|
296
|
-
|
|
297
|
-
try:
|
|
298
|
-
self._b_volume_idx = _find_column_index_in_list(fieldnames, 'taker_buy_volume', 'taker_buy_quote_volume', 'buy_volume')
|
|
299
|
-
except: pass
|
|
300
|
-
|
|
301
|
-
self._time = np.array([], dtype=np.datetime64)
|
|
302
|
-
self._open = np.array([])
|
|
303
|
-
self._high = np.array([])
|
|
304
|
-
self._low = np.array([])
|
|
305
|
-
self._close = np.array([])
|
|
306
|
-
self._volume = np.array([])
|
|
307
|
-
self._bvolume = np.array([])
|
|
308
|
-
self._fieldnames = fieldnames
|
|
309
|
-
self._ohlc = pd.DataFrame()
|
|
310
|
-
|
|
311
|
-
def process_data_rows(self, data: List[list]) -> Optional[Iterable]:
|
|
312
|
-
p = pd.DataFrame.from_records(data, columns=self._fieldnames)
|
|
313
|
-
p.set_index(self._fieldnames[self._time_idx], drop=True, inplace=True)
|
|
314
|
-
self._ohlc = pd.concat((self._ohlc, p), axis=0, sort=True, copy=True)
|
|
315
|
-
return None
|
|
316
|
-
|
|
317
|
-
def process_data_columns(self, data: list) -> Optional[Iterable]:
|
|
318
|
-
# p = pd.DataFrame({
|
|
319
|
-
# 'open': data[self._open_idx],
|
|
320
|
-
# 'high': data[self._high_idx],
|
|
321
|
-
# 'low': data[self._low_idx],
|
|
322
|
-
# 'close': data[self._close_idx],
|
|
323
|
-
# 'volume': data[self._volume_idx] if self._volume_idx else []},
|
|
324
|
-
# index = data[self._time_idx]
|
|
325
|
-
# )
|
|
326
|
-
# self.ohlc = pd.concat((self.ohlc, p), axis=0, sort=True, copy=True)
|
|
327
|
-
self._time = np.concatenate((self._time, data[self._time_idx]))
|
|
328
|
-
self._open = np.concatenate((self._open, data[self._open_idx]))
|
|
329
|
-
self._high = np.concatenate((self._high, data[self._high_idx]))
|
|
330
|
-
self._low = np.concatenate((self._low, data[self._low_idx]))
|
|
331
|
-
self._close = np.concatenate((self._close, data[self._close_idx]))
|
|
332
|
-
if self._volume_idx:
|
|
333
|
-
self._volume = np.concatenate((self._volume, data[self._volume_idx]))
|
|
334
|
-
if self._b_volume_idx:
|
|
335
|
-
self._bvolume = np.concatenate((self._bvolume, data[self._b_volume_idx]))
|
|
336
|
-
|
|
337
|
-
return None
|
|
338
|
-
|
|
339
|
-
def get_result(self) -> Any:
|
|
340
|
-
if not self._ohlc.empty:
|
|
341
|
-
return self._ohlc
|
|
342
|
-
|
|
343
|
-
rd = {
|
|
344
|
-
'open': self._open, 'high': self._high, 'low': self._low, 'close': self._close,
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
if self._volume_idx:
|
|
348
|
-
rd['volume'] = self._volume
|
|
349
|
-
|
|
350
|
-
if self._b_volume_idx:
|
|
351
|
-
rd['taker_buy_quote_volume'] = self._bvolume
|
|
352
|
-
|
|
353
|
-
return pd.DataFrame(rd, index = self._time).sort_index()
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
class CsvDataReader(DataReader):
|
|
357
|
-
"""
|
|
358
|
-
CSV data file reader
|
|
359
|
-
"""
|
|
360
|
-
|
|
361
|
-
def __init__(self, path: str, processor: DataProcessor|None=None, timestamp_parsers=None) -> None:
|
|
362
|
-
if not exists(path):
|
|
363
|
-
raise ValueError(f"CSV file not found at {path}")
|
|
364
|
-
super().__init__(processor)
|
|
365
|
-
self.time_parsers = timestamp_parsers
|
|
366
|
-
self.path = path
|
|
367
|
-
|
|
368
|
-
def __find_time_idx(self, arr: pa.ChunkedArray, v) -> int:
|
|
369
|
-
ix = arr.index(v).as_py()
|
|
370
|
-
if ix < 0:
|
|
371
|
-
for c in arr.iterchunks():
|
|
372
|
-
a = c.to_numpy()
|
|
373
|
-
ix = np.searchsorted(a, v, side='right')
|
|
374
|
-
if ix > 0 and ix < len(c):
|
|
375
|
-
ix = arr.index(a[ix]).as_py() - 1
|
|
376
|
-
break
|
|
377
|
-
return ix
|
|
378
|
-
|
|
379
|
-
def read(self, start: Optional[str]=None, stop: Optional[str]=None) -> Any:
|
|
380
|
-
convert_options = None
|
|
381
|
-
if self.time_parsers:
|
|
382
|
-
convert_options=csv.ConvertOptions(timestamp_parsers=self.time_parsers)
|
|
383
|
-
|
|
384
|
-
table = csv.read_csv(
|
|
385
|
-
self.path,
|
|
386
|
-
parse_options=csv.ParseOptions(ignore_empty_lines=True),
|
|
387
|
-
convert_options=convert_options
|
|
388
|
-
)
|
|
389
|
-
fieldnames = table.column_names
|
|
390
|
-
|
|
391
|
-
# - try to find range to load
|
|
392
|
-
start_idx, stop_idx = 0, table.num_rows
|
|
393
|
-
try:
|
|
394
|
-
_time_field_idx = _find_column_index_in_list(fieldnames, 'time', 'timestamp', 'datetime', 'date')
|
|
395
|
-
_time_type = table.field(_time_field_idx).type
|
|
396
|
-
_time_unit = _time_type.unit if hasattr(_time_type, 'unit') else 's'
|
|
397
|
-
_time_data = table[_time_field_idx]
|
|
398
|
-
|
|
399
|
-
# - check if need convert time to primitive types (i.e. Date32 -> timestamp[x])
|
|
400
|
-
_time_cast_function = lambda xs: xs
|
|
401
|
-
if _time_type != pa.timestamp(_time_unit):
|
|
402
|
-
_time_cast_function = lambda xs: xs.cast(pa.timestamp(_time_unit))
|
|
403
|
-
_time_data = _time_cast_function(_time_data)
|
|
404
|
-
|
|
405
|
-
# - preprocessing start and stop
|
|
406
|
-
t_0, t_1 = handle_start_stop(start, stop, convert=lambda x: _recognize_t(x, None, _time_unit))
|
|
407
|
-
|
|
408
|
-
# - check requested range
|
|
409
|
-
if t_0:
|
|
410
|
-
start_idx = self.__find_time_idx(_time_data, t_0)
|
|
411
|
-
if start_idx >= table.num_rows:
|
|
412
|
-
# no data for requested start date
|
|
413
|
-
return None
|
|
414
|
-
|
|
415
|
-
if t_1:
|
|
416
|
-
stop_idx = self.__find_time_idx(_time_data, t_1)
|
|
417
|
-
if stop_idx < 0 or stop_idx < start_idx:
|
|
418
|
-
stop_idx = table.num_rows
|
|
419
|
-
|
|
420
|
-
except Exception as exc:
|
|
421
|
-
logger.warning(exc)
|
|
422
|
-
logger.info('loading whole file')
|
|
423
|
-
|
|
424
|
-
length = (stop_idx - start_idx + 1)
|
|
425
|
-
self._processor.start_processing(fieldnames)
|
|
426
|
-
selected_table = table.slice(start_idx, length)
|
|
427
|
-
n_chunks = selected_table[table.column_names[0]].num_chunks
|
|
428
|
-
for n in range(n_chunks):
|
|
429
|
-
data = [
|
|
430
|
-
# - in some cases we need to convert time index to primitive type
|
|
431
|
-
_time_cast_function(selected_table[k].chunk(n)).to_numpy() if k == _time_field_idx else selected_table[k].chunk(n).to_numpy()
|
|
432
|
-
for k in range(selected_table.num_columns)]
|
|
433
|
-
self._processor.process_data_columns(data)
|
|
434
|
-
return self._processor.get_result()
|
|
435
|
-
|
|
436
423
|
|
|
437
424
|
def _retry(fn):
|
|
438
425
|
@wraps(fn)
|
|
@@ -456,13 +443,12 @@ class QuestDBConnector(DataReader):
|
|
|
456
443
|
|
|
457
444
|
# Connect to an existing QuestDB instance
|
|
458
445
|
>>> db = QuestDBConnector('user=admin password=quest host=localhost port=8812', OhlcvPandasDataProcessor())
|
|
459
|
-
>>> db.read('BINANCEF.ETHUSDT', '
|
|
446
|
+
>>> db.read('BINANCEF.ETHUSDT', '2024-01-01')
|
|
460
447
|
"""
|
|
461
448
|
_reconnect_tries = 5
|
|
462
449
|
_reconnect_idle = 0.1 # wait seconds before retying
|
|
463
450
|
|
|
464
|
-
def __init__(self, connection_url: str
|
|
465
|
-
super().__init__(processor)
|
|
451
|
+
def __init__(self, connection_url: str) -> None:
|
|
466
452
|
self._connection = None
|
|
467
453
|
self._cursor = None
|
|
468
454
|
self.connection_url = connection_url
|
|
@@ -474,12 +460,18 @@ class QuestDBConnector(DataReader):
|
|
|
474
460
|
self._cursor = self._connection.cursor()
|
|
475
461
|
|
|
476
462
|
@_retry
|
|
477
|
-
def read(self,
|
|
463
|
+
def read(self, data_id: str, start: str|None=None, stop: str|None=None,
|
|
464
|
+
transform: DataTransformer = DataTransformer(),
|
|
465
|
+
chunksize=0, # TODO: use self._cursor.fetchmany in this case !!!!
|
|
466
|
+
timeframe: str='1m') -> Any:
|
|
478
467
|
start, end = handle_start_stop(start, stop)
|
|
479
468
|
w0 = f"timestamp >= '{start}'" if start else ''
|
|
480
469
|
w1 = f"timestamp <= '{end}'" if end else ''
|
|
481
470
|
where = f'where {w0} and {w1}' if (w0 and w1) else f"where {(w0 or w1)}"
|
|
482
471
|
|
|
472
|
+
# just a temp hack - actually we need to discuss symbology etc
|
|
473
|
+
symbol = data_id#.split('.')[-1]
|
|
474
|
+
|
|
483
475
|
self._cursor.execute(
|
|
484
476
|
f"""
|
|
485
477
|
select timestamp,
|
|
@@ -496,14 +488,20 @@ class QuestDBConnector(DataReader):
|
|
|
496
488
|
SAMPLE by {timeframe};
|
|
497
489
|
""" # type: ignore
|
|
498
490
|
)
|
|
499
|
-
records = self._cursor.fetchall()
|
|
491
|
+
records = self._cursor.fetchall() # TODO: for chunksize > 0 use fetchmany etc
|
|
500
492
|
names = [d.name for d in self._cursor.description]
|
|
501
493
|
|
|
502
|
-
|
|
494
|
+
transform.start_transform(data_id, names)
|
|
503
495
|
|
|
504
496
|
# d = np.array(records)
|
|
505
|
-
|
|
506
|
-
return
|
|
497
|
+
transform.process_data(records)
|
|
498
|
+
return transform.collect()
|
|
499
|
+
|
|
500
|
+
@_retry
|
|
501
|
+
def get_names(self) -> List[str] :
|
|
502
|
+
self._cursor.execute("select table_name from tables()")
|
|
503
|
+
records = self._cursor.fetchall()
|
|
504
|
+
return [r[0] for r in records]
|
|
507
505
|
|
|
508
506
|
def __del__(self):
|
|
509
507
|
for c in (self._cursor, self._connection):
|
|
Binary file
|
|
@@ -6,13 +6,13 @@ qubx/core/basics.py,sha256=2u7WV5KX-RbTmzoKfi1yT4HNLDPfQcFMCUZ1pVsM_VE,14777
|
|
|
6
6
|
qubx/core/helpers.py,sha256=gPE78dO718NBY0-JbfqNGCzIvr4BVatFntNIy2RUrEY,11559
|
|
7
7
|
qubx/core/loggers.py,sha256=HpgavBZegoDv9ssihtqX0pitXKULVAPHUpoE_volJw0,11910
|
|
8
8
|
qubx/core/lookups.py,sha256=4aEC7b2AyEXFqHHGDenex3Z1FZGrpDSb8IwzBZrSqIA,13688
|
|
9
|
-
qubx/core/series.cpython-311-x86_64-linux-gnu.so,sha256=
|
|
9
|
+
qubx/core/series.cpython-311-x86_64-linux-gnu.so,sha256=lwzgrbIdmfZiMopIPJkOlh3tzcS-zIbBudvqXnduwdU,698952
|
|
10
10
|
qubx/core/series.pxd,sha256=IS89NQ5FYp3T0YIHe1lELKZIAKrNvX8K6WlLyac44I4,2847
|
|
11
11
|
qubx/core/series.pyx,sha256=WEAjn4j3zn540Cxx68X5gRXilvwa7NGdbki6myzZbIM,28108
|
|
12
12
|
qubx/core/strategy.py,sha256=Fs4fFyHaEGYuz7mBeQHBWFu3Ipg0yFzcxXhskgsPxJE,30330
|
|
13
|
-
qubx/core/utils.cpython-311-x86_64-linux-gnu.so,sha256=
|
|
13
|
+
qubx/core/utils.cpython-311-x86_64-linux-gnu.so,sha256=u2eUKlnC06lHG3LTg-92Om_32Mk7ZGAleV7As7--G1U,74216
|
|
14
14
|
qubx/core/utils.pyx,sha256=6dQ8R02bl8V3f-W3Wk9-e86D9OvDz-5-4NA_dlF_xwc,1368
|
|
15
|
-
qubx/data/readers.py,sha256=
|
|
15
|
+
qubx/data/readers.py,sha256=r5_DhzyaTMNGHr9sDjbIgK2kMcSC8fHYeDrb2ep1NLU,19648
|
|
16
16
|
qubx/impl/ccxt_connector.py,sha256=NqF-tgxfTATnmVqKUonNXCAzECrDU8YrgqM3Nq06fw8,9150
|
|
17
17
|
qubx/impl/ccxt_customizations.py,sha256=kK_4KmOyKvDVgd4MTkVg4CyqdjE-6r41siZIvLj-A-Q,3488
|
|
18
18
|
qubx/impl/ccxt_trading.py,sha256=cmg4P-zd78w-V8j3-IGS2LFxikGhxFPgmCvz3sr065Q,9097
|
|
@@ -20,7 +20,7 @@ qubx/impl/ccxt_utils.py,sha256=n6nicE2C_7tVF3soQJYgK0Er0hZrfY0NnN5c84vGiZc,3565
|
|
|
20
20
|
qubx/math/__init__.py,sha256=AavTKCtU7gRffG9T10Z0uv4LdI31bVvBn-L_Iv81FRk,33
|
|
21
21
|
qubx/math/stats.py,sha256=LnZZFe_3_vj1yW-wcQdtOmI9t5yGkiYfLWa4kVFXkjA,1176
|
|
22
22
|
qubx/ta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
-
qubx/ta/indicators.cpython-311-x86_64-linux-gnu.so,sha256=
|
|
23
|
+
qubx/ta/indicators.cpython-311-x86_64-linux-gnu.so,sha256=skr9RSXP2ypQ3oFXzrOiMx4iUhuLfvhVAb3KRaYPoSc,284552
|
|
24
24
|
qubx/ta/indicators.pyx,sha256=P-GEYUks2lSHo6hbtUFAB7TWE1AunjLR4jIjwqPHrwU,7708
|
|
25
25
|
qubx/trackers/__init__.py,sha256=1y_yvIy0OQwBqfhAW_EY33NxFzFSWvI0qNAPU6zchYc,60
|
|
26
26
|
qubx/trackers/rebalancers.py,sha256=QCzANCooZBi2VMCBjjCPMq_Dt1h1zrBelATnfmVve74,5522
|
|
@@ -32,6 +32,6 @@ qubx/utils/misc.py,sha256=bK9cqNKIt_qER8FnSs23L3RMSVhnJIZ5n4tyLNm5n3s,9837
|
|
|
32
32
|
qubx/utils/pandas.py,sha256=8gf0hgrkRfuOOiANZxKcSPgj8-KL9FlVlfSvNrCar6A,18605
|
|
33
33
|
qubx/utils/runner.py,sha256=ZUk7jgqx3JYUDZ_ZJLEZv0ug3m2Da-c4Ud2CwfOvC8Q,9277
|
|
34
34
|
qubx/utils/time.py,sha256=mdQ02PGoUBm9iH_wvFIhAhOkBoJOpO24ZanWcGU8oms,4884
|
|
35
|
-
qubx-0.1.
|
|
36
|
-
qubx-0.1.
|
|
37
|
-
qubx-0.1.
|
|
35
|
+
qubx-0.1.4.dist-info/METADATA,sha256=GfHFjg69n066xlpMMnpOyMAYlhXjVsyqpTlzUzzFguc,2144
|
|
36
|
+
qubx-0.1.4.dist-info/WHEEL,sha256=MLOa6LysROdjgj4FVxsHitAnIh8Be2D_c9ZSBHKrz2M,110
|
|
37
|
+
qubx-0.1.4.dist-info/RECORD,,
|
|
File without changes
|