Qubx 0.5.7__cp312-cp312-manylinux_2_39_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Qubx might be problematic. Click here for more details.
- qubx/__init__.py +207 -0
- qubx/_nb_magic.py +100 -0
- qubx/backtester/__init__.py +5 -0
- qubx/backtester/account.py +145 -0
- qubx/backtester/broker.py +87 -0
- qubx/backtester/data.py +296 -0
- qubx/backtester/management.py +378 -0
- qubx/backtester/ome.py +296 -0
- qubx/backtester/optimization.py +201 -0
- qubx/backtester/simulated_data.py +558 -0
- qubx/backtester/simulator.py +362 -0
- qubx/backtester/utils.py +780 -0
- qubx/cli/__init__.py +0 -0
- qubx/cli/commands.py +67 -0
- qubx/connectors/ccxt/__init__.py +0 -0
- qubx/connectors/ccxt/account.py +495 -0
- qubx/connectors/ccxt/broker.py +132 -0
- qubx/connectors/ccxt/customizations.py +193 -0
- qubx/connectors/ccxt/data.py +612 -0
- qubx/connectors/ccxt/exceptions.py +17 -0
- qubx/connectors/ccxt/factory.py +93 -0
- qubx/connectors/ccxt/utils.py +307 -0
- qubx/core/__init__.py +0 -0
- qubx/core/account.py +251 -0
- qubx/core/basics.py +850 -0
- qubx/core/context.py +420 -0
- qubx/core/exceptions.py +38 -0
- qubx/core/helpers.py +480 -0
- qubx/core/interfaces.py +1150 -0
- qubx/core/loggers.py +514 -0
- qubx/core/lookups.py +475 -0
- qubx/core/metrics.py +1512 -0
- qubx/core/mixins/__init__.py +13 -0
- qubx/core/mixins/market.py +94 -0
- qubx/core/mixins/processing.py +428 -0
- qubx/core/mixins/subscription.py +203 -0
- qubx/core/mixins/trading.py +88 -0
- qubx/core/mixins/universe.py +270 -0
- qubx/core/series.cpython-312-x86_64-linux-gnu.so +0 -0
- qubx/core/series.pxd +125 -0
- qubx/core/series.pyi +118 -0
- qubx/core/series.pyx +988 -0
- qubx/core/utils.cpython-312-x86_64-linux-gnu.so +0 -0
- qubx/core/utils.pyi +6 -0
- qubx/core/utils.pyx +62 -0
- qubx/data/__init__.py +25 -0
- qubx/data/helpers.py +416 -0
- qubx/data/readers.py +1562 -0
- qubx/data/tardis.py +100 -0
- qubx/gathering/simplest.py +88 -0
- qubx/math/__init__.py +3 -0
- qubx/math/stats.py +129 -0
- qubx/pandaz/__init__.py +23 -0
- qubx/pandaz/ta.py +2757 -0
- qubx/pandaz/utils.py +638 -0
- qubx/resources/instruments/symbols-binance.cm.json +1 -0
- qubx/resources/instruments/symbols-binance.json +1 -0
- qubx/resources/instruments/symbols-binance.um.json +1 -0
- qubx/resources/instruments/symbols-bitfinex.f.json +1 -0
- qubx/resources/instruments/symbols-bitfinex.json +1 -0
- qubx/resources/instruments/symbols-kraken.f.json +1 -0
- qubx/resources/instruments/symbols-kraken.json +1 -0
- qubx/ta/__init__.py +0 -0
- qubx/ta/indicators.cpython-312-x86_64-linux-gnu.so +0 -0
- qubx/ta/indicators.pxd +149 -0
- qubx/ta/indicators.pyi +41 -0
- qubx/ta/indicators.pyx +787 -0
- qubx/trackers/__init__.py +3 -0
- qubx/trackers/abvanced.py +236 -0
- qubx/trackers/composite.py +146 -0
- qubx/trackers/rebalancers.py +129 -0
- qubx/trackers/riskctrl.py +641 -0
- qubx/trackers/sizers.py +235 -0
- qubx/utils/__init__.py +5 -0
- qubx/utils/_pyxreloader.py +281 -0
- qubx/utils/charting/lookinglass.py +1057 -0
- qubx/utils/charting/mpl_helpers.py +1183 -0
- qubx/utils/marketdata/binance.py +284 -0
- qubx/utils/marketdata/ccxt.py +90 -0
- qubx/utils/marketdata/dukas.py +130 -0
- qubx/utils/misc.py +541 -0
- qubx/utils/ntp.py +63 -0
- qubx/utils/numbers_utils.py +7 -0
- qubx/utils/orderbook.py +491 -0
- qubx/utils/plotting/__init__.py +0 -0
- qubx/utils/plotting/dashboard.py +150 -0
- qubx/utils/plotting/data.py +137 -0
- qubx/utils/plotting/interfaces.py +25 -0
- qubx/utils/plotting/renderers/__init__.py +0 -0
- qubx/utils/plotting/renderers/plotly.py +0 -0
- qubx/utils/runner/__init__.py +1 -0
- qubx/utils/runner/_jupyter_runner.pyt +60 -0
- qubx/utils/runner/accounts.py +88 -0
- qubx/utils/runner/configs.py +65 -0
- qubx/utils/runner/runner.py +470 -0
- qubx/utils/time.py +312 -0
- qubx-0.5.7.dist-info/METADATA +105 -0
- qubx-0.5.7.dist-info/RECORD +100 -0
- qubx-0.5.7.dist-info/WHEEL +4 -0
- qubx-0.5.7.dist-info/entry_points.txt +3 -0
qubx/data/readers.py
ADDED
|
@@ -0,0 +1,1562 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from functools import wraps
|
|
5
|
+
from os.path import exists, join
|
|
6
|
+
from typing import Any, Iterable, Iterator, List, Set, Union
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import psycopg as pg
|
|
11
|
+
import pyarrow as pa
|
|
12
|
+
from pyarrow import csv, table
|
|
13
|
+
|
|
14
|
+
from qubx import logger
|
|
15
|
+
from qubx.core.basics import DataType, TimestampedDict
|
|
16
|
+
from qubx.core.series import OHLCV, Bar, Quote, Trade
|
|
17
|
+
from qubx.pandaz.utils import ohlc_resample, srows
|
|
18
|
+
from qubx.utils.time import handle_start_stop, infer_series_frequency
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def convert_timedelta_to_numpy(x: str) -> int:
|
|
22
|
+
return pd.Timedelta(x).to_numpy().item()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
D1, H1 = convert_timedelta_to_numpy("1D"), convert_timedelta_to_numpy("1h")
|
|
26
|
+
MS1 = 1_000_000
|
|
27
|
+
S1 = 1000 * MS1
|
|
28
|
+
M1 = 60 * S1
|
|
29
|
+
|
|
30
|
+
DEFAULT_DAILY_SESSION = (convert_timedelta_to_numpy("00:00:00.100"), convert_timedelta_to_numpy("23:59:59.900"))
|
|
31
|
+
STOCK_DAILY_SESSION = (convert_timedelta_to_numpy("9:30:00.100"), convert_timedelta_to_numpy("15:59:59.900"))
|
|
32
|
+
CME_FUTURES_DAILY_SESSION = (convert_timedelta_to_numpy("8:30:00.100"), convert_timedelta_to_numpy("15:14:59.900"))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _recognize_t(t: Union[int, str], defaultvalue, timeunit) -> int:
|
|
36
|
+
if isinstance(t, (str, pd.Timestamp)):
|
|
37
|
+
try:
|
|
38
|
+
return np.datetime64(t, timeunit)
|
|
39
|
+
except (ValueError, TypeError) as e:
|
|
40
|
+
logger.debug(f"Failed to convert time {t} to datetime64: {e}")
|
|
41
|
+
return defaultvalue
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _time(t, timestamp_units: str) -> int:
|
|
45
|
+
t = int(t) if isinstance(t, float) else t
|
|
46
|
+
if timestamp_units == "ns":
|
|
47
|
+
return np.datetime64(t, "ns").item()
|
|
48
|
+
return np.datetime64(t, timestamp_units).astype("datetime64[ns]").item()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _find_column_index_in_list(xs, *args):
|
|
52
|
+
xs = [x.lower() for x in xs]
|
|
53
|
+
for a in args:
|
|
54
|
+
ai = a.lower()
|
|
55
|
+
if ai in xs:
|
|
56
|
+
return xs.index(ai)
|
|
57
|
+
raise IndexError(f"Can't find any specified columns from [{args}] in provided list: {xs}")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _list_to_chunked_iterator(data: list[Any], chunksize: int) -> Iterable:
|
|
61
|
+
it = iter(data)
|
|
62
|
+
chunk = list(itertools.islice(it, chunksize))
|
|
63
|
+
while chunk:
|
|
64
|
+
yield chunk
|
|
65
|
+
chunk = list(itertools.islice(it, chunksize))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _find_time_col_idx(column_names):
|
|
69
|
+
return _find_column_index_in_list(column_names, "time", "timestamp", "datetime", "date", "open_time", "ts")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class DataTransformer:
|
|
73
|
+
def __init__(self) -> None:
|
|
74
|
+
self.buffer = []
|
|
75
|
+
self._column_names = []
|
|
76
|
+
|
|
77
|
+
def start_transform(
|
|
78
|
+
self,
|
|
79
|
+
name: str,
|
|
80
|
+
column_names: List[str],
|
|
81
|
+
start: str | None = None,
|
|
82
|
+
stop: str | None = None,
|
|
83
|
+
):
|
|
84
|
+
self._column_names = column_names
|
|
85
|
+
self.buffer = []
|
|
86
|
+
|
|
87
|
+
def process_data(self, rows_data: Iterable) -> Any:
|
|
88
|
+
if rows_data is not None:
|
|
89
|
+
self.buffer.extend(rows_data)
|
|
90
|
+
|
|
91
|
+
def collect(self) -> Any:
|
|
92
|
+
return self.buffer
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class DataReader:
|
|
96
|
+
def get_names(self, **kwargs) -> List[str]:
|
|
97
|
+
"""
|
|
98
|
+
TODO: not sure we really need this !
|
|
99
|
+
"""
|
|
100
|
+
raise NotImplementedError("get_names() method is not implemented")
|
|
101
|
+
|
|
102
|
+
def read(
|
|
103
|
+
self,
|
|
104
|
+
data_id: str,
|
|
105
|
+
start: str | None = None,
|
|
106
|
+
stop: str | None = None,
|
|
107
|
+
transform: DataTransformer = DataTransformer(),
|
|
108
|
+
chunksize=0,
|
|
109
|
+
**kwargs,
|
|
110
|
+
) -> Iterator | List:
|
|
111
|
+
raise NotImplementedError("read() method is not implemented")
|
|
112
|
+
|
|
113
|
+
def get_aux_data_ids(self) -> Set[str]:
|
|
114
|
+
"""
|
|
115
|
+
Returns list of all auxiliary data IDs available for this data reader
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
def _list_methods(cls):
|
|
119
|
+
_meth = []
|
|
120
|
+
for k, s in cls.__dict__.items():
|
|
121
|
+
if (
|
|
122
|
+
k.startswith("get_")
|
|
123
|
+
and k not in ["get_names", "get_symbols", "get_time_ranges", "get_aux_data_ids", "get_aux_data"]
|
|
124
|
+
and callable(s)
|
|
125
|
+
):
|
|
126
|
+
_meth.append(k[4:])
|
|
127
|
+
return _meth
|
|
128
|
+
|
|
129
|
+
_d_ids = _list_methods(self.__class__)
|
|
130
|
+
for bc in self.__class__.__bases__:
|
|
131
|
+
_d_ids.extend(_list_methods(bc))
|
|
132
|
+
return set(_d_ids)
|
|
133
|
+
|
|
134
|
+
def get_aux_data(self, data_id: str, **kwargs) -> Any:
|
|
135
|
+
"""
|
|
136
|
+
Returns auxiliary data for the specified data ID
|
|
137
|
+
"""
|
|
138
|
+
if hasattr(self, f"get_{data_id}"):
|
|
139
|
+
return getattr(self, f"get_{data_id}")(**kwargs)
|
|
140
|
+
raise ValueError(
|
|
141
|
+
f"{self.__class__.__name__} doesn't have getter for '{data_id}' auxiliary data. Available data: {self.get_aux_data_ids()}"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def get_symbols(self, exchange: str, dtype: str) -> list[str]:
|
|
145
|
+
raise NotImplementedError("get_symbols() method is not implemented")
|
|
146
|
+
|
|
147
|
+
def get_time_ranges(self, symbol: str, dtype: str) -> tuple[np.datetime64, np.datetime64]:
|
|
148
|
+
"""
|
|
149
|
+
Returns first and last time for the specified symbol and data type in the reader's storage
|
|
150
|
+
"""
|
|
151
|
+
raise NotImplementedError("get_time_ranges() method is not implemented")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class CsvStorageDataReader(DataReader):
|
|
155
|
+
"""
|
|
156
|
+
Data reader for timeseries data stored as csv files in the specified directory
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
def __init__(self, path: str) -> None:
|
|
160
|
+
_path = os.path.expanduser(path)
|
|
161
|
+
if not exists(_path):
|
|
162
|
+
raise ValueError(f"Folder is not found at {path}")
|
|
163
|
+
self.path = _path
|
|
164
|
+
|
|
165
|
+
def __find_time_idx(self, arr: pa.ChunkedArray, v) -> int:
|
|
166
|
+
ix = arr.index(v).as_py()
|
|
167
|
+
if ix < 0:
|
|
168
|
+
for c in arr.iterchunks():
|
|
169
|
+
a = c.to_numpy()
|
|
170
|
+
ix = np.searchsorted(a, v, side="right")
|
|
171
|
+
if ix > 0 and ix < len(c):
|
|
172
|
+
ix = arr.index(a[ix]).as_py() - 1
|
|
173
|
+
break
|
|
174
|
+
return ix
|
|
175
|
+
|
|
176
|
+
def __check_file_name(self, name: str) -> str | None:
|
|
177
|
+
_f = join(self.path, name.replace(":", os.sep))
|
|
178
|
+
for sfx in [".csv", ".csv.gz", ""]:
|
|
179
|
+
if exists(p := (_f + sfx)):
|
|
180
|
+
return p
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
def __try_read_data(
|
|
184
|
+
self, data_id: str, start: str | None = None, stop: str | None = None, timestamp_formatters=None
|
|
185
|
+
) -> tuple[table, np.ndarray, Any, list[str], int, int]:
|
|
186
|
+
f_path = self.__check_file_name(data_id)
|
|
187
|
+
if not f_path:
|
|
188
|
+
ValueError(f"Can't find any csv data for {data_id} in {self.path} !")
|
|
189
|
+
|
|
190
|
+
convert_options = None
|
|
191
|
+
if timestamp_formatters is not None:
|
|
192
|
+
convert_options = csv.ConvertOptions(timestamp_parsers=timestamp_formatters)
|
|
193
|
+
|
|
194
|
+
table = csv.read_csv(
|
|
195
|
+
f_path,
|
|
196
|
+
parse_options=csv.ParseOptions(ignore_empty_lines=True),
|
|
197
|
+
convert_options=convert_options,
|
|
198
|
+
)
|
|
199
|
+
fieldnames = table.column_names
|
|
200
|
+
|
|
201
|
+
# - try to find range to load
|
|
202
|
+
start_idx, stop_idx = 0, table.num_rows
|
|
203
|
+
try:
|
|
204
|
+
_time_field_idx = _find_time_col_idx(fieldnames)
|
|
205
|
+
_time_type = table.field(_time_field_idx).type
|
|
206
|
+
_time_unit = _time_type.unit if hasattr(_time_type, "unit") else "ms"
|
|
207
|
+
_time_data = table[_time_field_idx]
|
|
208
|
+
|
|
209
|
+
# - check if need convert time to primitive types (i.e. Date32 -> timestamp[x])
|
|
210
|
+
_time_cast_function = lambda xs: xs
|
|
211
|
+
if _time_type != pa.timestamp(_time_unit):
|
|
212
|
+
_time_cast_function = lambda xs: xs.cast(pa.timestamp(_time_unit))
|
|
213
|
+
_time_data = _time_cast_function(_time_data)
|
|
214
|
+
|
|
215
|
+
# - preprocessing start and stop
|
|
216
|
+
t_0, t_1 = handle_start_stop(start, stop, convert=lambda x: _recognize_t(x, None, _time_unit))
|
|
217
|
+
|
|
218
|
+
# - check requested range
|
|
219
|
+
if t_0:
|
|
220
|
+
start_idx = self.__find_time_idx(_time_data, t_0)
|
|
221
|
+
if start_idx >= table.num_rows:
|
|
222
|
+
# - no data for requested start date
|
|
223
|
+
return table, _time_data, _time_unit, fieldnames, -1, -1
|
|
224
|
+
|
|
225
|
+
if t_1:
|
|
226
|
+
stop_idx = self.__find_time_idx(_time_data, t_1)
|
|
227
|
+
if stop_idx < 0 or stop_idx < start_idx:
|
|
228
|
+
stop_idx = table.num_rows
|
|
229
|
+
|
|
230
|
+
except Exception as exc:
|
|
231
|
+
logger.warning(f"exception [{exc}] during preprocessing '{f_path}'")
|
|
232
|
+
|
|
233
|
+
return table, _time_data, _time_unit, fieldnames, start_idx, stop_idx
|
|
234
|
+
|
|
235
|
+
def read(
|
|
236
|
+
self,
|
|
237
|
+
data_id: str,
|
|
238
|
+
start: str | None = None,
|
|
239
|
+
stop: str | None = None,
|
|
240
|
+
transform: DataTransformer = DataTransformer(),
|
|
241
|
+
chunksize=0,
|
|
242
|
+
timestamp_formatters=None,
|
|
243
|
+
timeframe=None,
|
|
244
|
+
**kwargs,
|
|
245
|
+
) -> Iterable | Any:
|
|
246
|
+
table, _, _, fieldnames, start_idx, stop_idx = self.__try_read_data(data_id, start, stop, timestamp_formatters)
|
|
247
|
+
if start_idx < 0 or stop_idx < 0:
|
|
248
|
+
return None
|
|
249
|
+
length = stop_idx - start_idx + 1
|
|
250
|
+
selected_table = table.slice(start_idx, length)
|
|
251
|
+
|
|
252
|
+
# - in this case we want to return iterable chunks of data
|
|
253
|
+
if chunksize > 0:
|
|
254
|
+
|
|
255
|
+
def _iter_chunks():
|
|
256
|
+
for n in range(0, length // chunksize + 1):
|
|
257
|
+
transform.start_transform(data_id, fieldnames, start=start, stop=stop)
|
|
258
|
+
raw_data = selected_table[n * chunksize : min((n + 1) * chunksize, length)].to_pandas().to_numpy()
|
|
259
|
+
transform.process_data(raw_data)
|
|
260
|
+
yield transform.collect()
|
|
261
|
+
|
|
262
|
+
return _iter_chunks()
|
|
263
|
+
|
|
264
|
+
transform.start_transform(data_id, fieldnames, start=start, stop=stop)
|
|
265
|
+
raw_data = selected_table.to_pandas().to_numpy()
|
|
266
|
+
transform.process_data(raw_data)
|
|
267
|
+
return transform.collect()
|
|
268
|
+
|
|
269
|
+
def get_candles(
|
|
270
|
+
self,
|
|
271
|
+
exchange: str,
|
|
272
|
+
symbols: list[str],
|
|
273
|
+
start: str | pd.Timestamp,
|
|
274
|
+
stop: str | pd.Timestamp,
|
|
275
|
+
timeframe: str | None = None,
|
|
276
|
+
) -> pd.DataFrame:
|
|
277
|
+
"""
|
|
278
|
+
Returns pandas DataFrame of candles for given exchange and symbols within specified time range and timeframe
|
|
279
|
+
"""
|
|
280
|
+
_r = []
|
|
281
|
+
for symbol in symbols:
|
|
282
|
+
x = self.read(
|
|
283
|
+
f"{exchange}:{symbol}", start=start, stop=stop, timeframe=timeframe, transform=AsPandasFrame()
|
|
284
|
+
)
|
|
285
|
+
if x is not None:
|
|
286
|
+
if timeframe is not None:
|
|
287
|
+
x = ohlc_resample(x, timeframe)
|
|
288
|
+
_r.append(x.assign(symbol=symbol.upper(), timestamp=x.index)) # type: ignore
|
|
289
|
+
return srows(*_r).set_index(["timestamp", "symbol"]) if _r else pd.DataFrame()
|
|
290
|
+
|
|
291
|
+
def get_names(self, **kwargs) -> List[str]:
|
|
292
|
+
_n = []
|
|
293
|
+
for root, _, files in os.walk(self.path):
|
|
294
|
+
path = root.split(os.sep)
|
|
295
|
+
for file in files:
|
|
296
|
+
if re.match(r"(.*)\.csv(.gz)?$", file):
|
|
297
|
+
f = path[-1]
|
|
298
|
+
n = file.split(".")[0]
|
|
299
|
+
if f == self.path:
|
|
300
|
+
name = n
|
|
301
|
+
else:
|
|
302
|
+
name = f"{f}:{n}" if f else n
|
|
303
|
+
_n.append(name)
|
|
304
|
+
return _n
|
|
305
|
+
|
|
306
|
+
def get_symbols(self, exchange: str, dtype: str) -> list[str]:
|
|
307
|
+
return self.get_names()
|
|
308
|
+
|
|
309
|
+
def get_time_ranges(self, symbol: str, dtype: str) -> tuple[np.datetime64, np.datetime64]:
|
|
310
|
+
_, _time_data, _time_unit, _, start_idx, stop_idx = self.__try_read_data(symbol, None, None, None)
|
|
311
|
+
return (
|
|
312
|
+
np.datetime64(_time_data[start_idx].value, _time_unit),
|
|
313
|
+
np.datetime64(_time_data[stop_idx - 1].value, _time_unit),
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class InMemoryDataFrameReader(DataReader):
|
|
318
|
+
"""
|
|
319
|
+
Data reader for pandas DataFrames
|
|
320
|
+
"""
|
|
321
|
+
|
|
322
|
+
exchange: str | None
|
|
323
|
+
_data: dict[str, pd.DataFrame | pd.Series]
|
|
324
|
+
|
|
325
|
+
def __init__(self, data: dict[str, pd.DataFrame | pd.Series], exchange: str | None = None) -> None:
|
|
326
|
+
if not isinstance(data, dict):
|
|
327
|
+
raise ValueError("data must be a dictionary of pandas DataFrames")
|
|
328
|
+
self._data = data
|
|
329
|
+
self.exchange = exchange
|
|
330
|
+
|
|
331
|
+
def get_names(self, **kwargs) -> list[str]:
|
|
332
|
+
keys = list(self._data.keys())
|
|
333
|
+
if self.exchange:
|
|
334
|
+
return [f"{self.exchange}:{k}" for k in keys]
|
|
335
|
+
return keys
|
|
336
|
+
|
|
337
|
+
def _get_data_by_key(self, data_id: str) -> tuple[str, pd.DataFrame | pd.Series]:
|
|
338
|
+
if data_id not in self._data:
|
|
339
|
+
if self.exchange and data_id.startswith(self.exchange):
|
|
340
|
+
data_id = data_id.split(":")[1]
|
|
341
|
+
if (d := self._data.get(data_id)) is None:
|
|
342
|
+
raise ValueError(f"No data found for {data_id}")
|
|
343
|
+
return data_id, d
|
|
344
|
+
|
|
345
|
+
def read(
|
|
346
|
+
self,
|
|
347
|
+
data_id: str,
|
|
348
|
+
start: str | None = None,
|
|
349
|
+
stop: str | None = None,
|
|
350
|
+
transform: DataTransformer = DataTransformer(),
|
|
351
|
+
chunksize=0,
|
|
352
|
+
**kwargs,
|
|
353
|
+
) -> Iterable | list:
|
|
354
|
+
"""
|
|
355
|
+
Read and transform data for a given data_id within a specified time range.
|
|
356
|
+
|
|
357
|
+
Parameters:
|
|
358
|
+
-----------
|
|
359
|
+
data_id : str
|
|
360
|
+
The identifier for the data to be read.
|
|
361
|
+
start : str | None, optional
|
|
362
|
+
The start time for the data range (inclusive). If None, start from the earliest available data.
|
|
363
|
+
stop : str | None, optional
|
|
364
|
+
The stop time for the data range (inclusive). If None, include data up to the latest available.
|
|
365
|
+
transform : DataTransformer, optional
|
|
366
|
+
An instance of DataTransformer to process the retrieved data. Defaults to DataTransformer().
|
|
367
|
+
chunksize : int, optional
|
|
368
|
+
The size of data chunks to process at a time. If 0, process all data at once. Defaults to 0.
|
|
369
|
+
**kwargs : dict
|
|
370
|
+
Additional keyword arguments for future extensions.
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
--------
|
|
374
|
+
Iterable | List
|
|
375
|
+
The processed and transformed data, either as an iterable (if chunksize > 0) or as a list.
|
|
376
|
+
|
|
377
|
+
Raises:
|
|
378
|
+
-------
|
|
379
|
+
ValueError
|
|
380
|
+
If no data is found for the given data_id.
|
|
381
|
+
"""
|
|
382
|
+
start, stop = handle_start_stop(start, stop)
|
|
383
|
+
data_id, _stored_data = self._get_data_by_key(data_id)
|
|
384
|
+
|
|
385
|
+
_sliced_data = _stored_data.loc[start:stop].copy()
|
|
386
|
+
if _tf := kwargs.get("timeframe"):
|
|
387
|
+
_sliced_data = ohlc_resample(_sliced_data, _tf)
|
|
388
|
+
assert isinstance(_sliced_data, pd.DataFrame), "Resampled data should be a DataFrame"
|
|
389
|
+
_sliced_data = _sliced_data.reset_index()
|
|
390
|
+
|
|
391
|
+
def _do_transform(values: Iterable, columns: list[str]) -> Iterable:
|
|
392
|
+
transform.start_transform(data_id, columns, start=start, stop=stop)
|
|
393
|
+
transform.process_data(values)
|
|
394
|
+
return transform.collect()
|
|
395
|
+
|
|
396
|
+
if chunksize > 0:
|
|
397
|
+
# returns chunked frames
|
|
398
|
+
def _chunked_dataframe(data: np.ndarray, columns: list[str], chunksize: int) -> Iterable:
|
|
399
|
+
it = iter(data)
|
|
400
|
+
chunk = list(itertools.islice(it, chunksize))
|
|
401
|
+
while chunk:
|
|
402
|
+
yield _do_transform(chunk, columns)
|
|
403
|
+
chunk = list(itertools.islice(it, chunksize))
|
|
404
|
+
|
|
405
|
+
return _chunked_dataframe(_sliced_data.values, list(_sliced_data.columns), chunksize)
|
|
406
|
+
|
|
407
|
+
return _do_transform(_sliced_data.values, list(_sliced_data.columns))
|
|
408
|
+
|
|
409
|
+
def get_symbols(self, exchange: str, dtype: str) -> list[str]:
|
|
410
|
+
return self.get_names()
|
|
411
|
+
|
|
412
|
+
def get_time_ranges(self, symbol: str, dtype: DataType) -> tuple[np.datetime64 | None, np.datetime64 | None]:
|
|
413
|
+
try:
|
|
414
|
+
_, _stored_data = self._get_data_by_key(symbol)
|
|
415
|
+
return _stored_data.index[0], _stored_data.index[-1]
|
|
416
|
+
except ValueError:
|
|
417
|
+
return None, None
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
class AsPandasFrame(DataTransformer):
|
|
421
|
+
"""
|
|
422
|
+
List of records to pandas dataframe transformer
|
|
423
|
+
"""
|
|
424
|
+
|
|
425
|
+
def __init__(self, timestamp_units=None) -> None:
|
|
426
|
+
self.timestamp_units = timestamp_units
|
|
427
|
+
|
|
428
|
+
def start_transform(self, name: str, column_names: List[str], **kwargs):
|
|
429
|
+
self._time_idx = _find_time_col_idx(column_names)
|
|
430
|
+
self._column_names = column_names
|
|
431
|
+
self._frame = pd.DataFrame()
|
|
432
|
+
|
|
433
|
+
def process_data(self, rows_data: Iterable) -> Any:
|
|
434
|
+
self._frame
|
|
435
|
+
p = pd.DataFrame.from_records(rows_data, columns=self._column_names)
|
|
436
|
+
p.set_index(self._column_names[self._time_idx], drop=True, inplace=True)
|
|
437
|
+
p.index = pd.to_datetime(p.index, unit=self.timestamp_units) if self.timestamp_units else p.index
|
|
438
|
+
p.index.rename("timestamp", inplace=True)
|
|
439
|
+
p.sort_index(inplace=True)
|
|
440
|
+
self._frame = pd.concat((self._frame, p), axis=0, sort=True)
|
|
441
|
+
return p
|
|
442
|
+
|
|
443
|
+
def collect(self) -> Any:
|
|
444
|
+
return self._frame
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
class AsOhlcvSeries(DataTransformer):
|
|
448
|
+
"""
|
|
449
|
+
Convert incoming data into OHLCV series.
|
|
450
|
+
|
|
451
|
+
Incoming data may have one of the following structures:
|
|
452
|
+
|
|
453
|
+
```
|
|
454
|
+
ohlcv: time,open,high,low,close,volume|quote_volume,(buy_volume)
|
|
455
|
+
quotes: time,bid,ask,bidsize,asksize
|
|
456
|
+
trades (TAS): time,price,size,(is_taker)
|
|
457
|
+
```
|
|
458
|
+
"""
|
|
459
|
+
|
|
460
|
+
timeframe: str | None
|
|
461
|
+
_series: OHLCV | None
|
|
462
|
+
_data_type: str | None
|
|
463
|
+
|
|
464
|
+
def __init__(self, timeframe: str | None = None, timestamp_units="ns") -> None:
|
|
465
|
+
super().__init__()
|
|
466
|
+
self.timeframe = timeframe
|
|
467
|
+
self._series = None
|
|
468
|
+
self._data_type = None
|
|
469
|
+
self.timestamp_units = timestamp_units
|
|
470
|
+
|
|
471
|
+
def start_transform(self, name: str, column_names: List[str], **kwargs):
|
|
472
|
+
self._time_idx = _find_time_col_idx(column_names)
|
|
473
|
+
self._volume_idx = None
|
|
474
|
+
self._b_volume_idx = None
|
|
475
|
+
try:
|
|
476
|
+
self._close_idx = _find_column_index_in_list(column_names, "close")
|
|
477
|
+
self._open_idx = _find_column_index_in_list(column_names, "open")
|
|
478
|
+
self._high_idx = _find_column_index_in_list(column_names, "high")
|
|
479
|
+
self._low_idx = _find_column_index_in_list(column_names, "low")
|
|
480
|
+
|
|
481
|
+
try:
|
|
482
|
+
self._volume_idx = _find_column_index_in_list(column_names, "quote_volume", "volume", "vol")
|
|
483
|
+
except:
|
|
484
|
+
pass
|
|
485
|
+
|
|
486
|
+
try:
|
|
487
|
+
self._b_volume_idx = _find_column_index_in_list(
|
|
488
|
+
column_names,
|
|
489
|
+
"taker_buy_volume",
|
|
490
|
+
"taker_buy_quote_volume",
|
|
491
|
+
"buy_volume",
|
|
492
|
+
)
|
|
493
|
+
except:
|
|
494
|
+
pass
|
|
495
|
+
|
|
496
|
+
self._data_type = "ohlc"
|
|
497
|
+
except:
|
|
498
|
+
try:
|
|
499
|
+
self._ask_idx = _find_column_index_in_list(column_names, "ask")
|
|
500
|
+
self._bid_idx = _find_column_index_in_list(column_names, "bid")
|
|
501
|
+
self._data_type = "quotes"
|
|
502
|
+
except:
|
|
503
|
+
try:
|
|
504
|
+
self._price_idx = _find_column_index_in_list(column_names, "price")
|
|
505
|
+
self._size_idx = _find_column_index_in_list(
|
|
506
|
+
column_names, "quote_qty", "qty", "size", "amount", "volume"
|
|
507
|
+
)
|
|
508
|
+
self._taker_idx = None
|
|
509
|
+
try:
|
|
510
|
+
self._taker_idx = _find_column_index_in_list(
|
|
511
|
+
column_names,
|
|
512
|
+
"is_buyer_maker",
|
|
513
|
+
"side",
|
|
514
|
+
"aggressive",
|
|
515
|
+
"taker",
|
|
516
|
+
"is_taker",
|
|
517
|
+
)
|
|
518
|
+
except:
|
|
519
|
+
pass
|
|
520
|
+
|
|
521
|
+
self._data_type = "trades"
|
|
522
|
+
except:
|
|
523
|
+
raise ValueError(f"Can't recognize data for update from header: {column_names}")
|
|
524
|
+
|
|
525
|
+
self._column_names = column_names
|
|
526
|
+
self._name = name
|
|
527
|
+
if self.timeframe:
|
|
528
|
+
self._series = OHLCV(self._name, self.timeframe)
|
|
529
|
+
|
|
530
|
+
def _proc_ohlc(self, rows_data: List[List]):
|
|
531
|
+
for d in rows_data:
|
|
532
|
+
self._series.update_by_bar(
|
|
533
|
+
_time(d[self._time_idx], self.timestamp_units),
|
|
534
|
+
d[self._open_idx],
|
|
535
|
+
d[self._high_idx],
|
|
536
|
+
d[self._low_idx],
|
|
537
|
+
d[self._close_idx],
|
|
538
|
+
d[self._volume_idx] if self._volume_idx else 0,
|
|
539
|
+
d[self._b_volume_idx] if self._b_volume_idx else 0,
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
def _proc_quotes(self, rows_data: List[List]):
|
|
543
|
+
for d in rows_data:
|
|
544
|
+
self._series.update(
|
|
545
|
+
_time(d[self._time_idx], self.timestamp_units),
|
|
546
|
+
(d[self._ask_idx] + d[self._bid_idx]) / 2,
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
def _proc_trades(self, rows_data: List[List]):
|
|
550
|
+
for d in rows_data:
|
|
551
|
+
a = d[self._taker_idx] if self._taker_idx else 0
|
|
552
|
+
s = d[self._size_idx]
|
|
553
|
+
b = s if a else 0
|
|
554
|
+
self._series.update(_time(d[self._time_idx], self.timestamp_units), d[self._price_idx], s, b)
|
|
555
|
+
|
|
556
|
+
def process_data(self, rows_data: List[List]) -> Any:
|
|
557
|
+
if self._series is None:
|
|
558
|
+
ts = [t[self._time_idx] for t in rows_data[:100]]
|
|
559
|
+
self.timeframe = pd.Timedelta(infer_series_frequency(ts)).asm8.item()
|
|
560
|
+
|
|
561
|
+
# - create instance after first data received if
|
|
562
|
+
self._series = OHLCV(self._name, self.timeframe)
|
|
563
|
+
|
|
564
|
+
match self._data_type:
|
|
565
|
+
case "ohlc":
|
|
566
|
+
self._proc_ohlc(rows_data)
|
|
567
|
+
case "quotes":
|
|
568
|
+
self._proc_quotes(rows_data)
|
|
569
|
+
case "trades":
|
|
570
|
+
self._proc_trades(rows_data)
|
|
571
|
+
|
|
572
|
+
return None
|
|
573
|
+
|
|
574
|
+
def collect(self) -> Any:
|
|
575
|
+
return self._series
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
class AsBars(AsOhlcvSeries):
|
|
579
|
+
"""
|
|
580
|
+
Convert incoming data into Bars sequence.
|
|
581
|
+
|
|
582
|
+
Incoming data may have one of the following structures:
|
|
583
|
+
|
|
584
|
+
```
|
|
585
|
+
ohlcv: time,open,high,low,close,volume|quote_volume,(buy_volume)
|
|
586
|
+
quotes: time,bid,ask,bidsize,asksize
|
|
587
|
+
trades (TAS): time,price,size,(is_taker)
|
|
588
|
+
```
|
|
589
|
+
"""
|
|
590
|
+
|
|
591
|
+
def collect(self) -> Any:
|
|
592
|
+
return self._series[::-1] if self._series is not None else None
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
class AsQuotes(DataTransformer):
|
|
596
|
+
"""
|
|
597
|
+
Tries to convert incoming data to list of Quote's
|
|
598
|
+
Data must have appropriate structure: bid, ask, bidsize, asksize and time
|
|
599
|
+
"""
|
|
600
|
+
|
|
601
|
+
def start_transform(self, name: str, column_names: List[str], **kwargs):
|
|
602
|
+
self.buffer = list()
|
|
603
|
+
self._time_idx = _find_time_col_idx(column_names)
|
|
604
|
+
self._bid_idx = _find_column_index_in_list(column_names, "bid")
|
|
605
|
+
self._ask_idx = _find_column_index_in_list(column_names, "ask")
|
|
606
|
+
self._bidvol_idx = _find_column_index_in_list(column_names, "bidvol", "bid_vol", "bidsize", "bid_size")
|
|
607
|
+
self._askvol_idx = _find_column_index_in_list(column_names, "askvol", "ask_vol", "asksize", "ask_size")
|
|
608
|
+
|
|
609
|
+
def process_data(self, rows_data: Iterable) -> Any:
|
|
610
|
+
if rows_data is not None:
|
|
611
|
+
for d in rows_data:
|
|
612
|
+
t = d[self._time_idx]
|
|
613
|
+
b = d[self._bid_idx]
|
|
614
|
+
a = d[self._ask_idx]
|
|
615
|
+
bv = d[self._bidvol_idx]
|
|
616
|
+
av = d[self._askvol_idx]
|
|
617
|
+
self.buffer.append(Quote(_time(t, "ns"), b, a, bv, av))
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
class AsTrades(DataTransformer):
|
|
621
|
+
"""
|
|
622
|
+
Tries to convert incoming data to list of Trades
|
|
623
|
+
Data must have appropriate structure: price, size, market_maker (optional).
|
|
624
|
+
Market maker column specifies if buyer is a maker or taker.
|
|
625
|
+
"""
|
|
626
|
+
|
|
627
|
+
def start_transform(self, name: str, column_names: List[str], **kwargs):
|
|
628
|
+
self.buffer: list[Trade] = list()
|
|
629
|
+
self._time_idx = _find_time_col_idx(column_names)
|
|
630
|
+
self._price_idx = _find_column_index_in_list(column_names, "price")
|
|
631
|
+
self._size_idx = _find_column_index_in_list(column_names, "size")
|
|
632
|
+
try:
|
|
633
|
+
self._side_idx = _find_column_index_in_list(column_names, "market_maker")
|
|
634
|
+
except:
|
|
635
|
+
self._side_idx = None
|
|
636
|
+
|
|
637
|
+
def process_data(self, rows_data: Iterable) -> Any:
|
|
638
|
+
if rows_data is not None:
|
|
639
|
+
for d in rows_data:
|
|
640
|
+
t = d[self._time_idx]
|
|
641
|
+
price = d[self._price_idx]
|
|
642
|
+
size = d[self._size_idx]
|
|
643
|
+
side = d[self._side_idx] if self._side_idx else -1
|
|
644
|
+
self.buffer.append(Trade(_time(t, "ns"), price, size, side))
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
class AsTimestampedRecords(DataTransformer):
|
|
648
|
+
"""
|
|
649
|
+
Convert incoming data to list or dictionaries with preprocessed timestamps ('timestamp_ns' and 'timestamp')
|
|
650
|
+
```
|
|
651
|
+
[
|
|
652
|
+
{
|
|
653
|
+
'open_time': 1711944240000.0,
|
|
654
|
+
'open': 203.219,
|
|
655
|
+
'high': 203.33,
|
|
656
|
+
'low': 203.134,
|
|
657
|
+
'close': 203.175,
|
|
658
|
+
'volume': 10060.0,
|
|
659
|
+
....
|
|
660
|
+
'timestamp_ns': 1711944240000000000,
|
|
661
|
+
'timestamp': Timestamp('2024-04-01 04:04:00')
|
|
662
|
+
},
|
|
663
|
+
...
|
|
664
|
+
] ```
|
|
665
|
+
"""
|
|
666
|
+
|
|
667
|
+
def __init__(self, timestamp_units: str | None = None) -> None:
|
|
668
|
+
self.timestamp_units = timestamp_units
|
|
669
|
+
|
|
670
|
+
def start_transform(self, name: str, column_names: List[str], **kwargs):
|
|
671
|
+
self.buffer = list()
|
|
672
|
+
self._time_idx = _find_time_col_idx(column_names)
|
|
673
|
+
self._column_names = column_names
|
|
674
|
+
|
|
675
|
+
def process_data(self, rows_data: Iterable) -> Any:
|
|
676
|
+
self.buffer.extend(rows_data)
|
|
677
|
+
|
|
678
|
+
def collect(self) -> Any:
|
|
679
|
+
res = []
|
|
680
|
+
for r in self.buffer:
|
|
681
|
+
t = r[self._time_idx]
|
|
682
|
+
if self.timestamp_units:
|
|
683
|
+
t = _time(t, self.timestamp_units)
|
|
684
|
+
di = dict(zip(self._column_names, r)) | {
|
|
685
|
+
"timestamp_ns": t,
|
|
686
|
+
"timestamp": pd.Timestamp(t),
|
|
687
|
+
}
|
|
688
|
+
res.append(di)
|
|
689
|
+
return res
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
class RestoredEmulatorHelper(DataTransformer):
|
|
693
|
+
_freq: np.timedelta64 | None = None
|
|
694
|
+
_t_start: int
|
|
695
|
+
_t_mid1: int
|
|
696
|
+
_t_mid2: int
|
|
697
|
+
_t_end: int
|
|
698
|
+
_open_close_time_shift_secs: int
|
|
699
|
+
|
|
700
|
+
def __init__(self, daily_session_start_end: tuple, timestamp_units: str, open_close_time_shift_secs: int):
|
|
701
|
+
super().__init__()
|
|
702
|
+
self._d_session_start = daily_session_start_end[0]
|
|
703
|
+
self._d_session_end = daily_session_start_end[1]
|
|
704
|
+
self._timestamp_units = timestamp_units
|
|
705
|
+
self._open_close_time_shift_secs = open_close_time_shift_secs # type: ignore
|
|
706
|
+
|
|
707
|
+
def _detect_emulation_timestamps(self, rows_data: list[list]):
|
|
708
|
+
if self._freq is None:
|
|
709
|
+
ts = [t[self._time_idx] for t in rows_data]
|
|
710
|
+
try:
|
|
711
|
+
self._freq = infer_series_frequency(ts)
|
|
712
|
+
except ValueError:
|
|
713
|
+
logger.warning("Can't determine frequency of incoming data")
|
|
714
|
+
return
|
|
715
|
+
|
|
716
|
+
# - timestamps when we emit simulated quotes
|
|
717
|
+
dt = self._freq.astype("timedelta64[ns]").item()
|
|
718
|
+
dt10 = dt // 10
|
|
719
|
+
|
|
720
|
+
# - adjust open-close time shift to avoid overlapping timestamps
|
|
721
|
+
if self._open_close_time_shift_secs * S1 >= (dt // 2 - dt10):
|
|
722
|
+
self._open_close_time_shift_secs = (dt // 2 - 2 * dt10) // S1
|
|
723
|
+
|
|
724
|
+
if dt < D1:
|
|
725
|
+
self._t_start = self._open_close_time_shift_secs * S1
|
|
726
|
+
self._t_mid1 = dt // 2 - dt10
|
|
727
|
+
self._t_mid2 = dt // 2 + dt10
|
|
728
|
+
self._t_end = dt - self._open_close_time_shift_secs * S1
|
|
729
|
+
else:
|
|
730
|
+
self._t_start = self._d_session_start + self._open_close_time_shift_secs * S1
|
|
731
|
+
self._t_mid1 = dt // 2 - H1
|
|
732
|
+
self._t_mid2 = dt // 2 + H1
|
|
733
|
+
self._t_end = self._d_session_end - self._open_close_time_shift_secs * S1
|
|
734
|
+
|
|
735
|
+
def start_transform(self, name: str, column_names: List[str], **kwargs):
|
|
736
|
+
self.buffer = []
|
|
737
|
+
# - it will fail if receive data doesn't look as ohlcv
|
|
738
|
+
self._time_idx = _find_time_col_idx(column_names)
|
|
739
|
+
self._open_idx = _find_column_index_in_list(column_names, "open")
|
|
740
|
+
self._high_idx = _find_column_index_in_list(column_names, "high")
|
|
741
|
+
self._low_idx = _find_column_index_in_list(column_names, "low")
|
|
742
|
+
self._close_idx = _find_column_index_in_list(column_names, "close")
|
|
743
|
+
self._volume_idx = None
|
|
744
|
+
self._freq = None
|
|
745
|
+
try:
|
|
746
|
+
self._volume_idx = _find_column_index_in_list(column_names, "volume", "vol")
|
|
747
|
+
except: # noqa: E722
|
|
748
|
+
pass
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
class RestoreTicksFromOHLC(RestoredEmulatorHelper):
|
|
752
|
+
"""
|
|
753
|
+
Emulates quotes (and trades) from OHLC bars
|
|
754
|
+
"""
|
|
755
|
+
|
|
756
|
+
def __init__(
|
|
757
|
+
self,
|
|
758
|
+
trades: bool = False, # if we also wants 'trades'
|
|
759
|
+
default_bid_size=1e9, # default bid/ask is big
|
|
760
|
+
default_ask_size=1e9, # default bid/ask is big
|
|
761
|
+
daily_session_start_end=DEFAULT_DAILY_SESSION,
|
|
762
|
+
timestamp_units="ns",
|
|
763
|
+
spread=0.0,
|
|
764
|
+
open_close_time_shift_secs=1.0,
|
|
765
|
+
quotes=True,
|
|
766
|
+
):
|
|
767
|
+
super().__init__(daily_session_start_end, timestamp_units, open_close_time_shift_secs)
|
|
768
|
+
assert trades or quotes or trades and trades, "Either trades or quotes or both must be enabled"
|
|
769
|
+
self._trades = trades
|
|
770
|
+
self._quotes = quotes
|
|
771
|
+
self._bid_size = default_bid_size
|
|
772
|
+
self._ask_size = default_ask_size
|
|
773
|
+
self._s2 = spread / 2.0
|
|
774
|
+
|
|
775
|
+
def start_transform(self, name: str, column_names: list[str], **kwargs):
|
|
776
|
+
super().start_transform(name, column_names, **kwargs)
|
|
777
|
+
# - disable trades if no volume information is available
|
|
778
|
+
if self._volume_idx is None and self._trades:
|
|
779
|
+
logger.warning("Input OHLC data doesn't contain volume information so trades can't be emulated !")
|
|
780
|
+
self._trades = False
|
|
781
|
+
|
|
782
|
+
def process_data(self, rows_data: list[list]) -> Any:
|
|
783
|
+
if rows_data is None:
|
|
784
|
+
return
|
|
785
|
+
|
|
786
|
+
s2 = self._s2
|
|
787
|
+
if self._freq is None:
|
|
788
|
+
self._detect_emulation_timestamps(rows_data[:100])
|
|
789
|
+
|
|
790
|
+
# - input data
|
|
791
|
+
for data in rows_data:
|
|
792
|
+
# ti = pd.Timestamp(data[self._time_idx]).as_unit("ns").asm8.item()
|
|
793
|
+
ti = _time(data[self._time_idx], self._timestamp_units)
|
|
794
|
+
o = data[self._open_idx]
|
|
795
|
+
h = data[self._high_idx]
|
|
796
|
+
l = data[self._low_idx]
|
|
797
|
+
c = data[self._close_idx]
|
|
798
|
+
rv = data[self._volume_idx] if self._volume_idx else 0
|
|
799
|
+
rv = rv / (h - l) if h > l else rv
|
|
800
|
+
|
|
801
|
+
# - opening quote
|
|
802
|
+
if self._quotes:
|
|
803
|
+
self.buffer.append(Quote(ti + self._t_start, o - s2, o + s2, self._bid_size, self._ask_size))
|
|
804
|
+
|
|
805
|
+
if c >= o:
|
|
806
|
+
if self._trades:
|
|
807
|
+
self.buffer.append(Trade(ti + self._t_start, o - s2, rv * (o - l))) # sell 1
|
|
808
|
+
|
|
809
|
+
if self._quotes:
|
|
810
|
+
self.buffer.append(
|
|
811
|
+
Quote(
|
|
812
|
+
ti + self._t_mid1,
|
|
813
|
+
l - s2,
|
|
814
|
+
l + s2,
|
|
815
|
+
self._bid_size,
|
|
816
|
+
self._ask_size,
|
|
817
|
+
)
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
if self._trades:
|
|
821
|
+
self.buffer.append(Trade(ti + self._t_mid1, l + s2, rv * (c - o))) # buy 1
|
|
822
|
+
|
|
823
|
+
if self._quotes:
|
|
824
|
+
self.buffer.append(
|
|
825
|
+
Quote(
|
|
826
|
+
ti + self._t_mid2,
|
|
827
|
+
h - s2,
|
|
828
|
+
h + s2,
|
|
829
|
+
self._bid_size,
|
|
830
|
+
self._ask_size,
|
|
831
|
+
)
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
if self._trades:
|
|
835
|
+
self.buffer.append(Trade(ti + self._t_mid2, h - s2, rv * (h - c))) # sell 2
|
|
836
|
+
else:
|
|
837
|
+
if self._trades:
|
|
838
|
+
self.buffer.append(Trade(ti + self._t_start, o + s2, rv * (h - o))) # buy 1
|
|
839
|
+
|
|
840
|
+
if self._quotes:
|
|
841
|
+
self.buffer.append(
|
|
842
|
+
Quote(
|
|
843
|
+
ti + self._t_mid1,
|
|
844
|
+
h - s2,
|
|
845
|
+
h + s2,
|
|
846
|
+
self._bid_size,
|
|
847
|
+
self._ask_size,
|
|
848
|
+
)
|
|
849
|
+
)
|
|
850
|
+
|
|
851
|
+
if self._trades:
|
|
852
|
+
self.buffer.append(Trade(ti + self._t_mid1, h - s2, rv * (o - c))) # sell 1
|
|
853
|
+
|
|
854
|
+
if self._quotes:
|
|
855
|
+
self.buffer.append(
|
|
856
|
+
Quote(
|
|
857
|
+
ti + self._t_mid2,
|
|
858
|
+
l - s2,
|
|
859
|
+
l + s2,
|
|
860
|
+
self._bid_size,
|
|
861
|
+
self._ask_size,
|
|
862
|
+
)
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
if self._trades:
|
|
866
|
+
self.buffer.append(Trade(ti + self._t_mid2, l + s2, rv * (c - l))) # buy 2
|
|
867
|
+
|
|
868
|
+
# - closing quote
|
|
869
|
+
if self._quotes:
|
|
870
|
+
self.buffer.append(Quote(ti + self._t_end, c - s2, c + s2, self._bid_size, self._ask_size))
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
class RestoreQuotesFromOHLC(RestoreTicksFromOHLC):
|
|
874
|
+
"""
|
|
875
|
+
Restore (emulate) quotes from OHLC bars
|
|
876
|
+
"""
|
|
877
|
+
|
|
878
|
+
def __init__(
|
|
879
|
+
self,
|
|
880
|
+
default_bid_size=1e9, # default bid/ask is big
|
|
881
|
+
default_ask_size=1e9, # default bid/ask is big
|
|
882
|
+
daily_session_start_end=DEFAULT_DAILY_SESSION,
|
|
883
|
+
timestamp_units="ns",
|
|
884
|
+
spread=0.0,
|
|
885
|
+
open_close_time_shift_secs=1.0,
|
|
886
|
+
):
|
|
887
|
+
super().__init__(
|
|
888
|
+
trades=False,
|
|
889
|
+
default_bid_size=default_bid_size,
|
|
890
|
+
default_ask_size=default_ask_size,
|
|
891
|
+
daily_session_start_end=daily_session_start_end,
|
|
892
|
+
timestamp_units=timestamp_units,
|
|
893
|
+
spread=spread,
|
|
894
|
+
open_close_time_shift_secs=open_close_time_shift_secs,
|
|
895
|
+
quotes=True,
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
|
|
899
|
+
class RestoreTradesFromOHLC(RestoreTicksFromOHLC):
|
|
900
|
+
"""
|
|
901
|
+
Restore (emulate) trades from OHLC bars
|
|
902
|
+
"""
|
|
903
|
+
|
|
904
|
+
def __init__(
|
|
905
|
+
self,
|
|
906
|
+
daily_session_start_end=DEFAULT_DAILY_SESSION,
|
|
907
|
+
timestamp_units="ns",
|
|
908
|
+
open_close_time_shift_secs=1.0,
|
|
909
|
+
):
|
|
910
|
+
super().__init__(
|
|
911
|
+
trades=True,
|
|
912
|
+
default_bid_size=0,
|
|
913
|
+
default_ask_size=0,
|
|
914
|
+
daily_session_start_end=daily_session_start_end,
|
|
915
|
+
timestamp_units=timestamp_units,
|
|
916
|
+
spread=0,
|
|
917
|
+
open_close_time_shift_secs=open_close_time_shift_secs,
|
|
918
|
+
quotes=False,
|
|
919
|
+
)
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
class RestoredBarsFromOHLC(RestoredEmulatorHelper):
|
|
923
|
+
"""
|
|
924
|
+
Transforms OHLC data into a sequence of bars trying to mimic real-world market data updates
|
|
925
|
+
"""
|
|
926
|
+
|
|
927
|
+
def __init__(
|
|
928
|
+
self, daily_session_start_end=DEFAULT_DAILY_SESSION, timestamp_units="ns", open_close_time_shift_secs=1.0
|
|
929
|
+
):
|
|
930
|
+
super().__init__(daily_session_start_end, timestamp_units, open_close_time_shift_secs)
|
|
931
|
+
|
|
932
|
+
def process_data(self, rows_data: List[List]) -> Any:
|
|
933
|
+
if rows_data is None:
|
|
934
|
+
return
|
|
935
|
+
|
|
936
|
+
if self._freq is None:
|
|
937
|
+
self._detect_emulation_timestamps(rows_data[:100])
|
|
938
|
+
|
|
939
|
+
# - input data
|
|
940
|
+
for data in rows_data:
|
|
941
|
+
ti = _time(data[self._time_idx], self._timestamp_units)
|
|
942
|
+
o = data[self._open_idx]
|
|
943
|
+
h = data[self._high_idx]
|
|
944
|
+
l = data[self._low_idx]
|
|
945
|
+
c = data[self._close_idx]
|
|
946
|
+
|
|
947
|
+
vol = data[self._volume_idx] if self._volume_idx is not None else 0
|
|
948
|
+
rvol = vol / (h - l) if h > l else vol
|
|
949
|
+
|
|
950
|
+
# - opening bar (o,h,l,c=o, v=0)
|
|
951
|
+
self.buffer.append(Bar(ti + self._t_start, o, o, o, o, 0))
|
|
952
|
+
|
|
953
|
+
if c >= o:
|
|
954
|
+
v1 = rvol * (o - l)
|
|
955
|
+
self.buffer.append(Bar(ti + self._t_mid1, o, o, l, l, v1))
|
|
956
|
+
|
|
957
|
+
v2 = v1 + rvol * (c - o)
|
|
958
|
+
self.buffer.append(Bar(ti + self._t_mid2, o, h, l, h, v2))
|
|
959
|
+
|
|
960
|
+
else:
|
|
961
|
+
v1 = rvol * (h - o)
|
|
962
|
+
self.buffer.append(Bar(ti + self._t_mid1, o, h, o, h, v1))
|
|
963
|
+
|
|
964
|
+
v2 = v1 + rvol * (o - c)
|
|
965
|
+
self.buffer.append(Bar(ti + self._t_mid2, o, h, l, l, v2))
|
|
966
|
+
|
|
967
|
+
# - full bar
|
|
968
|
+
self.buffer.append(Bar(ti + self._t_end, o, h, l, c, vol))
|
|
969
|
+
|
|
970
|
+
|
|
971
|
+
class AsDict(DataTransformer):
|
|
972
|
+
"""
|
|
973
|
+
Tries to keep incoming data as list of dictionaries with preprocessed time
|
|
974
|
+
"""
|
|
975
|
+
|
|
976
|
+
def start_transform(self, name: str, column_names: List[str], **kwargs):
|
|
977
|
+
self.buffer = list()
|
|
978
|
+
self._time_idx = _find_time_col_idx(column_names)
|
|
979
|
+
self._column_names = column_names
|
|
980
|
+
self._time_name = column_names[self._time_idx]
|
|
981
|
+
|
|
982
|
+
def process_data(self, rows_data: Iterable):
|
|
983
|
+
if rows_data is not None:
|
|
984
|
+
for d in rows_data:
|
|
985
|
+
_r_dict = dict(zip(self._column_names, d))
|
|
986
|
+
self.buffer.append(TimestampedDict(_time(d[self._time_idx], "ns"), _r_dict)) # type: ignore
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
def _retry(fn):
|
|
990
|
+
@wraps(fn)
|
|
991
|
+
def wrapper(*args, **kw):
|
|
992
|
+
cls = args[0]
|
|
993
|
+
for x in range(cls._reconnect_tries):
|
|
994
|
+
# print(x, cls._reconnect_tries)
|
|
995
|
+
try:
|
|
996
|
+
return fn(*args, **kw)
|
|
997
|
+
except (pg.InterfaceError, pg.OperationalError, AttributeError) as e:
|
|
998
|
+
logger.debug("Database Connection [InterfaceError or OperationalError]")
|
|
999
|
+
# print ("Idle for %s seconds" % (cls._reconnect_idle))
|
|
1000
|
+
# time.sleep(cls._reconnect_idle)
|
|
1001
|
+
cls._connect()
|
|
1002
|
+
|
|
1003
|
+
return wrapper
|
|
1004
|
+
|
|
1005
|
+
|
|
1006
|
+
class QuestDBSqlBuilder:
|
|
1007
|
+
"""
|
|
1008
|
+
Generic sql builder for QuestDB data
|
|
1009
|
+
"""
|
|
1010
|
+
|
|
1011
|
+
_aliases = {"um": "umfutures", "cm": "cmfutures", "f": "futures"}
|
|
1012
|
+
|
|
1013
|
+
def get_table_name(self, data_id: str, sfx: str = "") -> str:
|
|
1014
|
+
"""
|
|
1015
|
+
Get table name for data_id
|
|
1016
|
+
data_id can have format <exchange>.<type>:<symbol>
|
|
1017
|
+
for example:
|
|
1018
|
+
BINANCE.UM:BTCUSDT or BINANCE:BTCUSDT for spot
|
|
1019
|
+
"""
|
|
1020
|
+
sfx = sfx or "candles_1m"
|
|
1021
|
+
table_name = data_id
|
|
1022
|
+
_exch, _symb, _mktype = self._get_exchange_symbol_market_type(data_id)
|
|
1023
|
+
if _exch and _symb:
|
|
1024
|
+
parts = [_exch.lower(), _mktype]
|
|
1025
|
+
if "candles" not in sfx:
|
|
1026
|
+
parts.append(_symb)
|
|
1027
|
+
parts.append(sfx)
|
|
1028
|
+
table_name = ".".join(filter(lambda x: x, parts))
|
|
1029
|
+
|
|
1030
|
+
return table_name
|
|
1031
|
+
|
|
1032
|
+
def _get_exchange_symbol_market_type(self, data_id: str) -> tuple[str | None, str | None, str | None]:
|
|
1033
|
+
_ss = data_id.split(":")
|
|
1034
|
+
if len(_ss) > 1:
|
|
1035
|
+
_exch, symb = _ss
|
|
1036
|
+
_mktype = "spot"
|
|
1037
|
+
_ss = _exch.split(".")
|
|
1038
|
+
if len(_ss) > 1:
|
|
1039
|
+
_exch = _ss[0]
|
|
1040
|
+
_mktype = _ss[1]
|
|
1041
|
+
_mktype = _mktype.lower()
|
|
1042
|
+
return _exch.lower(), symb.lower(), self._aliases.get(_mktype, _mktype)
|
|
1043
|
+
return None, None, None
|
|
1044
|
+
|
|
1045
|
+
def prepare_data_sql(
|
|
1046
|
+
self,
|
|
1047
|
+
data_id: str,
|
|
1048
|
+
start: str | None,
|
|
1049
|
+
end: str | None,
|
|
1050
|
+
resample: str | None,
|
|
1051
|
+
data_type: str,
|
|
1052
|
+
) -> str | None:
|
|
1053
|
+
pass
|
|
1054
|
+
|
|
1055
|
+
def prepare_names_sql(self) -> str:
|
|
1056
|
+
return "select table_name from tables()"
|
|
1057
|
+
|
|
1058
|
+
def prepare_symbols_sql(self, exchange: str, dtype: str) -> str:
|
|
1059
|
+
_table = self.get_table_name(f"{exchange}:BTCUSDT", dtype)
|
|
1060
|
+
return f"select distinct(symbol) from {_table}"
|
|
1061
|
+
|
|
1062
|
+
def prepare_data_ranges_sql(self, data_id: str) -> str:
|
|
1063
|
+
raise NotImplementedError()
|
|
1064
|
+
|
|
1065
|
+
|
|
1066
|
+
class QuestDBSqlCandlesBuilder(QuestDBSqlBuilder):
|
|
1067
|
+
"""
|
|
1068
|
+
Sql builder for candles data
|
|
1069
|
+
"""
|
|
1070
|
+
|
|
1071
|
+
def prepare_names_sql(self) -> str:
|
|
1072
|
+
return "select table_name from tables() where table_name like '%candles%'"
|
|
1073
|
+
|
|
1074
|
+
@staticmethod
|
|
1075
|
+
def _convert_time_delta_to_qdb_resample_format(c_tf: str):
|
|
1076
|
+
if c_tf:
|
|
1077
|
+
_t = re.match(r"(\d+)(\w+)", c_tf)
|
|
1078
|
+
if _t and len(_t.groups()) > 1:
|
|
1079
|
+
c_tf = f"{_t[1]}{_t[2][0].lower()}"
|
|
1080
|
+
return c_tf
|
|
1081
|
+
|
|
1082
|
+
def prepare_data_sql(
|
|
1083
|
+
self,
|
|
1084
|
+
data_id: str,
|
|
1085
|
+
start: str | None,
|
|
1086
|
+
end: str | None,
|
|
1087
|
+
resample: str | None,
|
|
1088
|
+
data_type: str,
|
|
1089
|
+
) -> str:
|
|
1090
|
+
_exch, _symb, _mktype = self._get_exchange_symbol_market_type(data_id)
|
|
1091
|
+
if _symb is None:
|
|
1092
|
+
_symb = data_id
|
|
1093
|
+
|
|
1094
|
+
where = f"where symbol = '{_symb}'"
|
|
1095
|
+
w0 = f"timestamp >= '{start}'" if start else ""
|
|
1096
|
+
w1 = f"timestamp < '{end}'" if end else ""
|
|
1097
|
+
|
|
1098
|
+
# - fix: when no data ranges are provided we must skip empy where keyword
|
|
1099
|
+
if w0 or w1:
|
|
1100
|
+
where = f"{where} and {w0} and {w1}" if (w0 and w1) else f"{where} and {(w0 or w1)}"
|
|
1101
|
+
|
|
1102
|
+
# - filter out candles without any volume
|
|
1103
|
+
where = f"{where} and volume > 0"
|
|
1104
|
+
|
|
1105
|
+
# - check resample format
|
|
1106
|
+
resample = (
|
|
1107
|
+
QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(resample)
|
|
1108
|
+
if resample
|
|
1109
|
+
else "1m" # if resample is empty let's use 1 minute timeframe
|
|
1110
|
+
)
|
|
1111
|
+
_rsmpl = f"SAMPLE by {resample} FILL(NONE)" if resample else ""
|
|
1112
|
+
|
|
1113
|
+
table_name = self.get_table_name(data_id, data_type)
|
|
1114
|
+
return f"""
|
|
1115
|
+
select timestamp,
|
|
1116
|
+
first(open) as open, max(high) as high, min(low) as low, last(close) as close,
|
|
1117
|
+
sum(volume) as volume,
|
|
1118
|
+
sum(quote_volume) as quote_volume,
|
|
1119
|
+
sum(count) as count,
|
|
1120
|
+
sum(taker_buy_volume) as taker_buy_volume,
|
|
1121
|
+
sum(taker_buy_quote_volume) as taker_buy_quote_volume
|
|
1122
|
+
from "{table_name}" {where} {_rsmpl};
|
|
1123
|
+
"""
|
|
1124
|
+
|
|
1125
|
+
def prepare_data_ranges_sql(self, data_id: str) -> str:
|
|
1126
|
+
_exch, _symb, _mktype = self._get_exchange_symbol_market_type(data_id)
|
|
1127
|
+
if _exch is None:
|
|
1128
|
+
raise ValueError(f"Can't get exchange name from data id: {data_id} !")
|
|
1129
|
+
return f"""(SELECT timestamp FROM "{_exch}.{_mktype}.candles_1m" WHERE symbol='{_symb}' ORDER BY timestamp ASC LIMIT 1)
|
|
1130
|
+
UNION
|
|
1131
|
+
(SELECT timestamp FROM "{_exch}.{_mktype}.candles_1m" WHERE symbol='{_symb}' ORDER BY timestamp DESC LIMIT 1)
|
|
1132
|
+
"""
|
|
1133
|
+
|
|
1134
|
+
|
|
1135
|
+
class QuestDBSqlTOBBilder(QuestDBSqlBuilder):
|
|
1136
|
+
def prepare_data_ranges_sql(self, data_id: str) -> str:
|
|
1137
|
+
_exch, _symb, _mktype = self._get_exchange_symbol_market_type(data_id)
|
|
1138
|
+
if _exch is None:
|
|
1139
|
+
raise ValueError(f"Can't get exchange name from data id: {data_id} !")
|
|
1140
|
+
# TODO: ????
|
|
1141
|
+
return f"""(SELECT timestamp FROM "{_exch}.{_mktype}.{_symb}.orderbook" ORDER BY timestamp ASC LIMIT 1)
|
|
1142
|
+
UNION
|
|
1143
|
+
(SELECT timestamp FROM "{_exch}.{_mktype}.{_symb}.orderbook" ORDER BY timestamp DESC LIMIT 1)
|
|
1144
|
+
"""
|
|
1145
|
+
|
|
1146
|
+
|
|
1147
|
+
class QuestDBConnector(DataReader):
|
|
1148
|
+
"""
|
|
1149
|
+
Very first version of QuestDB connector
|
|
1150
|
+
|
|
1151
|
+
### Connect to an existing QuestDB instance
|
|
1152
|
+
>>> db = QuestDBConnector()
|
|
1153
|
+
>>> db.read('BINANCE.UM:ETHUSDT', '2024-01-01', transform=AsPandasFrame())
|
|
1154
|
+
"""
|
|
1155
|
+
|
|
1156
|
+
_reconnect_tries = 5
|
|
1157
|
+
_reconnect_idle = 0.1 # wait seconds before retying
|
|
1158
|
+
_builder: QuestDBSqlBuilder
|
|
1159
|
+
|
|
1160
|
+
def __init__(
|
|
1161
|
+
self,
|
|
1162
|
+
builder: QuestDBSqlBuilder = QuestDBSqlCandlesBuilder(),
|
|
1163
|
+
host="localhost",
|
|
1164
|
+
user="admin",
|
|
1165
|
+
password="quest",
|
|
1166
|
+
port=8812,
|
|
1167
|
+
) -> None:
|
|
1168
|
+
self._connection = None
|
|
1169
|
+
self._host = host
|
|
1170
|
+
self._port = port
|
|
1171
|
+
self.connection_url = f"user={user} password={password} host={host} port={port}"
|
|
1172
|
+
self._builder = builder
|
|
1173
|
+
self._connect()
|
|
1174
|
+
|
|
1175
|
+
def __getstate__(self):
|
|
1176
|
+
if self._connection:
|
|
1177
|
+
self._connection.close()
|
|
1178
|
+
self._connection = None
|
|
1179
|
+
state = self.__dict__.copy()
|
|
1180
|
+
return state
|
|
1181
|
+
|
|
1182
|
+
def _connect(self):
|
|
1183
|
+
self._connection = pg.connect(self.connection_url, autocommit=True)
|
|
1184
|
+
logger.debug(f"Connected to QuestDB at {self._host}:{self._port}")
|
|
1185
|
+
|
|
1186
|
+
def read(
|
|
1187
|
+
self,
|
|
1188
|
+
data_id: str,
|
|
1189
|
+
start: str | None = None,
|
|
1190
|
+
stop: str | None = None,
|
|
1191
|
+
transform: DataTransformer = DataTransformer(),
|
|
1192
|
+
chunksize=0,
|
|
1193
|
+
timeframe: str | None = "1m",
|
|
1194
|
+
data_type="candles_1m",
|
|
1195
|
+
) -> Any:
|
|
1196
|
+
return self._read(
|
|
1197
|
+
data_id,
|
|
1198
|
+
start,
|
|
1199
|
+
stop,
|
|
1200
|
+
transform,
|
|
1201
|
+
chunksize,
|
|
1202
|
+
timeframe,
|
|
1203
|
+
data_type,
|
|
1204
|
+
self._builder,
|
|
1205
|
+
)
|
|
1206
|
+
|
|
1207
|
+
def get_candles(
|
|
1208
|
+
self,
|
|
1209
|
+
exchange: str,
|
|
1210
|
+
symbols: list[str],
|
|
1211
|
+
start: str | pd.Timestamp,
|
|
1212
|
+
stop: str | pd.Timestamp,
|
|
1213
|
+
timeframe: str = "1d",
|
|
1214
|
+
) -> pd.DataFrame:
|
|
1215
|
+
assert len(symbols) > 0, "No symbols provided"
|
|
1216
|
+
quoted_symbols = [f"'{s.lower()}'" for s in symbols]
|
|
1217
|
+
where = f"where symbol in ({', '.join(quoted_symbols)}) and timestamp >= '{start}' and timestamp < '{stop}'"
|
|
1218
|
+
table_name = QuestDBSqlCandlesBuilder().get_table_name(f"{exchange}:{list(symbols)[0]}")
|
|
1219
|
+
|
|
1220
|
+
_rsmpl = f"sample by {QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(timeframe)}"
|
|
1221
|
+
|
|
1222
|
+
query = f"""
|
|
1223
|
+
select timestamp,
|
|
1224
|
+
upper(symbol) as symbol,
|
|
1225
|
+
first(open) as open,
|
|
1226
|
+
max(high) as high,
|
|
1227
|
+
min(low) as low,
|
|
1228
|
+
last(close) as close,
|
|
1229
|
+
sum(volume) as volume,
|
|
1230
|
+
sum(quote_volume) as quote_volume,
|
|
1231
|
+
sum(count) as count,
|
|
1232
|
+
sum(taker_buy_volume) as taker_buy_volume,
|
|
1233
|
+
sum(taker_buy_quote_volume) as taker_buy_quote_volume
|
|
1234
|
+
from "{table_name}" {where} {_rsmpl};
|
|
1235
|
+
"""
|
|
1236
|
+
res = self.execute(query)
|
|
1237
|
+
if res.empty:
|
|
1238
|
+
return res
|
|
1239
|
+
return res.set_index(["timestamp", "symbol"])
|
|
1240
|
+
|
|
1241
|
+
def get_average_quote_volume(
|
|
1242
|
+
self,
|
|
1243
|
+
exchange: str,
|
|
1244
|
+
start: str | pd.Timestamp,
|
|
1245
|
+
stop: str | pd.Timestamp,
|
|
1246
|
+
timeframe: str = "1d",
|
|
1247
|
+
) -> pd.Series:
|
|
1248
|
+
table_name = QuestDBSqlCandlesBuilder().get_table_name(f"{exchange}:BTCUSDT")
|
|
1249
|
+
query = f"""
|
|
1250
|
+
WITH sampled as (
|
|
1251
|
+
select timestamp, symbol, sum(quote_volume) as qvolume
|
|
1252
|
+
from "{table_name}"
|
|
1253
|
+
where timestamp >= '{start}' and timestamp < '{stop}'
|
|
1254
|
+
SAMPLE BY {QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(timeframe)}
|
|
1255
|
+
)
|
|
1256
|
+
select upper(symbol) as symbol, avg(qvolume) as quote_volume from sampled
|
|
1257
|
+
group by symbol
|
|
1258
|
+
order by quote_volume desc;
|
|
1259
|
+
"""
|
|
1260
|
+
vol_stats = self.execute(query)
|
|
1261
|
+
if vol_stats.empty:
|
|
1262
|
+
return pd.Series()
|
|
1263
|
+
return vol_stats.set_index("symbol")["quote_volume"]
|
|
1264
|
+
|
|
1265
|
+
def get_fundamental_data(
|
|
1266
|
+
self,
|
|
1267
|
+
exchange: str,
|
|
1268
|
+
symbols: list[str] | None = None,
|
|
1269
|
+
start: str | pd.Timestamp | None = None,
|
|
1270
|
+
stop: str | pd.Timestamp | None = None,
|
|
1271
|
+
timeframe: str = "1d",
|
|
1272
|
+
) -> pd.DataFrame:
|
|
1273
|
+
table_name = {"BINANCE.UM": "binance.umfutures.fundamental"}[exchange]
|
|
1274
|
+
query = f"select timestamp, symbol, metric, last(value) as value from {table_name}"
|
|
1275
|
+
# TODO: fix handling without start/stop, where needs to be added
|
|
1276
|
+
if start or stop:
|
|
1277
|
+
conditions = []
|
|
1278
|
+
if start:
|
|
1279
|
+
conditions.append(f"timestamp >= '{start}'")
|
|
1280
|
+
if stop:
|
|
1281
|
+
conditions.append(f"timestamp < '{stop}'")
|
|
1282
|
+
query += " where " + " and ".join(conditions)
|
|
1283
|
+
if symbols:
|
|
1284
|
+
query += f" and symbol in ({', '.join(symbols)})"
|
|
1285
|
+
_rsmpl = f"sample by {QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(timeframe)}"
|
|
1286
|
+
query += f" {_rsmpl}"
|
|
1287
|
+
df = self.execute(query)
|
|
1288
|
+
if df.empty:
|
|
1289
|
+
return pd.DataFrame()
|
|
1290
|
+
return df.set_index(["timestamp", "symbol", "metric"]).value.unstack("metric")
|
|
1291
|
+
|
|
1292
|
+
def get_names(self) -> List[str]:
|
|
1293
|
+
return self._get_names(self._builder)
|
|
1294
|
+
|
|
1295
|
+
@_retry
|
|
1296
|
+
def execute(self, query: str) -> pd.DataFrame:
|
|
1297
|
+
_cursor = self._connection.cursor() # type: ignore
|
|
1298
|
+
_cursor.execute(query) # type: ignore
|
|
1299
|
+
names = [d.name for d in _cursor.description] # type: ignore
|
|
1300
|
+
records = _cursor.fetchall()
|
|
1301
|
+
if not records:
|
|
1302
|
+
return pd.DataFrame()
|
|
1303
|
+
return pd.DataFrame(records, columns=names)
|
|
1304
|
+
|
|
1305
|
+
@_retry
|
|
1306
|
+
def _read(
|
|
1307
|
+
self,
|
|
1308
|
+
data_id: str,
|
|
1309
|
+
start: str | None,
|
|
1310
|
+
stop: str | None,
|
|
1311
|
+
transform: DataTransformer,
|
|
1312
|
+
chunksize: int,
|
|
1313
|
+
timeframe: str | None,
|
|
1314
|
+
data_type: str,
|
|
1315
|
+
builder: QuestDBSqlBuilder,
|
|
1316
|
+
) -> Any:
|
|
1317
|
+
start, end = handle_start_stop(start, stop)
|
|
1318
|
+
_req = builder.prepare_data_sql(data_id, start, end, timeframe, data_type)
|
|
1319
|
+
|
|
1320
|
+
_cursor = self._connection.cursor() # type: ignore
|
|
1321
|
+
_cursor.execute(_req) # type: ignore
|
|
1322
|
+
names = [d.name for d in _cursor.description] # type: ignore
|
|
1323
|
+
|
|
1324
|
+
if chunksize > 0:
|
|
1325
|
+
|
|
1326
|
+
def _iter_chunks():
|
|
1327
|
+
while True:
|
|
1328
|
+
records = _cursor.fetchmany(chunksize)
|
|
1329
|
+
if not records:
|
|
1330
|
+
_cursor.close()
|
|
1331
|
+
break
|
|
1332
|
+
transform.start_transform(data_id, names, start=start, stop=stop)
|
|
1333
|
+
transform.process_data(records)
|
|
1334
|
+
yield transform.collect()
|
|
1335
|
+
|
|
1336
|
+
return _iter_chunks()
|
|
1337
|
+
|
|
1338
|
+
try:
|
|
1339
|
+
records = _cursor.fetchall()
|
|
1340
|
+
if not records:
|
|
1341
|
+
return None
|
|
1342
|
+
transform.start_transform(data_id, names, start=start, stop=stop)
|
|
1343
|
+
transform.process_data(records)
|
|
1344
|
+
return transform.collect()
|
|
1345
|
+
finally:
|
|
1346
|
+
_cursor.close()
|
|
1347
|
+
|
|
1348
|
+
@_retry
|
|
1349
|
+
def _get_names(self, builder: QuestDBSqlBuilder) -> list[str]:
|
|
1350
|
+
_cursor = None
|
|
1351
|
+
try:
|
|
1352
|
+
_cursor = self._connection.cursor() # type: ignore
|
|
1353
|
+
_cursor.execute(builder.prepare_names_sql()) # type: ignore
|
|
1354
|
+
records = _cursor.fetchall()
|
|
1355
|
+
finally:
|
|
1356
|
+
if _cursor:
|
|
1357
|
+
_cursor.close()
|
|
1358
|
+
return [r[0] for r in records]
|
|
1359
|
+
|
|
1360
|
+
@_retry
|
|
1361
|
+
def _get_symbols(self, builder: QuestDBSqlBuilder, exchange: str, dtype: str) -> list[str]:
|
|
1362
|
+
_cursor = None
|
|
1363
|
+
try:
|
|
1364
|
+
_cursor = self._connection.cursor() # type: ignore
|
|
1365
|
+
_cursor.execute(builder.prepare_symbols_sql(exchange, dtype)) # type: ignore
|
|
1366
|
+
records = _cursor.fetchall()
|
|
1367
|
+
finally:
|
|
1368
|
+
if _cursor:
|
|
1369
|
+
_cursor.close()
|
|
1370
|
+
return [f"{exchange}:{r[0].upper()}" for r in records]
|
|
1371
|
+
|
|
1372
|
+
@_retry
|
|
1373
|
+
def _get_range(self, builder: QuestDBSqlBuilder, data_id: str) -> tuple[Any] | None:
|
|
1374
|
+
_cursor = None
|
|
1375
|
+
try:
|
|
1376
|
+
_cursor = self._connection.cursor() # type: ignore
|
|
1377
|
+
_cursor.execute(builder.prepare_data_ranges_sql(data_id)) # type: ignore
|
|
1378
|
+
return tuple([np.datetime64(r[0]) for r in _cursor.fetchall()])
|
|
1379
|
+
finally:
|
|
1380
|
+
if _cursor:
|
|
1381
|
+
_cursor.close()
|
|
1382
|
+
|
|
1383
|
+
def __del__(self):
|
|
1384
|
+
try:
|
|
1385
|
+
if self._connection is not None:
|
|
1386
|
+
logger.debug("Closing connection")
|
|
1387
|
+
self._connection.close()
|
|
1388
|
+
except: # noqa: E722
|
|
1389
|
+
pass
|
|
1390
|
+
|
|
1391
|
+
|
|
1392
|
+
class QuestDBSqlOrderBookBuilder(QuestDBSqlCandlesBuilder):
|
|
1393
|
+
"""
|
|
1394
|
+
Sql builder for snapshot data
|
|
1395
|
+
"""
|
|
1396
|
+
|
|
1397
|
+
SNAPSHOT_DELTA = pd.Timedelta("1h")
|
|
1398
|
+
MIN_DELTA = pd.Timedelta("1s")
|
|
1399
|
+
|
|
1400
|
+
def prepare_data_sql(
|
|
1401
|
+
self,
|
|
1402
|
+
data_id: str,
|
|
1403
|
+
start: str | None,
|
|
1404
|
+
end: str | None,
|
|
1405
|
+
resample: str,
|
|
1406
|
+
data_type: str,
|
|
1407
|
+
) -> str:
|
|
1408
|
+
if not start or not end:
|
|
1409
|
+
raise ValueError("Start and end dates must be provided for orderbook data!")
|
|
1410
|
+
start_dt, end_dt = pd.Timestamp(start), pd.Timestamp(end)
|
|
1411
|
+
delta = end_dt - start_dt
|
|
1412
|
+
|
|
1413
|
+
raw_start_dt = start_dt.floor(self.SNAPSHOT_DELTA) - self.MIN_DELTA
|
|
1414
|
+
|
|
1415
|
+
table_name = self.get_table_name(data_id, data_type)
|
|
1416
|
+
query = f"""
|
|
1417
|
+
SELECT * FROM {table_name}
|
|
1418
|
+
WHERE timestamp BETWEEN '{raw_start_dt}' AND '{end_dt}'
|
|
1419
|
+
"""
|
|
1420
|
+
return query
|
|
1421
|
+
|
|
1422
|
+
|
|
1423
|
+
class TradeSql(QuestDBSqlCandlesBuilder):
|
|
1424
|
+
def prepare_data_sql(
|
|
1425
|
+
self,
|
|
1426
|
+
data_id: str,
|
|
1427
|
+
start: str | None,
|
|
1428
|
+
end: str | None,
|
|
1429
|
+
resample: str,
|
|
1430
|
+
data_type: str,
|
|
1431
|
+
) -> str:
|
|
1432
|
+
table_name = self.get_table_name(data_id, data_type)
|
|
1433
|
+
where = ""
|
|
1434
|
+
w0 = f"timestamp >= '{start}'" if start else ""
|
|
1435
|
+
w1 = f"timestamp <= '{end}'" if end else ""
|
|
1436
|
+
|
|
1437
|
+
# - fix: when no data ranges are provided we must skip empy where keyword
|
|
1438
|
+
if w0 or w1:
|
|
1439
|
+
where = f"where {w0} and {w1}" if (w0 and w1) else f"where {(w0 or w1)}"
|
|
1440
|
+
|
|
1441
|
+
resample = (
|
|
1442
|
+
QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(resample) if resample else resample
|
|
1443
|
+
)
|
|
1444
|
+
if resample:
|
|
1445
|
+
sql = f"""
|
|
1446
|
+
select timestamp, first(price) as open, max(price) as high, min(price) as low, last(price) as close,
|
|
1447
|
+
sum(size) as volume from "{table_name}" {where} SAMPLE by {resample};"""
|
|
1448
|
+
else:
|
|
1449
|
+
sql = f"""select timestamp, price, size, market_maker from "{table_name}" {where};"""
|
|
1450
|
+
|
|
1451
|
+
return sql
|
|
1452
|
+
|
|
1453
|
+
def prepare_symbols_sql(self, exchange: str, dtype: str) -> str:
|
|
1454
|
+
# TODO:
|
|
1455
|
+
raise NotImplementedError("Not implemented yet")
|
|
1456
|
+
|
|
1457
|
+
|
|
1458
|
+
class MultiQdbConnector(QuestDBConnector):
|
|
1459
|
+
"""
|
|
1460
|
+
Data connector for QuestDB which provides access to following data types:
|
|
1461
|
+
- candles
|
|
1462
|
+
- trades
|
|
1463
|
+
- orderbook snapshots
|
|
1464
|
+
- liquidations
|
|
1465
|
+
- funding rate
|
|
1466
|
+
|
|
1467
|
+
Examples:
|
|
1468
|
+
1. Retrieving trades:
|
|
1469
|
+
qdb.read(
|
|
1470
|
+
"BINANCE.UM:BTCUSDT",
|
|
1471
|
+
"2023-01-01 00:00",
|
|
1472
|
+
"2023-01-01 10:00",
|
|
1473
|
+
timeframe="15Min",
|
|
1474
|
+
transform=AsPandasFrame(),
|
|
1475
|
+
data_type="trade"
|
|
1476
|
+
)
|
|
1477
|
+
"""
|
|
1478
|
+
|
|
1479
|
+
_TYPE_TO_BUILDER = {
|
|
1480
|
+
"candles_1m": QuestDBSqlCandlesBuilder(),
|
|
1481
|
+
"tob": QuestDBSqlTOBBilder(),
|
|
1482
|
+
"trade": TradeSql(),
|
|
1483
|
+
"agg_trade": TradeSql(),
|
|
1484
|
+
"orderbook": QuestDBSqlOrderBookBuilder(),
|
|
1485
|
+
}
|
|
1486
|
+
|
|
1487
|
+
_TYPE_MAPPINGS = {
|
|
1488
|
+
"candles": "candles_1m",
|
|
1489
|
+
"ohlc": "candles_1m",
|
|
1490
|
+
"trades": "trade",
|
|
1491
|
+
"ob": "orderbook",
|
|
1492
|
+
"trd": "trade",
|
|
1493
|
+
"td": "trade",
|
|
1494
|
+
"quote": "tob",
|
|
1495
|
+
"aggTrade": "agg_trade",
|
|
1496
|
+
"agg_trades": "agg_trade",
|
|
1497
|
+
"aggTrades": "agg_trade",
|
|
1498
|
+
}
|
|
1499
|
+
|
|
1500
|
+
def __init__(
|
|
1501
|
+
self,
|
|
1502
|
+
host="localhost",
|
|
1503
|
+
user="admin",
|
|
1504
|
+
password="quest",
|
|
1505
|
+
port=8812,
|
|
1506
|
+
) -> None:
|
|
1507
|
+
self._connection = None
|
|
1508
|
+
self._host = host
|
|
1509
|
+
self._port = port
|
|
1510
|
+
self._user = user
|
|
1511
|
+
self._password = password
|
|
1512
|
+
self._connect()
|
|
1513
|
+
|
|
1514
|
+
@property
|
|
1515
|
+
def connection_url(self):
|
|
1516
|
+
return " ".join(
|
|
1517
|
+
[
|
|
1518
|
+
f"user={self._user}",
|
|
1519
|
+
f"password={self._password}",
|
|
1520
|
+
f"host={self._host}",
|
|
1521
|
+
f"port={self._port}",
|
|
1522
|
+
]
|
|
1523
|
+
)
|
|
1524
|
+
|
|
1525
|
+
def read(
|
|
1526
|
+
self,
|
|
1527
|
+
data_id: str,
|
|
1528
|
+
start: str | None = None,
|
|
1529
|
+
stop: str | None = None,
|
|
1530
|
+
transform: DataTransformer = DataTransformer(),
|
|
1531
|
+
chunksize: int = 0,
|
|
1532
|
+
timeframe: str | None = None,
|
|
1533
|
+
data_type: str = "candles",
|
|
1534
|
+
) -> Any:
|
|
1535
|
+
_mapped_data_type = self._TYPE_MAPPINGS.get(data_type, data_type)
|
|
1536
|
+
return self._read(
|
|
1537
|
+
data_id,
|
|
1538
|
+
start,
|
|
1539
|
+
stop,
|
|
1540
|
+
transform,
|
|
1541
|
+
chunksize,
|
|
1542
|
+
timeframe,
|
|
1543
|
+
_mapped_data_type,
|
|
1544
|
+
self._TYPE_TO_BUILDER[_mapped_data_type],
|
|
1545
|
+
)
|
|
1546
|
+
|
|
1547
|
+
def get_names(self, data_type: str) -> list[str]:
|
|
1548
|
+
return self._get_names(self._TYPE_TO_BUILDER[self._TYPE_MAPPINGS.get(data_type, data_type)])
|
|
1549
|
+
|
|
1550
|
+
def get_symbols(self, exchange: str, dtype: str) -> list[str]:
|
|
1551
|
+
return self._get_symbols(
|
|
1552
|
+
self._TYPE_TO_BUILDER[self._TYPE_MAPPINGS.get(dtype, dtype)],
|
|
1553
|
+
exchange,
|
|
1554
|
+
self._TYPE_MAPPINGS.get(dtype, dtype),
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
def get_time_ranges(self, symbol: str, dtype: str) -> tuple[np.datetime64, np.datetime64]:
|
|
1558
|
+
try:
|
|
1559
|
+
_xr = self._get_range(self._TYPE_TO_BUILDER[self._TYPE_MAPPINGS.get(dtype, dtype)], symbol)
|
|
1560
|
+
return (None, None) if not _xr else _xr # type: ignore
|
|
1561
|
+
except Exception:
|
|
1562
|
+
return (None, None) # type: ignore
|