Qubx 0.5.7__cp312-cp312-manylinux_2_39_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Qubx might be problematic. Click here for more details.

Files changed (100) hide show
  1. qubx/__init__.py +207 -0
  2. qubx/_nb_magic.py +100 -0
  3. qubx/backtester/__init__.py +5 -0
  4. qubx/backtester/account.py +145 -0
  5. qubx/backtester/broker.py +87 -0
  6. qubx/backtester/data.py +296 -0
  7. qubx/backtester/management.py +378 -0
  8. qubx/backtester/ome.py +296 -0
  9. qubx/backtester/optimization.py +201 -0
  10. qubx/backtester/simulated_data.py +558 -0
  11. qubx/backtester/simulator.py +362 -0
  12. qubx/backtester/utils.py +780 -0
  13. qubx/cli/__init__.py +0 -0
  14. qubx/cli/commands.py +67 -0
  15. qubx/connectors/ccxt/__init__.py +0 -0
  16. qubx/connectors/ccxt/account.py +495 -0
  17. qubx/connectors/ccxt/broker.py +132 -0
  18. qubx/connectors/ccxt/customizations.py +193 -0
  19. qubx/connectors/ccxt/data.py +612 -0
  20. qubx/connectors/ccxt/exceptions.py +17 -0
  21. qubx/connectors/ccxt/factory.py +93 -0
  22. qubx/connectors/ccxt/utils.py +307 -0
  23. qubx/core/__init__.py +0 -0
  24. qubx/core/account.py +251 -0
  25. qubx/core/basics.py +850 -0
  26. qubx/core/context.py +420 -0
  27. qubx/core/exceptions.py +38 -0
  28. qubx/core/helpers.py +480 -0
  29. qubx/core/interfaces.py +1150 -0
  30. qubx/core/loggers.py +514 -0
  31. qubx/core/lookups.py +475 -0
  32. qubx/core/metrics.py +1512 -0
  33. qubx/core/mixins/__init__.py +13 -0
  34. qubx/core/mixins/market.py +94 -0
  35. qubx/core/mixins/processing.py +428 -0
  36. qubx/core/mixins/subscription.py +203 -0
  37. qubx/core/mixins/trading.py +88 -0
  38. qubx/core/mixins/universe.py +270 -0
  39. qubx/core/series.cpython-312-x86_64-linux-gnu.so +0 -0
  40. qubx/core/series.pxd +125 -0
  41. qubx/core/series.pyi +118 -0
  42. qubx/core/series.pyx +988 -0
  43. qubx/core/utils.cpython-312-x86_64-linux-gnu.so +0 -0
  44. qubx/core/utils.pyi +6 -0
  45. qubx/core/utils.pyx +62 -0
  46. qubx/data/__init__.py +25 -0
  47. qubx/data/helpers.py +416 -0
  48. qubx/data/readers.py +1562 -0
  49. qubx/data/tardis.py +100 -0
  50. qubx/gathering/simplest.py +88 -0
  51. qubx/math/__init__.py +3 -0
  52. qubx/math/stats.py +129 -0
  53. qubx/pandaz/__init__.py +23 -0
  54. qubx/pandaz/ta.py +2757 -0
  55. qubx/pandaz/utils.py +638 -0
  56. qubx/resources/instruments/symbols-binance.cm.json +1 -0
  57. qubx/resources/instruments/symbols-binance.json +1 -0
  58. qubx/resources/instruments/symbols-binance.um.json +1 -0
  59. qubx/resources/instruments/symbols-bitfinex.f.json +1 -0
  60. qubx/resources/instruments/symbols-bitfinex.json +1 -0
  61. qubx/resources/instruments/symbols-kraken.f.json +1 -0
  62. qubx/resources/instruments/symbols-kraken.json +1 -0
  63. qubx/ta/__init__.py +0 -0
  64. qubx/ta/indicators.cpython-312-x86_64-linux-gnu.so +0 -0
  65. qubx/ta/indicators.pxd +149 -0
  66. qubx/ta/indicators.pyi +41 -0
  67. qubx/ta/indicators.pyx +787 -0
  68. qubx/trackers/__init__.py +3 -0
  69. qubx/trackers/abvanced.py +236 -0
  70. qubx/trackers/composite.py +146 -0
  71. qubx/trackers/rebalancers.py +129 -0
  72. qubx/trackers/riskctrl.py +641 -0
  73. qubx/trackers/sizers.py +235 -0
  74. qubx/utils/__init__.py +5 -0
  75. qubx/utils/_pyxreloader.py +281 -0
  76. qubx/utils/charting/lookinglass.py +1057 -0
  77. qubx/utils/charting/mpl_helpers.py +1183 -0
  78. qubx/utils/marketdata/binance.py +284 -0
  79. qubx/utils/marketdata/ccxt.py +90 -0
  80. qubx/utils/marketdata/dukas.py +130 -0
  81. qubx/utils/misc.py +541 -0
  82. qubx/utils/ntp.py +63 -0
  83. qubx/utils/numbers_utils.py +7 -0
  84. qubx/utils/orderbook.py +491 -0
  85. qubx/utils/plotting/__init__.py +0 -0
  86. qubx/utils/plotting/dashboard.py +150 -0
  87. qubx/utils/plotting/data.py +137 -0
  88. qubx/utils/plotting/interfaces.py +25 -0
  89. qubx/utils/plotting/renderers/__init__.py +0 -0
  90. qubx/utils/plotting/renderers/plotly.py +0 -0
  91. qubx/utils/runner/__init__.py +1 -0
  92. qubx/utils/runner/_jupyter_runner.pyt +60 -0
  93. qubx/utils/runner/accounts.py +88 -0
  94. qubx/utils/runner/configs.py +65 -0
  95. qubx/utils/runner/runner.py +470 -0
  96. qubx/utils/time.py +312 -0
  97. qubx-0.5.7.dist-info/METADATA +105 -0
  98. qubx-0.5.7.dist-info/RECORD +100 -0
  99. qubx-0.5.7.dist-info/WHEEL +4 -0
  100. qubx-0.5.7.dist-info/entry_points.txt +3 -0
qubx/data/readers.py ADDED
@@ -0,0 +1,1562 @@
1
+ import itertools
2
+ import os
3
+ import re
4
+ from functools import wraps
5
+ from os.path import exists, join
6
+ from typing import Any, Iterable, Iterator, List, Set, Union
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ import psycopg as pg
11
+ import pyarrow as pa
12
+ from pyarrow import csv, table
13
+
14
+ from qubx import logger
15
+ from qubx.core.basics import DataType, TimestampedDict
16
+ from qubx.core.series import OHLCV, Bar, Quote, Trade
17
+ from qubx.pandaz.utils import ohlc_resample, srows
18
+ from qubx.utils.time import handle_start_stop, infer_series_frequency
19
+
20
+
21
+ def convert_timedelta_to_numpy(x: str) -> int:
22
+ return pd.Timedelta(x).to_numpy().item()
23
+
24
+
25
+ D1, H1 = convert_timedelta_to_numpy("1D"), convert_timedelta_to_numpy("1h")
26
+ MS1 = 1_000_000
27
+ S1 = 1000 * MS1
28
+ M1 = 60 * S1
29
+
30
+ DEFAULT_DAILY_SESSION = (convert_timedelta_to_numpy("00:00:00.100"), convert_timedelta_to_numpy("23:59:59.900"))
31
+ STOCK_DAILY_SESSION = (convert_timedelta_to_numpy("9:30:00.100"), convert_timedelta_to_numpy("15:59:59.900"))
32
+ CME_FUTURES_DAILY_SESSION = (convert_timedelta_to_numpy("8:30:00.100"), convert_timedelta_to_numpy("15:14:59.900"))
33
+
34
+
35
+ def _recognize_t(t: Union[int, str], defaultvalue, timeunit) -> int:
36
+ if isinstance(t, (str, pd.Timestamp)):
37
+ try:
38
+ return np.datetime64(t, timeunit)
39
+ except (ValueError, TypeError) as e:
40
+ logger.debug(f"Failed to convert time {t} to datetime64: {e}")
41
+ return defaultvalue
42
+
43
+
44
+ def _time(t, timestamp_units: str) -> int:
45
+ t = int(t) if isinstance(t, float) else t
46
+ if timestamp_units == "ns":
47
+ return np.datetime64(t, "ns").item()
48
+ return np.datetime64(t, timestamp_units).astype("datetime64[ns]").item()
49
+
50
+
51
+ def _find_column_index_in_list(xs, *args):
52
+ xs = [x.lower() for x in xs]
53
+ for a in args:
54
+ ai = a.lower()
55
+ if ai in xs:
56
+ return xs.index(ai)
57
+ raise IndexError(f"Can't find any specified columns from [{args}] in provided list: {xs}")
58
+
59
+
60
+ def _list_to_chunked_iterator(data: list[Any], chunksize: int) -> Iterable:
61
+ it = iter(data)
62
+ chunk = list(itertools.islice(it, chunksize))
63
+ while chunk:
64
+ yield chunk
65
+ chunk = list(itertools.islice(it, chunksize))
66
+
67
+
68
+ def _find_time_col_idx(column_names):
69
+ return _find_column_index_in_list(column_names, "time", "timestamp", "datetime", "date", "open_time", "ts")
70
+
71
+
72
+ class DataTransformer:
73
+ def __init__(self) -> None:
74
+ self.buffer = []
75
+ self._column_names = []
76
+
77
+ def start_transform(
78
+ self,
79
+ name: str,
80
+ column_names: List[str],
81
+ start: str | None = None,
82
+ stop: str | None = None,
83
+ ):
84
+ self._column_names = column_names
85
+ self.buffer = []
86
+
87
+ def process_data(self, rows_data: Iterable) -> Any:
88
+ if rows_data is not None:
89
+ self.buffer.extend(rows_data)
90
+
91
+ def collect(self) -> Any:
92
+ return self.buffer
93
+
94
+
95
+ class DataReader:
96
+ def get_names(self, **kwargs) -> List[str]:
97
+ """
98
+ TODO: not sure we really need this !
99
+ """
100
+ raise NotImplementedError("get_names() method is not implemented")
101
+
102
+ def read(
103
+ self,
104
+ data_id: str,
105
+ start: str | None = None,
106
+ stop: str | None = None,
107
+ transform: DataTransformer = DataTransformer(),
108
+ chunksize=0,
109
+ **kwargs,
110
+ ) -> Iterator | List:
111
+ raise NotImplementedError("read() method is not implemented")
112
+
113
+ def get_aux_data_ids(self) -> Set[str]:
114
+ """
115
+ Returns list of all auxiliary data IDs available for this data reader
116
+ """
117
+
118
+ def _list_methods(cls):
119
+ _meth = []
120
+ for k, s in cls.__dict__.items():
121
+ if (
122
+ k.startswith("get_")
123
+ and k not in ["get_names", "get_symbols", "get_time_ranges", "get_aux_data_ids", "get_aux_data"]
124
+ and callable(s)
125
+ ):
126
+ _meth.append(k[4:])
127
+ return _meth
128
+
129
+ _d_ids = _list_methods(self.__class__)
130
+ for bc in self.__class__.__bases__:
131
+ _d_ids.extend(_list_methods(bc))
132
+ return set(_d_ids)
133
+
134
+ def get_aux_data(self, data_id: str, **kwargs) -> Any:
135
+ """
136
+ Returns auxiliary data for the specified data ID
137
+ """
138
+ if hasattr(self, f"get_{data_id}"):
139
+ return getattr(self, f"get_{data_id}")(**kwargs)
140
+ raise ValueError(
141
+ f"{self.__class__.__name__} doesn't have getter for '{data_id}' auxiliary data. Available data: {self.get_aux_data_ids()}"
142
+ )
143
+
144
+ def get_symbols(self, exchange: str, dtype: str) -> list[str]:
145
+ raise NotImplementedError("get_symbols() method is not implemented")
146
+
147
+ def get_time_ranges(self, symbol: str, dtype: str) -> tuple[np.datetime64, np.datetime64]:
148
+ """
149
+ Returns first and last time for the specified symbol and data type in the reader's storage
150
+ """
151
+ raise NotImplementedError("get_time_ranges() method is not implemented")
152
+
153
+
154
+ class CsvStorageDataReader(DataReader):
155
+ """
156
+ Data reader for timeseries data stored as csv files in the specified directory
157
+ """
158
+
159
+ def __init__(self, path: str) -> None:
160
+ _path = os.path.expanduser(path)
161
+ if not exists(_path):
162
+ raise ValueError(f"Folder is not found at {path}")
163
+ self.path = _path
164
+
165
+ def __find_time_idx(self, arr: pa.ChunkedArray, v) -> int:
166
+ ix = arr.index(v).as_py()
167
+ if ix < 0:
168
+ for c in arr.iterchunks():
169
+ a = c.to_numpy()
170
+ ix = np.searchsorted(a, v, side="right")
171
+ if ix > 0 and ix < len(c):
172
+ ix = arr.index(a[ix]).as_py() - 1
173
+ break
174
+ return ix
175
+
176
+ def __check_file_name(self, name: str) -> str | None:
177
+ _f = join(self.path, name.replace(":", os.sep))
178
+ for sfx in [".csv", ".csv.gz", ""]:
179
+ if exists(p := (_f + sfx)):
180
+ return p
181
+ return None
182
+
183
+ def __try_read_data(
184
+ self, data_id: str, start: str | None = None, stop: str | None = None, timestamp_formatters=None
185
+ ) -> tuple[table, np.ndarray, Any, list[str], int, int]:
186
+ f_path = self.__check_file_name(data_id)
187
+ if not f_path:
188
+ ValueError(f"Can't find any csv data for {data_id} in {self.path} !")
189
+
190
+ convert_options = None
191
+ if timestamp_formatters is not None:
192
+ convert_options = csv.ConvertOptions(timestamp_parsers=timestamp_formatters)
193
+
194
+ table = csv.read_csv(
195
+ f_path,
196
+ parse_options=csv.ParseOptions(ignore_empty_lines=True),
197
+ convert_options=convert_options,
198
+ )
199
+ fieldnames = table.column_names
200
+
201
+ # - try to find range to load
202
+ start_idx, stop_idx = 0, table.num_rows
203
+ try:
204
+ _time_field_idx = _find_time_col_idx(fieldnames)
205
+ _time_type = table.field(_time_field_idx).type
206
+ _time_unit = _time_type.unit if hasattr(_time_type, "unit") else "ms"
207
+ _time_data = table[_time_field_idx]
208
+
209
+ # - check if need convert time to primitive types (i.e. Date32 -> timestamp[x])
210
+ _time_cast_function = lambda xs: xs
211
+ if _time_type != pa.timestamp(_time_unit):
212
+ _time_cast_function = lambda xs: xs.cast(pa.timestamp(_time_unit))
213
+ _time_data = _time_cast_function(_time_data)
214
+
215
+ # - preprocessing start and stop
216
+ t_0, t_1 = handle_start_stop(start, stop, convert=lambda x: _recognize_t(x, None, _time_unit))
217
+
218
+ # - check requested range
219
+ if t_0:
220
+ start_idx = self.__find_time_idx(_time_data, t_0)
221
+ if start_idx >= table.num_rows:
222
+ # - no data for requested start date
223
+ return table, _time_data, _time_unit, fieldnames, -1, -1
224
+
225
+ if t_1:
226
+ stop_idx = self.__find_time_idx(_time_data, t_1)
227
+ if stop_idx < 0 or stop_idx < start_idx:
228
+ stop_idx = table.num_rows
229
+
230
+ except Exception as exc:
231
+ logger.warning(f"exception [{exc}] during preprocessing '{f_path}'")
232
+
233
+ return table, _time_data, _time_unit, fieldnames, start_idx, stop_idx
234
+
235
+ def read(
236
+ self,
237
+ data_id: str,
238
+ start: str | None = None,
239
+ stop: str | None = None,
240
+ transform: DataTransformer = DataTransformer(),
241
+ chunksize=0,
242
+ timestamp_formatters=None,
243
+ timeframe=None,
244
+ **kwargs,
245
+ ) -> Iterable | Any:
246
+ table, _, _, fieldnames, start_idx, stop_idx = self.__try_read_data(data_id, start, stop, timestamp_formatters)
247
+ if start_idx < 0 or stop_idx < 0:
248
+ return None
249
+ length = stop_idx - start_idx + 1
250
+ selected_table = table.slice(start_idx, length)
251
+
252
+ # - in this case we want to return iterable chunks of data
253
+ if chunksize > 0:
254
+
255
+ def _iter_chunks():
256
+ for n in range(0, length // chunksize + 1):
257
+ transform.start_transform(data_id, fieldnames, start=start, stop=stop)
258
+ raw_data = selected_table[n * chunksize : min((n + 1) * chunksize, length)].to_pandas().to_numpy()
259
+ transform.process_data(raw_data)
260
+ yield transform.collect()
261
+
262
+ return _iter_chunks()
263
+
264
+ transform.start_transform(data_id, fieldnames, start=start, stop=stop)
265
+ raw_data = selected_table.to_pandas().to_numpy()
266
+ transform.process_data(raw_data)
267
+ return transform.collect()
268
+
269
+ def get_candles(
270
+ self,
271
+ exchange: str,
272
+ symbols: list[str],
273
+ start: str | pd.Timestamp,
274
+ stop: str | pd.Timestamp,
275
+ timeframe: str | None = None,
276
+ ) -> pd.DataFrame:
277
+ """
278
+ Returns pandas DataFrame of candles for given exchange and symbols within specified time range and timeframe
279
+ """
280
+ _r = []
281
+ for symbol in symbols:
282
+ x = self.read(
283
+ f"{exchange}:{symbol}", start=start, stop=stop, timeframe=timeframe, transform=AsPandasFrame()
284
+ )
285
+ if x is not None:
286
+ if timeframe is not None:
287
+ x = ohlc_resample(x, timeframe)
288
+ _r.append(x.assign(symbol=symbol.upper(), timestamp=x.index)) # type: ignore
289
+ return srows(*_r).set_index(["timestamp", "symbol"]) if _r else pd.DataFrame()
290
+
291
+ def get_names(self, **kwargs) -> List[str]:
292
+ _n = []
293
+ for root, _, files in os.walk(self.path):
294
+ path = root.split(os.sep)
295
+ for file in files:
296
+ if re.match(r"(.*)\.csv(.gz)?$", file):
297
+ f = path[-1]
298
+ n = file.split(".")[0]
299
+ if f == self.path:
300
+ name = n
301
+ else:
302
+ name = f"{f}:{n}" if f else n
303
+ _n.append(name)
304
+ return _n
305
+
306
+ def get_symbols(self, exchange: str, dtype: str) -> list[str]:
307
+ return self.get_names()
308
+
309
+ def get_time_ranges(self, symbol: str, dtype: str) -> tuple[np.datetime64, np.datetime64]:
310
+ _, _time_data, _time_unit, _, start_idx, stop_idx = self.__try_read_data(symbol, None, None, None)
311
+ return (
312
+ np.datetime64(_time_data[start_idx].value, _time_unit),
313
+ np.datetime64(_time_data[stop_idx - 1].value, _time_unit),
314
+ )
315
+
316
+
317
+ class InMemoryDataFrameReader(DataReader):
318
+ """
319
+ Data reader for pandas DataFrames
320
+ """
321
+
322
+ exchange: str | None
323
+ _data: dict[str, pd.DataFrame | pd.Series]
324
+
325
+ def __init__(self, data: dict[str, pd.DataFrame | pd.Series], exchange: str | None = None) -> None:
326
+ if not isinstance(data, dict):
327
+ raise ValueError("data must be a dictionary of pandas DataFrames")
328
+ self._data = data
329
+ self.exchange = exchange
330
+
331
+ def get_names(self, **kwargs) -> list[str]:
332
+ keys = list(self._data.keys())
333
+ if self.exchange:
334
+ return [f"{self.exchange}:{k}" for k in keys]
335
+ return keys
336
+
337
+ def _get_data_by_key(self, data_id: str) -> tuple[str, pd.DataFrame | pd.Series]:
338
+ if data_id not in self._data:
339
+ if self.exchange and data_id.startswith(self.exchange):
340
+ data_id = data_id.split(":")[1]
341
+ if (d := self._data.get(data_id)) is None:
342
+ raise ValueError(f"No data found for {data_id}")
343
+ return data_id, d
344
+
345
+ def read(
346
+ self,
347
+ data_id: str,
348
+ start: str | None = None,
349
+ stop: str | None = None,
350
+ transform: DataTransformer = DataTransformer(),
351
+ chunksize=0,
352
+ **kwargs,
353
+ ) -> Iterable | list:
354
+ """
355
+ Read and transform data for a given data_id within a specified time range.
356
+
357
+ Parameters:
358
+ -----------
359
+ data_id : str
360
+ The identifier for the data to be read.
361
+ start : str | None, optional
362
+ The start time for the data range (inclusive). If None, start from the earliest available data.
363
+ stop : str | None, optional
364
+ The stop time for the data range (inclusive). If None, include data up to the latest available.
365
+ transform : DataTransformer, optional
366
+ An instance of DataTransformer to process the retrieved data. Defaults to DataTransformer().
367
+ chunksize : int, optional
368
+ The size of data chunks to process at a time. If 0, process all data at once. Defaults to 0.
369
+ **kwargs : dict
370
+ Additional keyword arguments for future extensions.
371
+
372
+ Returns:
373
+ --------
374
+ Iterable | List
375
+ The processed and transformed data, either as an iterable (if chunksize > 0) or as a list.
376
+
377
+ Raises:
378
+ -------
379
+ ValueError
380
+ If no data is found for the given data_id.
381
+ """
382
+ start, stop = handle_start_stop(start, stop)
383
+ data_id, _stored_data = self._get_data_by_key(data_id)
384
+
385
+ _sliced_data = _stored_data.loc[start:stop].copy()
386
+ if _tf := kwargs.get("timeframe"):
387
+ _sliced_data = ohlc_resample(_sliced_data, _tf)
388
+ assert isinstance(_sliced_data, pd.DataFrame), "Resampled data should be a DataFrame"
389
+ _sliced_data = _sliced_data.reset_index()
390
+
391
+ def _do_transform(values: Iterable, columns: list[str]) -> Iterable:
392
+ transform.start_transform(data_id, columns, start=start, stop=stop)
393
+ transform.process_data(values)
394
+ return transform.collect()
395
+
396
+ if chunksize > 0:
397
+ # returns chunked frames
398
+ def _chunked_dataframe(data: np.ndarray, columns: list[str], chunksize: int) -> Iterable:
399
+ it = iter(data)
400
+ chunk = list(itertools.islice(it, chunksize))
401
+ while chunk:
402
+ yield _do_transform(chunk, columns)
403
+ chunk = list(itertools.islice(it, chunksize))
404
+
405
+ return _chunked_dataframe(_sliced_data.values, list(_sliced_data.columns), chunksize)
406
+
407
+ return _do_transform(_sliced_data.values, list(_sliced_data.columns))
408
+
409
+ def get_symbols(self, exchange: str, dtype: str) -> list[str]:
410
+ return self.get_names()
411
+
412
+ def get_time_ranges(self, symbol: str, dtype: DataType) -> tuple[np.datetime64 | None, np.datetime64 | None]:
413
+ try:
414
+ _, _stored_data = self._get_data_by_key(symbol)
415
+ return _stored_data.index[0], _stored_data.index[-1]
416
+ except ValueError:
417
+ return None, None
418
+
419
+
420
+ class AsPandasFrame(DataTransformer):
421
+ """
422
+ List of records to pandas dataframe transformer
423
+ """
424
+
425
+ def __init__(self, timestamp_units=None) -> None:
426
+ self.timestamp_units = timestamp_units
427
+
428
+ def start_transform(self, name: str, column_names: List[str], **kwargs):
429
+ self._time_idx = _find_time_col_idx(column_names)
430
+ self._column_names = column_names
431
+ self._frame = pd.DataFrame()
432
+
433
+ def process_data(self, rows_data: Iterable) -> Any:
434
+ self._frame
435
+ p = pd.DataFrame.from_records(rows_data, columns=self._column_names)
436
+ p.set_index(self._column_names[self._time_idx], drop=True, inplace=True)
437
+ p.index = pd.to_datetime(p.index, unit=self.timestamp_units) if self.timestamp_units else p.index
438
+ p.index.rename("timestamp", inplace=True)
439
+ p.sort_index(inplace=True)
440
+ self._frame = pd.concat((self._frame, p), axis=0, sort=True)
441
+ return p
442
+
443
+ def collect(self) -> Any:
444
+ return self._frame
445
+
446
+
447
+ class AsOhlcvSeries(DataTransformer):
448
+ """
449
+ Convert incoming data into OHLCV series.
450
+
451
+ Incoming data may have one of the following structures:
452
+
453
+ ```
454
+ ohlcv: time,open,high,low,close,volume|quote_volume,(buy_volume)
455
+ quotes: time,bid,ask,bidsize,asksize
456
+ trades (TAS): time,price,size,(is_taker)
457
+ ```
458
+ """
459
+
460
+ timeframe: str | None
461
+ _series: OHLCV | None
462
+ _data_type: str | None
463
+
464
+ def __init__(self, timeframe: str | None = None, timestamp_units="ns") -> None:
465
+ super().__init__()
466
+ self.timeframe = timeframe
467
+ self._series = None
468
+ self._data_type = None
469
+ self.timestamp_units = timestamp_units
470
+
471
+ def start_transform(self, name: str, column_names: List[str], **kwargs):
472
+ self._time_idx = _find_time_col_idx(column_names)
473
+ self._volume_idx = None
474
+ self._b_volume_idx = None
475
+ try:
476
+ self._close_idx = _find_column_index_in_list(column_names, "close")
477
+ self._open_idx = _find_column_index_in_list(column_names, "open")
478
+ self._high_idx = _find_column_index_in_list(column_names, "high")
479
+ self._low_idx = _find_column_index_in_list(column_names, "low")
480
+
481
+ try:
482
+ self._volume_idx = _find_column_index_in_list(column_names, "quote_volume", "volume", "vol")
483
+ except:
484
+ pass
485
+
486
+ try:
487
+ self._b_volume_idx = _find_column_index_in_list(
488
+ column_names,
489
+ "taker_buy_volume",
490
+ "taker_buy_quote_volume",
491
+ "buy_volume",
492
+ )
493
+ except:
494
+ pass
495
+
496
+ self._data_type = "ohlc"
497
+ except:
498
+ try:
499
+ self._ask_idx = _find_column_index_in_list(column_names, "ask")
500
+ self._bid_idx = _find_column_index_in_list(column_names, "bid")
501
+ self._data_type = "quotes"
502
+ except:
503
+ try:
504
+ self._price_idx = _find_column_index_in_list(column_names, "price")
505
+ self._size_idx = _find_column_index_in_list(
506
+ column_names, "quote_qty", "qty", "size", "amount", "volume"
507
+ )
508
+ self._taker_idx = None
509
+ try:
510
+ self._taker_idx = _find_column_index_in_list(
511
+ column_names,
512
+ "is_buyer_maker",
513
+ "side",
514
+ "aggressive",
515
+ "taker",
516
+ "is_taker",
517
+ )
518
+ except:
519
+ pass
520
+
521
+ self._data_type = "trades"
522
+ except:
523
+ raise ValueError(f"Can't recognize data for update from header: {column_names}")
524
+
525
+ self._column_names = column_names
526
+ self._name = name
527
+ if self.timeframe:
528
+ self._series = OHLCV(self._name, self.timeframe)
529
+
530
+ def _proc_ohlc(self, rows_data: List[List]):
531
+ for d in rows_data:
532
+ self._series.update_by_bar(
533
+ _time(d[self._time_idx], self.timestamp_units),
534
+ d[self._open_idx],
535
+ d[self._high_idx],
536
+ d[self._low_idx],
537
+ d[self._close_idx],
538
+ d[self._volume_idx] if self._volume_idx else 0,
539
+ d[self._b_volume_idx] if self._b_volume_idx else 0,
540
+ )
541
+
542
+ def _proc_quotes(self, rows_data: List[List]):
543
+ for d in rows_data:
544
+ self._series.update(
545
+ _time(d[self._time_idx], self.timestamp_units),
546
+ (d[self._ask_idx] + d[self._bid_idx]) / 2,
547
+ )
548
+
549
+ def _proc_trades(self, rows_data: List[List]):
550
+ for d in rows_data:
551
+ a = d[self._taker_idx] if self._taker_idx else 0
552
+ s = d[self._size_idx]
553
+ b = s if a else 0
554
+ self._series.update(_time(d[self._time_idx], self.timestamp_units), d[self._price_idx], s, b)
555
+
556
+ def process_data(self, rows_data: List[List]) -> Any:
557
+ if self._series is None:
558
+ ts = [t[self._time_idx] for t in rows_data[:100]]
559
+ self.timeframe = pd.Timedelta(infer_series_frequency(ts)).asm8.item()
560
+
561
+ # - create instance after first data received if
562
+ self._series = OHLCV(self._name, self.timeframe)
563
+
564
+ match self._data_type:
565
+ case "ohlc":
566
+ self._proc_ohlc(rows_data)
567
+ case "quotes":
568
+ self._proc_quotes(rows_data)
569
+ case "trades":
570
+ self._proc_trades(rows_data)
571
+
572
+ return None
573
+
574
+ def collect(self) -> Any:
575
+ return self._series
576
+
577
+
578
+ class AsBars(AsOhlcvSeries):
579
+ """
580
+ Convert incoming data into Bars sequence.
581
+
582
+ Incoming data may have one of the following structures:
583
+
584
+ ```
585
+ ohlcv: time,open,high,low,close,volume|quote_volume,(buy_volume)
586
+ quotes: time,bid,ask,bidsize,asksize
587
+ trades (TAS): time,price,size,(is_taker)
588
+ ```
589
+ """
590
+
591
+ def collect(self) -> Any:
592
+ return self._series[::-1] if self._series is not None else None
593
+
594
+
595
+ class AsQuotes(DataTransformer):
596
+ """
597
+ Tries to convert incoming data to list of Quote's
598
+ Data must have appropriate structure: bid, ask, bidsize, asksize and time
599
+ """
600
+
601
+ def start_transform(self, name: str, column_names: List[str], **kwargs):
602
+ self.buffer = list()
603
+ self._time_idx = _find_time_col_idx(column_names)
604
+ self._bid_idx = _find_column_index_in_list(column_names, "bid")
605
+ self._ask_idx = _find_column_index_in_list(column_names, "ask")
606
+ self._bidvol_idx = _find_column_index_in_list(column_names, "bidvol", "bid_vol", "bidsize", "bid_size")
607
+ self._askvol_idx = _find_column_index_in_list(column_names, "askvol", "ask_vol", "asksize", "ask_size")
608
+
609
+ def process_data(self, rows_data: Iterable) -> Any:
610
+ if rows_data is not None:
611
+ for d in rows_data:
612
+ t = d[self._time_idx]
613
+ b = d[self._bid_idx]
614
+ a = d[self._ask_idx]
615
+ bv = d[self._bidvol_idx]
616
+ av = d[self._askvol_idx]
617
+ self.buffer.append(Quote(_time(t, "ns"), b, a, bv, av))
618
+
619
+
620
+ class AsTrades(DataTransformer):
621
+ """
622
+ Tries to convert incoming data to list of Trades
623
+ Data must have appropriate structure: price, size, market_maker (optional).
624
+ Market maker column specifies if buyer is a maker or taker.
625
+ """
626
+
627
+ def start_transform(self, name: str, column_names: List[str], **kwargs):
628
+ self.buffer: list[Trade] = list()
629
+ self._time_idx = _find_time_col_idx(column_names)
630
+ self._price_idx = _find_column_index_in_list(column_names, "price")
631
+ self._size_idx = _find_column_index_in_list(column_names, "size")
632
+ try:
633
+ self._side_idx = _find_column_index_in_list(column_names, "market_maker")
634
+ except:
635
+ self._side_idx = None
636
+
637
+ def process_data(self, rows_data: Iterable) -> Any:
638
+ if rows_data is not None:
639
+ for d in rows_data:
640
+ t = d[self._time_idx]
641
+ price = d[self._price_idx]
642
+ size = d[self._size_idx]
643
+ side = d[self._side_idx] if self._side_idx else -1
644
+ self.buffer.append(Trade(_time(t, "ns"), price, size, side))
645
+
646
+
647
+ class AsTimestampedRecords(DataTransformer):
648
+ """
649
+ Convert incoming data to list or dictionaries with preprocessed timestamps ('timestamp_ns' and 'timestamp')
650
+ ```
651
+ [
652
+ {
653
+ 'open_time': 1711944240000.0,
654
+ 'open': 203.219,
655
+ 'high': 203.33,
656
+ 'low': 203.134,
657
+ 'close': 203.175,
658
+ 'volume': 10060.0,
659
+ ....
660
+ 'timestamp_ns': 1711944240000000000,
661
+ 'timestamp': Timestamp('2024-04-01 04:04:00')
662
+ },
663
+ ...
664
+ ] ```
665
+ """
666
+
667
+ def __init__(self, timestamp_units: str | None = None) -> None:
668
+ self.timestamp_units = timestamp_units
669
+
670
+ def start_transform(self, name: str, column_names: List[str], **kwargs):
671
+ self.buffer = list()
672
+ self._time_idx = _find_time_col_idx(column_names)
673
+ self._column_names = column_names
674
+
675
+ def process_data(self, rows_data: Iterable) -> Any:
676
+ self.buffer.extend(rows_data)
677
+
678
+ def collect(self) -> Any:
679
+ res = []
680
+ for r in self.buffer:
681
+ t = r[self._time_idx]
682
+ if self.timestamp_units:
683
+ t = _time(t, self.timestamp_units)
684
+ di = dict(zip(self._column_names, r)) | {
685
+ "timestamp_ns": t,
686
+ "timestamp": pd.Timestamp(t),
687
+ }
688
+ res.append(di)
689
+ return res
690
+
691
+
692
+ class RestoredEmulatorHelper(DataTransformer):
693
+ _freq: np.timedelta64 | None = None
694
+ _t_start: int
695
+ _t_mid1: int
696
+ _t_mid2: int
697
+ _t_end: int
698
+ _open_close_time_shift_secs: int
699
+
700
+ def __init__(self, daily_session_start_end: tuple, timestamp_units: str, open_close_time_shift_secs: int):
701
+ super().__init__()
702
+ self._d_session_start = daily_session_start_end[0]
703
+ self._d_session_end = daily_session_start_end[1]
704
+ self._timestamp_units = timestamp_units
705
+ self._open_close_time_shift_secs = open_close_time_shift_secs # type: ignore
706
+
707
+ def _detect_emulation_timestamps(self, rows_data: list[list]):
708
+ if self._freq is None:
709
+ ts = [t[self._time_idx] for t in rows_data]
710
+ try:
711
+ self._freq = infer_series_frequency(ts)
712
+ except ValueError:
713
+ logger.warning("Can't determine frequency of incoming data")
714
+ return
715
+
716
+ # - timestamps when we emit simulated quotes
717
+ dt = self._freq.astype("timedelta64[ns]").item()
718
+ dt10 = dt // 10
719
+
720
+ # - adjust open-close time shift to avoid overlapping timestamps
721
+ if self._open_close_time_shift_secs * S1 >= (dt // 2 - dt10):
722
+ self._open_close_time_shift_secs = (dt // 2 - 2 * dt10) // S1
723
+
724
+ if dt < D1:
725
+ self._t_start = self._open_close_time_shift_secs * S1
726
+ self._t_mid1 = dt // 2 - dt10
727
+ self._t_mid2 = dt // 2 + dt10
728
+ self._t_end = dt - self._open_close_time_shift_secs * S1
729
+ else:
730
+ self._t_start = self._d_session_start + self._open_close_time_shift_secs * S1
731
+ self._t_mid1 = dt // 2 - H1
732
+ self._t_mid2 = dt // 2 + H1
733
+ self._t_end = self._d_session_end - self._open_close_time_shift_secs * S1
734
+
735
+ def start_transform(self, name: str, column_names: List[str], **kwargs):
736
+ self.buffer = []
737
+ # - it will fail if receive data doesn't look as ohlcv
738
+ self._time_idx = _find_time_col_idx(column_names)
739
+ self._open_idx = _find_column_index_in_list(column_names, "open")
740
+ self._high_idx = _find_column_index_in_list(column_names, "high")
741
+ self._low_idx = _find_column_index_in_list(column_names, "low")
742
+ self._close_idx = _find_column_index_in_list(column_names, "close")
743
+ self._volume_idx = None
744
+ self._freq = None
745
+ try:
746
+ self._volume_idx = _find_column_index_in_list(column_names, "volume", "vol")
747
+ except: # noqa: E722
748
+ pass
749
+
750
+
751
+ class RestoreTicksFromOHLC(RestoredEmulatorHelper):
752
+ """
753
+ Emulates quotes (and trades) from OHLC bars
754
+ """
755
+
756
+ def __init__(
757
+ self,
758
+ trades: bool = False, # if we also wants 'trades'
759
+ default_bid_size=1e9, # default bid/ask is big
760
+ default_ask_size=1e9, # default bid/ask is big
761
+ daily_session_start_end=DEFAULT_DAILY_SESSION,
762
+ timestamp_units="ns",
763
+ spread=0.0,
764
+ open_close_time_shift_secs=1.0,
765
+ quotes=True,
766
+ ):
767
+ super().__init__(daily_session_start_end, timestamp_units, open_close_time_shift_secs)
768
+ assert trades or quotes or trades and trades, "Either trades or quotes or both must be enabled"
769
+ self._trades = trades
770
+ self._quotes = quotes
771
+ self._bid_size = default_bid_size
772
+ self._ask_size = default_ask_size
773
+ self._s2 = spread / 2.0
774
+
775
+ def start_transform(self, name: str, column_names: list[str], **kwargs):
776
+ super().start_transform(name, column_names, **kwargs)
777
+ # - disable trades if no volume information is available
778
+ if self._volume_idx is None and self._trades:
779
+ logger.warning("Input OHLC data doesn't contain volume information so trades can't be emulated !")
780
+ self._trades = False
781
+
782
+ def process_data(self, rows_data: list[list]) -> Any:
783
+ if rows_data is None:
784
+ return
785
+
786
+ s2 = self._s2
787
+ if self._freq is None:
788
+ self._detect_emulation_timestamps(rows_data[:100])
789
+
790
+ # - input data
791
+ for data in rows_data:
792
+ # ti = pd.Timestamp(data[self._time_idx]).as_unit("ns").asm8.item()
793
+ ti = _time(data[self._time_idx], self._timestamp_units)
794
+ o = data[self._open_idx]
795
+ h = data[self._high_idx]
796
+ l = data[self._low_idx]
797
+ c = data[self._close_idx]
798
+ rv = data[self._volume_idx] if self._volume_idx else 0
799
+ rv = rv / (h - l) if h > l else rv
800
+
801
+ # - opening quote
802
+ if self._quotes:
803
+ self.buffer.append(Quote(ti + self._t_start, o - s2, o + s2, self._bid_size, self._ask_size))
804
+
805
+ if c >= o:
806
+ if self._trades:
807
+ self.buffer.append(Trade(ti + self._t_start, o - s2, rv * (o - l))) # sell 1
808
+
809
+ if self._quotes:
810
+ self.buffer.append(
811
+ Quote(
812
+ ti + self._t_mid1,
813
+ l - s2,
814
+ l + s2,
815
+ self._bid_size,
816
+ self._ask_size,
817
+ )
818
+ )
819
+
820
+ if self._trades:
821
+ self.buffer.append(Trade(ti + self._t_mid1, l + s2, rv * (c - o))) # buy 1
822
+
823
+ if self._quotes:
824
+ self.buffer.append(
825
+ Quote(
826
+ ti + self._t_mid2,
827
+ h - s2,
828
+ h + s2,
829
+ self._bid_size,
830
+ self._ask_size,
831
+ )
832
+ )
833
+
834
+ if self._trades:
835
+ self.buffer.append(Trade(ti + self._t_mid2, h - s2, rv * (h - c))) # sell 2
836
+ else:
837
+ if self._trades:
838
+ self.buffer.append(Trade(ti + self._t_start, o + s2, rv * (h - o))) # buy 1
839
+
840
+ if self._quotes:
841
+ self.buffer.append(
842
+ Quote(
843
+ ti + self._t_mid1,
844
+ h - s2,
845
+ h + s2,
846
+ self._bid_size,
847
+ self._ask_size,
848
+ )
849
+ )
850
+
851
+ if self._trades:
852
+ self.buffer.append(Trade(ti + self._t_mid1, h - s2, rv * (o - c))) # sell 1
853
+
854
+ if self._quotes:
855
+ self.buffer.append(
856
+ Quote(
857
+ ti + self._t_mid2,
858
+ l - s2,
859
+ l + s2,
860
+ self._bid_size,
861
+ self._ask_size,
862
+ )
863
+ )
864
+
865
+ if self._trades:
866
+ self.buffer.append(Trade(ti + self._t_mid2, l + s2, rv * (c - l))) # buy 2
867
+
868
+ # - closing quote
869
+ if self._quotes:
870
+ self.buffer.append(Quote(ti + self._t_end, c - s2, c + s2, self._bid_size, self._ask_size))
871
+
872
+
873
+ class RestoreQuotesFromOHLC(RestoreTicksFromOHLC):
874
+ """
875
+ Restore (emulate) quotes from OHLC bars
876
+ """
877
+
878
+ def __init__(
879
+ self,
880
+ default_bid_size=1e9, # default bid/ask is big
881
+ default_ask_size=1e9, # default bid/ask is big
882
+ daily_session_start_end=DEFAULT_DAILY_SESSION,
883
+ timestamp_units="ns",
884
+ spread=0.0,
885
+ open_close_time_shift_secs=1.0,
886
+ ):
887
+ super().__init__(
888
+ trades=False,
889
+ default_bid_size=default_bid_size,
890
+ default_ask_size=default_ask_size,
891
+ daily_session_start_end=daily_session_start_end,
892
+ timestamp_units=timestamp_units,
893
+ spread=spread,
894
+ open_close_time_shift_secs=open_close_time_shift_secs,
895
+ quotes=True,
896
+ )
897
+
898
+
899
+ class RestoreTradesFromOHLC(RestoreTicksFromOHLC):
900
+ """
901
+ Restore (emulate) trades from OHLC bars
902
+ """
903
+
904
+ def __init__(
905
+ self,
906
+ daily_session_start_end=DEFAULT_DAILY_SESSION,
907
+ timestamp_units="ns",
908
+ open_close_time_shift_secs=1.0,
909
+ ):
910
+ super().__init__(
911
+ trades=True,
912
+ default_bid_size=0,
913
+ default_ask_size=0,
914
+ daily_session_start_end=daily_session_start_end,
915
+ timestamp_units=timestamp_units,
916
+ spread=0,
917
+ open_close_time_shift_secs=open_close_time_shift_secs,
918
+ quotes=False,
919
+ )
920
+
921
+
922
+ class RestoredBarsFromOHLC(RestoredEmulatorHelper):
923
+ """
924
+ Transforms OHLC data into a sequence of bars trying to mimic real-world market data updates
925
+ """
926
+
927
+ def __init__(
928
+ self, daily_session_start_end=DEFAULT_DAILY_SESSION, timestamp_units="ns", open_close_time_shift_secs=1.0
929
+ ):
930
+ super().__init__(daily_session_start_end, timestamp_units, open_close_time_shift_secs)
931
+
932
+ def process_data(self, rows_data: List[List]) -> Any:
933
+ if rows_data is None:
934
+ return
935
+
936
+ if self._freq is None:
937
+ self._detect_emulation_timestamps(rows_data[:100])
938
+
939
+ # - input data
940
+ for data in rows_data:
941
+ ti = _time(data[self._time_idx], self._timestamp_units)
942
+ o = data[self._open_idx]
943
+ h = data[self._high_idx]
944
+ l = data[self._low_idx]
945
+ c = data[self._close_idx]
946
+
947
+ vol = data[self._volume_idx] if self._volume_idx is not None else 0
948
+ rvol = vol / (h - l) if h > l else vol
949
+
950
+ # - opening bar (o,h,l,c=o, v=0)
951
+ self.buffer.append(Bar(ti + self._t_start, o, o, o, o, 0))
952
+
953
+ if c >= o:
954
+ v1 = rvol * (o - l)
955
+ self.buffer.append(Bar(ti + self._t_mid1, o, o, l, l, v1))
956
+
957
+ v2 = v1 + rvol * (c - o)
958
+ self.buffer.append(Bar(ti + self._t_mid2, o, h, l, h, v2))
959
+
960
+ else:
961
+ v1 = rvol * (h - o)
962
+ self.buffer.append(Bar(ti + self._t_mid1, o, h, o, h, v1))
963
+
964
+ v2 = v1 + rvol * (o - c)
965
+ self.buffer.append(Bar(ti + self._t_mid2, o, h, l, l, v2))
966
+
967
+ # - full bar
968
+ self.buffer.append(Bar(ti + self._t_end, o, h, l, c, vol))
969
+
970
+
971
+ class AsDict(DataTransformer):
972
+ """
973
+ Tries to keep incoming data as list of dictionaries with preprocessed time
974
+ """
975
+
976
+ def start_transform(self, name: str, column_names: List[str], **kwargs):
977
+ self.buffer = list()
978
+ self._time_idx = _find_time_col_idx(column_names)
979
+ self._column_names = column_names
980
+ self._time_name = column_names[self._time_idx]
981
+
982
+ def process_data(self, rows_data: Iterable):
983
+ if rows_data is not None:
984
+ for d in rows_data:
985
+ _r_dict = dict(zip(self._column_names, d))
986
+ self.buffer.append(TimestampedDict(_time(d[self._time_idx], "ns"), _r_dict)) # type: ignore
987
+
988
+
989
+ def _retry(fn):
990
+ @wraps(fn)
991
+ def wrapper(*args, **kw):
992
+ cls = args[0]
993
+ for x in range(cls._reconnect_tries):
994
+ # print(x, cls._reconnect_tries)
995
+ try:
996
+ return fn(*args, **kw)
997
+ except (pg.InterfaceError, pg.OperationalError, AttributeError) as e:
998
+ logger.debug("Database Connection [InterfaceError or OperationalError]")
999
+ # print ("Idle for %s seconds" % (cls._reconnect_idle))
1000
+ # time.sleep(cls._reconnect_idle)
1001
+ cls._connect()
1002
+
1003
+ return wrapper
1004
+
1005
+
1006
+ class QuestDBSqlBuilder:
1007
+ """
1008
+ Generic sql builder for QuestDB data
1009
+ """
1010
+
1011
+ _aliases = {"um": "umfutures", "cm": "cmfutures", "f": "futures"}
1012
+
1013
+ def get_table_name(self, data_id: str, sfx: str = "") -> str:
1014
+ """
1015
+ Get table name for data_id
1016
+ data_id can have format <exchange>.<type>:<symbol>
1017
+ for example:
1018
+ BINANCE.UM:BTCUSDT or BINANCE:BTCUSDT for spot
1019
+ """
1020
+ sfx = sfx or "candles_1m"
1021
+ table_name = data_id
1022
+ _exch, _symb, _mktype = self._get_exchange_symbol_market_type(data_id)
1023
+ if _exch and _symb:
1024
+ parts = [_exch.lower(), _mktype]
1025
+ if "candles" not in sfx:
1026
+ parts.append(_symb)
1027
+ parts.append(sfx)
1028
+ table_name = ".".join(filter(lambda x: x, parts))
1029
+
1030
+ return table_name
1031
+
1032
+ def _get_exchange_symbol_market_type(self, data_id: str) -> tuple[str | None, str | None, str | None]:
1033
+ _ss = data_id.split(":")
1034
+ if len(_ss) > 1:
1035
+ _exch, symb = _ss
1036
+ _mktype = "spot"
1037
+ _ss = _exch.split(".")
1038
+ if len(_ss) > 1:
1039
+ _exch = _ss[0]
1040
+ _mktype = _ss[1]
1041
+ _mktype = _mktype.lower()
1042
+ return _exch.lower(), symb.lower(), self._aliases.get(_mktype, _mktype)
1043
+ return None, None, None
1044
+
1045
+ def prepare_data_sql(
1046
+ self,
1047
+ data_id: str,
1048
+ start: str | None,
1049
+ end: str | None,
1050
+ resample: str | None,
1051
+ data_type: str,
1052
+ ) -> str | None:
1053
+ pass
1054
+
1055
+ def prepare_names_sql(self) -> str:
1056
+ return "select table_name from tables()"
1057
+
1058
+ def prepare_symbols_sql(self, exchange: str, dtype: str) -> str:
1059
+ _table = self.get_table_name(f"{exchange}:BTCUSDT", dtype)
1060
+ return f"select distinct(symbol) from {_table}"
1061
+
1062
+ def prepare_data_ranges_sql(self, data_id: str) -> str:
1063
+ raise NotImplementedError()
1064
+
1065
+
1066
+ class QuestDBSqlCandlesBuilder(QuestDBSqlBuilder):
1067
+ """
1068
+ Sql builder for candles data
1069
+ """
1070
+
1071
+ def prepare_names_sql(self) -> str:
1072
+ return "select table_name from tables() where table_name like '%candles%'"
1073
+
1074
+ @staticmethod
1075
+ def _convert_time_delta_to_qdb_resample_format(c_tf: str):
1076
+ if c_tf:
1077
+ _t = re.match(r"(\d+)(\w+)", c_tf)
1078
+ if _t and len(_t.groups()) > 1:
1079
+ c_tf = f"{_t[1]}{_t[2][0].lower()}"
1080
+ return c_tf
1081
+
1082
+ def prepare_data_sql(
1083
+ self,
1084
+ data_id: str,
1085
+ start: str | None,
1086
+ end: str | None,
1087
+ resample: str | None,
1088
+ data_type: str,
1089
+ ) -> str:
1090
+ _exch, _symb, _mktype = self._get_exchange_symbol_market_type(data_id)
1091
+ if _symb is None:
1092
+ _symb = data_id
1093
+
1094
+ where = f"where symbol = '{_symb}'"
1095
+ w0 = f"timestamp >= '{start}'" if start else ""
1096
+ w1 = f"timestamp < '{end}'" if end else ""
1097
+
1098
+ # - fix: when no data ranges are provided we must skip empy where keyword
1099
+ if w0 or w1:
1100
+ where = f"{where} and {w0} and {w1}" if (w0 and w1) else f"{where} and {(w0 or w1)}"
1101
+
1102
+ # - filter out candles without any volume
1103
+ where = f"{where} and volume > 0"
1104
+
1105
+ # - check resample format
1106
+ resample = (
1107
+ QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(resample)
1108
+ if resample
1109
+ else "1m" # if resample is empty let's use 1 minute timeframe
1110
+ )
1111
+ _rsmpl = f"SAMPLE by {resample} FILL(NONE)" if resample else ""
1112
+
1113
+ table_name = self.get_table_name(data_id, data_type)
1114
+ return f"""
1115
+ select timestamp,
1116
+ first(open) as open, max(high) as high, min(low) as low, last(close) as close,
1117
+ sum(volume) as volume,
1118
+ sum(quote_volume) as quote_volume,
1119
+ sum(count) as count,
1120
+ sum(taker_buy_volume) as taker_buy_volume,
1121
+ sum(taker_buy_quote_volume) as taker_buy_quote_volume
1122
+ from "{table_name}" {where} {_rsmpl};
1123
+ """
1124
+
1125
+ def prepare_data_ranges_sql(self, data_id: str) -> str:
1126
+ _exch, _symb, _mktype = self._get_exchange_symbol_market_type(data_id)
1127
+ if _exch is None:
1128
+ raise ValueError(f"Can't get exchange name from data id: {data_id} !")
1129
+ return f"""(SELECT timestamp FROM "{_exch}.{_mktype}.candles_1m" WHERE symbol='{_symb}' ORDER BY timestamp ASC LIMIT 1)
1130
+ UNION
1131
+ (SELECT timestamp FROM "{_exch}.{_mktype}.candles_1m" WHERE symbol='{_symb}' ORDER BY timestamp DESC LIMIT 1)
1132
+ """
1133
+
1134
+
1135
+ class QuestDBSqlTOBBilder(QuestDBSqlBuilder):
1136
+ def prepare_data_ranges_sql(self, data_id: str) -> str:
1137
+ _exch, _symb, _mktype = self._get_exchange_symbol_market_type(data_id)
1138
+ if _exch is None:
1139
+ raise ValueError(f"Can't get exchange name from data id: {data_id} !")
1140
+ # TODO: ????
1141
+ return f"""(SELECT timestamp FROM "{_exch}.{_mktype}.{_symb}.orderbook" ORDER BY timestamp ASC LIMIT 1)
1142
+ UNION
1143
+ (SELECT timestamp FROM "{_exch}.{_mktype}.{_symb}.orderbook" ORDER BY timestamp DESC LIMIT 1)
1144
+ """
1145
+
1146
+
1147
+ class QuestDBConnector(DataReader):
1148
+ """
1149
+ Very first version of QuestDB connector
1150
+
1151
+ ### Connect to an existing QuestDB instance
1152
+ >>> db = QuestDBConnector()
1153
+ >>> db.read('BINANCE.UM:ETHUSDT', '2024-01-01', transform=AsPandasFrame())
1154
+ """
1155
+
1156
+ _reconnect_tries = 5
1157
+ _reconnect_idle = 0.1 # wait seconds before retying
1158
+ _builder: QuestDBSqlBuilder
1159
+
1160
+ def __init__(
1161
+ self,
1162
+ builder: QuestDBSqlBuilder = QuestDBSqlCandlesBuilder(),
1163
+ host="localhost",
1164
+ user="admin",
1165
+ password="quest",
1166
+ port=8812,
1167
+ ) -> None:
1168
+ self._connection = None
1169
+ self._host = host
1170
+ self._port = port
1171
+ self.connection_url = f"user={user} password={password} host={host} port={port}"
1172
+ self._builder = builder
1173
+ self._connect()
1174
+
1175
+ def __getstate__(self):
1176
+ if self._connection:
1177
+ self._connection.close()
1178
+ self._connection = None
1179
+ state = self.__dict__.copy()
1180
+ return state
1181
+
1182
+ def _connect(self):
1183
+ self._connection = pg.connect(self.connection_url, autocommit=True)
1184
+ logger.debug(f"Connected to QuestDB at {self._host}:{self._port}")
1185
+
1186
+ def read(
1187
+ self,
1188
+ data_id: str,
1189
+ start: str | None = None,
1190
+ stop: str | None = None,
1191
+ transform: DataTransformer = DataTransformer(),
1192
+ chunksize=0,
1193
+ timeframe: str | None = "1m",
1194
+ data_type="candles_1m",
1195
+ ) -> Any:
1196
+ return self._read(
1197
+ data_id,
1198
+ start,
1199
+ stop,
1200
+ transform,
1201
+ chunksize,
1202
+ timeframe,
1203
+ data_type,
1204
+ self._builder,
1205
+ )
1206
+
1207
+ def get_candles(
1208
+ self,
1209
+ exchange: str,
1210
+ symbols: list[str],
1211
+ start: str | pd.Timestamp,
1212
+ stop: str | pd.Timestamp,
1213
+ timeframe: str = "1d",
1214
+ ) -> pd.DataFrame:
1215
+ assert len(symbols) > 0, "No symbols provided"
1216
+ quoted_symbols = [f"'{s.lower()}'" for s in symbols]
1217
+ where = f"where symbol in ({', '.join(quoted_symbols)}) and timestamp >= '{start}' and timestamp < '{stop}'"
1218
+ table_name = QuestDBSqlCandlesBuilder().get_table_name(f"{exchange}:{list(symbols)[0]}")
1219
+
1220
+ _rsmpl = f"sample by {QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(timeframe)}"
1221
+
1222
+ query = f"""
1223
+ select timestamp,
1224
+ upper(symbol) as symbol,
1225
+ first(open) as open,
1226
+ max(high) as high,
1227
+ min(low) as low,
1228
+ last(close) as close,
1229
+ sum(volume) as volume,
1230
+ sum(quote_volume) as quote_volume,
1231
+ sum(count) as count,
1232
+ sum(taker_buy_volume) as taker_buy_volume,
1233
+ sum(taker_buy_quote_volume) as taker_buy_quote_volume
1234
+ from "{table_name}" {where} {_rsmpl};
1235
+ """
1236
+ res = self.execute(query)
1237
+ if res.empty:
1238
+ return res
1239
+ return res.set_index(["timestamp", "symbol"])
1240
+
1241
+ def get_average_quote_volume(
1242
+ self,
1243
+ exchange: str,
1244
+ start: str | pd.Timestamp,
1245
+ stop: str | pd.Timestamp,
1246
+ timeframe: str = "1d",
1247
+ ) -> pd.Series:
1248
+ table_name = QuestDBSqlCandlesBuilder().get_table_name(f"{exchange}:BTCUSDT")
1249
+ query = f"""
1250
+ WITH sampled as (
1251
+ select timestamp, symbol, sum(quote_volume) as qvolume
1252
+ from "{table_name}"
1253
+ where timestamp >= '{start}' and timestamp < '{stop}'
1254
+ SAMPLE BY {QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(timeframe)}
1255
+ )
1256
+ select upper(symbol) as symbol, avg(qvolume) as quote_volume from sampled
1257
+ group by symbol
1258
+ order by quote_volume desc;
1259
+ """
1260
+ vol_stats = self.execute(query)
1261
+ if vol_stats.empty:
1262
+ return pd.Series()
1263
+ return vol_stats.set_index("symbol")["quote_volume"]
1264
+
1265
+ def get_fundamental_data(
1266
+ self,
1267
+ exchange: str,
1268
+ symbols: list[str] | None = None,
1269
+ start: str | pd.Timestamp | None = None,
1270
+ stop: str | pd.Timestamp | None = None,
1271
+ timeframe: str = "1d",
1272
+ ) -> pd.DataFrame:
1273
+ table_name = {"BINANCE.UM": "binance.umfutures.fundamental"}[exchange]
1274
+ query = f"select timestamp, symbol, metric, last(value) as value from {table_name}"
1275
+ # TODO: fix handling without start/stop, where needs to be added
1276
+ if start or stop:
1277
+ conditions = []
1278
+ if start:
1279
+ conditions.append(f"timestamp >= '{start}'")
1280
+ if stop:
1281
+ conditions.append(f"timestamp < '{stop}'")
1282
+ query += " where " + " and ".join(conditions)
1283
+ if symbols:
1284
+ query += f" and symbol in ({', '.join(symbols)})"
1285
+ _rsmpl = f"sample by {QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(timeframe)}"
1286
+ query += f" {_rsmpl}"
1287
+ df = self.execute(query)
1288
+ if df.empty:
1289
+ return pd.DataFrame()
1290
+ return df.set_index(["timestamp", "symbol", "metric"]).value.unstack("metric")
1291
+
1292
+ def get_names(self) -> List[str]:
1293
+ return self._get_names(self._builder)
1294
+
1295
+ @_retry
1296
+ def execute(self, query: str) -> pd.DataFrame:
1297
+ _cursor = self._connection.cursor() # type: ignore
1298
+ _cursor.execute(query) # type: ignore
1299
+ names = [d.name for d in _cursor.description] # type: ignore
1300
+ records = _cursor.fetchall()
1301
+ if not records:
1302
+ return pd.DataFrame()
1303
+ return pd.DataFrame(records, columns=names)
1304
+
1305
+ @_retry
1306
+ def _read(
1307
+ self,
1308
+ data_id: str,
1309
+ start: str | None,
1310
+ stop: str | None,
1311
+ transform: DataTransformer,
1312
+ chunksize: int,
1313
+ timeframe: str | None,
1314
+ data_type: str,
1315
+ builder: QuestDBSqlBuilder,
1316
+ ) -> Any:
1317
+ start, end = handle_start_stop(start, stop)
1318
+ _req = builder.prepare_data_sql(data_id, start, end, timeframe, data_type)
1319
+
1320
+ _cursor = self._connection.cursor() # type: ignore
1321
+ _cursor.execute(_req) # type: ignore
1322
+ names = [d.name for d in _cursor.description] # type: ignore
1323
+
1324
+ if chunksize > 0:
1325
+
1326
+ def _iter_chunks():
1327
+ while True:
1328
+ records = _cursor.fetchmany(chunksize)
1329
+ if not records:
1330
+ _cursor.close()
1331
+ break
1332
+ transform.start_transform(data_id, names, start=start, stop=stop)
1333
+ transform.process_data(records)
1334
+ yield transform.collect()
1335
+
1336
+ return _iter_chunks()
1337
+
1338
+ try:
1339
+ records = _cursor.fetchall()
1340
+ if not records:
1341
+ return None
1342
+ transform.start_transform(data_id, names, start=start, stop=stop)
1343
+ transform.process_data(records)
1344
+ return transform.collect()
1345
+ finally:
1346
+ _cursor.close()
1347
+
1348
+ @_retry
1349
+ def _get_names(self, builder: QuestDBSqlBuilder) -> list[str]:
1350
+ _cursor = None
1351
+ try:
1352
+ _cursor = self._connection.cursor() # type: ignore
1353
+ _cursor.execute(builder.prepare_names_sql()) # type: ignore
1354
+ records = _cursor.fetchall()
1355
+ finally:
1356
+ if _cursor:
1357
+ _cursor.close()
1358
+ return [r[0] for r in records]
1359
+
1360
+ @_retry
1361
+ def _get_symbols(self, builder: QuestDBSqlBuilder, exchange: str, dtype: str) -> list[str]:
1362
+ _cursor = None
1363
+ try:
1364
+ _cursor = self._connection.cursor() # type: ignore
1365
+ _cursor.execute(builder.prepare_symbols_sql(exchange, dtype)) # type: ignore
1366
+ records = _cursor.fetchall()
1367
+ finally:
1368
+ if _cursor:
1369
+ _cursor.close()
1370
+ return [f"{exchange}:{r[0].upper()}" for r in records]
1371
+
1372
+ @_retry
1373
+ def _get_range(self, builder: QuestDBSqlBuilder, data_id: str) -> tuple[Any] | None:
1374
+ _cursor = None
1375
+ try:
1376
+ _cursor = self._connection.cursor() # type: ignore
1377
+ _cursor.execute(builder.prepare_data_ranges_sql(data_id)) # type: ignore
1378
+ return tuple([np.datetime64(r[0]) for r in _cursor.fetchall()])
1379
+ finally:
1380
+ if _cursor:
1381
+ _cursor.close()
1382
+
1383
+ def __del__(self):
1384
+ try:
1385
+ if self._connection is not None:
1386
+ logger.debug("Closing connection")
1387
+ self._connection.close()
1388
+ except: # noqa: E722
1389
+ pass
1390
+
1391
+
1392
+ class QuestDBSqlOrderBookBuilder(QuestDBSqlCandlesBuilder):
1393
+ """
1394
+ Sql builder for snapshot data
1395
+ """
1396
+
1397
+ SNAPSHOT_DELTA = pd.Timedelta("1h")
1398
+ MIN_DELTA = pd.Timedelta("1s")
1399
+
1400
+ def prepare_data_sql(
1401
+ self,
1402
+ data_id: str,
1403
+ start: str | None,
1404
+ end: str | None,
1405
+ resample: str,
1406
+ data_type: str,
1407
+ ) -> str:
1408
+ if not start or not end:
1409
+ raise ValueError("Start and end dates must be provided for orderbook data!")
1410
+ start_dt, end_dt = pd.Timestamp(start), pd.Timestamp(end)
1411
+ delta = end_dt - start_dt
1412
+
1413
+ raw_start_dt = start_dt.floor(self.SNAPSHOT_DELTA) - self.MIN_DELTA
1414
+
1415
+ table_name = self.get_table_name(data_id, data_type)
1416
+ query = f"""
1417
+ SELECT * FROM {table_name}
1418
+ WHERE timestamp BETWEEN '{raw_start_dt}' AND '{end_dt}'
1419
+ """
1420
+ return query
1421
+
1422
+
1423
+ class TradeSql(QuestDBSqlCandlesBuilder):
1424
+ def prepare_data_sql(
1425
+ self,
1426
+ data_id: str,
1427
+ start: str | None,
1428
+ end: str | None,
1429
+ resample: str,
1430
+ data_type: str,
1431
+ ) -> str:
1432
+ table_name = self.get_table_name(data_id, data_type)
1433
+ where = ""
1434
+ w0 = f"timestamp >= '{start}'" if start else ""
1435
+ w1 = f"timestamp <= '{end}'" if end else ""
1436
+
1437
+ # - fix: when no data ranges are provided we must skip empy where keyword
1438
+ if w0 or w1:
1439
+ where = f"where {w0} and {w1}" if (w0 and w1) else f"where {(w0 or w1)}"
1440
+
1441
+ resample = (
1442
+ QuestDBSqlCandlesBuilder._convert_time_delta_to_qdb_resample_format(resample) if resample else resample
1443
+ )
1444
+ if resample:
1445
+ sql = f"""
1446
+ select timestamp, first(price) as open, max(price) as high, min(price) as low, last(price) as close,
1447
+ sum(size) as volume from "{table_name}" {where} SAMPLE by {resample};"""
1448
+ else:
1449
+ sql = f"""select timestamp, price, size, market_maker from "{table_name}" {where};"""
1450
+
1451
+ return sql
1452
+
1453
+ def prepare_symbols_sql(self, exchange: str, dtype: str) -> str:
1454
+ # TODO:
1455
+ raise NotImplementedError("Not implemented yet")
1456
+
1457
+
1458
+ class MultiQdbConnector(QuestDBConnector):
1459
+ """
1460
+ Data connector for QuestDB which provides access to following data types:
1461
+ - candles
1462
+ - trades
1463
+ - orderbook snapshots
1464
+ - liquidations
1465
+ - funding rate
1466
+
1467
+ Examples:
1468
+ 1. Retrieving trades:
1469
+ qdb.read(
1470
+ "BINANCE.UM:BTCUSDT",
1471
+ "2023-01-01 00:00",
1472
+ "2023-01-01 10:00",
1473
+ timeframe="15Min",
1474
+ transform=AsPandasFrame(),
1475
+ data_type="trade"
1476
+ )
1477
+ """
1478
+
1479
+ _TYPE_TO_BUILDER = {
1480
+ "candles_1m": QuestDBSqlCandlesBuilder(),
1481
+ "tob": QuestDBSqlTOBBilder(),
1482
+ "trade": TradeSql(),
1483
+ "agg_trade": TradeSql(),
1484
+ "orderbook": QuestDBSqlOrderBookBuilder(),
1485
+ }
1486
+
1487
+ _TYPE_MAPPINGS = {
1488
+ "candles": "candles_1m",
1489
+ "ohlc": "candles_1m",
1490
+ "trades": "trade",
1491
+ "ob": "orderbook",
1492
+ "trd": "trade",
1493
+ "td": "trade",
1494
+ "quote": "tob",
1495
+ "aggTrade": "agg_trade",
1496
+ "agg_trades": "agg_trade",
1497
+ "aggTrades": "agg_trade",
1498
+ }
1499
+
1500
+ def __init__(
1501
+ self,
1502
+ host="localhost",
1503
+ user="admin",
1504
+ password="quest",
1505
+ port=8812,
1506
+ ) -> None:
1507
+ self._connection = None
1508
+ self._host = host
1509
+ self._port = port
1510
+ self._user = user
1511
+ self._password = password
1512
+ self._connect()
1513
+
1514
+ @property
1515
+ def connection_url(self):
1516
+ return " ".join(
1517
+ [
1518
+ f"user={self._user}",
1519
+ f"password={self._password}",
1520
+ f"host={self._host}",
1521
+ f"port={self._port}",
1522
+ ]
1523
+ )
1524
+
1525
+ def read(
1526
+ self,
1527
+ data_id: str,
1528
+ start: str | None = None,
1529
+ stop: str | None = None,
1530
+ transform: DataTransformer = DataTransformer(),
1531
+ chunksize: int = 0,
1532
+ timeframe: str | None = None,
1533
+ data_type: str = "candles",
1534
+ ) -> Any:
1535
+ _mapped_data_type = self._TYPE_MAPPINGS.get(data_type, data_type)
1536
+ return self._read(
1537
+ data_id,
1538
+ start,
1539
+ stop,
1540
+ transform,
1541
+ chunksize,
1542
+ timeframe,
1543
+ _mapped_data_type,
1544
+ self._TYPE_TO_BUILDER[_mapped_data_type],
1545
+ )
1546
+
1547
+ def get_names(self, data_type: str) -> list[str]:
1548
+ return self._get_names(self._TYPE_TO_BUILDER[self._TYPE_MAPPINGS.get(data_type, data_type)])
1549
+
1550
+ def get_symbols(self, exchange: str, dtype: str) -> list[str]:
1551
+ return self._get_symbols(
1552
+ self._TYPE_TO_BUILDER[self._TYPE_MAPPINGS.get(dtype, dtype)],
1553
+ exchange,
1554
+ self._TYPE_MAPPINGS.get(dtype, dtype),
1555
+ )
1556
+
1557
+ def get_time_ranges(self, symbol: str, dtype: str) -> tuple[np.datetime64, np.datetime64]:
1558
+ try:
1559
+ _xr = self._get_range(self._TYPE_TO_BUILDER[self._TYPE_MAPPINGS.get(dtype, dtype)], symbol)
1560
+ return (None, None) if not _xr else _xr # type: ignore
1561
+ except Exception:
1562
+ return (None, None) # type: ignore