Qubx 0.5.7__cp312-cp312-manylinux_2_39_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Qubx might be problematic. Click here for more details.
- qubx/__init__.py +207 -0
- qubx/_nb_magic.py +100 -0
- qubx/backtester/__init__.py +5 -0
- qubx/backtester/account.py +145 -0
- qubx/backtester/broker.py +87 -0
- qubx/backtester/data.py +296 -0
- qubx/backtester/management.py +378 -0
- qubx/backtester/ome.py +296 -0
- qubx/backtester/optimization.py +201 -0
- qubx/backtester/simulated_data.py +558 -0
- qubx/backtester/simulator.py +362 -0
- qubx/backtester/utils.py +780 -0
- qubx/cli/__init__.py +0 -0
- qubx/cli/commands.py +67 -0
- qubx/connectors/ccxt/__init__.py +0 -0
- qubx/connectors/ccxt/account.py +495 -0
- qubx/connectors/ccxt/broker.py +132 -0
- qubx/connectors/ccxt/customizations.py +193 -0
- qubx/connectors/ccxt/data.py +612 -0
- qubx/connectors/ccxt/exceptions.py +17 -0
- qubx/connectors/ccxt/factory.py +93 -0
- qubx/connectors/ccxt/utils.py +307 -0
- qubx/core/__init__.py +0 -0
- qubx/core/account.py +251 -0
- qubx/core/basics.py +850 -0
- qubx/core/context.py +420 -0
- qubx/core/exceptions.py +38 -0
- qubx/core/helpers.py +480 -0
- qubx/core/interfaces.py +1150 -0
- qubx/core/loggers.py +514 -0
- qubx/core/lookups.py +475 -0
- qubx/core/metrics.py +1512 -0
- qubx/core/mixins/__init__.py +13 -0
- qubx/core/mixins/market.py +94 -0
- qubx/core/mixins/processing.py +428 -0
- qubx/core/mixins/subscription.py +203 -0
- qubx/core/mixins/trading.py +88 -0
- qubx/core/mixins/universe.py +270 -0
- qubx/core/series.cpython-312-x86_64-linux-gnu.so +0 -0
- qubx/core/series.pxd +125 -0
- qubx/core/series.pyi +118 -0
- qubx/core/series.pyx +988 -0
- qubx/core/utils.cpython-312-x86_64-linux-gnu.so +0 -0
- qubx/core/utils.pyi +6 -0
- qubx/core/utils.pyx +62 -0
- qubx/data/__init__.py +25 -0
- qubx/data/helpers.py +416 -0
- qubx/data/readers.py +1562 -0
- qubx/data/tardis.py +100 -0
- qubx/gathering/simplest.py +88 -0
- qubx/math/__init__.py +3 -0
- qubx/math/stats.py +129 -0
- qubx/pandaz/__init__.py +23 -0
- qubx/pandaz/ta.py +2757 -0
- qubx/pandaz/utils.py +638 -0
- qubx/resources/instruments/symbols-binance.cm.json +1 -0
- qubx/resources/instruments/symbols-binance.json +1 -0
- qubx/resources/instruments/symbols-binance.um.json +1 -0
- qubx/resources/instruments/symbols-bitfinex.f.json +1 -0
- qubx/resources/instruments/symbols-bitfinex.json +1 -0
- qubx/resources/instruments/symbols-kraken.f.json +1 -0
- qubx/resources/instruments/symbols-kraken.json +1 -0
- qubx/ta/__init__.py +0 -0
- qubx/ta/indicators.cpython-312-x86_64-linux-gnu.so +0 -0
- qubx/ta/indicators.pxd +149 -0
- qubx/ta/indicators.pyi +41 -0
- qubx/ta/indicators.pyx +787 -0
- qubx/trackers/__init__.py +3 -0
- qubx/trackers/abvanced.py +236 -0
- qubx/trackers/composite.py +146 -0
- qubx/trackers/rebalancers.py +129 -0
- qubx/trackers/riskctrl.py +641 -0
- qubx/trackers/sizers.py +235 -0
- qubx/utils/__init__.py +5 -0
- qubx/utils/_pyxreloader.py +281 -0
- qubx/utils/charting/lookinglass.py +1057 -0
- qubx/utils/charting/mpl_helpers.py +1183 -0
- qubx/utils/marketdata/binance.py +284 -0
- qubx/utils/marketdata/ccxt.py +90 -0
- qubx/utils/marketdata/dukas.py +130 -0
- qubx/utils/misc.py +541 -0
- qubx/utils/ntp.py +63 -0
- qubx/utils/numbers_utils.py +7 -0
- qubx/utils/orderbook.py +491 -0
- qubx/utils/plotting/__init__.py +0 -0
- qubx/utils/plotting/dashboard.py +150 -0
- qubx/utils/plotting/data.py +137 -0
- qubx/utils/plotting/interfaces.py +25 -0
- qubx/utils/plotting/renderers/__init__.py +0 -0
- qubx/utils/plotting/renderers/plotly.py +0 -0
- qubx/utils/runner/__init__.py +1 -0
- qubx/utils/runner/_jupyter_runner.pyt +60 -0
- qubx/utils/runner/accounts.py +88 -0
- qubx/utils/runner/configs.py +65 -0
- qubx/utils/runner/runner.py +470 -0
- qubx/utils/time.py +312 -0
- qubx-0.5.7.dist-info/METADATA +105 -0
- qubx-0.5.7.dist-info/RECORD +100 -0
- qubx-0.5.7.dist-info/WHEEL +4 -0
- qubx-0.5.7.dist-info/entry_points.txt +3 -0
qubx/pandaz/utils.py
ADDED
|
@@ -0,0 +1,638 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Set, Union
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from numpy.lib.stride_tricks import as_strided as stride
|
|
7
|
+
|
|
8
|
+
from qubx.core.series import OHLCV
|
|
9
|
+
from qubx.utils.misc import Struct
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def has_columns(x, *args):
|
|
13
|
+
return isinstance(x, pd.DataFrame) and sum(x.columns.isin(args)) == len(args)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def check_frame_columns(x, *args):
|
|
17
|
+
if not isinstance(x, pd.DataFrame):
|
|
18
|
+
raise ValueError(f"Input data must be DataFrame but {type(x)} received !")
|
|
19
|
+
|
|
20
|
+
if sum(x.columns.isin(args)) != len(args):
|
|
21
|
+
required = [y for y in args if y not in x.columns]
|
|
22
|
+
raise ValueError(f"> Required {required} columns not found in dataframe !")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def rolling_forward_test_split(
|
|
26
|
+
x: pd.Series | pd.DataFrame, training_period: int, test_period: int, units: str | None = None
|
|
27
|
+
):
|
|
28
|
+
"""
|
|
29
|
+
Split data into training and testing **rolling** periods.
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
|
|
33
|
+
>>> for train_idx, test_idx in rolling_forward_test_split(np.array(range(15)), 5, 3):
|
|
34
|
+
>>> print('Train:', train_idx, ' Test:', test_idx)
|
|
35
|
+
|
|
36
|
+
> Train: [1 2 3 4 5] Test: [6 7 8]
|
|
37
|
+
Train: [4 5 6 7 8] Test: [9 10 11]
|
|
38
|
+
Train: [7 8 9 10 11] Test: [12 13 14]
|
|
39
|
+
|
|
40
|
+
Also it allows splitting using calendar periods (see units for that).
|
|
41
|
+
Example of 2w / 1w splitting:
|
|
42
|
+
|
|
43
|
+
>>> Y = pd.Series(np.arange(30), index=pd.date_range('2000-01-01', periods=30))
|
|
44
|
+
>>> for train_idx, test_idx in rolling_forward_test_split(Y, 2, 1, units='W'):
|
|
45
|
+
>>> print('Train:', Y.loc[train_idx], '\\n Test:', Y.loc[test_idx])
|
|
46
|
+
|
|
47
|
+
:param x: data
|
|
48
|
+
:param training_period: number observations for learning period
|
|
49
|
+
:param test_period: number observations for learning period
|
|
50
|
+
:param units: period units if training_period and test_period is the period date: {'H', 'D', 'W', 'M', 'Q', 'Y'}
|
|
51
|
+
:return:
|
|
52
|
+
"""
|
|
53
|
+
# unit formats from pd.TimeDelta and formats for pd.resample
|
|
54
|
+
units_format = {"H": "H", "D": "D", "W": "W", "M": "MS", "Q": "QS", "Y": "AS"}
|
|
55
|
+
|
|
56
|
+
if units:
|
|
57
|
+
if units.upper() not in units_format:
|
|
58
|
+
raise ValueError(
|
|
59
|
+
'Wrong value for "units" parameter. Only %s values are valid' % ",".join(units_format.keys())
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
if not isinstance(x, (pd.Series, pd.DataFrame)) or not isinstance(x.index, pd.DatetimeIndex):
|
|
63
|
+
raise ValueError('Data must be passed as pd.DataFrame or pd.Series when "units" specified')
|
|
64
|
+
|
|
65
|
+
if isinstance(x, pd.Series):
|
|
66
|
+
x = x.to_frame()
|
|
67
|
+
|
|
68
|
+
resampled = x.resample(units_format[units.upper()]).mean().index
|
|
69
|
+
resampled = resampled - pd.DateOffset(seconds=1)
|
|
70
|
+
|
|
71
|
+
for i in range(0, len(resampled), test_period):
|
|
72
|
+
if len(resampled) - 1 < i + training_period or resampled[i + training_period] > x.index[-1]:
|
|
73
|
+
# no data for next training period
|
|
74
|
+
break
|
|
75
|
+
training_df = x[resampled[i] : resampled[i + training_period]]
|
|
76
|
+
whole_period = i + training_period + test_period
|
|
77
|
+
if len(resampled) - 1 < whole_period or resampled[whole_period] > x.index[-1]:
|
|
78
|
+
# if there is not all data for test period or it's just last month,
|
|
79
|
+
# we don't need restrict the end date
|
|
80
|
+
test_df = x[resampled[i + training_period] :]
|
|
81
|
+
else:
|
|
82
|
+
test_df = x[resampled[i + training_period] : resampled[whole_period]]
|
|
83
|
+
|
|
84
|
+
if training_df.empty or test_df.empty:
|
|
85
|
+
continue
|
|
86
|
+
yield (np.array(training_df.index), np.array(test_df.index))
|
|
87
|
+
else:
|
|
88
|
+
n_obs = x.shape[0]
|
|
89
|
+
i_shift = (n_obs - training_period - test_period) % test_period
|
|
90
|
+
for i in range(i_shift + training_period, n_obs, test_period):
|
|
91
|
+
yield (np.array(range(i - training_period, i)), np.array(range(i, i + test_period)))
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def generate_equal_date_ranges(start: str | pd.Timestamp, end: str | pd.Timestamp, freq, units):
|
|
95
|
+
"""
|
|
96
|
+
Generator for date ranges:
|
|
97
|
+
|
|
98
|
+
for s,e in generate_ranges('2019-01-01', '2022-05-17', 1, 'Y'):
|
|
99
|
+
print(s, e)
|
|
100
|
+
------------------
|
|
101
|
+
2019-01-01 2019-12-31
|
|
102
|
+
2020-01-01 2020-12-31
|
|
103
|
+
2021-01-01 2021-12-31
|
|
104
|
+
2022-01-01 2022-05-17
|
|
105
|
+
"""
|
|
106
|
+
_as_f = lambda x: pd.Timestamp(x).strftime("%Y-%m-%d")
|
|
107
|
+
if units in ["M", "Y"]:
|
|
108
|
+
offset = pd.DateOffset(years=freq) if units == "Y" else pd.DateOffset(months=freq)
|
|
109
|
+
else:
|
|
110
|
+
offset = pd.Timedelta(f"{freq}{units}")
|
|
111
|
+
|
|
112
|
+
if pd.Timestamp(end) - offset < pd.Timestamp(start):
|
|
113
|
+
b = [start, end]
|
|
114
|
+
|
|
115
|
+
for a, b in rolling_forward_test_split(pd.Series(0, pd.date_range(start, end)), freq, freq, units=units):
|
|
116
|
+
yield _as_f(a[0]), _as_f(a[-1])
|
|
117
|
+
|
|
118
|
+
yield _as_f(b[0]), _as_f(b[-1])
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def drop_duplicated_indexes(df: pd.DataFrame, keep: Literal["first", "last", False] = "first"):
|
|
122
|
+
"""
|
|
123
|
+
Drops duplicated indexes in dataframe/series
|
|
124
|
+
Keeps either first or last occurence (parameter keep)
|
|
125
|
+
"""
|
|
126
|
+
return df[~df.index.duplicated(keep=keep)]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def process_duplicated_indexes(data: pd.DataFrame | pd.Series, ns=1) -> pd.DataFrame | pd.Series:
|
|
130
|
+
"""
|
|
131
|
+
Finds duplicated indexes in frame/series and add shift (in nS) to every repeating one
|
|
132
|
+
:param data: time indexed dataframe/series
|
|
133
|
+
:param ns: shift constant in nanosec
|
|
134
|
+
:return: return dataframe with all no duplicated rows (each duplicate has own unique index)
|
|
135
|
+
"""
|
|
136
|
+
values = data.index.duplicated(keep="first").astype(float)
|
|
137
|
+
values[values == 0] = np.NaN
|
|
138
|
+
|
|
139
|
+
missings = np.isnan(values)
|
|
140
|
+
cumsum = np.cumsum(~missings)
|
|
141
|
+
diff = np.diff(np.concatenate(([0.0], cumsum[missings])))
|
|
142
|
+
values[missings] = -diff
|
|
143
|
+
|
|
144
|
+
# set new index (1 ms)
|
|
145
|
+
data.index = data.index.values + np.cumsum(values).astype(np.timedelta64) * ns
|
|
146
|
+
return data
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def scols(*xs, keys=None, names=None, keep="all") -> pd.DataFrame:
|
|
150
|
+
"""
|
|
151
|
+
Concat dataframes/series from xs into single dataframe by axis 1
|
|
152
|
+
:param keys: keys of new dataframe (see pd.concat's keys parameter)
|
|
153
|
+
:param names: new column names or dict with replacements
|
|
154
|
+
:return: combined dataframe
|
|
155
|
+
|
|
156
|
+
Example
|
|
157
|
+
-------
|
|
158
|
+
>>> scols(
|
|
159
|
+
pd.DataFrame([1,2,3,4,-4], list('abcud')),
|
|
160
|
+
pd.DataFrame([111,21,31,14], list('xyzu')),
|
|
161
|
+
pd.DataFrame([11,21,31,124], list('ertu')),
|
|
162
|
+
pd.DataFrame([11,21,31,14], list('WERT')),
|
|
163
|
+
names=['x', 'y', 'z', 'w'])
|
|
164
|
+
"""
|
|
165
|
+
r = pd.concat((xs), axis=1, keys=keys)
|
|
166
|
+
if names:
|
|
167
|
+
if isinstance(names, (list, tuple)):
|
|
168
|
+
if len(names) == len(r.columns):
|
|
169
|
+
r.columns = names
|
|
170
|
+
else:
|
|
171
|
+
raise ValueError(
|
|
172
|
+
f"if 'names' contains new column names it must have same length as resulting df ({len(r.columns)})"
|
|
173
|
+
)
|
|
174
|
+
elif isinstance(names, dict):
|
|
175
|
+
r = r.rename(columns=names)
|
|
176
|
+
return r
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def srows(*xs, keep="all", sort=True) -> Union[pd.DataFrame, pd.Series]:
|
|
180
|
+
"""
|
|
181
|
+
Concat dataframes/series from xs into single dataframe by axis 0
|
|
182
|
+
:param sort: if true it sorts resulting dataframe by index (default)
|
|
183
|
+
:param keep: how to deal with duplicated indexes.
|
|
184
|
+
If set to 'all' it doesn't do anything (default). Otherwise keeps first or last occurences
|
|
185
|
+
:return: combined dataframe
|
|
186
|
+
|
|
187
|
+
Example
|
|
188
|
+
-------
|
|
189
|
+
>>> srows(
|
|
190
|
+
pd.DataFrame([1,2,3,4,-4], list('abcud')),
|
|
191
|
+
pd.DataFrame([111,21,31,14], list('xyzu')),
|
|
192
|
+
pd.DataFrame([11,21,31,124], list('ertu')),
|
|
193
|
+
pd.DataFrame([11,21,31,14], list('WERT')),
|
|
194
|
+
sort=True, keep='last')
|
|
195
|
+
"""
|
|
196
|
+
r = pd.concat((xs), axis=0)
|
|
197
|
+
r = r.sort_index() if sort else r
|
|
198
|
+
if keep != "all":
|
|
199
|
+
r = drop_duplicated_indexes(r, keep=keep)
|
|
200
|
+
return r
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def retain_columns_and_join(data: Dict[str, pd.DataFrame], columns: str | List[str]) -> pd.DataFrame:
|
|
204
|
+
"""
|
|
205
|
+
Retains given columns from every value of data dictionary and concatenate them into single data frame
|
|
206
|
+
|
|
207
|
+
from qube.datasource import DataSource
|
|
208
|
+
from qube.analysis.tools import retain_columns_and_join
|
|
209
|
+
|
|
210
|
+
ds = DataSource('yahoo::daily')
|
|
211
|
+
data = ds.load_data(['aapl', 'msft', 'spy'], '2000-01-01', 'now')
|
|
212
|
+
|
|
213
|
+
closes = retain_columns_and_join(data, 'close')
|
|
214
|
+
hi_lo = retain_columns_and_join(data, ['high', 'low'])
|
|
215
|
+
|
|
216
|
+
:param data: dictionary with dataframes
|
|
217
|
+
:param columns: columns names need to be retained
|
|
218
|
+
:return: data frame
|
|
219
|
+
"""
|
|
220
|
+
if not isinstance(data, dict):
|
|
221
|
+
raise ValueError("Data must be passed as dictionary")
|
|
222
|
+
|
|
223
|
+
return pd.concat([data[k][columns] for k in data.keys()], axis=1, keys=data.keys())
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def continuous_periods(xs: pd.Series, cond: pd.Series) -> Struct:
|
|
227
|
+
"""
|
|
228
|
+
Detect continues periods on series xs based on condition cond
|
|
229
|
+
"""
|
|
230
|
+
df = scols(xs, cond, keys=["_XS_", "sig"])
|
|
231
|
+
df["block"] = (df.sig.shift(1) != df.sig).astype(int).cumsum()
|
|
232
|
+
idx_col_name = xs.index.name
|
|
233
|
+
|
|
234
|
+
blk = df[df.sig].reset_index().groupby("block")[idx_col_name].apply(np.array)
|
|
235
|
+
starts = blk.apply(lambda x: x[0])
|
|
236
|
+
ends = blk.apply(lambda x: x[-1])
|
|
237
|
+
se_info = scols(starts, ends, keys=["start", "end"])
|
|
238
|
+
return Struct(blocks=blk.reset_index(drop=True), periods=se_info)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def roll(df: pd.DataFrame, w: int, **kwargs):
|
|
242
|
+
"""
|
|
243
|
+
Rolling window on dataframe using multiple columns
|
|
244
|
+
|
|
245
|
+
>>> roll(pd.DataFrame(np.random.randn(10,3), index=list('ABCDEFGHIJ')), 3).apply(print)
|
|
246
|
+
|
|
247
|
+
or alternatively
|
|
248
|
+
|
|
249
|
+
>>> pd.DataFrame(np.random.randn(10,3), index=list('ABCDEFGHIJ')).pipe(roll, 3).apply(lambda x: print(x[2]))
|
|
250
|
+
|
|
251
|
+
:param df: pandas DataFrame
|
|
252
|
+
:param w: window size (only integers)
|
|
253
|
+
:return: rolling window
|
|
254
|
+
"""
|
|
255
|
+
if w > len(df):
|
|
256
|
+
raise ValueError("Window size exceeds number of rows !")
|
|
257
|
+
|
|
258
|
+
v = df.values
|
|
259
|
+
d0, d1 = v.shape
|
|
260
|
+
s0, s1 = v.strides
|
|
261
|
+
a = stride(v, (d0 - (w - 1), w, d1), (s0, s0, s1))
|
|
262
|
+
rolled_df = pd.concat({row: pd.DataFrame(values, columns=df.columns) for row, values in zip(df.index, a)})
|
|
263
|
+
|
|
264
|
+
return rolled_df.groupby(level=0, **kwargs)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def dict_to_frame(x: dict, index_type=None, orient="index", columns=None, column_types=dict()) -> pd.DataFrame:
|
|
268
|
+
"""
|
|
269
|
+
Utility for convert dictionary to indexed DataFrame
|
|
270
|
+
It's possible to pass columns names and type of index
|
|
271
|
+
"""
|
|
272
|
+
y = pd.DataFrame().from_dict(x, orient=orient)
|
|
273
|
+
if index_type:
|
|
274
|
+
if index_type in ["ns", "nano"]:
|
|
275
|
+
index_type = "M8[ns]"
|
|
276
|
+
y.index = y.index.astype(index_type)
|
|
277
|
+
|
|
278
|
+
# rename if needed
|
|
279
|
+
if columns:
|
|
280
|
+
columns = [columns] if not isinstance(columns, (list, tuple, set)) else columns
|
|
281
|
+
if len(columns) == len(y.columns):
|
|
282
|
+
y.rename(columns=dict(zip(y.columns, columns)), inplace=True)
|
|
283
|
+
else:
|
|
284
|
+
raise ValueError("dict_to_frame> columns argument must contain %d elements" % len(y.columns))
|
|
285
|
+
|
|
286
|
+
# if additional conversion is required
|
|
287
|
+
if column_types:
|
|
288
|
+
_existing_cols_conversion = {c: v for c, v in column_types.items() if c in y.columns}
|
|
289
|
+
y = y.astype(_existing_cols_conversion)
|
|
290
|
+
|
|
291
|
+
return y
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def select_column_and_join(data: Dict[str, pd.DataFrame], column: str) -> pd.DataFrame:
|
|
295
|
+
"""
|
|
296
|
+
Select given column from every value of data dictionary and concatenate them into single data frame
|
|
297
|
+
|
|
298
|
+
from qube.datasource import DataSource
|
|
299
|
+
from qube.analysis.tools import retain_columns_and_join
|
|
300
|
+
|
|
301
|
+
ds = DataSource('yahoo::daily')
|
|
302
|
+
data = ds.load_data(['aapl', 'msft', 'spy'], '2000-01-01', 'now')
|
|
303
|
+
|
|
304
|
+
closes = select_column_and_join(data, 'close')
|
|
305
|
+
hi_lo = select_column_and_join(data, ['high', 'low'])
|
|
306
|
+
|
|
307
|
+
:param data: dictionary with dataframes
|
|
308
|
+
:param columns: column name need to be selected
|
|
309
|
+
:return: pandas data frame
|
|
310
|
+
"""
|
|
311
|
+
if not isinstance(data, dict):
|
|
312
|
+
raise ValueError("Data must be passed as dictionary of pandas dataframes")
|
|
313
|
+
|
|
314
|
+
return pd.concat([data[k][column] for k in data.keys()], axis=1, keys=data.keys())
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def merge_columns_by_op(x: pd.DataFrame, y: pd.DataFrame, op):
|
|
318
|
+
"""
|
|
319
|
+
Merge 2 dataframes into one and performing operation on intersected columns
|
|
320
|
+
|
|
321
|
+
merge_columns_by_op(
|
|
322
|
+
pd.DataFrame({'A': [1,2,3], 'B': [100,200,300]}),
|
|
323
|
+
pd.DataFrame({'B': [5,6,7], 'C': [10,20,30]}),
|
|
324
|
+
lambda x,y: x + y
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
B A C
|
|
328
|
+
0 105 1 10
|
|
329
|
+
1 206 2 20
|
|
330
|
+
2 307 3 30
|
|
331
|
+
|
|
332
|
+
"""
|
|
333
|
+
if x is None or x.empty:
|
|
334
|
+
return y
|
|
335
|
+
if y is None:
|
|
336
|
+
return x
|
|
337
|
+
r = []
|
|
338
|
+
uc = set(x.columns & y.columns)
|
|
339
|
+
for c in uc:
|
|
340
|
+
r.append(op(x[c], y[c]))
|
|
341
|
+
|
|
342
|
+
for c in set(x.columns) - uc:
|
|
343
|
+
r.append(x[c])
|
|
344
|
+
|
|
345
|
+
for c in set(y.columns) - uc:
|
|
346
|
+
r.append(y[c])
|
|
347
|
+
|
|
348
|
+
return scols(*r)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def bands_signals(
|
|
352
|
+
prices: pd.DataFrame,
|
|
353
|
+
score: pd.DataFrame,
|
|
354
|
+
entry,
|
|
355
|
+
exit,
|
|
356
|
+
stop: Optional[float] = np.inf,
|
|
357
|
+
entry_method="cross-out", # 'cross-in', 'revert-to-band'
|
|
358
|
+
position_sizes_fn=lambda time, score, prices, side: np.zeros(len(p)),
|
|
359
|
+
) -> pd.DataFrame:
|
|
360
|
+
"""
|
|
361
|
+
Generate trading signals using score and entry / exit thresholds
|
|
362
|
+
"""
|
|
363
|
+
if not isinstance(prices, pd.DataFrame):
|
|
364
|
+
raise ValueError("Prices must be a pandas DataFrame")
|
|
365
|
+
|
|
366
|
+
_as_series = lambda xs, index, name: pd.Series(xs, index=index, name=name) if isscalar(xs) else xs
|
|
367
|
+
|
|
368
|
+
# - entry function
|
|
369
|
+
ent_fn: Callable[[float, float, float, float], int] = lambda t, s2, s1, s0: 0
|
|
370
|
+
|
|
371
|
+
match entry_method:
|
|
372
|
+
case "cross-in":
|
|
373
|
+
ent_fn = lambda t, s2, s1, s0: (
|
|
374
|
+
+1 if (s2 < -t and s1 <= -t and s0 > -t) else -1 if (s2 > +t and s1 >= +t and s0 < +t) else 0
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
case "cross-out":
|
|
378
|
+
ent_fn = lambda t, s2, s1, s0: (
|
|
379
|
+
+1 if (s2 >= -t and s1 >= -t and s0 < -t) else -1 if (s2 <= +t and s1 <= +t and s0 > +t) else 0
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
case "revert-to-band":
|
|
383
|
+
ent_fn = lambda t, s2, s1, s0: (
|
|
384
|
+
+1 if (s1 <= -t and s0 < -t and s0 > s1) else -1 if (s1 >= +t and s0 > +t and s0 < s1) else 0
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
case _:
|
|
388
|
+
raise ValueError(
|
|
389
|
+
f"Unknown entry_method: {entry_method}, supported methods are: cross-out, cross-in, revert-to-band"
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
entry = abs(_as_series(entry, score.index, "entry"))
|
|
393
|
+
exit = _as_series(exit, score.index, "exit")
|
|
394
|
+
stop = abs(_as_series(stop, score.index, "stop"))
|
|
395
|
+
|
|
396
|
+
mx = scols(prices, scols(score.rename("score"), entry, exit, stop), keys=["price", "service"]).dropna()
|
|
397
|
+
px: pd.DataFrame = mx["price"]
|
|
398
|
+
sx: pd.DataFrame = mx["service"]
|
|
399
|
+
P0 = np.zeros(px.shape[1]) # zero position
|
|
400
|
+
|
|
401
|
+
signals = {}
|
|
402
|
+
pos = 0
|
|
403
|
+
s1, s2 = np.nan, np.nan
|
|
404
|
+
|
|
405
|
+
for t, pi, (s0, en, ex, stp) in zip(px.index, px.values, sx.values):
|
|
406
|
+
if pos == 0: # no positions yet
|
|
407
|
+
# - only if there are at least 2 past scores available
|
|
408
|
+
if not np.isnan(s1) and not np.isnan(s2):
|
|
409
|
+
if (side := ent_fn(en, s2, s1, s0)) != 0:
|
|
410
|
+
signals[t] = position_sizes_fn(t, s0, pi, side)
|
|
411
|
+
pos = side # - sell or buy spread
|
|
412
|
+
|
|
413
|
+
elif pos == -1:
|
|
414
|
+
# - check for stop or exit
|
|
415
|
+
if s0 >= +stp or s0 <= +ex:
|
|
416
|
+
signals[t] = P0
|
|
417
|
+
pos = 0
|
|
418
|
+
|
|
419
|
+
elif pos == +1:
|
|
420
|
+
# - check for stop or exit
|
|
421
|
+
if s0 <= -stp or s0 >= -ex:
|
|
422
|
+
signals[t] = P0
|
|
423
|
+
pos = 0
|
|
424
|
+
|
|
425
|
+
s1, s2 = s0, s1
|
|
426
|
+
|
|
427
|
+
return pd.DataFrame.from_dict(signals, orient="index", columns=px.columns)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def ohlc_resample(
|
|
431
|
+
df: pd.DataFrame | pd.Series | Dict[str, pd.DataFrame],
|
|
432
|
+
new_freq: str = "1h",
|
|
433
|
+
vmpt: bool = False,
|
|
434
|
+
resample_tz=None,
|
|
435
|
+
non_ohlc_columns_aggregator="sum",
|
|
436
|
+
) -> pd.DataFrame | dict:
|
|
437
|
+
"""
|
|
438
|
+
Resample OHLCV/tick series to new timeframe.
|
|
439
|
+
|
|
440
|
+
Example:
|
|
441
|
+
>>> d = pd.DataFrame({
|
|
442
|
+
>>> 'open' : np.random.randn(30),
|
|
443
|
+
>>> 'high' : np.random.randn(30),
|
|
444
|
+
>>> 'low' : np.random.randn(30),
|
|
445
|
+
>>> 'close' : np.random.randn(30)
|
|
446
|
+
>>> }, index=pd.date_range('2000-01-01 00:00', freq='5Min', periods=30))
|
|
447
|
+
>>>
|
|
448
|
+
>>> ohlc_resample(d, '15Min')
|
|
449
|
+
>>>
|
|
450
|
+
>>> # if we need to resample quotes
|
|
451
|
+
>>> from qube.datasource import DataSource
|
|
452
|
+
>>> with DataSource('kdb::dukas') as ds:
|
|
453
|
+
>>> quotes = ds.load_data(['EURUSD', 'GBPUSD'], '2018-05-07', '2018-05-11')
|
|
454
|
+
>>> ohlc_resample(quotes, '1Min', vmpt=True)
|
|
455
|
+
|
|
456
|
+
:param df: input ohlc or bid/ask quotes or dict
|
|
457
|
+
:param new_freq: how to resample rule (see pandas.DataFrame::resample)
|
|
458
|
+
:param vmpt: use volume weighted price for quotes (if false midprice will be used)
|
|
459
|
+
:param resample_tz: timezone for resample. For example, to create daily bars in the EET timezone
|
|
460
|
+
:param non_ohlc_columns_aggregator: how aggregate unknown columns
|
|
461
|
+
:return: resampled ohlc / dict
|
|
462
|
+
"""
|
|
463
|
+
|
|
464
|
+
def __mx_rsmpl(d, freq: str, is_vmpt: bool = False, resample_tz=None) -> pd.DataFrame:
|
|
465
|
+
_cols = d.columns
|
|
466
|
+
_source_tz = d.index.tz
|
|
467
|
+
|
|
468
|
+
# if we have trades
|
|
469
|
+
if all([i in _cols for i in ["price", "side", "amount"]]):
|
|
470
|
+
result = _tz_convert(d.price, resample_tz, _source_tz)
|
|
471
|
+
result = result.resample(freq).agg("ohlc")
|
|
472
|
+
result["volume"] = d.amount.resample(freq).sum()
|
|
473
|
+
result["quote_volume"] = (d.amount * d.price).resample(freq).sum()
|
|
474
|
+
result["taker_buy_volume"] = d[d.side == "buy"].amount.resample(freq).sum()
|
|
475
|
+
result["taker_buy_quote_volume"] = (
|
|
476
|
+
(d[d.side == "buy"].amount * d[d.side == "buy"].price).resample(freq).sum()
|
|
477
|
+
)
|
|
478
|
+
return result if not resample_tz else result.tz_convert(_source_tz)
|
|
479
|
+
|
|
480
|
+
# if we have bid/ask frame
|
|
481
|
+
if "ask" in _cols and "bid" in _cols:
|
|
482
|
+
# if sizes are presented we can calc vmpt if needed
|
|
483
|
+
if is_vmpt and "askvol" in _cols and "bidvol" in _cols:
|
|
484
|
+
mp = (d.ask * d.bidvol + d.bid * d.askvol) / (d.askvol + d.bidvol)
|
|
485
|
+
return mp.resample(freq).agg("ohlc")
|
|
486
|
+
|
|
487
|
+
# if there is only asks and bids and we don't need vmpt
|
|
488
|
+
result = _tz_convert(d[["ask", "bid"]].mean(axis=1), resample_tz, _source_tz)
|
|
489
|
+
result = result.resample(freq).agg("ohlc")
|
|
490
|
+
# Convert timezone to back if it changed
|
|
491
|
+
return result if not resample_tz else result.tz_convert(_source_tz)
|
|
492
|
+
|
|
493
|
+
# for OHLC case or just simple series
|
|
494
|
+
if all([i in _cols for i in ["open", "high", "low", "close"]]) or isinstance(d, pd.Series):
|
|
495
|
+
ohlc_rules = {
|
|
496
|
+
"open": "first",
|
|
497
|
+
"high": "max",
|
|
498
|
+
"low": "min",
|
|
499
|
+
"close": "last",
|
|
500
|
+
"ask_vol": "sum",
|
|
501
|
+
"bid_vol": "sum",
|
|
502
|
+
"volume": "sum",
|
|
503
|
+
}
|
|
504
|
+
result = _tz_convert(d, resample_tz, _source_tz)
|
|
505
|
+
# result = result.resample(freq).apply(dict(i for i in ohlc_rules.items() if i[0] in d.columns)).dropna()
|
|
506
|
+
# 25-Oct-2022: we allow other columns to be included in transformation (just use last value)
|
|
507
|
+
result = (
|
|
508
|
+
result.resample(freq)
|
|
509
|
+
.apply({c: ohlc_rules.get(c, non_ohlc_columns_aggregator) for c in d.columns})
|
|
510
|
+
.dropna()
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Convert timezone to back if it changed
|
|
514
|
+
return result if not resample_tz else result.tz_convert(_source_tz)
|
|
515
|
+
|
|
516
|
+
raise ValueError("Can't recognize structure of input data !")
|
|
517
|
+
|
|
518
|
+
def _tz_convert(df, tz, source_tz):
|
|
519
|
+
if tz:
|
|
520
|
+
if not source_tz:
|
|
521
|
+
df = df.tz_localize("GMT")
|
|
522
|
+
return df.tz_convert(tz)
|
|
523
|
+
else:
|
|
524
|
+
return df
|
|
525
|
+
|
|
526
|
+
if isinstance(df, (pd.DataFrame, pd.Series)):
|
|
527
|
+
return __mx_rsmpl(df, new_freq, vmpt, resample_tz)
|
|
528
|
+
elif isinstance(df, dict):
|
|
529
|
+
return {k: __mx_rsmpl(v, new_freq, vmpt, resample_tz) for k, v in df.items()}
|
|
530
|
+
else:
|
|
531
|
+
raise ValueError("Type [%s] is not supported in ohlc_resample" % str(type(df)))
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def shift_series(
|
|
535
|
+
sigs: pd.Series | pd.DataFrame,
|
|
536
|
+
forward: str | None = None,
|
|
537
|
+
days=0,
|
|
538
|
+
hours=0,
|
|
539
|
+
minutes=0,
|
|
540
|
+
seconds=0,
|
|
541
|
+
) -> Union[pd.Series, pd.DataFrame]:
|
|
542
|
+
"""
|
|
543
|
+
Shift timeseries into future.
|
|
544
|
+
|
|
545
|
+
:return: shifted frame or series
|
|
546
|
+
"""
|
|
547
|
+
n_sigs = sigs.copy()
|
|
548
|
+
f0 = pd.Timedelta(forward if forward is not None else 0)
|
|
549
|
+
n_sigs.index = n_sigs.index + f0 + pd.DateOffset(days=days, hours=hours, minutes=minutes, seconds=seconds)
|
|
550
|
+
return n_sigs
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def _frame_to_str(data: pd.DataFrame | pd.Series, name: str, start=4, end=4, time_info=True) -> str:
|
|
554
|
+
r = ""
|
|
555
|
+
if isinstance(data, (pd.DataFrame, pd.Series)):
|
|
556
|
+
t_info = f"{len(data)} records"
|
|
557
|
+
if time_info:
|
|
558
|
+
t_info += f" | {data.index[0]}:{data.index[-1]}"
|
|
559
|
+
hdr = f"- - -({name} {t_info} records)- - -"
|
|
560
|
+
sep = " -" * 50
|
|
561
|
+
r += hdr[: len(sep)] + "\n"
|
|
562
|
+
r += data.head(start).to_string(header=True) + "\n"
|
|
563
|
+
if start < len(data) and end > 0:
|
|
564
|
+
r += "\t. . . . . . \n"
|
|
565
|
+
_s = data.tail(end).to_string(header=True)
|
|
566
|
+
r += "\n".join(_s.split("\n")[2:]) + "\n"
|
|
567
|
+
else:
|
|
568
|
+
r = str(data)
|
|
569
|
+
return r
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
class OhlcDict(dict):
|
|
573
|
+
"""
|
|
574
|
+
Extended dictionary with method to perform resampling on OHLCV (Open, High, Low, Close, Volume) data.
|
|
575
|
+
|
|
576
|
+
Example:
|
|
577
|
+
-------
|
|
578
|
+
>>> index=pd.date_range('2020-01-01', periods=10, freq='15min')
|
|
579
|
+
nc = np.random.rand(10).cumsum()
|
|
580
|
+
d = OhlcDict({
|
|
581
|
+
"A": pd.DataFrame({"open": nc, "high": nc,"low": nc,"close": nc }, index=index),
|
|
582
|
+
"B": pd.DataFrame({"open": nc, "high": nc,"low": nc,"close": nc }, index=index),
|
|
583
|
+
})
|
|
584
|
+
print(d.close)
|
|
585
|
+
print(d("1h").close)
|
|
586
|
+
# - just show full info about this dict
|
|
587
|
+
print(str(d))
|
|
588
|
+
"""
|
|
589
|
+
|
|
590
|
+
_fields: Set[str]
|
|
591
|
+
|
|
592
|
+
def __init__(self, orig: dict[str, pd.DataFrame | pd.Series | OHLCV]):
|
|
593
|
+
_o_copy = {}
|
|
594
|
+
_lst = []
|
|
595
|
+
if isinstance(orig, dict):
|
|
596
|
+
for k, o in orig.items():
|
|
597
|
+
if not isinstance(o, (pd.DataFrame | pd.Series | OHLCV)):
|
|
598
|
+
raise ValueError(
|
|
599
|
+
f"All values in the dictionary must be pandas Series, DataFrames or OHLCV, but {k} is {type(o)}"
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
if isinstance(o, OHLCV):
|
|
603
|
+
o = o.pd()
|
|
604
|
+
|
|
605
|
+
# - skip empty data
|
|
606
|
+
if not o.empty:
|
|
607
|
+
_o_copy[k] = o
|
|
608
|
+
_lst.extend(o.columns.values)
|
|
609
|
+
|
|
610
|
+
self._fields = set(_lst)
|
|
611
|
+
super().__init__(_o_copy)
|
|
612
|
+
|
|
613
|
+
def __call__(self, *args: Any, **kwds: Any) -> Any:
|
|
614
|
+
if args:
|
|
615
|
+
try:
|
|
616
|
+
return OhlcDict(ohlc_resample(self, pd.Timedelta(args[0]), **kwds))
|
|
617
|
+
except Exception as e:
|
|
618
|
+
raise ValueError(str(e))
|
|
619
|
+
return self
|
|
620
|
+
|
|
621
|
+
def __getattribute__(self, name: str) -> Any:
|
|
622
|
+
if name != "_fields":
|
|
623
|
+
if name in self._fields:
|
|
624
|
+
return retain_columns_and_join(self, name)
|
|
625
|
+
return super().__getattribute__(name)
|
|
626
|
+
|
|
627
|
+
def display(self, heads=-1, tails=0) -> str:
|
|
628
|
+
r = ""
|
|
629
|
+
_h, _t = (len(self), 0) if heads == -1 else (heads, tails)
|
|
630
|
+
for k, v in self.items():
|
|
631
|
+
r += _frame_to_str(v, name=k, start=_h, end=_t) + "\n"
|
|
632
|
+
return r
|
|
633
|
+
|
|
634
|
+
def __str__(self) -> str:
|
|
635
|
+
return self.display(3, 3)
|
|
636
|
+
|
|
637
|
+
def __repr__(self) -> str:
|
|
638
|
+
return self.display(3, 3)
|