siglab-py 0.1.29__py3-none-any.whl → 0.6.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of siglab-py might be problematic. Click here for more details.
- siglab_py/constants.py +26 -1
- siglab_py/exchanges/binance.py +38 -0
- siglab_py/exchanges/deribit.py +83 -0
- siglab_py/exchanges/futubull.py +12 -2
- siglab_py/market_data_providers/candles_provider.py +2 -2
- siglab_py/market_data_providers/candles_ta_provider.py +3 -3
- siglab_py/market_data_providers/ccxt_candles_ta_to_csv.py +4 -4
- siglab_py/market_data_providers/futu_candles_ta_to_csv.py +7 -2
- siglab_py/market_data_providers/google_monitor.py +320 -0
- siglab_py/market_data_providers/orderbooks_provider.py +15 -12
- siglab_py/market_data_providers/tg_monitor.py +428 -0
- siglab_py/market_data_providers/{test_provider.py → trigger_provider.py} +9 -8
- siglab_py/ordergateway/client.py +172 -41
- siglab_py/ordergateway/encrypt_keys_util.py +1 -1
- siglab_py/ordergateway/gateway.py +456 -347
- siglab_py/ordergateway/test_ordergateway.py +8 -7
- siglab_py/tests/integration/market_data_util_tests.py +35 -1
- siglab_py/tests/unit/analytic_util_tests.py +47 -12
- siglab_py/tests/unit/simple_math_tests.py +235 -0
- siglab_py/tests/unit/trading_util_tests.py +65 -0
- siglab_py/util/analytic_util.py +478 -69
- siglab_py/util/market_data_util.py +487 -100
- siglab_py/util/notification_util.py +78 -0
- siglab_py/util/retry_util.py +11 -3
- siglab_py/util/simple_math.py +240 -0
- siglab_py/util/slack_notification_util.py +59 -0
- siglab_py/util/trading_util.py +118 -0
- {siglab_py-0.1.29.dist-info → siglab_py-0.6.12.dist-info}/METADATA +5 -9
- siglab_py-0.6.12.dist-info/RECORD +44 -0
- {siglab_py-0.1.29.dist-info → siglab_py-0.6.12.dist-info}/WHEEL +1 -1
- siglab_py-0.1.29.dist-info/RECORD +0 -34
- {siglab_py-0.1.29.dist-info → siglab_py-0.6.12.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,19 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import incremental
|
|
1
3
|
import tzlocal
|
|
2
4
|
from datetime import datetime, timezone
|
|
5
|
+
import time
|
|
3
6
|
from typing import List, Dict, Union, NoReturn, Any, Tuple
|
|
4
7
|
from pathlib import Path
|
|
5
8
|
import math
|
|
6
9
|
import pandas as pd
|
|
7
10
|
import numpy as np
|
|
11
|
+
import asyncio
|
|
12
|
+
from tabulate import tabulate
|
|
8
13
|
|
|
9
14
|
from ccxt.base.exchange import Exchange as CcxtExchange
|
|
10
|
-
|
|
15
|
+
import ccxt
|
|
16
|
+
import ccxt.pro as ccxtpro
|
|
11
17
|
|
|
12
18
|
# https://www.analyticsvidhya.com/blog/2021/06/download-financial-dataset-using-yahoo-finance-in-python-a-complete-guide/
|
|
13
19
|
from yahoofinancials import YahooFinancials
|
|
@@ -15,72 +21,271 @@ from yahoofinancials import YahooFinancials
|
|
|
15
21
|
# yfinance allows intervals '1m', '5m', '15m', '1h', '1d', '1wk', '1mo'. yahoofinancials not as flexible
|
|
16
22
|
import yfinance as yf
|
|
17
23
|
|
|
24
|
+
from siglab_py.util.retry_util import retry
|
|
18
25
|
from siglab_py.exchanges.futubull import Futubull
|
|
26
|
+
from siglab_py.exchanges.any_exchange import AnyExchange
|
|
27
|
+
from siglab_py.exchanges.deribit import Deribit, DeribitAsync
|
|
28
|
+
from siglab_py.exchanges.binance import Binance, BinanceAsync
|
|
29
|
+
|
|
30
|
+
def instantiate_exchange(
|
|
31
|
+
exchange_name : str,
|
|
32
|
+
api_key : Union[str, None] = None,
|
|
33
|
+
secret : Union[str, None] = None,
|
|
34
|
+
passphrase : Union[str, None] = None,
|
|
35
|
+
default_type : Union[str, None] = 'spot',
|
|
36
|
+
default_sub_type : Union[str, None] = None,
|
|
37
|
+
rate_limit_ms : float = 100
|
|
38
|
+
) -> Union[AnyExchange, None]:
|
|
39
|
+
exchange_name = exchange_name.lower().strip()
|
|
40
|
+
|
|
41
|
+
# Look at ccxt exchange.describe. under 'options' \ 'defaultType' (and 'defaultSubType') for what markets the exchange support.
|
|
42
|
+
# https://docs.ccxt.com/en/latest/manual.html#instantiation
|
|
43
|
+
exchange_params : Dict[str, Any]= {
|
|
44
|
+
'apiKey' : api_key,
|
|
45
|
+
'secret' : secret,
|
|
46
|
+
'enableRateLimit' : True,
|
|
47
|
+
'rateLimit' : rate_limit_ms,
|
|
48
|
+
'options' : {
|
|
49
|
+
'defaultType' : default_type
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if default_sub_type:
|
|
54
|
+
exchange_params['defaultSubType'] = default_sub_type
|
|
55
|
+
|
|
56
|
+
if api_key:
|
|
57
|
+
exchange_params['apiKey'] = api_key
|
|
58
|
+
if secret:
|
|
59
|
+
exchange_params['secret'] = secret
|
|
60
|
+
if passphrase:
|
|
61
|
+
exchange_params['passphrase'] = passphrase
|
|
62
|
+
|
|
63
|
+
if exchange_name=='binance':
|
|
64
|
+
exchange = Binance(exchange_params) # type: ignore
|
|
65
|
+
elif exchange_name=='bybit':
|
|
66
|
+
exchange = ccxt.bybit(exchange_params) # type: ignore
|
|
67
|
+
elif exchange_name=='okx':
|
|
68
|
+
exchange = ccxt.okx(exchange_params) # type: ignore
|
|
69
|
+
elif exchange_name=='deribit':
|
|
70
|
+
exchange = Deribit(exchange_params) # type: ignore
|
|
71
|
+
elif exchange_name=='hyperliquid':
|
|
72
|
+
exchange = ccxt.hyperliquid(
|
|
73
|
+
{
|
|
74
|
+
"walletAddress" : api_key, # type: ignore
|
|
75
|
+
"privateKey" : secret,
|
|
76
|
+
'enableRateLimit' : True,
|
|
77
|
+
'rateLimit' : rate_limit_ms
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
raise ValueError(f"Unsupported exchange {exchange_name}.")
|
|
82
|
+
|
|
83
|
+
exchange.load_markets() # type: ignore
|
|
84
|
+
|
|
85
|
+
return exchange # type: ignore
|
|
86
|
+
|
|
87
|
+
async def async_instantiate_exchange(
|
|
88
|
+
gateway_id : str,
|
|
89
|
+
api_key : str,
|
|
90
|
+
secret : str,
|
|
91
|
+
passphrase : str,
|
|
92
|
+
default_type : Union[str, None] = 'spot',
|
|
93
|
+
default_sub_type : Union[str, None] = None,
|
|
94
|
+
rate_limit_ms : float = 100,
|
|
95
|
+
verbose : bool = False
|
|
96
|
+
) -> Union[AnyExchange, None]:
|
|
97
|
+
exchange : Union[AnyExchange, None] = None
|
|
98
|
+
exchange_name : str = gateway_id.split('_')[0]
|
|
99
|
+
exchange_name =exchange_name.lower().strip()
|
|
100
|
+
|
|
101
|
+
# Look at ccxt exchange.describe. under 'options' \ 'defaultType' (and 'defaultSubType') for what markets the exchange support.
|
|
102
|
+
# https://docs.ccxt.com/en/latest/manual.html#instantiation
|
|
103
|
+
exchange_params : Dict[str, Any]= {
|
|
104
|
+
'apiKey' : api_key,
|
|
105
|
+
'secret' : secret,
|
|
106
|
+
'enableRateLimit' : True,
|
|
107
|
+
'rateLimit' : rate_limit_ms,
|
|
108
|
+
'options' : {
|
|
109
|
+
'defaultType' : default_type
|
|
110
|
+
},
|
|
111
|
+
'verbose': verbose
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if default_sub_type:
|
|
115
|
+
exchange_params['defaultSubType'] = default_sub_type
|
|
116
|
+
|
|
117
|
+
if exchange_name=='binance':
|
|
118
|
+
# spot, future, margin, delivery, option
|
|
119
|
+
# https://github.com/ccxt/ccxt/blob/master/python/ccxt/binance.py#L1298
|
|
120
|
+
exchange = BinanceAsync(exchange_params) # type: ignore
|
|
121
|
+
elif exchange_name=='bybit':
|
|
122
|
+
# spot, linear, inverse, futures
|
|
123
|
+
# https://github.com/ccxt/ccxt/blob/master/python/ccxt/bybit.py#L1041
|
|
124
|
+
exchange = ccxtpro.bybit(exchange_params) # type: ignore
|
|
125
|
+
elif exchange_name=='okx':
|
|
126
|
+
# 'funding', spot, margin, future, swap, option
|
|
127
|
+
# https://github.com/ccxt/ccxt/blob/master/python/ccxt/okx.py#L1144
|
|
128
|
+
exchange_params['password'] = passphrase
|
|
129
|
+
exchange = ccxtpro.okx(exchange_params) # type: ignore
|
|
130
|
+
elif exchange_name=='deribit':
|
|
131
|
+
# spot, swap, future
|
|
132
|
+
# https://github.com/ccxt/ccxt/blob/master/python/ccxt/deribit.py#L360
|
|
133
|
+
exchange = DeribitAsync(exchange_params) # type: ignore
|
|
134
|
+
elif exchange_name=='kraken':
|
|
135
|
+
exchange = ccxtpro.kraken(exchange_params) # type: ignore
|
|
136
|
+
elif exchange_name=='hyperliquid':
|
|
137
|
+
'''
|
|
138
|
+
https://app.hyperliquid.xyz/API
|
|
139
|
+
|
|
140
|
+
defaultType from ccxt: swap
|
|
141
|
+
https://github.com/ccxt/ccxt/blob/master/python/ccxt/hyperliquid.py#L225
|
|
142
|
+
|
|
143
|
+
How to integrate? You can skip first 6 min: https://www.youtube.com/watch?v=UuBr331wxr4&t=363s
|
|
144
|
+
|
|
145
|
+
Example,
|
|
146
|
+
API credentials created under "\ More \ API":
|
|
147
|
+
Ledger Arbitrum Wallet Address: 0xAAAAA <-- This is your Ledger Arbitrum wallet address with which you connect to Hyperliquid.
|
|
148
|
+
API Wallet Address 0xBBBBB <-- Generated
|
|
149
|
+
privateKey 0xCCCCC
|
|
150
|
+
|
|
151
|
+
Basic connection via CCXT:
|
|
152
|
+
import asyncio
|
|
153
|
+
import ccxt.pro as ccxtpro
|
|
154
|
+
|
|
155
|
+
async def main():
|
|
156
|
+
rate_limit_ms = 100
|
|
157
|
+
exchange_params = {
|
|
158
|
+
"walletAddress" : "0xAAAAA", # Ledger Arbitrum Wallet Address here! Not the generated address.
|
|
159
|
+
"privateKey" : "0xCCCCC"
|
|
160
|
+
}
|
|
161
|
+
exchange = ccxtpro.hyperliquid(exchange_params)
|
|
162
|
+
balances = await exchange.fetch_balance()
|
|
163
|
+
print(balances)
|
|
164
|
+
|
|
165
|
+
asyncio.run(main())
|
|
166
|
+
'''
|
|
167
|
+
exchange = ccxtpro.hyperliquid(
|
|
168
|
+
{
|
|
169
|
+
"walletAddress" : api_key,
|
|
170
|
+
"privateKey" : secret,
|
|
171
|
+
'enableRateLimit' : True,
|
|
172
|
+
'rateLimit' : rate_limit_ms,
|
|
173
|
+
'verbose': verbose
|
|
174
|
+
} # type: ignore
|
|
175
|
+
)
|
|
176
|
+
else:
|
|
177
|
+
raise ValueError(f"Unsupported exchange {exchange_name}, check gateway_id {gateway_id}.")
|
|
178
|
+
|
|
179
|
+
await exchange.load_markets() # type: ignore
|
|
180
|
+
|
|
181
|
+
'''
|
|
182
|
+
Is this necessary? The added trouble is for example bybit.authenticate requires arg 'url'. binance doesn't. And fetch_balance already test credentials.
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
await exchange.authenticate() # type: ignore
|
|
186
|
+
except Exception as swallow_this_error:
|
|
187
|
+
pass
|
|
188
|
+
'''
|
|
189
|
+
|
|
190
|
+
return exchange
|
|
19
191
|
|
|
20
192
|
def timestamp_to_datetime_cols(pd_candles : pd.DataFrame):
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
193
|
+
def _fix_timestamp_ms(x):
|
|
194
|
+
if isinstance(x, pd.Timestamp):
|
|
195
|
+
return int(x.value // 10**6)
|
|
196
|
+
elif isinstance(x, np.datetime64):
|
|
197
|
+
return int(x.astype('int64') // 10**6)
|
|
198
|
+
elif isinstance(x, (int, float)):
|
|
199
|
+
x = int(x)
|
|
200
|
+
if len(str(abs(x))) == 13:
|
|
201
|
+
return x
|
|
202
|
+
else:
|
|
203
|
+
return int(x * 1000)
|
|
204
|
+
else:
|
|
205
|
+
raise ValueError(f"Unsupported type {type(x)} for timestamp conversion")
|
|
206
|
+
pd_candles['timestamp_ms'] = pd_candles['timestamp_ms'].apply(_fix_timestamp_ms)
|
|
207
|
+
pd_candles['datetime'] = pd_candles['timestamp_ms'].apply(lambda x: datetime.fromtimestamp(int(x/1000)))
|
|
24
208
|
pd_candles['datetime'] = pd.to_datetime(pd_candles['datetime'])
|
|
25
|
-
pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None)
|
|
209
|
+
pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None) # type: ignore
|
|
26
210
|
pd_candles['datetime_utc'] = pd_candles['timestamp_ms'].apply(
|
|
27
211
|
lambda x: datetime.fromtimestamp(int(x.timestamp()) if isinstance(x, pd.Timestamp) else int(x / 1000), tz=timezone.utc)
|
|
28
212
|
)
|
|
29
213
|
|
|
30
214
|
# This is to make it easy to do grouping with Excel pivot table
|
|
31
|
-
pd_candles['year'] = pd_candles['datetime'].dt.year
|
|
32
|
-
pd_candles['month'] = pd_candles['datetime'].dt.month
|
|
33
|
-
pd_candles['day'] = pd_candles['datetime'].dt.day
|
|
34
|
-
pd_candles['hour'] = pd_candles['datetime'].dt.hour
|
|
35
|
-
pd_candles['minute'] = pd_candles['datetime'].dt.minute
|
|
36
|
-
pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek # dayofweek: Monday is 0 and Sunday is 6
|
|
215
|
+
pd_candles['year'] = pd_candles['datetime'].dt.year # type: ignore
|
|
216
|
+
pd_candles['month'] = pd_candles['datetime'].dt.month # type: ignore
|
|
217
|
+
pd_candles['day'] = pd_candles['datetime'].dt.day # type: ignore
|
|
218
|
+
pd_candles['hour'] = pd_candles['datetime'].dt.hour # type: ignore
|
|
219
|
+
pd_candles['minute'] = pd_candles['datetime'].dt.minute # type: ignore
|
|
220
|
+
pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek # type: ignore dayofweek: Monday is 0 and Sunday is 6
|
|
37
221
|
|
|
38
222
|
pd_candles['week_of_month'] = pd_candles['timestamp_ms'].apply(
|
|
39
|
-
lambda x: timestamp_to_week_of_month(x)
|
|
223
|
+
lambda x: timestamp_to_week_of_month(int(x/1000))
|
|
40
224
|
)
|
|
41
225
|
|
|
42
226
|
pd_candles['apac_trading_hr'] = pd_candles['timestamp_ms'].apply(
|
|
43
|
-
lambda x: "APAC" in timestamp_to_active_trading_regions(x)
|
|
227
|
+
lambda x: "APAC" in timestamp_to_active_trading_regions(int(x/1000))
|
|
44
228
|
)
|
|
45
229
|
pd_candles['emea_trading_hr'] = pd_candles['timestamp_ms'].apply(
|
|
46
|
-
lambda x: "EMEA" in timestamp_to_active_trading_regions(x)
|
|
230
|
+
lambda x: "EMEA" in timestamp_to_active_trading_regions(int(x/1000))
|
|
47
231
|
)
|
|
48
232
|
pd_candles['amer_trading_hr'] = pd_candles['timestamp_ms'].apply(
|
|
49
|
-
lambda x: "AMER" in timestamp_to_active_trading_regions(x)
|
|
233
|
+
lambda x: "AMER" in timestamp_to_active_trading_regions(int(x/1000))
|
|
50
234
|
)
|
|
51
235
|
|
|
236
|
+
pd_candles['timestamp_ms_gap'] = pd_candles['timestamp_ms'] - pd_candles['timestamp_ms'].shift(1)
|
|
237
|
+
|
|
238
|
+
# Depending on asset, minutes bar may have gaps
|
|
239
|
+
timestamp_ms_gap_median = pd_candles['timestamp_ms_gap'].median()
|
|
240
|
+
NUM_MS_IN_1HR = 60*60*1000
|
|
241
|
+
if timestamp_ms_gap_median>=NUM_MS_IN_1HR:
|
|
242
|
+
num_rows_with_expected_gap = pd_candles[~pd_candles.timestamp_ms_gap.isna()][pd_candles.timestamp_ms_gap==timestamp_ms_gap_median].shape[0]
|
|
243
|
+
assert(num_rows_with_expected_gap/pd_candles.shape[0]>0.9)
|
|
244
|
+
pd_candles.drop(columns=['timestamp_ms_gap'], inplace=True)
|
|
245
|
+
|
|
246
|
+
'''
|
|
247
|
+
APAC (Asia-Pacific) Trading Hours
|
|
248
|
+
UTC 21:00 - 09:00 (approximate range)
|
|
249
|
+
Major financial centers: Tokyo, Hong Kong, Singapore, Sydney
|
|
250
|
+
|
|
251
|
+
EMEA (Europe, Middle East, Africa) Trading Hours
|
|
252
|
+
UTC 07:00 - 16:00 (approximate range)
|
|
253
|
+
Major financial centers: London, Frankfurt, Paris, Zurich, Dubai
|
|
254
|
+
|
|
255
|
+
US Trading Hours
|
|
256
|
+
UTC 13:00 - 22:00 (approximate range)
|
|
257
|
+
Major financial centers: New York, Chicago
|
|
258
|
+
Key markets: NYSE, NASDAQ
|
|
259
|
+
|
|
260
|
+
utcnow and utcfromtimestamp been deprecated in Python 3.12
|
|
261
|
+
https://www.pythonmorsels.com/converting-to-utc-time/
|
|
262
|
+
|
|
263
|
+
Example, UTC 23:00 is 3rd hour in APAC trading session
|
|
264
|
+
utc_hour = 23
|
|
265
|
+
i = get_regions_trading_utc_hours()['APAC'].index(utc_hour)
|
|
266
|
+
assert(i==2)
|
|
267
|
+
'''
|
|
268
|
+
def get_regions_trading_utc_hours():
|
|
269
|
+
return {
|
|
270
|
+
'APAC' : [21,22,23,0,1,2,3,4,5,6,7,8,9],
|
|
271
|
+
'EMEA' : [7,8,9,10,11,12,13,14,15,16],
|
|
272
|
+
'AMER' : [13,14,15,16,17,18,19,20,21,22]
|
|
273
|
+
}
|
|
274
|
+
|
|
52
275
|
def timestamp_to_active_trading_regions(
|
|
53
276
|
timestamp_ms : int
|
|
54
277
|
) -> List[str]:
|
|
55
|
-
|
|
56
|
-
'''
|
|
57
|
-
APAC (Asia-Pacific) Trading Hours
|
|
58
|
-
UTC 22:00 - 09:00 (approximate range)
|
|
59
|
-
Major financial centers: Tokyo, Hong Kong, Singapore, Sydney
|
|
60
|
-
|
|
61
|
-
EMEA (Europe, Middle East, Africa) Trading Hours
|
|
62
|
-
UTC 07:00 - 16:00 (approximate range)
|
|
63
|
-
Major financial centers: London, Frankfurt, Paris, Zurich, Dubai
|
|
64
|
-
|
|
65
|
-
US Trading Hours
|
|
66
|
-
UTC 13:30 - 20:00 (approximate range)
|
|
67
|
-
Major financial centers: New York, Chicago
|
|
68
|
-
Key markets: NYSE, NASDAQ
|
|
69
|
-
|
|
70
|
-
utcnow and utcfromtimestamp been deprecated in Python 3.12
|
|
71
|
-
https://www.pythonmorsels.com/converting-to-utc-time/
|
|
72
|
-
'''
|
|
73
278
|
active_trading_regions : List[str] = []
|
|
74
279
|
|
|
75
280
|
dt_utc = datetime.fromtimestamp(int(timestamp_ms / 1000), tz=timezone.utc)
|
|
76
281
|
utc_hour = dt_utc.hour
|
|
77
|
-
if
|
|
282
|
+
if utc_hour in get_regions_trading_utc_hours()['APAC']:
|
|
78
283
|
active_trading_regions.append("APAC")
|
|
79
284
|
|
|
80
|
-
if
|
|
285
|
+
if utc_hour in get_regions_trading_utc_hours()['EMEA']:
|
|
81
286
|
active_trading_regions.append("EMEA")
|
|
82
287
|
|
|
83
|
-
if
|
|
288
|
+
if utc_hour in get_regions_trading_utc_hours()['AMER']:
|
|
84
289
|
active_trading_regions.append("AMER")
|
|
85
290
|
|
|
86
291
|
return active_trading_regions
|
|
@@ -113,6 +318,17 @@ def fix_column_types(pd_candles : pd.DataFrame):
|
|
|
113
318
|
pd_candles.reset_index(drop=True, inplace=True)
|
|
114
319
|
pd_candles.sort_values("datetime", inplace=True)
|
|
115
320
|
|
|
321
|
+
def interval_to_ms(interval : str) -> int:
|
|
322
|
+
interval_ms : int = 0
|
|
323
|
+
if interval=="d":
|
|
324
|
+
interval_ms = 24*60*60*1000
|
|
325
|
+
elif interval=="h":
|
|
326
|
+
interval_ms = 60*60*1000
|
|
327
|
+
elif interval=="m":
|
|
328
|
+
interval_ms = 60*1000
|
|
329
|
+
|
|
330
|
+
return interval_ms
|
|
331
|
+
|
|
116
332
|
'''
|
|
117
333
|
https://polygon.io/docs/stocks
|
|
118
334
|
'''
|
|
@@ -168,7 +384,7 @@ class NASDAQExchange:
|
|
|
168
384
|
pd_daily_candles['low'] = pd_daily_candles['low'].astype(str).str.replace('$','')
|
|
169
385
|
pd_daily_candles['close'] = pd_daily_candles['close'].astype(str).str.replace('$','')
|
|
170
386
|
pd_daily_candles['datetime']= pd.to_datetime(pd_daily_candles['datetime'])
|
|
171
|
-
pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6
|
|
387
|
+
pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6 # type: ignore
|
|
172
388
|
pd_daily_candles['symbol'] = symbol
|
|
173
389
|
pd_daily_candles['exchange'] = 'nasdaq'
|
|
174
390
|
fix_column_types(pd_daily_candles)
|
|
@@ -187,7 +403,7 @@ class NASDAQExchange:
|
|
|
187
403
|
)
|
|
188
404
|
|
|
189
405
|
# When you fill foward, a few candles before start date can have null values (open, high, low, close, volume ...)
|
|
190
|
-
first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna()) & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime']
|
|
406
|
+
first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna()) & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime'] # type: ignore
|
|
191
407
|
pd_hourly_candles = pd_hourly_candles[pd_hourly_candles.datetime>=first_candle_dt]
|
|
192
408
|
exchange_candles[symbol] = pd_hourly_candles
|
|
193
409
|
|
|
@@ -251,6 +467,45 @@ class YahooExchange:
|
|
|
251
467
|
|
|
252
468
|
return exchange_candles
|
|
253
469
|
|
|
470
|
+
def aggregate_candles(
|
|
471
|
+
interval : str,
|
|
472
|
+
pd_candles : pd.DataFrame
|
|
473
|
+
) -> pd.DataFrame:
|
|
474
|
+
if interval[-1]=='m':
|
|
475
|
+
# 'm' for pandas means months!
|
|
476
|
+
interval = interval.replace('m','min')
|
|
477
|
+
pd_candles.set_index('datetime', inplace=True)
|
|
478
|
+
pd_candles_aggregated = pd_candles.resample(interval).agg({
|
|
479
|
+
'exchange' : 'first',
|
|
480
|
+
'symbol' : 'first',
|
|
481
|
+
'timestamp_ms' : 'first',
|
|
482
|
+
|
|
483
|
+
'open': 'first',
|
|
484
|
+
'high': 'max',
|
|
485
|
+
'low': 'min',
|
|
486
|
+
'close': 'last',
|
|
487
|
+
'volume': 'sum',
|
|
488
|
+
|
|
489
|
+
'datetime_utc' : 'first',
|
|
490
|
+
'year' : 'first',
|
|
491
|
+
'month' : 'first',
|
|
492
|
+
'day' : 'first',
|
|
493
|
+
'hour' : 'first',
|
|
494
|
+
'minute' : 'first',
|
|
495
|
+
'dayofweek' : 'first',
|
|
496
|
+
'week_of_month' : 'first',
|
|
497
|
+
|
|
498
|
+
'apac_trading_hr' : 'first',
|
|
499
|
+
'emea_trading_hr' : 'first',
|
|
500
|
+
'amer_trading_hr' : 'first',
|
|
501
|
+
|
|
502
|
+
'pct_chg_on_close' : 'sum',
|
|
503
|
+
|
|
504
|
+
})
|
|
505
|
+
pd_candles.reset_index(inplace=True)
|
|
506
|
+
pd_candles_aggregated.reset_index(inplace=True)
|
|
507
|
+
return pd_candles_aggregated
|
|
508
|
+
|
|
254
509
|
def fetch_historical_price(
|
|
255
510
|
exchange,
|
|
256
511
|
normalized_symbol : str,
|
|
@@ -297,15 +552,21 @@ def fetch_candles(
|
|
|
297
552
|
validation_max_gaps : int = 10,
|
|
298
553
|
validation_max_end_date_intervals : int = 1
|
|
299
554
|
) -> Dict[str, Union[pd.DataFrame, None]]:
|
|
555
|
+
exchange_candles = { '' : None }
|
|
556
|
+
num_intervals = int(candle_size.replace(candle_size[-1],''))
|
|
557
|
+
|
|
558
|
+
if end_ts>datetime.now().timestamp():
|
|
559
|
+
end_ts = int(datetime.now().timestamp())
|
|
560
|
+
|
|
300
561
|
if type(exchange) is YahooExchange:
|
|
301
|
-
|
|
562
|
+
exchange_candles = exchange.fetch_candles(
|
|
302
563
|
start_ts=start_ts,
|
|
303
564
|
end_ts=end_ts,
|
|
304
565
|
symbols=normalized_symbols,
|
|
305
566
|
candle_size=candle_size
|
|
306
567
|
)
|
|
307
568
|
elif type(exchange) is NASDAQExchange:
|
|
308
|
-
|
|
569
|
+
exchange_candles = exchange.fetch_candles(
|
|
309
570
|
start_ts=start_ts,
|
|
310
571
|
end_ts=end_ts,
|
|
311
572
|
symbols=normalized_symbols,
|
|
@@ -322,79 +583,153 @@ def fetch_candles(
|
|
|
322
583
|
pd_candles = exchange_candles[symbol]
|
|
323
584
|
if not pd_candles is None:
|
|
324
585
|
fix_column_types(pd_candles) # You don't want to do this from Futubull as you'd need import Futubull from there: Circular references
|
|
325
|
-
|
|
586
|
+
|
|
326
587
|
elif issubclass(exchange.__class__, CcxtExchange):
|
|
327
|
-
|
|
588
|
+
exchange_candles = _fetch_candles_ccxt(
|
|
328
589
|
start_ts=start_ts,
|
|
329
590
|
end_ts=end_ts,
|
|
330
591
|
exchange=exchange,
|
|
331
592
|
normalized_symbols=normalized_symbols,
|
|
332
593
|
candle_size=candle_size,
|
|
333
|
-
|
|
334
|
-
num_candles_limit=num_candles_limit,
|
|
335
|
-
cache_dir=cache_dir,
|
|
336
|
-
list_ts_field=list_ts_field
|
|
594
|
+
num_candles_limit=num_candles_limit
|
|
337
595
|
)
|
|
338
|
-
|
|
596
|
+
if num_intervals!=1:
|
|
597
|
+
for symbol in exchange_candles:
|
|
598
|
+
if not exchange_candles[symbol] is None:
|
|
599
|
+
exchange_candles[symbol] = aggregate_candles(candle_size, exchange_candles[symbol]) # type: ignore
|
|
600
|
+
|
|
601
|
+
# For invalid rows missing timestamps, o/h/l/c/v, fill forward close, set volume to zero.
|
|
602
|
+
for symbol in exchange_candles:
|
|
603
|
+
pd_candles = exchange_candles[symbol]
|
|
604
|
+
|
|
605
|
+
if pd_candles is not None:
|
|
606
|
+
mask_invalid_candles = pd_candles["timestamp_ms"].isna()
|
|
607
|
+
if mask_invalid_candles.any():
|
|
608
|
+
pd_invalid_candles = pd_candles[mask_invalid_candles]
|
|
609
|
+
|
|
610
|
+
if logger is not None:
|
|
611
|
+
logger.warning(f"Dropping {pd_invalid_candles.shape[0]}/{pd_candles.shape[0]} rows from {symbol} candles (null timestamp_ms)") # type: ignore
|
|
612
|
+
logger.warning(f"{tabulate(pd_invalid_candles, headers='keys', tablefmt='psql')}") # type: ignore
|
|
613
|
+
|
|
614
|
+
def _to_timestamp_ms(dt):
|
|
615
|
+
if pd.isna(dt):
|
|
616
|
+
return pd.NA
|
|
617
|
+
if isinstance(dt, str):
|
|
618
|
+
dt = pd.to_datetime(dt)
|
|
619
|
+
return int(dt.timestamp() * 1000)
|
|
620
|
+
|
|
621
|
+
pd_candles.loc[mask_invalid_candles, "timestamp_ms"] = pd_candles.loc[mask_invalid_candles, "datetime"].apply(_to_timestamp_ms)
|
|
622
|
+
|
|
623
|
+
pd_candles["close"] = pd_candles["close"].ffill()
|
|
624
|
+
pd_candles.loc[mask_invalid_candles, ["open", "high", "low"]] = pd_candles.loc[
|
|
625
|
+
mask_invalid_candles, ["close"]
|
|
626
|
+
]
|
|
627
|
+
pd_candles.loc[mask_invalid_candles, "volume"] = 0.0
|
|
628
|
+
|
|
629
|
+
return exchange_candles # type: ignore
|
|
630
|
+
|
|
631
|
+
'''
|
|
632
|
+
Find listing date https://gist.github.com/mr-easy/5185b1dcdd5f9f908ff196446f092e9b
|
|
633
|
+
|
|
634
|
+
Usage:
|
|
635
|
+
listing_ts = find_start_time(exchange, 'HYPE/USDT:USDT', int(datetime(2024,1,1).timestamp()*1000), int(datetime(2025,5,1).timestamp()*1000), '1h')
|
|
339
636
|
|
|
637
|
+
Caveats:
|
|
638
|
+
1) If listing date lies outside [start_time, end_time], this function will stackoverflow,
|
|
639
|
+
2) Even if not, it's still very time consuming.
|
|
640
|
+
|
|
641
|
+
Alternative: market['created']
|
|
642
|
+
'''
|
|
643
|
+
def search_listing_ts(exchange, symbol, start_time, end_time, timeframe):
|
|
644
|
+
mid_time = (start_time + end_time)//2
|
|
645
|
+
if(mid_time == start_time): return mid_time+1
|
|
646
|
+
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, mid_time, limit=1)
|
|
647
|
+
time.sleep(1)
|
|
648
|
+
if(len(ohlcv) == 0):
|
|
649
|
+
return search_listing_ts(exchange, symbol, mid_time, end_time, timeframe)
|
|
650
|
+
else:
|
|
651
|
+
return search_listing_ts(exchange, symbol, start_time, mid_time, timeframe)
|
|
652
|
+
|
|
340
653
|
def _fetch_candles_ccxt(
|
|
341
654
|
start_ts : int,
|
|
342
655
|
end_ts : int,
|
|
343
656
|
exchange,
|
|
344
657
|
normalized_symbols : List[str],
|
|
345
658
|
candle_size : str,
|
|
346
|
-
num_candles_limit : int = 100,
|
|
347
|
-
logger = None,
|
|
348
|
-
cache_dir : Union[str, None] = None,
|
|
349
|
-
list_ts_field : Union[str, None] = None
|
|
350
|
-
) -> Dict[str, Union[pd.DataFrame, None]]:
|
|
351
|
-
ticker = normalized_symbols[0]
|
|
352
|
-
pd_candles = _fetch_candles(
|
|
353
|
-
symbol = ticker,
|
|
354
|
-
exchange = exchange,
|
|
355
|
-
start_ts = start_ts,
|
|
356
|
-
end_ts = end_ts,
|
|
357
|
-
candle_size = candle_size,
|
|
358
|
-
)
|
|
359
|
-
return {
|
|
360
|
-
ticker : pd_candles
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
def _fetch_candles(
|
|
364
|
-
symbol : str,
|
|
365
|
-
exchange : CcxtExchange,
|
|
366
|
-
start_ts : int,
|
|
367
|
-
end_ts : int,
|
|
368
|
-
candle_size : str = '1d',
|
|
369
659
|
num_candles_limit : int = 100
|
|
370
|
-
):
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
660
|
+
) -> Dict[str, Union[pd.DataFrame, None]]:
|
|
661
|
+
logger = logging.getLogger()
|
|
662
|
+
|
|
663
|
+
rsp = {}
|
|
664
|
+
|
|
665
|
+
exchange.load_markets()
|
|
666
|
+
|
|
667
|
+
num_tickers = len(normalized_symbols)
|
|
668
|
+
i = 0
|
|
669
|
+
for ticker in normalized_symbols:
|
|
670
|
+
@retry(num_attempts=3, pause_between_retries_ms=1000)
|
|
671
|
+
def _fetch_ohlcv(exchange, symbol, timeframe, since, limit, params) -> Union[List, NoReturn]:
|
|
672
|
+
one_timeframe = f"1{timeframe[-1]}"
|
|
673
|
+
candles = exchange.fetch_ohlcv(symbol=symbol, timeframe=one_timeframe, since=since, limit=limit, params=params)
|
|
674
|
+
if candles and len(candles)>0:
|
|
675
|
+
candles.sort(key=lambda x : x[0], reverse=False)
|
|
676
|
+
|
|
677
|
+
return candles
|
|
678
|
+
|
|
679
|
+
def _calc_increment(candle_size):
|
|
680
|
+
increment = 1
|
|
681
|
+
num_intervals = int(candle_size.replace(candle_size[-1],''))
|
|
682
|
+
interval_type = candle_size[-1]
|
|
683
|
+
if interval_type == "m":
|
|
684
|
+
increment = 60
|
|
685
|
+
elif interval_type == "h":
|
|
686
|
+
increment = 60*60
|
|
687
|
+
elif interval_type == "d":
|
|
688
|
+
increment = 60*60*24
|
|
689
|
+
else:
|
|
690
|
+
raise ValueError(f"Invalid candle_size {candle_size}")
|
|
691
|
+
return num_intervals * increment
|
|
692
|
+
|
|
693
|
+
logger.info(f"{i}/{num_tickers} Fetching {candle_size} candles for {ticker}.")
|
|
694
|
+
|
|
695
|
+
'''
|
|
696
|
+
It uses a while loop to implement a sliding window to download candles between start_ts and end_ts.
|
|
697
|
+
However, start_ts for example can be 1 Jan 2021 for a given ticker.
|
|
698
|
+
But if that ticker listing date is 1 Jan 2025, this while loop would waste a lot of time loop between 1 Jan 2021 thru 31 Dec 2024, slowly incrementing this_cutoff += _calc_increment(candle_size).
|
|
699
|
+
A more efficient way is to find listing date. Start looping from there.
|
|
700
|
+
'''
|
|
701
|
+
market = exchange.markets[ticker]
|
|
702
|
+
this_ticker_start_ts = start_ts
|
|
703
|
+
if market['created']:
|
|
704
|
+
this_ticker_start_ts = max(this_ticker_start_ts, int(market['created']/1000))
|
|
705
|
+
|
|
706
|
+
all_candles = []
|
|
707
|
+
params = {}
|
|
708
|
+
this_cutoff = this_ticker_start_ts
|
|
709
|
+
while this_cutoff<end_ts:
|
|
710
|
+
candles = _fetch_ohlcv(exchange=exchange, symbol=ticker, timeframe=candle_size, since=int(this_cutoff * 1000), limit=num_candles_limit, params=params)
|
|
711
|
+
if candles and len(candles)>0:
|
|
712
|
+
all_candles = all_candles + [[ int(x[0]), float(x[1]), float(x[2]), float(x[3]), float(x[4]), float(x[5]) ] for x in candles if x[1] and x[2] and x[3] and x[4] and x[5] ]
|
|
713
|
+
|
|
714
|
+
record_ts = max([int(record[0]) for record in candles])
|
|
715
|
+
record_ts_str : str = str(record_ts)
|
|
716
|
+
if len(record_ts_str)==13:
|
|
717
|
+
record_ts = int(int(record_ts_str)/1000) # Convert from milli-seconds to seconds
|
|
718
|
+
|
|
719
|
+
this_cutoff = record_ts + _calc_increment(candle_size)
|
|
720
|
+
else:
|
|
721
|
+
this_cutoff += _calc_increment(candle_size)
|
|
722
|
+
|
|
723
|
+
columns = ['exchange', 'symbol', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume']
|
|
724
|
+
pd_all_candles = pd.DataFrame([ [ exchange.name, ticker, x[0], x[1], x[2], x[3], x[4], x[5] ] for x in all_candles], columns=columns)
|
|
725
|
+
fix_column_types(pd_all_candles)
|
|
726
|
+
pd_all_candles['pct_chg_on_close'] = pd_all_candles['close'].pct_change()
|
|
727
|
+
|
|
728
|
+
rsp[ticker] = pd_all_candles
|
|
729
|
+
|
|
730
|
+
i+=1
|
|
731
|
+
|
|
732
|
+
return rsp
|
|
398
733
|
|
|
399
734
|
def fetch_deribit_btc_option_expiries(
|
|
400
735
|
market: str = 'BTC'
|
|
@@ -404,7 +739,7 @@ def fetch_deribit_btc_option_expiries(
|
|
|
404
739
|
Dict[str, Dict[str, Union[str, float]]]
|
|
405
740
|
]
|
|
406
741
|
]:
|
|
407
|
-
exchange = deribit()
|
|
742
|
+
exchange = ccxt.deribit()
|
|
408
743
|
instruments = exchange.public_get_get_instruments({
|
|
409
744
|
'currency': market,
|
|
410
745
|
'kind': 'option',
|
|
@@ -456,4 +791,56 @@ def fetch_deribit_btc_option_expiries(
|
|
|
456
791
|
'index_price' : index_price,
|
|
457
792
|
'by_expiry' : sorted_expiry_data, # type: ignore Otherwise, Error: Type "dict[str, list[tuple[str, float]] | dict[str, Dict[Unknown, Unknown]]]" is not assignable to return type "Dict[str, Dict[str, float] | Dict[str, Dict[str, str | float]]]"
|
|
458
793
|
'by_expiry_and_strike' : expiry_data_breakdown_by_strike
|
|
459
|
-
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
def build_pair_candles(
|
|
797
|
+
pd_candles1 : pd.DataFrame,
|
|
798
|
+
pd_candles2 : pd.DataFrame,
|
|
799
|
+
left_columns_postfix : str = "_1",
|
|
800
|
+
right_columns_postfix : str = "_2"
|
|
801
|
+
) -> pd.DataFrame:
|
|
802
|
+
min_timestamp_ms1 = int(pd_candles1.iloc[0]['timestamp_ms'])
|
|
803
|
+
max_timestamp_ms1 = int(pd_candles1.iloc[-1]['timestamp_ms'])
|
|
804
|
+
min_timestamp_ms2 = int(pd_candles2.iloc[0]['timestamp_ms'])
|
|
805
|
+
max_timestamp_ms2 = int(pd_candles2.iloc[-1]['timestamp_ms'])
|
|
806
|
+
|
|
807
|
+
pd_candles1 = pd_candles1[(pd_candles1.timestamp_ms>=min_timestamp_ms2) & (pd_candles1.timestamp_ms<=max_timestamp_ms2) & (~pd_candles1.timestamp_ms.isna()) ]
|
|
808
|
+
pd_candles2 = pd_candles2[(pd_candles2.timestamp_ms>=min_timestamp_ms1) & (pd_candles2.timestamp_ms<=max_timestamp_ms1) & (~pd_candles2.timestamp_ms.isna())]
|
|
809
|
+
assert(pd_candles1.shape[0]==pd_candles2.shape[0])
|
|
810
|
+
|
|
811
|
+
pd_candles1['timestamp_ms_gap'] = pd_candles1['timestamp_ms'] - pd_candles1['timestamp_ms'].shift(1)
|
|
812
|
+
timestamp_ms_gap = pd_candles1.iloc[-1]['timestamp_ms_gap']
|
|
813
|
+
|
|
814
|
+
assert(pd_candles1[~pd_candles1.timestamp_ms_gap.isna()][pd_candles1.timestamp_ms_gap!=timestamp_ms_gap].shape[0]==0)
|
|
815
|
+
pd_candles1.drop(columns=['timestamp_ms_gap'], inplace=True)
|
|
816
|
+
|
|
817
|
+
pd_candles2['timestamp_ms_gap'] = pd_candles2['timestamp_ms'] - pd_candles2['timestamp_ms'].shift(1)
|
|
818
|
+
timestamp_ms_gap = pd_candles2.iloc[-1]['timestamp_ms_gap']
|
|
819
|
+
assert(pd_candles2[~pd_candles2.timestamp_ms_gap.isna()][pd_candles2.timestamp_ms_gap!=timestamp_ms_gap].shape[0]==0)
|
|
820
|
+
pd_candles2.drop(columns=['timestamp_ms_gap'], inplace=True)
|
|
821
|
+
|
|
822
|
+
min_timestamp_ms1 = int(pd_candles1.iloc[0]['timestamp_ms'])
|
|
823
|
+
max_timestamp_ms1 = int(pd_candles1.iloc[-1]['timestamp_ms'])
|
|
824
|
+
min_timestamp_ms2 = int(pd_candles2.iloc[0]['timestamp_ms'])
|
|
825
|
+
max_timestamp_ms2 = int(pd_candles2.iloc[-1]['timestamp_ms'])
|
|
826
|
+
assert(min_timestamp_ms1==min_timestamp_ms2)
|
|
827
|
+
assert(max_timestamp_ms1==max_timestamp_ms2)
|
|
828
|
+
assert(pd_candles1.shape[0]==pd_candles2.shape[0])
|
|
829
|
+
|
|
830
|
+
if len([ col for col in pd_candles1.columns if col[-2:]==left_columns_postfix ]) == 0:
|
|
831
|
+
pd_candles1.columns = [str(col) + left_columns_postfix for col in pd_candles1.columns]
|
|
832
|
+
|
|
833
|
+
if len([ col for col in pd_candles2.columns if col[-2:]==right_columns_postfix ]) == 0:
|
|
834
|
+
pd_candles2.columns = [str(col) + right_columns_postfix for col in pd_candles2.columns]
|
|
835
|
+
|
|
836
|
+
pd_candles1.reset_index(drop=True, inplace=True)
|
|
837
|
+
pd_candles2.reset_index(drop=True, inplace=True)
|
|
838
|
+
pd_candles = pd.concat([pd_candles1, pd_candles2], axis=1)
|
|
839
|
+
pd_candles['timestamp_ms_gap'] = pd_candles[f'timestamp_ms{left_columns_postfix}'] - pd_candles[f'timestamp_ms{right_columns_postfix}']
|
|
840
|
+
assert(pd_candles[pd_candles.timestamp_ms_gap!=0].shape[0]==0)
|
|
841
|
+
|
|
842
|
+
pd_candles.drop(pd_candles.columns[pd_candles.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
|
|
843
|
+
|
|
844
|
+
return pd_candles
|
|
845
|
+
|
|
846
|
+
|