siglab-py 0.1.29__py3-none-any.whl → 0.6.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of siglab-py might be problematic. Click here for more details.

Files changed (32) hide show
  1. siglab_py/constants.py +26 -1
  2. siglab_py/exchanges/binance.py +38 -0
  3. siglab_py/exchanges/deribit.py +83 -0
  4. siglab_py/exchanges/futubull.py +12 -2
  5. siglab_py/market_data_providers/candles_provider.py +2 -2
  6. siglab_py/market_data_providers/candles_ta_provider.py +3 -3
  7. siglab_py/market_data_providers/ccxt_candles_ta_to_csv.py +4 -4
  8. siglab_py/market_data_providers/futu_candles_ta_to_csv.py +7 -2
  9. siglab_py/market_data_providers/google_monitor.py +320 -0
  10. siglab_py/market_data_providers/orderbooks_provider.py +15 -12
  11. siglab_py/market_data_providers/tg_monitor.py +428 -0
  12. siglab_py/market_data_providers/{test_provider.py → trigger_provider.py} +9 -8
  13. siglab_py/ordergateway/client.py +172 -41
  14. siglab_py/ordergateway/encrypt_keys_util.py +1 -1
  15. siglab_py/ordergateway/gateway.py +456 -347
  16. siglab_py/ordergateway/test_ordergateway.py +8 -7
  17. siglab_py/tests/integration/market_data_util_tests.py +35 -1
  18. siglab_py/tests/unit/analytic_util_tests.py +47 -12
  19. siglab_py/tests/unit/simple_math_tests.py +235 -0
  20. siglab_py/tests/unit/trading_util_tests.py +65 -0
  21. siglab_py/util/analytic_util.py +478 -69
  22. siglab_py/util/market_data_util.py +487 -100
  23. siglab_py/util/notification_util.py +78 -0
  24. siglab_py/util/retry_util.py +11 -3
  25. siglab_py/util/simple_math.py +240 -0
  26. siglab_py/util/slack_notification_util.py +59 -0
  27. siglab_py/util/trading_util.py +118 -0
  28. {siglab_py-0.1.29.dist-info → siglab_py-0.6.12.dist-info}/METADATA +5 -9
  29. siglab_py-0.6.12.dist-info/RECORD +44 -0
  30. {siglab_py-0.1.29.dist-info → siglab_py-0.6.12.dist-info}/WHEEL +1 -1
  31. siglab_py-0.1.29.dist-info/RECORD +0 -34
  32. {siglab_py-0.1.29.dist-info → siglab_py-0.6.12.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,19 @@
1
+ import logging
2
+ import incremental
1
3
  import tzlocal
2
4
  from datetime import datetime, timezone
5
+ import time
3
6
  from typing import List, Dict, Union, NoReturn, Any, Tuple
4
7
  from pathlib import Path
5
8
  import math
6
9
  import pandas as pd
7
10
  import numpy as np
11
+ import asyncio
12
+ from tabulate import tabulate
8
13
 
9
14
  from ccxt.base.exchange import Exchange as CcxtExchange
10
- from ccxt import deribit
15
+ import ccxt
16
+ import ccxt.pro as ccxtpro
11
17
 
12
18
  # https://www.analyticsvidhya.com/blog/2021/06/download-financial-dataset-using-yahoo-finance-in-python-a-complete-guide/
13
19
  from yahoofinancials import YahooFinancials
@@ -15,72 +21,271 @@ from yahoofinancials import YahooFinancials
15
21
  # yfinance allows intervals '1m', '5m', '15m', '1h', '1d', '1wk', '1mo'. yahoofinancials not as flexible
16
22
  import yfinance as yf
17
23
 
24
+ from siglab_py.util.retry_util import retry
18
25
  from siglab_py.exchanges.futubull import Futubull
26
+ from siglab_py.exchanges.any_exchange import AnyExchange
27
+ from siglab_py.exchanges.deribit import Deribit, DeribitAsync
28
+ from siglab_py.exchanges.binance import Binance, BinanceAsync
29
+
30
+ def instantiate_exchange(
31
+ exchange_name : str,
32
+ api_key : Union[str, None] = None,
33
+ secret : Union[str, None] = None,
34
+ passphrase : Union[str, None] = None,
35
+ default_type : Union[str, None] = 'spot',
36
+ default_sub_type : Union[str, None] = None,
37
+ rate_limit_ms : float = 100
38
+ ) -> Union[AnyExchange, None]:
39
+ exchange_name = exchange_name.lower().strip()
40
+
41
+ # Look at ccxt exchange.describe. under 'options' \ 'defaultType' (and 'defaultSubType') for what markets the exchange support.
42
+ # https://docs.ccxt.com/en/latest/manual.html#instantiation
43
+ exchange_params : Dict[str, Any]= {
44
+ 'apiKey' : api_key,
45
+ 'secret' : secret,
46
+ 'enableRateLimit' : True,
47
+ 'rateLimit' : rate_limit_ms,
48
+ 'options' : {
49
+ 'defaultType' : default_type
50
+ }
51
+ }
52
+
53
+ if default_sub_type:
54
+ exchange_params['defaultSubType'] = default_sub_type
55
+
56
+ if api_key:
57
+ exchange_params['apiKey'] = api_key
58
+ if secret:
59
+ exchange_params['secret'] = secret
60
+ if passphrase:
61
+ exchange_params['passphrase'] = passphrase
62
+
63
+ if exchange_name=='binance':
64
+ exchange = Binance(exchange_params) # type: ignore
65
+ elif exchange_name=='bybit':
66
+ exchange = ccxt.bybit(exchange_params) # type: ignore
67
+ elif exchange_name=='okx':
68
+ exchange = ccxt.okx(exchange_params) # type: ignore
69
+ elif exchange_name=='deribit':
70
+ exchange = Deribit(exchange_params) # type: ignore
71
+ elif exchange_name=='hyperliquid':
72
+ exchange = ccxt.hyperliquid(
73
+ {
74
+ "walletAddress" : api_key, # type: ignore
75
+ "privateKey" : secret,
76
+ 'enableRateLimit' : True,
77
+ 'rateLimit' : rate_limit_ms
78
+ }
79
+ )
80
+ else:
81
+ raise ValueError(f"Unsupported exchange {exchange_name}.")
82
+
83
+ exchange.load_markets() # type: ignore
84
+
85
+ return exchange # type: ignore
86
+
87
+ async def async_instantiate_exchange(
88
+ gateway_id : str,
89
+ api_key : str,
90
+ secret : str,
91
+ passphrase : str,
92
+ default_type : Union[str, None] = 'spot',
93
+ default_sub_type : Union[str, None] = None,
94
+ rate_limit_ms : float = 100,
95
+ verbose : bool = False
96
+ ) -> Union[AnyExchange, None]:
97
+ exchange : Union[AnyExchange, None] = None
98
+ exchange_name : str = gateway_id.split('_')[0]
99
+ exchange_name =exchange_name.lower().strip()
100
+
101
+ # Look at ccxt exchange.describe. under 'options' \ 'defaultType' (and 'defaultSubType') for what markets the exchange support.
102
+ # https://docs.ccxt.com/en/latest/manual.html#instantiation
103
+ exchange_params : Dict[str, Any]= {
104
+ 'apiKey' : api_key,
105
+ 'secret' : secret,
106
+ 'enableRateLimit' : True,
107
+ 'rateLimit' : rate_limit_ms,
108
+ 'options' : {
109
+ 'defaultType' : default_type
110
+ },
111
+ 'verbose': verbose
112
+ }
113
+
114
+ if default_sub_type:
115
+ exchange_params['defaultSubType'] = default_sub_type
116
+
117
+ if exchange_name=='binance':
118
+ # spot, future, margin, delivery, option
119
+ # https://github.com/ccxt/ccxt/blob/master/python/ccxt/binance.py#L1298
120
+ exchange = BinanceAsync(exchange_params) # type: ignore
121
+ elif exchange_name=='bybit':
122
+ # spot, linear, inverse, futures
123
+ # https://github.com/ccxt/ccxt/blob/master/python/ccxt/bybit.py#L1041
124
+ exchange = ccxtpro.bybit(exchange_params) # type: ignore
125
+ elif exchange_name=='okx':
126
+ # 'funding', spot, margin, future, swap, option
127
+ # https://github.com/ccxt/ccxt/blob/master/python/ccxt/okx.py#L1144
128
+ exchange_params['password'] = passphrase
129
+ exchange = ccxtpro.okx(exchange_params) # type: ignore
130
+ elif exchange_name=='deribit':
131
+ # spot, swap, future
132
+ # https://github.com/ccxt/ccxt/blob/master/python/ccxt/deribit.py#L360
133
+ exchange = DeribitAsync(exchange_params) # type: ignore
134
+ elif exchange_name=='kraken':
135
+ exchange = ccxtpro.kraken(exchange_params) # type: ignore
136
+ elif exchange_name=='hyperliquid':
137
+ '''
138
+ https://app.hyperliquid.xyz/API
139
+
140
+ defaultType from ccxt: swap
141
+ https://github.com/ccxt/ccxt/blob/master/python/ccxt/hyperliquid.py#L225
142
+
143
+ How to integrate? You can skip first 6 min: https://www.youtube.com/watch?v=UuBr331wxr4&t=363s
144
+
145
+ Example,
146
+ API credentials created under "\ More \ API":
147
+ Ledger Arbitrum Wallet Address: 0xAAAAA <-- This is your Ledger Arbitrum wallet address with which you connect to Hyperliquid.
148
+ API Wallet Address 0xBBBBB <-- Generated
149
+ privateKey 0xCCCCC
150
+
151
+ Basic connection via CCXT:
152
+ import asyncio
153
+ import ccxt.pro as ccxtpro
154
+
155
+ async def main():
156
+ rate_limit_ms = 100
157
+ exchange_params = {
158
+ "walletAddress" : "0xAAAAA", # Ledger Arbitrum Wallet Address here! Not the generated address.
159
+ "privateKey" : "0xCCCCC"
160
+ }
161
+ exchange = ccxtpro.hyperliquid(exchange_params)
162
+ balances = await exchange.fetch_balance()
163
+ print(balances)
164
+
165
+ asyncio.run(main())
166
+ '''
167
+ exchange = ccxtpro.hyperliquid(
168
+ {
169
+ "walletAddress" : api_key,
170
+ "privateKey" : secret,
171
+ 'enableRateLimit' : True,
172
+ 'rateLimit' : rate_limit_ms,
173
+ 'verbose': verbose
174
+ } # type: ignore
175
+ )
176
+ else:
177
+ raise ValueError(f"Unsupported exchange {exchange_name}, check gateway_id {gateway_id}.")
178
+
179
+ await exchange.load_markets() # type: ignore
180
+
181
+ '''
182
+ Is this necessary? The added trouble is for example bybit.authenticate requires arg 'url'. binance doesn't. And fetch_balance already test credentials.
183
+
184
+ try:
185
+ await exchange.authenticate() # type: ignore
186
+ except Exception as swallow_this_error:
187
+ pass
188
+ '''
189
+
190
+ return exchange
19
191
 
20
192
  def timestamp_to_datetime_cols(pd_candles : pd.DataFrame):
21
- pd_candles['datetime'] = pd_candles['timestamp_ms'].apply(
22
- lambda x: datetime.fromtimestamp(int(x.timestamp()) if isinstance(x, pd.Timestamp) else int(x / 1000))
23
- )
193
+ def _fix_timestamp_ms(x):
194
+ if isinstance(x, pd.Timestamp):
195
+ return int(x.value // 10**6)
196
+ elif isinstance(x, np.datetime64):
197
+ return int(x.astype('int64') // 10**6)
198
+ elif isinstance(x, (int, float)):
199
+ x = int(x)
200
+ if len(str(abs(x))) == 13:
201
+ return x
202
+ else:
203
+ return int(x * 1000)
204
+ else:
205
+ raise ValueError(f"Unsupported type {type(x)} for timestamp conversion")
206
+ pd_candles['timestamp_ms'] = pd_candles['timestamp_ms'].apply(_fix_timestamp_ms)
207
+ pd_candles['datetime'] = pd_candles['timestamp_ms'].apply(lambda x: datetime.fromtimestamp(int(x/1000)))
24
208
  pd_candles['datetime'] = pd.to_datetime(pd_candles['datetime'])
25
- pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None)
209
+ pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None) # type: ignore
26
210
  pd_candles['datetime_utc'] = pd_candles['timestamp_ms'].apply(
27
211
  lambda x: datetime.fromtimestamp(int(x.timestamp()) if isinstance(x, pd.Timestamp) else int(x / 1000), tz=timezone.utc)
28
212
  )
29
213
 
30
214
  # This is to make it easy to do grouping with Excel pivot table
31
- pd_candles['year'] = pd_candles['datetime'].dt.year
32
- pd_candles['month'] = pd_candles['datetime'].dt.month
33
- pd_candles['day'] = pd_candles['datetime'].dt.day
34
- pd_candles['hour'] = pd_candles['datetime'].dt.hour
35
- pd_candles['minute'] = pd_candles['datetime'].dt.minute
36
- pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek # dayofweek: Monday is 0 and Sunday is 6
215
+ pd_candles['year'] = pd_candles['datetime'].dt.year # type: ignore
216
+ pd_candles['month'] = pd_candles['datetime'].dt.month # type: ignore
217
+ pd_candles['day'] = pd_candles['datetime'].dt.day # type: ignore
218
+ pd_candles['hour'] = pd_candles['datetime'].dt.hour # type: ignore
219
+ pd_candles['minute'] = pd_candles['datetime'].dt.minute # type: ignore
220
+ pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek # type: ignore dayofweek: Monday is 0 and Sunday is 6
37
221
 
38
222
  pd_candles['week_of_month'] = pd_candles['timestamp_ms'].apply(
39
- lambda x: timestamp_to_week_of_month(x)
223
+ lambda x: timestamp_to_week_of_month(int(x/1000))
40
224
  )
41
225
 
42
226
  pd_candles['apac_trading_hr'] = pd_candles['timestamp_ms'].apply(
43
- lambda x: "APAC" in timestamp_to_active_trading_regions(x)
227
+ lambda x: "APAC" in timestamp_to_active_trading_regions(int(x/1000))
44
228
  )
45
229
  pd_candles['emea_trading_hr'] = pd_candles['timestamp_ms'].apply(
46
- lambda x: "EMEA" in timestamp_to_active_trading_regions(x)
230
+ lambda x: "EMEA" in timestamp_to_active_trading_regions(int(x/1000))
47
231
  )
48
232
  pd_candles['amer_trading_hr'] = pd_candles['timestamp_ms'].apply(
49
- lambda x: "AMER" in timestamp_to_active_trading_regions(x)
233
+ lambda x: "AMER" in timestamp_to_active_trading_regions(int(x/1000))
50
234
  )
51
235
 
236
+ pd_candles['timestamp_ms_gap'] = pd_candles['timestamp_ms'] - pd_candles['timestamp_ms'].shift(1)
237
+
238
+ # Depending on asset, minutes bar may have gaps
239
+ timestamp_ms_gap_median = pd_candles['timestamp_ms_gap'].median()
240
+ NUM_MS_IN_1HR = 60*60*1000
241
+ if timestamp_ms_gap_median>=NUM_MS_IN_1HR:
242
+ num_rows_with_expected_gap = pd_candles[~pd_candles.timestamp_ms_gap.isna()][pd_candles.timestamp_ms_gap==timestamp_ms_gap_median].shape[0]
243
+ assert(num_rows_with_expected_gap/pd_candles.shape[0]>0.9)
244
+ pd_candles.drop(columns=['timestamp_ms_gap'], inplace=True)
245
+
246
+ '''
247
+ APAC (Asia-Pacific) Trading Hours
248
+ UTC 21:00 - 09:00 (approximate range)
249
+ Major financial centers: Tokyo, Hong Kong, Singapore, Sydney
250
+
251
+ EMEA (Europe, Middle East, Africa) Trading Hours
252
+ UTC 07:00 - 16:00 (approximate range)
253
+ Major financial centers: London, Frankfurt, Paris, Zurich, Dubai
254
+
255
+ US Trading Hours
256
+ UTC 13:00 - 22:00 (approximate range)
257
+ Major financial centers: New York, Chicago
258
+ Key markets: NYSE, NASDAQ
259
+
260
+ utcnow and utcfromtimestamp been deprecated in Python 3.12
261
+ https://www.pythonmorsels.com/converting-to-utc-time/
262
+
263
+ Example, UTC 23:00 is 3rd hour in APAC trading session
264
+ utc_hour = 23
265
+ i = get_regions_trading_utc_hours()['APAC'].index(utc_hour)
266
+ assert(i==2)
267
+ '''
268
+ def get_regions_trading_utc_hours():
269
+ return {
270
+ 'APAC' : [21,22,23,0,1,2,3,4,5,6,7,8,9],
271
+ 'EMEA' : [7,8,9,10,11,12,13,14,15,16],
272
+ 'AMER' : [13,14,15,16,17,18,19,20,21,22]
273
+ }
274
+
52
275
  def timestamp_to_active_trading_regions(
53
276
  timestamp_ms : int
54
277
  ) -> List[str]:
55
-
56
- '''
57
- APAC (Asia-Pacific) Trading Hours
58
- UTC 22:00 - 09:00 (approximate range)
59
- Major financial centers: Tokyo, Hong Kong, Singapore, Sydney
60
-
61
- EMEA (Europe, Middle East, Africa) Trading Hours
62
- UTC 07:00 - 16:00 (approximate range)
63
- Major financial centers: London, Frankfurt, Paris, Zurich, Dubai
64
-
65
- US Trading Hours
66
- UTC 13:30 - 20:00 (approximate range)
67
- Major financial centers: New York, Chicago
68
- Key markets: NYSE, NASDAQ
69
-
70
- utcnow and utcfromtimestamp been deprecated in Python 3.12
71
- https://www.pythonmorsels.com/converting-to-utc-time/
72
- '''
73
278
  active_trading_regions : List[str] = []
74
279
 
75
280
  dt_utc = datetime.fromtimestamp(int(timestamp_ms / 1000), tz=timezone.utc)
76
281
  utc_hour = dt_utc.hour
77
- if (utc_hour >= 22) or (utc_hour <= 9):
282
+ if utc_hour in get_regions_trading_utc_hours()['APAC']:
78
283
  active_trading_regions.append("APAC")
79
284
 
80
- if 7 <= utc_hour <= 16:
285
+ if utc_hour in get_regions_trading_utc_hours()['EMEA']:
81
286
  active_trading_regions.append("EMEA")
82
287
 
83
- if 13 <= utc_hour <= 20:
288
+ if utc_hour in get_regions_trading_utc_hours()['AMER']:
84
289
  active_trading_regions.append("AMER")
85
290
 
86
291
  return active_trading_regions
@@ -113,6 +318,17 @@ def fix_column_types(pd_candles : pd.DataFrame):
113
318
  pd_candles.reset_index(drop=True, inplace=True)
114
319
  pd_candles.sort_values("datetime", inplace=True)
115
320
 
321
+ def interval_to_ms(interval : str) -> int:
322
+ interval_ms : int = 0
323
+ if interval=="d":
324
+ interval_ms = 24*60*60*1000
325
+ elif interval=="h":
326
+ interval_ms = 60*60*1000
327
+ elif interval=="m":
328
+ interval_ms = 60*1000
329
+
330
+ return interval_ms
331
+
116
332
  '''
117
333
  https://polygon.io/docs/stocks
118
334
  '''
@@ -168,7 +384,7 @@ class NASDAQExchange:
168
384
  pd_daily_candles['low'] = pd_daily_candles['low'].astype(str).str.replace('$','')
169
385
  pd_daily_candles['close'] = pd_daily_candles['close'].astype(str).str.replace('$','')
170
386
  pd_daily_candles['datetime']= pd.to_datetime(pd_daily_candles['datetime'])
171
- pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6
387
+ pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6 # type: ignore
172
388
  pd_daily_candles['symbol'] = symbol
173
389
  pd_daily_candles['exchange'] = 'nasdaq'
174
390
  fix_column_types(pd_daily_candles)
@@ -187,7 +403,7 @@ class NASDAQExchange:
187
403
  )
188
404
 
189
405
  # When you fill foward, a few candles before start date can have null values (open, high, low, close, volume ...)
190
- first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna()) & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime']
406
+ first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna()) & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime'] # type: ignore
191
407
  pd_hourly_candles = pd_hourly_candles[pd_hourly_candles.datetime>=first_candle_dt]
192
408
  exchange_candles[symbol] = pd_hourly_candles
193
409
 
@@ -251,6 +467,45 @@ class YahooExchange:
251
467
 
252
468
  return exchange_candles
253
469
 
470
+ def aggregate_candles(
471
+ interval : str,
472
+ pd_candles : pd.DataFrame
473
+ ) -> pd.DataFrame:
474
+ if interval[-1]=='m':
475
+ # 'm' for pandas means months!
476
+ interval = interval.replace('m','min')
477
+ pd_candles.set_index('datetime', inplace=True)
478
+ pd_candles_aggregated = pd_candles.resample(interval).agg({
479
+ 'exchange' : 'first',
480
+ 'symbol' : 'first',
481
+ 'timestamp_ms' : 'first',
482
+
483
+ 'open': 'first',
484
+ 'high': 'max',
485
+ 'low': 'min',
486
+ 'close': 'last',
487
+ 'volume': 'sum',
488
+
489
+ 'datetime_utc' : 'first',
490
+ 'year' : 'first',
491
+ 'month' : 'first',
492
+ 'day' : 'first',
493
+ 'hour' : 'first',
494
+ 'minute' : 'first',
495
+ 'dayofweek' : 'first',
496
+ 'week_of_month' : 'first',
497
+
498
+ 'apac_trading_hr' : 'first',
499
+ 'emea_trading_hr' : 'first',
500
+ 'amer_trading_hr' : 'first',
501
+
502
+ 'pct_chg_on_close' : 'sum',
503
+
504
+ })
505
+ pd_candles.reset_index(inplace=True)
506
+ pd_candles_aggregated.reset_index(inplace=True)
507
+ return pd_candles_aggregated
508
+
254
509
  def fetch_historical_price(
255
510
  exchange,
256
511
  normalized_symbol : str,
@@ -297,15 +552,21 @@ def fetch_candles(
297
552
  validation_max_gaps : int = 10,
298
553
  validation_max_end_date_intervals : int = 1
299
554
  ) -> Dict[str, Union[pd.DataFrame, None]]:
555
+ exchange_candles = { '' : None }
556
+ num_intervals = int(candle_size.replace(candle_size[-1],''))
557
+
558
+ if end_ts>datetime.now().timestamp():
559
+ end_ts = int(datetime.now().timestamp())
560
+
300
561
  if type(exchange) is YahooExchange:
301
- return exchange.fetch_candles(
562
+ exchange_candles = exchange.fetch_candles(
302
563
  start_ts=start_ts,
303
564
  end_ts=end_ts,
304
565
  symbols=normalized_symbols,
305
566
  candle_size=candle_size
306
567
  )
307
568
  elif type(exchange) is NASDAQExchange:
308
- return exchange.fetch_candles(
569
+ exchange_candles = exchange.fetch_candles(
309
570
  start_ts=start_ts,
310
571
  end_ts=end_ts,
311
572
  symbols=normalized_symbols,
@@ -322,79 +583,153 @@ def fetch_candles(
322
583
  pd_candles = exchange_candles[symbol]
323
584
  if not pd_candles is None:
324
585
  fix_column_types(pd_candles) # You don't want to do this from Futubull as you'd need import Futubull from there: Circular references
325
- return exchange_candles
586
+
326
587
  elif issubclass(exchange.__class__, CcxtExchange):
327
- return _fetch_candles_ccxt(
588
+ exchange_candles = _fetch_candles_ccxt(
328
589
  start_ts=start_ts,
329
590
  end_ts=end_ts,
330
591
  exchange=exchange,
331
592
  normalized_symbols=normalized_symbols,
332
593
  candle_size=candle_size,
333
- logger=logger,
334
- num_candles_limit=num_candles_limit,
335
- cache_dir=cache_dir,
336
- list_ts_field=list_ts_field
594
+ num_candles_limit=num_candles_limit
337
595
  )
338
- return { '' : None }
596
+ if num_intervals!=1:
597
+ for symbol in exchange_candles:
598
+ if not exchange_candles[symbol] is None:
599
+ exchange_candles[symbol] = aggregate_candles(candle_size, exchange_candles[symbol]) # type: ignore
600
+
601
+ # For invalid rows missing timestamps, o/h/l/c/v, fill forward close, set volume to zero.
602
+ for symbol in exchange_candles:
603
+ pd_candles = exchange_candles[symbol]
604
+
605
+ if pd_candles is not None:
606
+ mask_invalid_candles = pd_candles["timestamp_ms"].isna()
607
+ if mask_invalid_candles.any():
608
+ pd_invalid_candles = pd_candles[mask_invalid_candles]
609
+
610
+ if logger is not None:
611
+ logger.warning(f"Dropping {pd_invalid_candles.shape[0]}/{pd_candles.shape[0]} rows from {symbol} candles (null timestamp_ms)") # type: ignore
612
+ logger.warning(f"{tabulate(pd_invalid_candles, headers='keys', tablefmt='psql')}") # type: ignore
613
+
614
+ def _to_timestamp_ms(dt):
615
+ if pd.isna(dt):
616
+ return pd.NA
617
+ if isinstance(dt, str):
618
+ dt = pd.to_datetime(dt)
619
+ return int(dt.timestamp() * 1000)
620
+
621
+ pd_candles.loc[mask_invalid_candles, "timestamp_ms"] = pd_candles.loc[mask_invalid_candles, "datetime"].apply(_to_timestamp_ms)
622
+
623
+ pd_candles["close"] = pd_candles["close"].ffill()
624
+ pd_candles.loc[mask_invalid_candles, ["open", "high", "low"]] = pd_candles.loc[
625
+ mask_invalid_candles, ["close"]
626
+ ]
627
+ pd_candles.loc[mask_invalid_candles, "volume"] = 0.0
628
+
629
+ return exchange_candles # type: ignore
630
+
631
+ '''
632
+ Find listing date https://gist.github.com/mr-easy/5185b1dcdd5f9f908ff196446f092e9b
633
+
634
+ Usage:
635
+ listing_ts = find_start_time(exchange, 'HYPE/USDT:USDT', int(datetime(2024,1,1).timestamp()*1000), int(datetime(2025,5,1).timestamp()*1000), '1h')
339
636
 
637
+ Caveats:
638
+ 1) If listing date lies outside [start_time, end_time], this function will stackoverflow,
639
+ 2) Even if not, it's still very time consuming.
640
+
641
+ Alternative: market['created']
642
+ '''
643
+ def search_listing_ts(exchange, symbol, start_time, end_time, timeframe):
644
+ mid_time = (start_time + end_time)//2
645
+ if(mid_time == start_time): return mid_time+1
646
+ ohlcv = exchange.fetch_ohlcv(symbol, timeframe, mid_time, limit=1)
647
+ time.sleep(1)
648
+ if(len(ohlcv) == 0):
649
+ return search_listing_ts(exchange, symbol, mid_time, end_time, timeframe)
650
+ else:
651
+ return search_listing_ts(exchange, symbol, start_time, mid_time, timeframe)
652
+
340
653
  def _fetch_candles_ccxt(
341
654
  start_ts : int,
342
655
  end_ts : int,
343
656
  exchange,
344
657
  normalized_symbols : List[str],
345
658
  candle_size : str,
346
- num_candles_limit : int = 100,
347
- logger = None,
348
- cache_dir : Union[str, None] = None,
349
- list_ts_field : Union[str, None] = None
350
- ) -> Dict[str, Union[pd.DataFrame, None]]:
351
- ticker = normalized_symbols[0]
352
- pd_candles = _fetch_candles(
353
- symbol = ticker,
354
- exchange = exchange,
355
- start_ts = start_ts,
356
- end_ts = end_ts,
357
- candle_size = candle_size,
358
- )
359
- return {
360
- ticker : pd_candles
361
- }
362
-
363
- def _fetch_candles(
364
- symbol : str,
365
- exchange : CcxtExchange,
366
- start_ts : int,
367
- end_ts : int,
368
- candle_size : str = '1d',
369
659
  num_candles_limit : int = 100
370
- ):
371
- def _fetch_ohlcv(exchange, symbol, timeframe, since, limit, params) -> Union[List, NoReturn]:
372
- one_timeframe = f"1{timeframe[-1]}"
373
- candles = exchange.fetch_ohlcv(symbol=symbol, timeframe=one_timeframe, since=since, limit=limit, params=params)
374
- if candles and len(candles)>0:
375
- candles.sort(key=lambda x : x[0], reverse=False)
376
-
377
- return candles
378
-
379
- all_candles = []
380
- params = {}
381
- this_cutoff = start_ts
382
- while this_cutoff<=end_ts:
383
- candles = _fetch_ohlcv(exchange=exchange, symbol=symbol, timeframe=candle_size, since=int(this_cutoff * 1000), limit=num_candles_limit, params=params)
384
- if candles and len(candles)>0:
385
- all_candles = all_candles + [[ int(x[0]), float(x[1]), float(x[2]), float(x[3]), float(x[4]), float(x[5]) ] for x in candles if x[1] and x[2] and x[3] and x[4] and x[5] ]
386
-
387
- record_ts = max([int(record[0]) for record in candles])
388
- record_ts_str : str = str(record_ts)
389
- if len(record_ts_str)==13:
390
- record_ts = int(int(record_ts_str)/1000) # Convert from milli-seconds to seconds
391
-
392
- this_cutoff = record_ts + 1
393
- columns = ['exchange', 'symbol', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume']
394
- pd_all_candles = pd.DataFrame([ [ exchange.name, symbol, x[0], x[1], x[2], x[3], x[4], x[5] ] for x in all_candles], columns=columns)
395
- fix_column_types(pd_all_candles)
396
- pd_all_candles['pct_chg_on_close'] = pd_all_candles['close'].pct_change()
397
- return pd_all_candles
660
+ ) -> Dict[str, Union[pd.DataFrame, None]]:
661
+ logger = logging.getLogger()
662
+
663
+ rsp = {}
664
+
665
+ exchange.load_markets()
666
+
667
+ num_tickers = len(normalized_symbols)
668
+ i = 0
669
+ for ticker in normalized_symbols:
670
+ @retry(num_attempts=3, pause_between_retries_ms=1000)
671
+ def _fetch_ohlcv(exchange, symbol, timeframe, since, limit, params) -> Union[List, NoReturn]:
672
+ one_timeframe = f"1{timeframe[-1]}"
673
+ candles = exchange.fetch_ohlcv(symbol=symbol, timeframe=one_timeframe, since=since, limit=limit, params=params)
674
+ if candles and len(candles)>0:
675
+ candles.sort(key=lambda x : x[0], reverse=False)
676
+
677
+ return candles
678
+
679
+ def _calc_increment(candle_size):
680
+ increment = 1
681
+ num_intervals = int(candle_size.replace(candle_size[-1],''))
682
+ interval_type = candle_size[-1]
683
+ if interval_type == "m":
684
+ increment = 60
685
+ elif interval_type == "h":
686
+ increment = 60*60
687
+ elif interval_type == "d":
688
+ increment = 60*60*24
689
+ else:
690
+ raise ValueError(f"Invalid candle_size {candle_size}")
691
+ return num_intervals * increment
692
+
693
+ logger.info(f"{i}/{num_tickers} Fetching {candle_size} candles for {ticker}.")
694
+
695
+ '''
696
+ It uses a while loop to implement a sliding window to download candles between start_ts and end_ts.
697
+ However, start_ts for example can be 1 Jan 2021 for a given ticker.
698
+ But if that ticker listing date is 1 Jan 2025, this while loop would waste a lot of time loop between 1 Jan 2021 thru 31 Dec 2024, slowly incrementing this_cutoff += _calc_increment(candle_size).
699
+ A more efficient way is to find listing date. Start looping from there.
700
+ '''
701
+ market = exchange.markets[ticker]
702
+ this_ticker_start_ts = start_ts
703
+ if market['created']:
704
+ this_ticker_start_ts = max(this_ticker_start_ts, int(market['created']/1000))
705
+
706
+ all_candles = []
707
+ params = {}
708
+ this_cutoff = this_ticker_start_ts
709
+ while this_cutoff<end_ts:
710
+ candles = _fetch_ohlcv(exchange=exchange, symbol=ticker, timeframe=candle_size, since=int(this_cutoff * 1000), limit=num_candles_limit, params=params)
711
+ if candles and len(candles)>0:
712
+ all_candles = all_candles + [[ int(x[0]), float(x[1]), float(x[2]), float(x[3]), float(x[4]), float(x[5]) ] for x in candles if x[1] and x[2] and x[3] and x[4] and x[5] ]
713
+
714
+ record_ts = max([int(record[0]) for record in candles])
715
+ record_ts_str : str = str(record_ts)
716
+ if len(record_ts_str)==13:
717
+ record_ts = int(int(record_ts_str)/1000) # Convert from milli-seconds to seconds
718
+
719
+ this_cutoff = record_ts + _calc_increment(candle_size)
720
+ else:
721
+ this_cutoff += _calc_increment(candle_size)
722
+
723
+ columns = ['exchange', 'symbol', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume']
724
+ pd_all_candles = pd.DataFrame([ [ exchange.name, ticker, x[0], x[1], x[2], x[3], x[4], x[5] ] for x in all_candles], columns=columns)
725
+ fix_column_types(pd_all_candles)
726
+ pd_all_candles['pct_chg_on_close'] = pd_all_candles['close'].pct_change()
727
+
728
+ rsp[ticker] = pd_all_candles
729
+
730
+ i+=1
731
+
732
+ return rsp
398
733
 
399
734
  def fetch_deribit_btc_option_expiries(
400
735
  market: str = 'BTC'
@@ -404,7 +739,7 @@ def fetch_deribit_btc_option_expiries(
404
739
  Dict[str, Dict[str, Union[str, float]]]
405
740
  ]
406
741
  ]:
407
- exchange = deribit()
742
+ exchange = ccxt.deribit()
408
743
  instruments = exchange.public_get_get_instruments({
409
744
  'currency': market,
410
745
  'kind': 'option',
@@ -456,4 +791,56 @@ def fetch_deribit_btc_option_expiries(
456
791
  'index_price' : index_price,
457
792
  'by_expiry' : sorted_expiry_data, # type: ignore Otherwise, Error: Type "dict[str, list[tuple[str, float]] | dict[str, Dict[Unknown, Unknown]]]" is not assignable to return type "Dict[str, Dict[str, float] | Dict[str, Dict[str, str | float]]]"
458
793
  'by_expiry_and_strike' : expiry_data_breakdown_by_strike
459
- }
794
+ }
795
+
796
+ def build_pair_candles(
797
+ pd_candles1 : pd.DataFrame,
798
+ pd_candles2 : pd.DataFrame,
799
+ left_columns_postfix : str = "_1",
800
+ right_columns_postfix : str = "_2"
801
+ ) -> pd.DataFrame:
802
+ min_timestamp_ms1 = int(pd_candles1.iloc[0]['timestamp_ms'])
803
+ max_timestamp_ms1 = int(pd_candles1.iloc[-1]['timestamp_ms'])
804
+ min_timestamp_ms2 = int(pd_candles2.iloc[0]['timestamp_ms'])
805
+ max_timestamp_ms2 = int(pd_candles2.iloc[-1]['timestamp_ms'])
806
+
807
+ pd_candles1 = pd_candles1[(pd_candles1.timestamp_ms>=min_timestamp_ms2) & (pd_candles1.timestamp_ms<=max_timestamp_ms2) & (~pd_candles1.timestamp_ms.isna()) ]
808
+ pd_candles2 = pd_candles2[(pd_candles2.timestamp_ms>=min_timestamp_ms1) & (pd_candles2.timestamp_ms<=max_timestamp_ms1) & (~pd_candles2.timestamp_ms.isna())]
809
+ assert(pd_candles1.shape[0]==pd_candles2.shape[0])
810
+
811
+ pd_candles1['timestamp_ms_gap'] = pd_candles1['timestamp_ms'] - pd_candles1['timestamp_ms'].shift(1)
812
+ timestamp_ms_gap = pd_candles1.iloc[-1]['timestamp_ms_gap']
813
+
814
+ assert(pd_candles1[~pd_candles1.timestamp_ms_gap.isna()][pd_candles1.timestamp_ms_gap!=timestamp_ms_gap].shape[0]==0)
815
+ pd_candles1.drop(columns=['timestamp_ms_gap'], inplace=True)
816
+
817
+ pd_candles2['timestamp_ms_gap'] = pd_candles2['timestamp_ms'] - pd_candles2['timestamp_ms'].shift(1)
818
+ timestamp_ms_gap = pd_candles2.iloc[-1]['timestamp_ms_gap']
819
+ assert(pd_candles2[~pd_candles2.timestamp_ms_gap.isna()][pd_candles2.timestamp_ms_gap!=timestamp_ms_gap].shape[0]==0)
820
+ pd_candles2.drop(columns=['timestamp_ms_gap'], inplace=True)
821
+
822
+ min_timestamp_ms1 = int(pd_candles1.iloc[0]['timestamp_ms'])
823
+ max_timestamp_ms1 = int(pd_candles1.iloc[-1]['timestamp_ms'])
824
+ min_timestamp_ms2 = int(pd_candles2.iloc[0]['timestamp_ms'])
825
+ max_timestamp_ms2 = int(pd_candles2.iloc[-1]['timestamp_ms'])
826
+ assert(min_timestamp_ms1==min_timestamp_ms2)
827
+ assert(max_timestamp_ms1==max_timestamp_ms2)
828
+ assert(pd_candles1.shape[0]==pd_candles2.shape[0])
829
+
830
+ if len([ col for col in pd_candles1.columns if col[-2:]==left_columns_postfix ]) == 0:
831
+ pd_candles1.columns = [str(col) + left_columns_postfix for col in pd_candles1.columns]
832
+
833
+ if len([ col for col in pd_candles2.columns if col[-2:]==right_columns_postfix ]) == 0:
834
+ pd_candles2.columns = [str(col) + right_columns_postfix for col in pd_candles2.columns]
835
+
836
+ pd_candles1.reset_index(drop=True, inplace=True)
837
+ pd_candles2.reset_index(drop=True, inplace=True)
838
+ pd_candles = pd.concat([pd_candles1, pd_candles2], axis=1)
839
+ pd_candles['timestamp_ms_gap'] = pd_candles[f'timestamp_ms{left_columns_postfix}'] - pd_candles[f'timestamp_ms{right_columns_postfix}']
840
+ assert(pd_candles[pd_candles.timestamp_ms_gap!=0].shape[0]==0)
841
+
842
+ pd_candles.drop(pd_candles.columns[pd_candles.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
843
+
844
+ return pd_candles
845
+
846
+