siglab-py 0.5.30__py3-none-any.whl → 0.6.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of siglab-py might be problematic. Click here for more details.

@@ -9,9 +9,9 @@ import math
9
9
  import pandas as pd
10
10
  import numpy as np
11
11
  import asyncio
12
+ from tabulate import tabulate
12
13
 
13
14
  from ccxt.base.exchange import Exchange as CcxtExchange
14
- from ccxt import deribit
15
15
  import ccxt
16
16
  import ccxt.pro as ccxtpro
17
17
 
@@ -24,6 +24,8 @@ import yfinance as yf
24
24
  from siglab_py.util.retry_util import retry
25
25
  from siglab_py.exchanges.futubull import Futubull
26
26
  from siglab_py.exchanges.any_exchange import AnyExchange
27
+ from siglab_py.exchanges.deribit import Deribit, DeribitAsync
28
+ from siglab_py.exchanges.binance import Binance, BinanceAsync
27
29
 
28
30
  def instantiate_exchange(
29
31
  exchange_name : str,
@@ -31,6 +33,7 @@ def instantiate_exchange(
31
33
  secret : Union[str, None] = None,
32
34
  passphrase : Union[str, None] = None,
33
35
  default_type : Union[str, None] = 'spot',
36
+ default_sub_type : Union[str, None] = None,
34
37
  rate_limit_ms : float = 100
35
38
  ) -> Union[AnyExchange, None]:
36
39
  exchange_name = exchange_name.lower().strip()
@@ -46,6 +49,10 @@ def instantiate_exchange(
46
49
  'defaultType' : default_type
47
50
  }
48
51
  }
52
+
53
+ if default_sub_type:
54
+ exchange_params['defaultSubType'] = default_sub_type
55
+
49
56
  if api_key:
50
57
  exchange_params['apiKey'] = api_key
51
58
  if secret:
@@ -54,13 +61,13 @@ def instantiate_exchange(
54
61
  exchange_params['passphrase'] = passphrase
55
62
 
56
63
  if exchange_name=='binance':
57
- exchange = ccxt.binance(exchange_params) # type: ignore
64
+ exchange = Binance(exchange_params) # type: ignore
58
65
  elif exchange_name=='bybit':
59
66
  exchange = ccxt.bybit(exchange_params) # type: ignore
60
67
  elif exchange_name=='okx':
61
68
  exchange = ccxt.okx(exchange_params) # type: ignore
62
69
  elif exchange_name=='deribit':
63
- exchange = ccxt.deribit(exchange_params) # type: ignore
70
+ exchange = Deribit(exchange_params) # type: ignore
64
71
  elif exchange_name=='hyperliquid':
65
72
  exchange = ccxt.hyperliquid(
66
73
  {
@@ -82,8 +89,10 @@ async def async_instantiate_exchange(
82
89
  api_key : str,
83
90
  secret : str,
84
91
  passphrase : str,
85
- default_type : str,
86
- rate_limit_ms : float = 100
92
+ default_type : Union[str, None] = 'spot',
93
+ default_sub_type : Union[str, None] = None,
94
+ rate_limit_ms : float = 100,
95
+ verbose : bool = False
87
96
  ) -> Union[AnyExchange, None]:
88
97
  exchange : Union[AnyExchange, None] = None
89
98
  exchange_name : str = gateway_id.split('_')[0]
@@ -98,13 +107,17 @@ async def async_instantiate_exchange(
98
107
  'rateLimit' : rate_limit_ms,
99
108
  'options' : {
100
109
  'defaultType' : default_type
101
- }
110
+ },
111
+ 'verbose': verbose
102
112
  }
113
+
114
+ if default_sub_type:
115
+ exchange_params['defaultSubType'] = default_sub_type
103
116
 
104
117
  if exchange_name=='binance':
105
118
  # spot, future, margin, delivery, option
106
119
  # https://github.com/ccxt/ccxt/blob/master/python/ccxt/binance.py#L1298
107
- exchange = ccxtpro.binance(exchange_params) # type: ignore
120
+ exchange = BinanceAsync(exchange_params) # type: ignore
108
121
  elif exchange_name=='bybit':
109
122
  # spot, linear, inverse, futures
110
123
  # https://github.com/ccxt/ccxt/blob/master/python/ccxt/bybit.py#L1041
@@ -117,7 +130,7 @@ async def async_instantiate_exchange(
117
130
  elif exchange_name=='deribit':
118
131
  # spot, swap, future
119
132
  # https://github.com/ccxt/ccxt/blob/master/python/ccxt/deribit.py#L360
120
- exchange = ccxtpro.deribit(exchange_params) # type: ignore
133
+ exchange = DeribitAsync(exchange_params) # type: ignore
121
134
  elif exchange_name=='kraken':
122
135
  exchange = ccxtpro.kraken(exchange_params) # type: ignore
123
136
  elif exchange_name=='hyperliquid':
@@ -156,9 +169,10 @@ async def async_instantiate_exchange(
156
169
  "walletAddress" : api_key,
157
170
  "privateKey" : secret,
158
171
  'enableRateLimit' : True,
159
- 'rateLimit' : rate_limit_ms
160
- }
161
- ) # type: ignore
172
+ 'rateLimit' : rate_limit_ms,
173
+ 'verbose': verbose
174
+ } # type: ignore
175
+ )
162
176
  else:
163
177
  raise ValueError(f"Unsupported exchange {exchange_name}, check gateway_id {gateway_id}.")
164
178
 
@@ -192,18 +206,18 @@ def timestamp_to_datetime_cols(pd_candles : pd.DataFrame):
192
206
  pd_candles['timestamp_ms'] = pd_candles['timestamp_ms'].apply(_fix_timestamp_ms)
193
207
  pd_candles['datetime'] = pd_candles['timestamp_ms'].apply(lambda x: datetime.fromtimestamp(int(x/1000)))
194
208
  pd_candles['datetime'] = pd.to_datetime(pd_candles['datetime'])
195
- pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None)
209
+ pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None) # type: ignore
196
210
  pd_candles['datetime_utc'] = pd_candles['timestamp_ms'].apply(
197
211
  lambda x: datetime.fromtimestamp(int(x.timestamp()) if isinstance(x, pd.Timestamp) else int(x / 1000), tz=timezone.utc)
198
212
  )
199
213
 
200
214
  # This is to make it easy to do grouping with Excel pivot table
201
- pd_candles['year'] = pd_candles['datetime'].dt.year
202
- pd_candles['month'] = pd_candles['datetime'].dt.month
203
- pd_candles['day'] = pd_candles['datetime'].dt.day
204
- pd_candles['hour'] = pd_candles['datetime'].dt.hour
205
- pd_candles['minute'] = pd_candles['datetime'].dt.minute
206
- pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek # dayofweek: Monday is 0 and Sunday is 6
215
+ pd_candles['year'] = pd_candles['datetime'].dt.year # type: ignore
216
+ pd_candles['month'] = pd_candles['datetime'].dt.month # type: ignore
217
+ pd_candles['day'] = pd_candles['datetime'].dt.day # type: ignore
218
+ pd_candles['hour'] = pd_candles['datetime'].dt.hour # type: ignore
219
+ pd_candles['minute'] = pd_candles['datetime'].dt.minute # type: ignore
220
+ pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek # type: ignore dayofweek: Monday is 0 and Sunday is 6
207
221
 
208
222
  pd_candles['week_of_month'] = pd_candles['timestamp_ms'].apply(
209
223
  lambda x: timestamp_to_week_of_month(int(x/1000))
@@ -220,42 +234,58 @@ def timestamp_to_datetime_cols(pd_candles : pd.DataFrame):
220
234
  )
221
235
 
222
236
  pd_candles['timestamp_ms_gap'] = pd_candles['timestamp_ms'] - pd_candles['timestamp_ms'].shift(1)
223
- timestamp_ms_gap = pd_candles.iloc[-1]['timestamp_ms_gap']
224
- assert(pd_candles[~pd_candles.timestamp_ms_gap.isna()][pd_candles.timestamp_ms_gap!=timestamp_ms_gap].shape[0]==0)
237
+
238
+ # Depending on asset, minutes bar may have gaps
239
+ timestamp_ms_gap_median = pd_candles['timestamp_ms_gap'].median()
240
+ NUM_MS_IN_1HR = 60*60*1000
241
+ if timestamp_ms_gap_median>=NUM_MS_IN_1HR:
242
+ num_rows_with_expected_gap = pd_candles[~pd_candles.timestamp_ms_gap.isna()][pd_candles.timestamp_ms_gap==timestamp_ms_gap_median].shape[0]
243
+ assert(num_rows_with_expected_gap/pd_candles.shape[0]>0.9)
225
244
  pd_candles.drop(columns=['timestamp_ms_gap'], inplace=True)
226
245
 
246
+ '''
247
+ APAC (Asia-Pacific) Trading Hours
248
+ UTC 21:00 - 09:00 (approximate range)
249
+ Major financial centers: Tokyo, Hong Kong, Singapore, Sydney
250
+
251
+ EMEA (Europe, Middle East, Africa) Trading Hours
252
+ UTC 07:00 - 16:00 (approximate range)
253
+ Major financial centers: London, Frankfurt, Paris, Zurich, Dubai
254
+
255
+ US Trading Hours
256
+ UTC 13:00 - 22:00 (approximate range)
257
+ Major financial centers: New York, Chicago
258
+ Key markets: NYSE, NASDAQ
259
+
260
+ utcnow and utcfromtimestamp been deprecated in Python 3.12
261
+ https://www.pythonmorsels.com/converting-to-utc-time/
262
+
263
+ Example, UTC 23:00 is 3rd hour in APAC trading session
264
+ utc_hour = 23
265
+ i = get_regions_trading_utc_hours()['APAC'].index(utc_hour)
266
+ assert(i==2)
267
+ '''
268
+ def get_regions_trading_utc_hours():
269
+ return {
270
+ 'APAC' : [21,22,23,0,1,2,3,4,5,6,7,8,9],
271
+ 'EMEA' : [7,8,9,10,11,12,13,14,15,16],
272
+ 'AMER' : [13,14,15,16,17,18,19,20,21,22]
273
+ }
274
+
227
275
  def timestamp_to_active_trading_regions(
228
276
  timestamp_ms : int
229
277
  ) -> List[str]:
230
-
231
- '''
232
- APAC (Asia-Pacific) Trading Hours
233
- UTC 22:00 - 09:00 (approximate range)
234
- Major financial centers: Tokyo, Hong Kong, Singapore, Sydney
235
-
236
- EMEA (Europe, Middle East, Africa) Trading Hours
237
- UTC 07:00 - 16:00 (approximate range)
238
- Major financial centers: London, Frankfurt, Paris, Zurich, Dubai
239
-
240
- US Trading Hours
241
- UTC 13:30 - 20:00 (approximate range)
242
- Major financial centers: New York, Chicago
243
- Key markets: NYSE, NASDAQ
244
-
245
- utcnow and utcfromtimestamp been deprecated in Python 3.12
246
- https://www.pythonmorsels.com/converting-to-utc-time/
247
- '''
248
278
  active_trading_regions : List[str] = []
249
279
 
250
280
  dt_utc = datetime.fromtimestamp(int(timestamp_ms / 1000), tz=timezone.utc)
251
281
  utc_hour = dt_utc.hour
252
- if (utc_hour >= 22) or (utc_hour <= 9):
282
+ if utc_hour in get_regions_trading_utc_hours()['APAC']:
253
283
  active_trading_regions.append("APAC")
254
284
 
255
- if 7 <= utc_hour <= 16:
285
+ if utc_hour in get_regions_trading_utc_hours()['EMEA']:
256
286
  active_trading_regions.append("EMEA")
257
287
 
258
- if 13 <= utc_hour <= 20:
288
+ if utc_hour in get_regions_trading_utc_hours()['AMER']:
259
289
  active_trading_regions.append("AMER")
260
290
 
261
291
  return active_trading_regions
@@ -288,6 +318,17 @@ def fix_column_types(pd_candles : pd.DataFrame):
288
318
  pd_candles.reset_index(drop=True, inplace=True)
289
319
  pd_candles.sort_values("datetime", inplace=True)
290
320
 
321
+ def interval_to_ms(interval : str) -> int:
322
+ interval_ms : int = 0
323
+ if interval=="d":
324
+ interval_ms = 24*60*60*1000
325
+ elif interval=="h":
326
+ interval_ms = 60*60*1000
327
+ elif interval=="m":
328
+ interval_ms = 60*1000
329
+
330
+ return interval_ms
331
+
291
332
  '''
292
333
  https://polygon.io/docs/stocks
293
334
  '''
@@ -343,7 +384,7 @@ class NASDAQExchange:
343
384
  pd_daily_candles['low'] = pd_daily_candles['low'].astype(str).str.replace('$','')
344
385
  pd_daily_candles['close'] = pd_daily_candles['close'].astype(str).str.replace('$','')
345
386
  pd_daily_candles['datetime']= pd.to_datetime(pd_daily_candles['datetime'])
346
- pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6
387
+ pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6 # type: ignore
347
388
  pd_daily_candles['symbol'] = symbol
348
389
  pd_daily_candles['exchange'] = 'nasdaq'
349
390
  fix_column_types(pd_daily_candles)
@@ -362,7 +403,7 @@ class NASDAQExchange:
362
403
  )
363
404
 
364
405
  # When you fill foward, a few candles before start date can have null values (open, high, low, close, volume ...)
365
- first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna()) & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime']
406
+ first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna()) & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime'] # type: ignore
366
407
  pd_hourly_candles = pd_hourly_candles[pd_hourly_candles.datetime>=first_candle_dt]
367
408
  exchange_candles[symbol] = pd_hourly_candles
368
409
 
@@ -426,6 +467,45 @@ class YahooExchange:
426
467
 
427
468
  return exchange_candles
428
469
 
470
+ def aggregate_candles(
471
+ interval : str,
472
+ pd_candles : pd.DataFrame
473
+ ) -> pd.DataFrame:
474
+ if interval[-1]=='m':
475
+ # 'm' for pandas means months!
476
+ interval = interval.replace('m','min')
477
+ pd_candles.set_index('datetime', inplace=True)
478
+ pd_candles_aggregated = pd_candles.resample(interval).agg({
479
+ 'exchange' : 'first',
480
+ 'symbol' : 'first',
481
+ 'timestamp_ms' : 'first',
482
+
483
+ 'open': 'first',
484
+ 'high': 'max',
485
+ 'low': 'min',
486
+ 'close': 'last',
487
+ 'volume': 'sum',
488
+
489
+ 'datetime_utc' : 'first',
490
+ 'year' : 'first',
491
+ 'month' : 'first',
492
+ 'day' : 'first',
493
+ 'hour' : 'first',
494
+ 'minute' : 'first',
495
+ 'dayofweek' : 'first',
496
+ 'week_of_month' : 'first',
497
+
498
+ 'apac_trading_hr' : 'first',
499
+ 'emea_trading_hr' : 'first',
500
+ 'amer_trading_hr' : 'first',
501
+
502
+ 'pct_chg_on_close' : 'sum',
503
+
504
+ })
505
+ pd_candles.reset_index(inplace=True)
506
+ pd_candles_aggregated.reset_index(inplace=True)
507
+ return pd_candles_aggregated
508
+
429
509
  def fetch_historical_price(
430
510
  exchange,
431
511
  normalized_symbol : str,
@@ -472,19 +552,21 @@ def fetch_candles(
472
552
  validation_max_gaps : int = 10,
473
553
  validation_max_end_date_intervals : int = 1
474
554
  ) -> Dict[str, Union[pd.DataFrame, None]]:
475
-
555
+ exchange_candles = { '' : None }
556
+ num_intervals = int(candle_size.replace(candle_size[-1],''))
557
+
476
558
  if end_ts>datetime.now().timestamp():
477
559
  end_ts = int(datetime.now().timestamp())
478
560
 
479
561
  if type(exchange) is YahooExchange:
480
- return exchange.fetch_candles(
562
+ exchange_candles = exchange.fetch_candles(
481
563
  start_ts=start_ts,
482
564
  end_ts=end_ts,
483
565
  symbols=normalized_symbols,
484
566
  candle_size=candle_size
485
567
  )
486
568
  elif type(exchange) is NASDAQExchange:
487
- return exchange.fetch_candles(
569
+ exchange_candles = exchange.fetch_candles(
488
570
  start_ts=start_ts,
489
571
  end_ts=end_ts,
490
572
  symbols=normalized_symbols,
@@ -501,9 +583,9 @@ def fetch_candles(
501
583
  pd_candles = exchange_candles[symbol]
502
584
  if not pd_candles is None:
503
585
  fix_column_types(pd_candles) # You don't want to do this from Futubull as you'd need import Futubull from there: Circular references
504
- return exchange_candles
586
+
505
587
  elif issubclass(exchange.__class__, CcxtExchange):
506
- return _fetch_candles_ccxt(
588
+ exchange_candles = _fetch_candles_ccxt(
507
589
  start_ts=start_ts,
508
590
  end_ts=end_ts,
509
591
  exchange=exchange,
@@ -511,7 +593,40 @@ def fetch_candles(
511
593
  candle_size=candle_size,
512
594
  num_candles_limit=num_candles_limit
513
595
  )
514
- return { '' : None }
596
+ if num_intervals!=1:
597
+ for symbol in exchange_candles:
598
+ if not exchange_candles[symbol] is None:
599
+ exchange_candles[symbol] = aggregate_candles(candle_size, exchange_candles[symbol]) # type: ignore
600
+
601
+ # For invalid rows missing timestamps, o/h/l/c/v, fill forward close, set volume to zero.
602
+ for symbol in exchange_candles:
603
+ pd_candles = exchange_candles[symbol]
604
+
605
+ if pd_candles is not None:
606
+ mask_invalid_candles = pd_candles["timestamp_ms"].isna()
607
+ if mask_invalid_candles.any():
608
+ pd_invalid_candles = pd_candles[mask_invalid_candles]
609
+
610
+ if logger is not None:
611
+ logger.warning(f"Dropping {pd_invalid_candles.shape[0]}/{pd_candles.shape[0]} rows from {symbol} candles (null timestamp_ms)") # type: ignore
612
+ logger.warning(f"{tabulate(pd_invalid_candles, headers='keys', tablefmt='psql')}") # type: ignore
613
+
614
+ def _to_timestamp_ms(dt):
615
+ if pd.isna(dt):
616
+ return pd.NA
617
+ if isinstance(dt, str):
618
+ dt = pd.to_datetime(dt)
619
+ return int(dt.timestamp() * 1000)
620
+
621
+ pd_candles.loc[mask_invalid_candles, "timestamp_ms"] = pd_candles.loc[mask_invalid_candles, "datetime"].apply(_to_timestamp_ms)
622
+
623
+ pd_candles["close"] = pd_candles["close"].ffill()
624
+ pd_candles.loc[mask_invalid_candles, ["open", "high", "low"]] = pd_candles.loc[
625
+ mask_invalid_candles, ["close"]
626
+ ]
627
+ pd_candles.loc[mask_invalid_candles, "volume"] = 0.0
628
+
629
+ return exchange_candles # type: ignore
515
630
 
516
631
  '''
517
632
  Find listing date https://gist.github.com/mr-easy/5185b1dcdd5f9f908ff196446f092e9b
@@ -563,7 +678,7 @@ def _fetch_candles_ccxt(
563
678
 
564
679
  def _calc_increment(candle_size):
565
680
  increment = 1
566
- num_intervals = int(candle_size[0])
681
+ num_intervals = int(candle_size.replace(candle_size[-1],''))
567
682
  interval_type = candle_size[-1]
568
683
  if interval_type == "m":
569
684
  increment = 60
@@ -584,12 +699,13 @@ def _fetch_candles_ccxt(
584
699
  A more efficient way is to find listing date. Start looping from there.
585
700
  '''
586
701
  market = exchange.markets[ticker]
702
+ this_ticker_start_ts = start_ts
587
703
  if market['created']:
588
- start_ts = max(start_ts, int(market['created']/1000))
704
+ this_ticker_start_ts = max(this_ticker_start_ts, int(market['created']/1000))
589
705
 
590
706
  all_candles = []
591
707
  params = {}
592
- this_cutoff = start_ts
708
+ this_cutoff = this_ticker_start_ts
593
709
  while this_cutoff<end_ts:
594
710
  candles = _fetch_ohlcv(exchange=exchange, symbol=ticker, timeframe=candle_size, since=int(this_cutoff * 1000), limit=num_candles_limit, params=params)
595
711
  if candles and len(candles)>0:
@@ -623,7 +739,7 @@ def fetch_deribit_btc_option_expiries(
623
739
  Dict[str, Dict[str, Union[str, float]]]
624
740
  ]
625
741
  ]:
626
- exchange = deribit()
742
+ exchange = ccxt.deribit()
627
743
  instruments = exchange.public_get_get_instruments({
628
744
  'currency': market,
629
745
  'kind': 'option',
@@ -679,7 +795,9 @@ def fetch_deribit_btc_option_expiries(
679
795
 
680
796
  def build_pair_candles(
681
797
  pd_candles1 : pd.DataFrame,
682
- pd_candles2 : pd.DataFrame
798
+ pd_candles2 : pd.DataFrame,
799
+ left_columns_postfix : str = "_1",
800
+ right_columns_postfix : str = "_2"
683
801
  ) -> pd.DataFrame:
684
802
  min_timestamp_ms1 = int(pd_candles1.iloc[0]['timestamp_ms'])
685
803
  max_timestamp_ms1 = int(pd_candles1.iloc[-1]['timestamp_ms'])
@@ -709,16 +827,16 @@ def build_pair_candles(
709
827
  assert(max_timestamp_ms1==max_timestamp_ms2)
710
828
  assert(pd_candles1.shape[0]==pd_candles2.shape[0])
711
829
 
712
- if len([ col for col in pd_candles1.columns if col[-2:]=='_1' ]) == 0:
713
- pd_candles1.columns = [str(col) + '_1' for col in pd_candles1.columns]
830
+ if len([ col for col in pd_candles1.columns if col[-2:]==left_columns_postfix ]) == 0:
831
+ pd_candles1.columns = [str(col) + left_columns_postfix for col in pd_candles1.columns]
714
832
 
715
- if len([ col for col in pd_candles2.columns if col[-2:]=='_2' ]) == 0:
716
- pd_candles2.columns = [str(col) + '_2' for col in pd_candles2.columns]
833
+ if len([ col for col in pd_candles2.columns if col[-2:]==right_columns_postfix ]) == 0:
834
+ pd_candles2.columns = [str(col) + right_columns_postfix for col in pd_candles2.columns]
717
835
 
718
836
  pd_candles1.reset_index(drop=True, inplace=True)
719
837
  pd_candles2.reset_index(drop=True, inplace=True)
720
838
  pd_candles = pd.concat([pd_candles1, pd_candles2], axis=1)
721
- pd_candles['timestamp_ms_gap'] = pd_candles['timestamp_ms_1'] - pd_candles['timestamp_ms_2']
839
+ pd_candles['timestamp_ms_gap'] = pd_candles[f'timestamp_ms{left_columns_postfix}'] - pd_candles[f'timestamp_ms{right_columns_postfix}']
722
840
  assert(pd_candles[pd_candles.timestamp_ms_gap!=0].shape[0]==0)
723
841
 
724
842
  pd_candles.drop(pd_candles.columns[pd_candles.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
@@ -5,7 +5,7 @@ import pandas as pd
5
5
  import numpy as np
6
6
  from tabulate import tabulate
7
7
 
8
- from util.slack_notification_util import slack_dispatch_notification
8
+ from siglab_py.util.slack_notification_util import slack_dispatch_notification
9
9
 
10
10
  from siglab_py.constants import LogLevel
11
11