siglab-py 0.1.19__py3-none-any.whl → 0.6.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. siglab_py/algo/__init__.py +0 -0
  2. siglab_py/algo/macdrsi_crosses_15m_tc_strategy.py +107 -0
  3. siglab_py/algo/strategy_base.py +122 -0
  4. siglab_py/algo/strategy_executor.py +1308 -0
  5. siglab_py/algo/tp_algo.py +529 -0
  6. siglab_py/backtests/__init__.py +0 -0
  7. siglab_py/backtests/backtest_core.py +2405 -0
  8. siglab_py/backtests/coinflip_15m_crypto.py +432 -0
  9. siglab_py/backtests/fibonacci_d_mv_crypto.py +541 -0
  10. siglab_py/backtests/macdrsi_crosses_15m_tc_crypto.py +473 -0
  11. siglab_py/constants.py +26 -1
  12. siglab_py/exchanges/binance.py +38 -0
  13. siglab_py/exchanges/deribit.py +83 -0
  14. siglab_py/exchanges/futubull.py +33 -3
  15. siglab_py/market_data_providers/candles_provider.py +11 -10
  16. siglab_py/market_data_providers/candles_ta_provider.py +5 -5
  17. siglab_py/market_data_providers/ccxt_candles_ta_to_csv.py +238 -0
  18. siglab_py/market_data_providers/futu_candles_ta_to_csv.py +224 -0
  19. siglab_py/market_data_providers/google_monitor.py +320 -0
  20. siglab_py/market_data_providers/orderbooks_provider.py +15 -12
  21. siglab_py/market_data_providers/tg_monitor.py +428 -0
  22. siglab_py/market_data_providers/{test_provider.py → trigger_provider.py} +9 -8
  23. siglab_py/ordergateway/client.py +172 -41
  24. siglab_py/ordergateway/encrypt_keys_util.py +1 -1
  25. siglab_py/ordergateway/gateway.py +456 -344
  26. siglab_py/ordergateway/test_ordergateway.py +8 -7
  27. siglab_py/tests/integration/market_data_util_tests.py +80 -6
  28. siglab_py/tests/unit/analytic_util_tests.py +67 -4
  29. siglab_py/tests/unit/market_data_util_tests.py +96 -0
  30. siglab_py/tests/unit/simple_math_tests.py +252 -0
  31. siglab_py/tests/unit/trading_util_tests.py +65 -0
  32. siglab_py/util/analytic_util.py +484 -66
  33. siglab_py/util/datetime_util.py +39 -0
  34. siglab_py/util/market_data_util.py +564 -74
  35. siglab_py/util/module_util.py +40 -0
  36. siglab_py/util/notification_util.py +78 -0
  37. siglab_py/util/retry_util.py +16 -3
  38. siglab_py/util/simple_math.py +262 -0
  39. siglab_py/util/slack_notification_util.py +59 -0
  40. siglab_py/util/trading_util.py +118 -0
  41. {siglab_py-0.1.19.dist-info → siglab_py-0.6.33.dist-info}/METADATA +5 -13
  42. siglab_py-0.6.33.dist-info/RECORD +56 -0
  43. {siglab_py-0.1.19.dist-info → siglab_py-0.6.33.dist-info}/WHEEL +1 -1
  44. siglab_py-0.1.19.dist-info/RECORD +0 -31
  45. {siglab_py-0.1.19.dist-info → siglab_py-0.6.33.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,19 @@
1
+ import logging
2
+ import incremental
1
3
  import tzlocal
2
4
  from datetime import datetime, timezone
5
+ import time
3
6
  from typing import List, Dict, Union, NoReturn, Any, Tuple
4
7
  from pathlib import Path
5
8
  import math
6
9
  import pandas as pd
7
10
  import numpy as np
11
+ import asyncio
12
+ from tabulate import tabulate
8
13
 
9
14
  from ccxt.base.exchange import Exchange as CcxtExchange
10
- from ccxt import deribit
15
+ import ccxt
16
+ import ccxt.pro as ccxtpro
11
17
 
12
18
  # https://www.analyticsvidhya.com/blog/2021/06/download-financial-dataset-using-yahoo-finance-in-python-a-complete-guide/
13
19
  from yahoofinancials import YahooFinancials
@@ -15,25 +21,284 @@ from yahoofinancials import YahooFinancials
15
21
  # yfinance allows intervals '1m', '5m', '15m', '1h', '1d', '1wk', '1mo'. yahoofinancials not as flexible
16
22
  import yfinance as yf
17
23
 
18
- from exchanges.futubull import Futubull
24
+ from siglab_py.util.retry_util import retry
25
+ from siglab_py.exchanges.futubull import Futubull
26
+ from siglab_py.exchanges.any_exchange import AnyExchange
27
+ from siglab_py.exchanges.deribit import Deribit, DeribitAsync
28
+ from siglab_py.exchanges.binance import Binance, BinanceAsync
29
+
30
+ def instantiate_exchange(
31
+ exchange_name : str,
32
+ api_key : Union[str, None] = None,
33
+ secret : Union[str, None] = None,
34
+ passphrase : Union[str, None] = None,
35
+ default_type : Union[str, None] = 'spot',
36
+ default_sub_type : Union[str, None] = None,
37
+ rate_limit_ms : float = 100
38
+ ) -> Union[AnyExchange, None]:
39
+ exchange_name = exchange_name.lower().strip()
40
+
41
+ # Look at ccxt exchange.describe. under 'options' \ 'defaultType' (and 'defaultSubType') for what markets the exchange support.
42
+ # https://docs.ccxt.com/en/latest/manual.html#instantiation
43
+ exchange_params : Dict[str, Any]= {
44
+ 'apiKey' : api_key,
45
+ 'secret' : secret,
46
+ 'enableRateLimit' : True,
47
+ 'rateLimit' : rate_limit_ms,
48
+ 'options' : {
49
+ 'defaultType' : default_type
50
+ }
51
+ }
52
+
53
+ if default_sub_type:
54
+ exchange_params['defaultSubType'] = default_sub_type
55
+
56
+ if api_key:
57
+ exchange_params['apiKey'] = api_key
58
+ if secret:
59
+ exchange_params['secret'] = secret
60
+ if passphrase:
61
+ exchange_params['passphrase'] = passphrase
62
+
63
+ if exchange_name=='binance':
64
+ exchange = Binance(exchange_params) # type: ignore
65
+ elif exchange_name=='bybit':
66
+ exchange = ccxt.bybit(exchange_params) # type: ignore
67
+ elif exchange_name=='okx':
68
+ exchange = ccxt.okx(exchange_params) # type: ignore
69
+ elif exchange_name=='deribit':
70
+ exchange = Deribit(exchange_params) # type: ignore
71
+ elif exchange_name=='hyperliquid':
72
+ exchange = ccxt.hyperliquid(
73
+ {
74
+ "walletAddress" : api_key, # type: ignore
75
+ "privateKey" : secret,
76
+ 'enableRateLimit' : True,
77
+ 'rateLimit' : rate_limit_ms
78
+ }
79
+ )
80
+ else:
81
+ raise ValueError(f"Unsupported exchange {exchange_name}.")
82
+
83
+ exchange.load_markets() # type: ignore
84
+
85
+ return exchange # type: ignore
86
+
87
+ async def async_instantiate_exchange(
88
+ gateway_id : str,
89
+ api_key : str,
90
+ secret : str,
91
+ passphrase : str,
92
+ default_type : Union[str, None] = 'spot',
93
+ default_sub_type : Union[str, None] = None,
94
+ rate_limit_ms : float = 100,
95
+ verbose : bool = False
96
+ ) -> Union[AnyExchange, None]:
97
+ exchange : Union[AnyExchange, None] = None
98
+ exchange_name : str = gateway_id.split('_')[0]
99
+ exchange_name =exchange_name.lower().strip()
100
+
101
+ # Look at ccxt exchange.describe. under 'options' \ 'defaultType' (and 'defaultSubType') for what markets the exchange support.
102
+ # https://docs.ccxt.com/en/latest/manual.html#instantiation
103
+ exchange_params : Dict[str, Any]= {
104
+ 'apiKey' : api_key,
105
+ 'secret' : secret,
106
+ 'enableRateLimit' : True,
107
+ 'rateLimit' : rate_limit_ms,
108
+ 'options' : {
109
+ 'defaultType' : default_type
110
+ },
111
+ 'verbose': verbose
112
+ }
113
+
114
+ if default_sub_type:
115
+ exchange_params['defaultSubType'] = default_sub_type
116
+
117
+ if exchange_name=='binance':
118
+ # spot, future, margin, delivery, option
119
+ # https://github.com/ccxt/ccxt/blob/master/python/ccxt/binance.py#L1298
120
+ exchange = BinanceAsync(exchange_params) # type: ignore
121
+ elif exchange_name=='bybit':
122
+ # spot, linear, inverse, futures
123
+ # https://github.com/ccxt/ccxt/blob/master/python/ccxt/bybit.py#L1041
124
+ exchange = ccxtpro.bybit(exchange_params) # type: ignore
125
+ elif exchange_name=='okx':
126
+ # 'funding', spot, margin, future, swap, option
127
+ # https://github.com/ccxt/ccxt/blob/master/python/ccxt/okx.py#L1144
128
+ exchange_params['password'] = passphrase
129
+ exchange = ccxtpro.okx(exchange_params) # type: ignore
130
+ elif exchange_name=='deribit':
131
+ # spot, swap, future
132
+ # https://github.com/ccxt/ccxt/blob/master/python/ccxt/deribit.py#L360
133
+ exchange = DeribitAsync(exchange_params) # type: ignore
134
+ elif exchange_name=='kraken':
135
+ exchange = ccxtpro.kraken(exchange_params) # type: ignore
136
+ elif exchange_name=='hyperliquid':
137
+ '''
138
+ https://app.hyperliquid.xyz/API
139
+
140
+ defaultType from ccxt: swap
141
+ https://github.com/ccxt/ccxt/blob/master/python/ccxt/hyperliquid.py#L225
142
+
143
+ How to integrate? You can skip first 6 min: https://www.youtube.com/watch?v=UuBr331wxr4&t=363s
144
+
145
+ Example,
146
+ API credentials created under "\ More \ API":
147
+ Ledger Arbitrum Wallet Address: 0xAAAAA <-- This is your Ledger Arbitrum wallet address with which you connect to Hyperliquid.
148
+ API Wallet Address 0xBBBBB <-- Generated
149
+ privateKey 0xCCCCC
150
+
151
+ Basic connection via CCXT:
152
+ import asyncio
153
+ import ccxt.pro as ccxtpro
154
+
155
+ async def main():
156
+ rate_limit_ms = 100
157
+ exchange_params = {
158
+ "walletAddress" : "0xAAAAA", # Ledger Arbitrum Wallet Address here! Not the generated address.
159
+ "privateKey" : "0xCCCCC"
160
+ }
161
+ exchange = ccxtpro.hyperliquid(exchange_params)
162
+ balances = await exchange.fetch_balance()
163
+ print(balances)
164
+
165
+ asyncio.run(main())
166
+ '''
167
+ exchange = ccxtpro.hyperliquid(
168
+ {
169
+ "walletAddress" : api_key,
170
+ "privateKey" : secret,
171
+ 'enableRateLimit' : True,
172
+ 'rateLimit' : rate_limit_ms,
173
+ 'verbose': verbose
174
+ } # type: ignore
175
+ )
176
+ else:
177
+ raise ValueError(f"Unsupported exchange {exchange_name}, check gateway_id {gateway_id}.")
178
+
179
+ await exchange.load_markets() # type: ignore
180
+
181
+ '''
182
+ Is this necessary? The added trouble is for example bybit.authenticate requires arg 'url'. binance doesn't. And fetch_balance already test credentials.
183
+
184
+ try:
185
+ await exchange.authenticate() # type: ignore
186
+ except Exception as swallow_this_error:
187
+ pass
188
+ '''
189
+
190
+ return exchange
19
191
 
20
192
  def timestamp_to_datetime_cols(pd_candles : pd.DataFrame):
21
- pd_candles['datetime'] = pd_candles['timestamp_ms'].apply(
22
- lambda x: datetime.fromtimestamp(int(x.timestamp()) if isinstance(x, pd.Timestamp) else int(x / 1000))
23
- )
193
+ def _fix_timestamp_ms(x):
194
+ if isinstance(x, pd.Timestamp):
195
+ return int(x.value // 10**6)
196
+ elif isinstance(x, np.datetime64):
197
+ return int(x.astype('int64') // 10**6)
198
+ elif isinstance(x, (int, float)):
199
+ x = int(x)
200
+ if len(str(abs(x))) == 13:
201
+ return x
202
+ else:
203
+ return int(x * 1000)
204
+ else:
205
+ raise ValueError(f"Unsupported type {type(x)} for timestamp conversion")
206
+ pd_candles['timestamp_ms'] = pd_candles['timestamp_ms'].apply(_fix_timestamp_ms)
207
+ pd_candles['datetime'] = pd_candles['timestamp_ms'].apply(lambda x: datetime.fromtimestamp(int(x/1000)))
24
208
  pd_candles['datetime'] = pd.to_datetime(pd_candles['datetime'])
25
- pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None)
209
+ pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None) # type: ignore
26
210
  pd_candles['datetime_utc'] = pd_candles['timestamp_ms'].apply(
27
211
  lambda x: datetime.fromtimestamp(int(x.timestamp()) if isinstance(x, pd.Timestamp) else int(x / 1000), tz=timezone.utc)
28
212
  )
29
213
 
30
214
  # This is to make it easy to do grouping with Excel pivot table
31
- pd_candles['year'] = pd_candles['datetime'].dt.year
32
- pd_candles['month'] = pd_candles['datetime'].dt.month
33
- pd_candles['day'] = pd_candles['datetime'].dt.day
34
- pd_candles['hour'] = pd_candles['datetime'].dt.hour
35
- pd_candles['minute'] = pd_candles['datetime'].dt.minute
36
- pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek # dayofweek: Monday is 0 and Sunday is 6
215
+ pd_candles['year'] = pd_candles['datetime'].dt.year # type: ignore
216
+ pd_candles['month'] = pd_candles['datetime'].dt.month # type: ignore
217
+ pd_candles['day'] = pd_candles['datetime'].dt.day # type: ignore
218
+ pd_candles['hour'] = pd_candles['datetime'].dt.hour # type: ignore
219
+ pd_candles['minute'] = pd_candles['datetime'].dt.minute # type: ignore
220
+ pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek # type: ignore dayofweek: Monday is 0 and Sunday is 6
221
+
222
+ pd_candles['week_of_month'] = pd_candles['timestamp_ms'].apply(
223
+ lambda x: timestamp_to_week_of_month(int(x/1000))
224
+ )
225
+
226
+ pd_candles['apac_trading_hr'] = pd_candles['timestamp_ms'].apply(
227
+ lambda x: "APAC" in timestamp_to_active_trading_regions(int(x/1000))
228
+ )
229
+ pd_candles['emea_trading_hr'] = pd_candles['timestamp_ms'].apply(
230
+ lambda x: "EMEA" in timestamp_to_active_trading_regions(int(x/1000))
231
+ )
232
+ pd_candles['amer_trading_hr'] = pd_candles['timestamp_ms'].apply(
233
+ lambda x: "AMER" in timestamp_to_active_trading_regions(int(x/1000))
234
+ )
235
+
236
+ pd_candles['timestamp_ms_gap'] = pd_candles['timestamp_ms'] - pd_candles['timestamp_ms'].shift(1)
237
+
238
+ # Depending on asset, minutes bar may have gaps
239
+ timestamp_ms_gap_median = pd_candles['timestamp_ms_gap'].median()
240
+ NUM_MS_IN_1HR = 60*60*1000
241
+ if timestamp_ms_gap_median>=NUM_MS_IN_1HR:
242
+ num_rows_with_expected_gap = pd_candles[~pd_candles.timestamp_ms_gap.isna()][pd_candles.timestamp_ms_gap==timestamp_ms_gap_median].shape[0]
243
+ assert(num_rows_with_expected_gap/pd_candles.shape[0]>0.9)
244
+ pd_candles.drop(columns=['timestamp_ms_gap'], inplace=True)
245
+
246
+ '''
247
+ APAC (Asia-Pacific) Trading Hours
248
+ UTC 21:00 - 09:00 (approximate range)
249
+ Major financial centers: Tokyo, Hong Kong, Singapore, Sydney
250
+
251
+ EMEA (Europe, Middle East, Africa) Trading Hours
252
+ UTC 07:00 - 16:00 (approximate range)
253
+ Major financial centers: London, Frankfurt, Paris, Zurich, Dubai
254
+
255
+ US Trading Hours
256
+ UTC 13:00 - 22:00 (approximate range)
257
+ Major financial centers: New York, Chicago
258
+ Key markets: NYSE, NASDAQ
259
+
260
+ utcnow and utcfromtimestamp been deprecated in Python 3.12
261
+ https://www.pythonmorsels.com/converting-to-utc-time/
262
+
263
+ Example, UTC 23:00 is 3rd hour in APAC trading session
264
+ utc_hour = 23
265
+ i = get_regions_trading_utc_hours()['APAC'].index(utc_hour)
266
+ assert(i==2)
267
+ '''
268
+ def get_regions_trading_utc_hours():
269
+ return {
270
+ 'APAC' : [21,22,23,0,1,2,3,4,5,6,7,8,9],
271
+ 'EMEA' : [7,8,9,10,11,12,13,14,15,16],
272
+ 'AMER' : [13,14,15,16,17,18,19,20,21,22]
273
+ }
274
+
275
+ def timestamp_to_active_trading_regions(
276
+ timestamp_ms : int
277
+ ) -> List[str]:
278
+ active_trading_regions : List[str] = []
279
+
280
+ dt_utc = datetime.fromtimestamp(int(timestamp_ms / 1000), tz=timezone.utc)
281
+ utc_hour = dt_utc.hour
282
+ if utc_hour in get_regions_trading_utc_hours()['APAC']:
283
+ active_trading_regions.append("APAC")
284
+
285
+ if utc_hour in get_regions_trading_utc_hours()['EMEA']:
286
+ active_trading_regions.append("EMEA")
287
+
288
+ if utc_hour in get_regions_trading_utc_hours()['AMER']:
289
+ active_trading_regions.append("AMER")
290
+
291
+ return active_trading_regions
292
+
293
+ def timestamp_to_week_of_month(timestamp_ms: int) -> int:
294
+ """
295
+ Returns:
296
+ int: Week of the month (0 = first week, 1 = second week, etc.).
297
+ """
298
+ dt = datetime.fromtimestamp(timestamp_ms / 1000)
299
+ day_of_month = dt.day
300
+ week_of_month = (day_of_month - 1) // 7
301
+ return week_of_month
37
302
 
38
303
  def fix_column_types(pd_candles : pd.DataFrame):
39
304
  pd_candles['open'] = pd_candles['open'].astype(float)
@@ -53,6 +318,17 @@ def fix_column_types(pd_candles : pd.DataFrame):
53
318
  pd_candles.reset_index(drop=True, inplace=True)
54
319
  pd_candles.sort_values("datetime", inplace=True)
55
320
 
321
+ def interval_to_ms(interval : str) -> int:
322
+ interval_ms : int = 0
323
+ if interval=="d":
324
+ interval_ms = 24*60*60*1000
325
+ elif interval=="h":
326
+ interval_ms = 60*60*1000
327
+ elif interval=="m":
328
+ interval_ms = 60*1000
329
+
330
+ return interval_ms
331
+
56
332
  '''
57
333
  https://polygon.io/docs/stocks
58
334
  '''
@@ -108,7 +384,7 @@ class NASDAQExchange:
108
384
  pd_daily_candles['low'] = pd_daily_candles['low'].astype(str).str.replace('$','')
109
385
  pd_daily_candles['close'] = pd_daily_candles['close'].astype(str).str.replace('$','')
110
386
  pd_daily_candles['datetime']= pd.to_datetime(pd_daily_candles['datetime'])
111
- pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6
387
+ pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6 # type: ignore
112
388
  pd_daily_candles['symbol'] = symbol
113
389
  pd_daily_candles['exchange'] = 'nasdaq'
114
390
  fix_column_types(pd_daily_candles)
@@ -127,7 +403,7 @@ class NASDAQExchange:
127
403
  )
128
404
 
129
405
  # When you fill foward, a few candles before start date can have null values (open, high, low, close, volume ...)
130
- first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna()) & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime']
406
+ first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna()) & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime'] # type: ignore
131
407
  pd_hourly_candles = pd_hourly_candles[pd_hourly_candles.datetime>=first_candle_dt]
132
408
  exchange_candles[symbol] = pd_hourly_candles
133
409
 
@@ -191,6 +467,45 @@ class YahooExchange:
191
467
 
192
468
  return exchange_candles
193
469
 
470
+ def aggregate_candles(
471
+ interval : str,
472
+ pd_candles : pd.DataFrame
473
+ ) -> pd.DataFrame:
474
+ if interval[-1]=='m':
475
+ # 'm' for pandas means months!
476
+ interval = interval.replace('m','min')
477
+ pd_candles.set_index('datetime', inplace=True)
478
+ pd_candles_aggregated = pd_candles.resample(interval).agg({
479
+ 'exchange' : 'first',
480
+ 'symbol' : 'first',
481
+ 'timestamp_ms' : 'first',
482
+
483
+ 'open': 'first',
484
+ 'high': 'max',
485
+ 'low': 'min',
486
+ 'close': 'last',
487
+ 'volume': 'sum',
488
+
489
+ 'datetime_utc' : 'first',
490
+ 'year' : 'first',
491
+ 'month' : 'first',
492
+ 'day' : 'first',
493
+ 'hour' : 'first',
494
+ 'minute' : 'first',
495
+ 'dayofweek' : 'first',
496
+ 'week_of_month' : 'first',
497
+
498
+ 'apac_trading_hr' : 'first',
499
+ 'emea_trading_hr' : 'first',
500
+ 'amer_trading_hr' : 'first',
501
+
502
+ 'pct_chg_on_close' : 'sum',
503
+
504
+ })
505
+ pd_candles.reset_index(inplace=True)
506
+ pd_candles_aggregated.reset_index(inplace=True)
507
+ return pd_candles_aggregated
508
+
194
509
  def fetch_historical_price(
195
510
  exchange,
196
511
  normalized_symbol : str,
@@ -230,6 +545,8 @@ def fetch_candles(
230
545
 
231
546
  num_candles_limit : int = 100,
232
547
 
548
+ ticker_change_map : List[Dict[str, Union[str, int]]] = [],
549
+
233
550
  cache_dir : Union[str, None] = None,
234
551
 
235
552
  list_ts_field : Union[str, None] = None,
@@ -237,15 +554,21 @@ def fetch_candles(
237
554
  validation_max_gaps : int = 10,
238
555
  validation_max_end_date_intervals : int = 1
239
556
  ) -> Dict[str, Union[pd.DataFrame, None]]:
557
+ exchange_candles = { '' : None }
558
+ num_intervals = int(candle_size.replace(candle_size[-1],''))
559
+
560
+ if end_ts>datetime.now().timestamp():
561
+ end_ts = int(datetime.now().timestamp())
562
+
240
563
  if type(exchange) is YahooExchange:
241
- return exchange.fetch_candles(
564
+ exchange_candles = exchange.fetch_candles(
242
565
  start_ts=start_ts,
243
566
  end_ts=end_ts,
244
567
  symbols=normalized_symbols,
245
568
  candle_size=candle_size
246
569
  )
247
570
  elif type(exchange) is NASDAQExchange:
248
- return exchange.fetch_candles(
571
+ exchange_candles = exchange.fetch_candles(
249
572
  start_ts=start_ts,
250
573
  end_ts=end_ts,
251
574
  symbols=normalized_symbols,
@@ -262,21 +585,75 @@ def fetch_candles(
262
585
  pd_candles = exchange_candles[symbol]
263
586
  if not pd_candles is None:
264
587
  fix_column_types(pd_candles) # You don't want to do this from Futubull as you'd need import Futubull from there: Circular references
265
- return exchange_candles
588
+
266
589
  elif issubclass(exchange.__class__, CcxtExchange):
267
- return _fetch_candles_ccxt(
590
+ exchange_candles = _fetch_candles_ccxt(
268
591
  start_ts=start_ts,
269
592
  end_ts=end_ts,
270
593
  exchange=exchange,
271
594
  normalized_symbols=normalized_symbols,
272
595
  candle_size=candle_size,
273
- logger=logger,
274
596
  num_candles_limit=num_candles_limit,
275
- cache_dir=cache_dir,
276
- list_ts_field=list_ts_field
597
+ ticker_change_map=ticker_change_map,
598
+ logger=logger
277
599
  )
278
- return { '' : None }
600
+ if num_intervals!=1:
601
+ for symbol in exchange_candles:
602
+ if not exchange_candles[symbol] is None:
603
+ exchange_candles[symbol] = aggregate_candles(candle_size, exchange_candles[symbol]) # type: ignore
604
+
605
+ # For invalid rows missing timestamps, o/h/l/c/v, fill forward close, set volume to zero.
606
+ for symbol in exchange_candles:
607
+ pd_candles = exchange_candles[symbol]
608
+
609
+ if pd_candles is not None:
610
+ mask_invalid_candles = pd_candles["timestamp_ms"].isna()
611
+ if mask_invalid_candles.any():
612
+ pd_invalid_candles = pd_candles[mask_invalid_candles]
613
+
614
+ if logger is not None:
615
+ logger.warning(f"Dropping {pd_invalid_candles.shape[0]}/{pd_candles.shape[0]} rows from {symbol} candles (null timestamp_ms)") # type: ignore
616
+ logger.warning(f"{tabulate(pd_invalid_candles, headers='keys', tablefmt='psql')}") # type: ignore
617
+
618
+ def _to_timestamp_ms(dt):
619
+ if pd.isna(dt):
620
+ return pd.NA
621
+ if isinstance(dt, str):
622
+ dt = pd.to_datetime(dt)
623
+ return int(dt.timestamp() * 1000)
624
+
625
+ pd_candles.loc[mask_invalid_candles, "timestamp_ms"] = pd_candles.loc[mask_invalid_candles, "datetime"].apply(_to_timestamp_ms)
626
+
627
+ pd_candles["close"] = pd_candles["close"].ffill()
628
+ pd_candles.loc[mask_invalid_candles, ["open", "high", "low"]] = pd_candles.loc[
629
+ mask_invalid_candles, ["close"]
630
+ ]
631
+ pd_candles.loc[mask_invalid_candles, "volume"] = 0.0
632
+
633
+ return exchange_candles # type: ignore
279
634
 
635
+ '''
636
+ Find listing date https://gist.github.com/mr-easy/5185b1dcdd5f9f908ff196446f092e9b
637
+
638
+ Usage:
639
+ listing_ts = find_start_time(exchange, 'HYPE/USDT:USDT', int(datetime(2024,1,1).timestamp()*1000), int(datetime(2025,5,1).timestamp()*1000), '1h')
640
+
641
+ Caveats:
642
+ 1) If listing date lies outside [start_time, end_time], this function will stackoverflow,
643
+ 2) Even if not, it's still very time consuming.
644
+
645
+ Alternative: market['created']
646
+ '''
647
+ def search_listing_ts(exchange, symbol, start_time, end_time, timeframe):
648
+ mid_time = (start_time + end_time)//2
649
+ if(mid_time == start_time): return mid_time+1
650
+ ohlcv = exchange.fetch_ohlcv(symbol, timeframe, mid_time, limit=1)
651
+ time.sleep(1)
652
+ if(len(ohlcv) == 0):
653
+ return search_listing_ts(exchange, symbol, mid_time, end_time, timeframe)
654
+ else:
655
+ return search_listing_ts(exchange, symbol, start_time, mid_time, timeframe)
656
+
280
657
  def _fetch_candles_ccxt(
281
658
  start_ts : int,
282
659
  end_ts : int,
@@ -284,57 +661,93 @@ def _fetch_candles_ccxt(
284
661
  normalized_symbols : List[str],
285
662
  candle_size : str,
286
663
  num_candles_limit : int = 100,
287
- logger = None,
288
- cache_dir : Union[str, None] = None,
289
- list_ts_field : Union[str, None] = None
664
+ ticker_change_map : List[Dict[str, Union[str, int]]] = [],
665
+ logger = None
290
666
  ) -> Dict[str, Union[pd.DataFrame, None]]:
291
- ticker = normalized_symbols[0]
292
- pd_candles = _fetch_candles(
293
- symbol = ticker,
294
- exchange = exchange,
295
- start_ts = start_ts,
296
- end_ts = end_ts,
297
- candle_size = candle_size,
298
- )
299
- return {
300
- ticker : pd_candles
301
- }
302
-
303
- def _fetch_candles(
304
- symbol : str,
305
- exchange : CcxtExchange,
306
- start_ts : int,
307
- end_ts : int,
308
- candle_size : str = '1d',
309
- num_candles_limit : int = 100
310
- ):
311
- def _fetch_ohlcv(exchange, symbol, timeframe, since, limit, params) -> Union[List, NoReturn]:
312
- one_timeframe = f"1{timeframe[-1]}"
313
- candles = exchange.fetch_ohlcv(symbol=symbol, timeframe=one_timeframe, since=since, limit=limit, params=params)
314
- if candles and len(candles)>0:
315
- candles.sort(key=lambda x : x[0], reverse=False)
316
-
317
- return candles
318
-
319
- all_candles = []
320
- params = {}
321
- this_cutoff = start_ts
322
- while this_cutoff<=end_ts:
323
- candles = _fetch_ohlcv(exchange=exchange, symbol=symbol, timeframe=candle_size, since=int(this_cutoff * 1000), limit=num_candles_limit, params=params)
324
- if candles and len(candles)>0:
325
- all_candles = all_candles + [[ int(x[0]), float(x[1]), float(x[2]), float(x[3]), float(x[4]), float(x[5]) ] for x in candles if x[1] and x[2] and x[3] and x[4] and x[5] ]
326
-
327
- record_ts = max([int(record[0]) for record in candles])
328
- record_ts_str : str = str(record_ts)
329
- if len(record_ts_str)==13:
330
- record_ts = int(int(record_ts_str)/1000) # Convert from milli-seconds to seconds
331
-
332
- this_cutoff = record_ts + 1
333
- columns = ['exchange', 'symbol', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume']
334
- pd_all_candles = pd.DataFrame([ [ exchange.name, symbol, x[0], x[1], x[2], x[3], x[4], x[5] ] for x in all_candles], columns=columns)
335
- fix_column_types(pd_all_candles)
336
- pd_all_candles['pct_chg_on_close'] = pd_all_candles['close'].pct_change()
337
- return pd_all_candles
667
+ rsp = {}
668
+
669
+ exchange.load_markets()
670
+
671
+ num_tickers = len(normalized_symbols)
672
+ i = 0
673
+ for ticker in normalized_symbols:
674
+ old_ticker = get_old_ticker(ticker, ticker_change_map)
675
+ ticker_change_mapping = get_ticker_map(ticker, ticker_change_map)
676
+
677
+ @retry(num_attempts=3, pause_between_retries_ms=1000, logger=logger)
678
+ def _fetch_ohlcv(exchange, symbol, timeframe, since, limit, params) -> Union[List, NoReturn]:
679
+ one_timeframe = f"1{timeframe[-1]}"
680
+ candles = exchange.fetch_ohlcv(symbol=symbol, timeframe=one_timeframe, since=since, limit=limit, params=params)
681
+ if candles and len(candles)>0:
682
+ candles.sort(key=lambda x : x[0], reverse=False)
683
+
684
+ return candles
685
+
686
+ def _calc_increment(candle_size):
687
+ increment = 1
688
+ num_intervals = int(candle_size.replace(candle_size[-1],''))
689
+ interval_type = candle_size[-1]
690
+ if interval_type == "m":
691
+ increment = 60
692
+ elif interval_type == "h":
693
+ increment = 60*60
694
+ elif interval_type == "d":
695
+ increment = 60*60*24
696
+ else:
697
+ raise ValueError(f"Invalid candle_size {candle_size}")
698
+ return num_intervals * increment
699
+
700
+ if logger:
701
+ logger.info(f"{i}/{num_tickers} Fetching {candle_size} candles for {ticker}.")
702
+
703
+ '''
704
+ It uses a while loop to implement a sliding window to download candles between start_ts and end_ts.
705
+ However, start_ts for example can be 1 Jan 2021 for a given ticker.
706
+ But if that ticker listing date is 1 Jan 2025, this while loop would waste a lot of time loop between 1 Jan 2021 thru 31 Dec 2024, slowly incrementing this_cutoff += _calc_increment(candle_size).
707
+ A more efficient way is to find listing date. Start looping from there.
708
+ '''
709
+ market = exchange.markets[ticker] if ticker in exchange.markets else None
710
+ if not market:
711
+ market = exchange.markets[old_ticker] if old_ticker else None
712
+ if not market:
713
+ raise ValueError(f"market {ticker} not support by exchange {exchange.name}!")
714
+
715
+ this_ticker_start_ts = start_ts
716
+ if market['created']:
717
+ this_ticker_start_ts = max(this_ticker_start_ts, int(market['created']/1000))
718
+
719
+ all_candles = []
720
+ params = {}
721
+ this_cutoff = this_ticker_start_ts
722
+ while this_cutoff<end_ts:
723
+ _ticker = ticker
724
+ if ticker_change_mapping:
725
+ ticker_change_cutoff_sec = int(ticker_change_mapping['cutoff_ms']) / 1000
726
+ if this_cutoff<ticker_change_cutoff_sec:
727
+ _ticker = old_ticker
728
+ candles = _fetch_ohlcv(exchange=exchange, symbol=_ticker, timeframe=candle_size, since=int(this_cutoff * 1000), limit=num_candles_limit, params=params)
729
+ if candles and len(candles)>0:
730
+ all_candles = all_candles + [[ int(x[0]), float(x[1]), float(x[2]), float(x[3]), float(x[4]), float(x[5]) ] for x in candles if x[1] and x[2] and x[3] and x[4] and x[5] ]
731
+
732
+ record_ts = max([int(record[0]) for record in candles])
733
+ record_ts_str : str = str(record_ts)
734
+ if len(record_ts_str)==13:
735
+ record_ts = int(int(record_ts_str)/1000) # Convert from milli-seconds to seconds
736
+
737
+ this_cutoff = record_ts + _calc_increment(candle_size)
738
+ else:
739
+ this_cutoff += _calc_increment(candle_size)
740
+
741
+ columns = ['exchange', 'symbol', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume']
742
+ pd_all_candles = pd.DataFrame([ [ exchange.name, ticker, x[0], x[1], x[2], x[3], x[4], x[5] ] for x in all_candles], columns=columns)
743
+ fix_column_types(pd_all_candles)
744
+ pd_all_candles['pct_chg_on_close'] = pd_all_candles['close'].pct_change()
745
+
746
+ rsp[ticker] = pd_all_candles
747
+
748
+ i+=1
749
+
750
+ return rsp
338
751
 
339
752
  def fetch_deribit_btc_option_expiries(
340
753
  market: str = 'BTC'
@@ -344,7 +757,7 @@ def fetch_deribit_btc_option_expiries(
344
757
  Dict[str, Dict[str, Union[str, float]]]
345
758
  ]
346
759
  ]:
347
- exchange = deribit()
760
+ exchange = ccxt.deribit()
348
761
  instruments = exchange.public_get_get_instruments({
349
762
  'currency': market,
350
763
  'kind': 'option',
@@ -396,4 +809,81 @@ def fetch_deribit_btc_option_expiries(
396
809
  'index_price' : index_price,
397
810
  'by_expiry' : sorted_expiry_data, # type: ignore Otherwise, Error: Type "dict[str, list[tuple[str, float]] | dict[str, Dict[Unknown, Unknown]]]" is not assignable to return type "Dict[str, Dict[str, float] | Dict[str, Dict[str, str | float]]]"
398
811
  'by_expiry_and_strike' : expiry_data_breakdown_by_strike
399
- }
812
+ }
813
+
814
+ def build_pair_candles(
815
+ pd_candles1 : pd.DataFrame,
816
+ pd_candles2 : pd.DataFrame,
817
+ left_columns_postfix : str = "_1",
818
+ right_columns_postfix : str = "_2"
819
+ ) -> pd.DataFrame:
820
+ min_timestamp_ms1 = int(pd_candles1.iloc[0]['timestamp_ms'])
821
+ max_timestamp_ms1 = int(pd_candles1.iloc[-1]['timestamp_ms'])
822
+ min_timestamp_ms2 = int(pd_candles2.iloc[0]['timestamp_ms'])
823
+ max_timestamp_ms2 = int(pd_candles2.iloc[-1]['timestamp_ms'])
824
+
825
+ pd_candles1 = pd_candles1[(pd_candles1.timestamp_ms>=min_timestamp_ms2) & (pd_candles1.timestamp_ms<=max_timestamp_ms2) & (~pd_candles1.timestamp_ms.isna()) ]
826
+ pd_candles2 = pd_candles2[(pd_candles2.timestamp_ms>=min_timestamp_ms1) & (pd_candles2.timestamp_ms<=max_timestamp_ms1) & (~pd_candles2.timestamp_ms.isna())]
827
+ assert(pd_candles1.shape[0]==pd_candles2.shape[0])
828
+
829
+ pd_candles1['timestamp_ms_gap'] = pd_candles1['timestamp_ms'] - pd_candles1['timestamp_ms'].shift(1)
830
+ timestamp_ms_gap = pd_candles1.iloc[-1]['timestamp_ms_gap']
831
+
832
+ assert(pd_candles1[~pd_candles1.timestamp_ms_gap.isna()][pd_candles1.timestamp_ms_gap!=timestamp_ms_gap].shape[0]==0)
833
+ pd_candles1.drop(columns=['timestamp_ms_gap'], inplace=True)
834
+
835
+ pd_candles2['timestamp_ms_gap'] = pd_candles2['timestamp_ms'] - pd_candles2['timestamp_ms'].shift(1)
836
+ timestamp_ms_gap = pd_candles2.iloc[-1]['timestamp_ms_gap']
837
+ assert(pd_candles2[~pd_candles2.timestamp_ms_gap.isna()][pd_candles2.timestamp_ms_gap!=timestamp_ms_gap].shape[0]==0)
838
+ pd_candles2.drop(columns=['timestamp_ms_gap'], inplace=True)
839
+
840
+ min_timestamp_ms1 = int(pd_candles1.iloc[0]['timestamp_ms'])
841
+ max_timestamp_ms1 = int(pd_candles1.iloc[-1]['timestamp_ms'])
842
+ min_timestamp_ms2 = int(pd_candles2.iloc[0]['timestamp_ms'])
843
+ max_timestamp_ms2 = int(pd_candles2.iloc[-1]['timestamp_ms'])
844
+ assert(min_timestamp_ms1==min_timestamp_ms2)
845
+ assert(max_timestamp_ms1==max_timestamp_ms2)
846
+ assert(pd_candles1.shape[0]==pd_candles2.shape[0])
847
+
848
+ if len([ col for col in pd_candles1.columns if col[-2:]==left_columns_postfix ]) == 0:
849
+ pd_candles1.columns = [str(col) + left_columns_postfix for col in pd_candles1.columns]
850
+
851
+ if len([ col for col in pd_candles2.columns if col[-2:]==right_columns_postfix ]) == 0:
852
+ pd_candles2.columns = [str(col) + right_columns_postfix for col in pd_candles2.columns]
853
+
854
+ pd_candles1.reset_index(drop=True, inplace=True)
855
+ pd_candles2.reset_index(drop=True, inplace=True)
856
+ pd_candles = pd.concat([pd_candles1, pd_candles2], axis=1)
857
+ pd_candles['timestamp_ms_gap'] = pd_candles[f'timestamp_ms{left_columns_postfix}'] - pd_candles[f'timestamp_ms{right_columns_postfix}']
858
+ assert(pd_candles[pd_candles.timestamp_ms_gap!=0].shape[0]==0)
859
+
860
+ pd_candles.drop(pd_candles.columns[pd_candles.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
861
+
862
+ return pd_candles
863
+
864
+ def get_old_ticker(
865
+ ticker : str,
866
+ ticker_change_map : List[Dict[str, Union[str, int]]]
867
+ ) -> Union[str, None]:
868
+ if not ticker_change_map:
869
+ return None
870
+
871
+ mapping = get_ticker_map(ticker, ticker_change_map)
872
+ if mapping:
873
+ return str(mapping['old_ticker'])
874
+
875
+ return None
876
+
877
+ def get_ticker_map(
878
+ ticker : str,
879
+ ticker_change_map : List[Dict[str, Union[str, int]]]
880
+ ) -> Union[None, Dict[str, Union[str, int]]]:
881
+ if not ticker_change_map:
882
+ return None
883
+
884
+ for mapping in ticker_change_map:
885
+ new_ticker = mapping['new_ticker']
886
+ if new_ticker==ticker:
887
+ return mapping
888
+
889
+ return None