PyPI - siglab-py - Versions diffs - 0.1.29__py3-none-any.whl → 0.6.12__py3-none-any.whl - Mend

siglab-py 0.1.29py3-none-any.whl → 0.6.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of siglab-py might be problematic. Click here for more details.

Files changed (32) hide show

siglab_py/constants.py +26 -1
siglab_py/exchanges/binance.py +38 -0
siglab_py/exchanges/deribit.py +83 -0
siglab_py/exchanges/futubull.py +12 -2
siglab_py/market_data_providers/candles_provider.py +2 -2
siglab_py/market_data_providers/candles_ta_provider.py +3 -3
siglab_py/market_data_providers/ccxt_candles_ta_to_csv.py +4 -4
siglab_py/market_data_providers/futu_candles_ta_to_csv.py +7 -2
siglab_py/market_data_providers/google_monitor.py +320 -0
siglab_py/market_data_providers/orderbooks_provider.py +15 -12
siglab_py/market_data_providers/tg_monitor.py +428 -0
siglab_py/market_data_providers/{test_provider.py → trigger_provider.py} +9 -8
siglab_py/ordergateway/client.py +172 -41
siglab_py/ordergateway/encrypt_keys_util.py +1 -1
siglab_py/ordergateway/gateway.py +456 -347
siglab_py/ordergateway/test_ordergateway.py +8 -7
siglab_py/tests/integration/market_data_util_tests.py +35 -1
siglab_py/tests/unit/analytic_util_tests.py +47 -12
siglab_py/tests/unit/simple_math_tests.py +235 -0
siglab_py/tests/unit/trading_util_tests.py +65 -0
siglab_py/util/analytic_util.py +478 -69
siglab_py/util/market_data_util.py +487 -100
siglab_py/util/notification_util.py +78 -0
siglab_py/util/retry_util.py +11 -3
siglab_py/util/simple_math.py +240 -0
siglab_py/util/slack_notification_util.py +59 -0
siglab_py/util/trading_util.py +118 -0
{siglab_py-0.1.29.dist-info → siglab_py-0.6.12.dist-info}/METADATA +5 -9
siglab_py-0.6.12.dist-info/RECORD +44 -0
{siglab_py-0.1.29.dist-info → siglab_py-0.6.12.dist-info}/WHEEL +1 -1
siglab_py-0.1.29.dist-info/RECORD +0 -34
{siglab_py-0.1.29.dist-info → siglab_py-0.6.12.dist-info}/top_level.txt +0 -0

siglab_py/util/market_data_util.py CHANGED Viewed

@@ -1,13 +1,19 @@
+import logging
+import incremental
 import tzlocal
 from datetime import datetime, timezone
+import time
 from typing import List, Dict, Union, NoReturn, Any, Tuple
 from pathlib import Path
 import math
 import pandas as pd
 import numpy as np
+import asyncio
+from tabulate import tabulate
 from ccxt.base.exchange import Exchange as CcxtExchange
-from ccxt import deribit
+import ccxt
+import ccxt.pro as ccxtpro
 # https://www.analyticsvidhya.com/blog/2021/06/download-financial-dataset-using-yahoo-finance-in-python-a-complete-guide/
 from yahoofinancials import YahooFinancials
@@ -15,72 +21,271 @@ from yahoofinancials import YahooFinancials
 # yfinance allows intervals '1m', '5m', '15m', '1h', '1d', '1wk', '1mo'. yahoofinancials not as flexible
 import yfinance as yf
+from siglab_py.util.retry_util import retry
 from siglab_py.exchanges.futubull import Futubull
+from siglab_py.exchanges.any_exchange import AnyExchange
+from siglab_py.exchanges.deribit import Deribit, DeribitAsync
+from siglab_py.exchanges.binance import Binance, BinanceAsync
+def instantiate_exchange(
+    exchange_name : str,
+    api_key : Union[str, None] = None,
+    secret : Union[str, None]  = None,
+    passphrase : Union[str, None] = None,
+    default_type : Union[str, None] = 'spot',
+    default_sub_type : Union[str, None] = None,
+    rate_limit_ms : float = 100
+) -> Union[AnyExchange, None]:
+    exchange_name = exchange_name.lower().strip()
+    # Look at ccxt exchange.describe. under 'options' \ 'defaultType' (and 'defaultSubType') for what markets the exchange support.
+    # https://docs.ccxt.com/en/latest/manual.html#instantiation
+    exchange_params : Dict[str, Any]= {
+                        'apiKey' : api_key,
+                        'secret' : secret,
+                        'enableRateLimit'  : True,
+                        'rateLimit' : rate_limit_ms,
+                        'options' : {
+                            'defaultType' : default_type
+                        }
+                    }
+    if default_sub_type:
+        exchange_params['defaultSubType'] = default_sub_type
+    if api_key:
+        exchange_params['apiKey'] = api_key
+    if secret:
+        exchange_params['secret'] = secret
+    if passphrase:
+        exchange_params['passphrase'] = passphrase
+    if exchange_name=='binance':
+        exchange = Binance(exchange_params)  # type: ignore
+    elif exchange_name=='bybit':
+        exchange = ccxt.bybit(exchange_params) # type: ignore
+    elif exchange_name=='okx':
+        exchange = ccxt.okx(exchange_params) # type: ignore
+    elif exchange_name=='deribit':
+        exchange = Deribit(exchange_params)  # type: ignore
+    elif exchange_name=='hyperliquid':
+        exchange = ccxt.hyperliquid(
+            {
+                "walletAddress" : api_key, # type: ignore
+                "privateKey" : secret,
+                'enableRateLimit'  : True,
+                'rateLimit' : rate_limit_ms
+            }
+        )
+    else:
+        raise ValueError(f"Unsupported exchange {exchange_name}.")
+    exchange.load_markets() # type: ignore
+    return exchange # type: ignore
+async def async_instantiate_exchange(
+    gateway_id : str,
+    api_key : str,
+    secret : str,
+    passphrase : str,
+    default_type : Union[str, None] = 'spot',
+    default_sub_type : Union[str, None] = None,
+    rate_limit_ms : float = 100,
+    verbose : bool = False
+) -> Union[AnyExchange, None]:
+    exchange : Union[AnyExchange, None] = None
+    exchange_name : str = gateway_id.split('_')[0]
+    exchange_name =exchange_name.lower().strip()
+    # Look at ccxt exchange.describe. under 'options' \ 'defaultType' (and 'defaultSubType') for what markets the exchange support.
+    # https://docs.ccxt.com/en/latest/manual.html#instantiation
+    exchange_params : Dict[str, Any]= {
+                        'apiKey' : api_key,
+                        'secret' : secret,
+                        'enableRateLimit'  : True,
+                        'rateLimit' : rate_limit_ms,
+                        'options' : {
+                            'defaultType' : default_type
+                        },
+                        'verbose': verbose
+                    }
+    if default_sub_type:
+        exchange_params['defaultSubType'] = default_sub_type
+    if exchange_name=='binance':
+        # spot, future, margin, delivery, option
+        # https://github.com/ccxt/ccxt/blob/master/python/ccxt/binance.py#L1298
+        exchange = BinanceAsync(exchange_params)  # type: ignore
+    elif exchange_name=='bybit':
+        # spot, linear, inverse, futures
+        # https://github.com/ccxt/ccxt/blob/master/python/ccxt/bybit.py#L1041
+        exchange = ccxtpro.bybit(exchange_params) # type: ignore
+    elif exchange_name=='okx':
+        # 'funding', spot, margin, future, swap, option
+        # https://github.com/ccxt/ccxt/blob/master/python/ccxt/okx.py#L1144
+        exchange_params['password'] = passphrase
+        exchange = ccxtpro.okx(exchange_params) # type: ignore
+    elif exchange_name=='deribit':
+        # spot, swap, future
+        # https://github.com/ccxt/ccxt/blob/master/python/ccxt/deribit.py#L360
+        exchange = DeribitAsync(exchange_params)  # type: ignore
+    elif exchange_name=='kraken':
+        exchange = ccxtpro.kraken(exchange_params) # type: ignore
+    elif exchange_name=='hyperliquid':
+        '''
+        https://app.hyperliquid.xyz/API
+        defaultType from ccxt: swap
+            https://github.com/ccxt/ccxt/blob/master/python/ccxt/hyperliquid.py#L225
+        How to integrate? You can skip first 6 min: https://www.youtube.com/watch?v=UuBr331wxr4&t=363s
+        Example,
+            API credentials created under "\ More \ API":
+                    Ledger Arbitrum Wallet Address: 0xAAAAA <-- This is your Ledger Arbitrum wallet address with which you connect to Hyperliquid.
+                    API Wallet Address 0xBBBBB <-- Generated
+                    privateKey 0xCCCCC
+        Basic connection via CCXT:
+            import asyncio
+            import ccxt.pro as ccxtpro
+            async def main():
+                rate_limit_ms = 100
+                exchange_params = {
+                    "walletAddress" : "0xAAAAA", # Ledger Arbitrum Wallet Address here! Not the generated address.
+                    "privateKey" : "0xCCCCC"
+                }
+                exchange = ccxtpro.hyperliquid(exchange_params)
+                balances = await exchange.fetch_balance()
+                print(balances)
+            asyncio.run(main())
+        '''
+        exchange = ccxtpro.hyperliquid(
+            {
+                "walletAddress" : api_key,
+                "privateKey" : secret,
+                'enableRateLimit'  : True,
+                'rateLimit' : rate_limit_ms,
+                'verbose': verbose
+            }  # type: ignore
+        )
+    else:
+        raise ValueError(f"Unsupported exchange {exchange_name}, check gateway_id {gateway_id}.")
+    await exchange.load_markets() # type: ignore
+    '''
+    Is this necessary? The added trouble is for example bybit.authenticate requires arg 'url'. binance doesn't. And fetch_balance already test credentials.
+    try:
+        await exchange.authenticate() # type: ignore
+    except Exception as swallow_this_error:
+        pass
+    '''
+    return exchange
 def timestamp_to_datetime_cols(pd_candles : pd.DataFrame):
-    pd_candles['datetime'] = pd_candles['timestamp_ms'].apply(
-        lambda x: datetime.fromtimestamp(int(x.timestamp()) if isinstance(x, pd.Timestamp) else int(x / 1000))
-    )
+    def _fix_timestamp_ms(x):
+        if isinstance(x, pd.Timestamp):
+            return int(x.value // 10**6)
+        elif isinstance(x, np.datetime64):
+            return int(x.astype('int64') // 10**6)
+        elif isinstance(x, (int, float)):
+            x = int(x)
+            if len(str(abs(x))) == 13:
+                return x
+            else:
+                return int(x * 1000)
+        else:
+            raise ValueError(f"Unsupported type {type(x)} for timestamp conversion")
+    pd_candles['timestamp_ms'] = pd_candles['timestamp_ms'].apply(_fix_timestamp_ms)
+    pd_candles['datetime'] = pd_candles['timestamp_ms'].apply(lambda x: datetime.fromtimestamp(int(x/1000)))
     pd_candles['datetime'] = pd.to_datetime(pd_candles['datetime'])
-    pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None)
+    pd_candles['datetime'] = pd_candles['datetime'].dt.tz_localize(None)  # type: ignore
     pd_candles['datetime_utc'] = pd_candles['timestamp_ms'].apply(
         lambda x: datetime.fromtimestamp(int(x.timestamp()) if isinstance(x, pd.Timestamp) else int(x / 1000), tz=timezone.utc)
     )
     # This is to make it easy to do grouping with Excel pivot table
-    pd_candles['year'] = pd_candles['datetime'].dt.year
-    pd_candles['month'] = pd_candles['datetime'].dt.month
-    pd_candles['day'] = pd_candles['datetime'].dt.day
-    pd_candles['hour'] = pd_candles['datetime'].dt.hour
-    pd_candles['minute'] = pd_candles['datetime'].dt.minute
-    pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek  # dayofweek: Monday is 0 and Sunday is 6
+    pd_candles['year'] = pd_candles['datetime'].dt.year  # type: ignore
+    pd_candles['month'] = pd_candles['datetime'].dt.month  # type: ignore
+    pd_candles['day'] = pd_candles['datetime'].dt.day  # type: ignore
+    pd_candles['hour'] = pd_candles['datetime'].dt.hour  # type: ignore
+    pd_candles['minute'] = pd_candles['datetime'].dt.minute  # type: ignore
+    pd_candles['dayofweek'] = pd_candles['datetime'].dt.dayofweek  # type: ignore dayofweek: Monday is 0 and Sunday is 6
     pd_candles['week_of_month'] = pd_candles['timestamp_ms'].apply(
-        lambda x: timestamp_to_week_of_month(x)
+        lambda x: timestamp_to_week_of_month(int(x/1000))
     )
     pd_candles['apac_trading_hr'] = pd_candles['timestamp_ms'].apply(
-        lambda x: "APAC" in timestamp_to_active_trading_regions(x)
+        lambda x: "APAC" in timestamp_to_active_trading_regions(int(x/1000))
     )
     pd_candles['emea_trading_hr'] = pd_candles['timestamp_ms'].apply(
-        lambda x: "EMEA" in timestamp_to_active_trading_regions(x)
+        lambda x: "EMEA" in timestamp_to_active_trading_regions(int(x/1000))
     )
     pd_candles['amer_trading_hr'] = pd_candles['timestamp_ms'].apply(
-        lambda x: "AMER" in timestamp_to_active_trading_regions(x)
+        lambda x: "AMER" in timestamp_to_active_trading_regions(int(x/1000))
     )
+    pd_candles['timestamp_ms_gap'] = pd_candles['timestamp_ms'] - pd_candles['timestamp_ms'].shift(1)
+    # Depending on asset, minutes bar may have gaps
+    timestamp_ms_gap_median = pd_candles['timestamp_ms_gap'].median()
+    NUM_MS_IN_1HR = 60*60*1000
+    if timestamp_ms_gap_median>=NUM_MS_IN_1HR:
+        num_rows_with_expected_gap = pd_candles[~pd_candles.timestamp_ms_gap.isna()][pd_candles.timestamp_ms_gap==timestamp_ms_gap_median].shape[0]
+        assert(num_rows_with_expected_gap/pd_candles.shape[0]>0.9)
+    pd_candles.drop(columns=['timestamp_ms_gap'], inplace=True)
+'''
+APAC (Asia-Pacific) Trading Hours
+    UTC 21:00 - 09:00 (approximate range)
+    Major financial centers: Tokyo, Hong Kong, Singapore, Sydney
+EMEA (Europe, Middle East, Africa) Trading Hours
+    UTC 07:00 - 16:00 (approximate range)
+    Major financial centers: London, Frankfurt, Paris, Zurich, Dubai
+US Trading Hours
+    UTC 13:00 - 22:00 (approximate range)
+    Major financial centers: New York, Chicago
+    Key markets: NYSE, NASDAQ
+utcnow and utcfromtimestamp been deprecated in Python 3.12
+https://www.pythonmorsels.com/converting-to-utc-time/
+Example, UTC 23:00 is 3rd hour in APAC trading session
+    utc_hour = 23
+    i = get_regions_trading_utc_hours()['APAC'].index(utc_hour)
+    assert(i==2)
+'''
+def get_regions_trading_utc_hours():
+    return {
+        'APAC' : [21,22,23,0,1,2,3,4,5,6,7,8,9],
+        'EMEA' : [7,8,9,10,11,12,13,14,15,16],
+        'AMER' : [13,14,15,16,17,18,19,20,21,22]
+    }
 def timestamp_to_active_trading_regions(
         timestamp_ms : int
 ) -> List[str]:
-    '''
-    APAC (Asia-Pacific) Trading Hours
-        UTC 22:00 - 09:00 (approximate range)
-        Major financial centers: Tokyo, Hong Kong, Singapore, Sydney
-    EMEA (Europe, Middle East, Africa) Trading Hours
-        UTC 07:00 - 16:00 (approximate range)
-        Major financial centers: London, Frankfurt, Paris, Zurich, Dubai
-    US Trading Hours
-        UTC 13:30 - 20:00 (approximate range)
-        Major financial centers: New York, Chicago
-        Key markets: NYSE, NASDAQ
-    utcnow and utcfromtimestamp been deprecated in Python 3.12
-    https://www.pythonmorsels.com/converting-to-utc-time/
-    '''
     active_trading_regions : List[str] = []
     dt_utc = datetime.fromtimestamp(int(timestamp_ms / 1000), tz=timezone.utc)
     utc_hour = dt_utc.hour
-    if (utc_hour >= 22) or (utc_hour <= 9):
+    if utc_hour in get_regions_trading_utc_hours()['APAC']:
         active_trading_regions.append("APAC")
-    if 7 <= utc_hour <= 16:
+    if utc_hour in get_regions_trading_utc_hours()['EMEA']:
         active_trading_regions.append("EMEA")
-    if 13 <= utc_hour <= 20:
+    if utc_hour in get_regions_trading_utc_hours()['AMER']:
         active_trading_regions.append("AMER")
     return active_trading_regions
@@ -113,6 +318,17 @@ def fix_column_types(pd_candles : pd.DataFrame):
     pd_candles.reset_index(drop=True, inplace=True)
     pd_candles.sort_values("datetime", inplace=True)
+def interval_to_ms(interval : str) -> int:
+    interval_ms : int = 0
+    if interval=="d":
+        interval_ms = 24*60*60*1000
+    elif interval=="h":
+        interval_ms = 60*60*1000
+    elif interval=="m":
+        interval_ms = 60*1000
+    return interval_ms
 '''
 https://polygon.io/docs/stocks
 '''
@@ -168,7 +384,7 @@ class NASDAQExchange:
             pd_daily_candles['low'] = pd_daily_candles['low'].astype(str).str.replace('$','')
             pd_daily_candles['close'] = pd_daily_candles['close'].astype(str).str.replace('$','')
             pd_daily_candles['datetime']= pd.to_datetime(pd_daily_candles['datetime'])
-            pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6
+            pd_daily_candles['timestamp_ms'] = pd_daily_candles.datetime.values.astype(np.int64) // 10 ** 6  # type: ignore
             pd_daily_candles['symbol'] = symbol
             pd_daily_candles['exchange'] = 'nasdaq'
             fix_column_types(pd_daily_candles)
@@ -187,7 +403,7 @@ class NASDAQExchange:
                 )
                 # When you fill foward, a few candles before start date can have null values (open, high, low, close, volume ...)
-                first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna())  & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime']
+                first_candle_dt = pd_hourly_candles[(~pd_hourly_candles.close.isna())  & (pd_hourly_candles['datetime'].dt.time == pd.Timestamp('00:00:00').time())].iloc[0]['datetime']  # type: ignore
                 pd_hourly_candles = pd_hourly_candles[pd_hourly_candles.datetime>=first_candle_dt]
                 exchange_candles[symbol] = pd_hourly_candles
@@ -251,6 +467,45 @@ class YahooExchange:
         return exchange_candles
+def aggregate_candles(
+    interval : str,
+    pd_candles : pd.DataFrame
+) -> pd.DataFrame:
+    if interval[-1]=='m':
+        # 'm' for pandas means months!
+        interval = interval.replace('m','min')
+    pd_candles.set_index('datetime', inplace=True)
+    pd_candles_aggregated = pd_candles.resample(interval).agg({
+        'exchange' : 'first',
+        'symbol' : 'first',
+        'timestamp_ms' : 'first',
+        'open': 'first',
+        'high': 'max',
+        'low': 'min',
+        'close': 'last',
+        'volume': 'sum',
+        'datetime_utc' : 'first',
+        'year' : 'first',
+        'month' : 'first',
+        'day' : 'first',
+        'hour' : 'first',
+        'minute' : 'first',
+        'dayofweek' : 'first',
+        'week_of_month' : 'first',
+        'apac_trading_hr' : 'first',
+        'emea_trading_hr' : 'first',
+        'amer_trading_hr' : 'first',
+        'pct_chg_on_close' : 'sum',
+    })
+    pd_candles.reset_index(inplace=True)
+    pd_candles_aggregated.reset_index(inplace=True)
+    return pd_candles_aggregated
 def fetch_historical_price(
     exchange,
     normalized_symbol : str,
@@ -297,15 +552,21 @@ def fetch_candles(
     validation_max_gaps : int = 10,
     validation_max_end_date_intervals : int = 1
 ) -> Dict[str, Union[pd.DataFrame, None]]:
+    exchange_candles = { '' : None }
+    num_intervals = int(candle_size.replace(candle_size[-1],''))
+    if end_ts>datetime.now().timestamp():
+        end_ts = int(datetime.now().timestamp())
     if type(exchange) is YahooExchange:
-        return exchange.fetch_candles(
+        exchange_candles = exchange.fetch_candles(
                             start_ts=start_ts,
                             end_ts=end_ts,
                             symbols=normalized_symbols,
                             candle_size=candle_size
                         )
     elif type(exchange) is NASDAQExchange:
-        return exchange.fetch_candles(
+        exchange_candles = exchange.fetch_candles(
                             start_ts=start_ts,
                             end_ts=end_ts,
                             symbols=normalized_symbols,
@@ -322,79 +583,153 @@ def fetch_candles(
             pd_candles = exchange_candles[symbol]
             if not pd_candles is None:
                 fix_column_types(pd_candles) # You don't want to do this from Futubull as you'd need import Futubull from there: Circular references
-        return exchange_candles
     elif issubclass(exchange.__class__, CcxtExchange):
-        return _fetch_candles_ccxt(
+        exchange_candles = _fetch_candles_ccxt(
             start_ts=start_ts,
             end_ts=end_ts,
             exchange=exchange,
             normalized_symbols=normalized_symbols,
             candle_size=candle_size,
-            logger=logger,
-            num_candles_limit=num_candles_limit,
-            cache_dir=cache_dir,
-            list_ts_field=list_ts_field
+            num_candles_limit=num_candles_limit
         )
-    return { '' : None }
+    if num_intervals!=1:
+        for symbol in exchange_candles:
+            if not exchange_candles[symbol] is None:
+                exchange_candles[symbol] = aggregate_candles(candle_size, exchange_candles[symbol]) #  type: ignore
+    # For invalid rows missing timestamps, o/h/l/c/v, fill forward close, set volume to zero.
+    for symbol in exchange_candles:
+        pd_candles = exchange_candles[symbol]
+        if pd_candles is not None:
+            mask_invalid_candles = pd_candles["timestamp_ms"].isna()
+            if mask_invalid_candles.any():
+                pd_invalid_candles = pd_candles[mask_invalid_candles]
+                if logger is not None:
+                    logger.warning(f"Dropping {pd_invalid_candles.shape[0]}/{pd_candles.shape[0]} rows from {symbol} candles (null timestamp_ms)") # type: ignore
+                    logger.warning(f"{tabulate(pd_invalid_candles, headers='keys', tablefmt='psql')}") # type: ignore
+                def _to_timestamp_ms(dt):
+                    if pd.isna(dt):
+                        return pd.NA
+                    if isinstance(dt, str):
+                        dt = pd.to_datetime(dt)
+                    return int(dt.timestamp() * 1000)
+                pd_candles.loc[mask_invalid_candles, "timestamp_ms"] = pd_candles.loc[mask_invalid_candles, "datetime"].apply(_to_timestamp_ms)
+                pd_candles["close"] = pd_candles["close"].ffill()
+                pd_candles.loc[mask_invalid_candles, ["open", "high", "low"]] = pd_candles.loc[
+                                                                                    mask_invalid_candles, ["close"]
+                                                                                ]
+                pd_candles.loc[mask_invalid_candles, "volume"] = 0.0
+    return exchange_candles # type: ignore
+'''
+Find listing date https://gist.github.com/mr-easy/5185b1dcdd5f9f908ff196446f092e9b
+Usage:
+    listing_ts = find_start_time(exchange, 'HYPE/USDT:USDT', int(datetime(2024,1,1).timestamp()*1000), int(datetime(2025,5,1).timestamp()*1000), '1h')
+Caveats:
+1) If listing date lies outside [start_time, end_time], this function will stackoverflow,
+2) Even if not, it's still very time consuming.
+Alternative: market['created']
+'''
+def search_listing_ts(exchange, symbol, start_time, end_time, timeframe):
+    mid_time = (start_time + end_time)//2
+    if(mid_time == start_time): return mid_time+1
+    ohlcv = exchange.fetch_ohlcv(symbol, timeframe, mid_time, limit=1)
+    time.sleep(1)
+    if(len(ohlcv) == 0):
+        return search_listing_ts(exchange, symbol, mid_time, end_time, timeframe)
+    else:
+        return search_listing_ts(exchange, symbol, start_time, mid_time, timeframe)
 def _fetch_candles_ccxt(
     start_ts : int,
     end_ts : int,
     exchange,
     normalized_symbols : List[str],
     candle_size : str,
-    num_candles_limit : int = 100,
-    logger = None,
-    cache_dir : Union[str, None] = None,
-    list_ts_field : Union[str, None] = None
-) -> Dict[str, Union[pd.DataFrame, None]]:
-  ticker = normalized_symbols[0]
-  pd_candles = _fetch_candles(
-              symbol = ticker,
-              exchange = exchange,
-              start_ts = start_ts,
-              end_ts = end_ts,
-              candle_size = candle_size,
-          )
-  return {
-      ticker : pd_candles
-  }
-def _fetch_candles(
-    symbol : str,
-    exchange : CcxtExchange,
-    start_ts : int,
-    end_ts : int,
-    candle_size : str = '1d',
     num_candles_limit : int = 100
-):
-    def _fetch_ohlcv(exchange, symbol, timeframe, since, limit, params) -> Union[List, NoReturn]:
-        one_timeframe = f"1{timeframe[-1]}"
-        candles = exchange.fetch_ohlcv(symbol=symbol, timeframe=one_timeframe, since=since, limit=limit, params=params)
-        if candles and len(candles)>0:
-            candles.sort(key=lambda x : x[0], reverse=False)
-        return candles
-    all_candles = []
-    params = {}
-    this_cutoff = start_ts
-    while this_cutoff<=end_ts:
-        candles = _fetch_ohlcv(exchange=exchange, symbol=symbol, timeframe=candle_size, since=int(this_cutoff * 1000), limit=num_candles_limit, params=params)
-        if candles and len(candles)>0:
-            all_candles = all_candles + [[ int(x[0]), float(x[1]), float(x[2]), float(x[3]), float(x[4]), float(x[5]) ] for x in candles if x[1] and x[2] and x[3] and x[4] and x[5] ]
-            record_ts = max([int(record[0]) for record in candles])
-            record_ts_str : str = str(record_ts)
-            if len(record_ts_str)==13:
-                record_ts = int(int(record_ts_str)/1000) # Convert from milli-seconds to seconds
-            this_cutoff = record_ts  + 1
-    columns = ['exchange', 'symbol', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume']
-    pd_all_candles = pd.DataFrame([ [ exchange.name, symbol, x[0], x[1], x[2], x[3], x[4], x[5] ] for x in all_candles], columns=columns)
-    fix_column_types(pd_all_candles)
-    pd_all_candles['pct_chg_on_close'] = pd_all_candles['close'].pct_change()
-    return pd_all_candles
+) -> Dict[str, Union[pd.DataFrame, None]]:
+    logger = logging.getLogger()
+    rsp = {}
+    exchange.load_markets()
+    num_tickers = len(normalized_symbols)
+    i = 0
+    for ticker in normalized_symbols:
+        @retry(num_attempts=3, pause_between_retries_ms=1000)
+        def _fetch_ohlcv(exchange, symbol, timeframe, since, limit, params) -> Union[List, NoReturn]:
+                one_timeframe = f"1{timeframe[-1]}"
+                candles = exchange.fetch_ohlcv(symbol=symbol, timeframe=one_timeframe, since=since, limit=limit, params=params)
+                if candles and len(candles)>0:
+                    candles.sort(key=lambda x : x[0], reverse=False)
+                return candles
+        def _calc_increment(candle_size):
+            increment = 1
+            num_intervals = int(candle_size.replace(candle_size[-1],''))
+            interval_type = candle_size[-1]
+            if interval_type == "m":
+                increment = 60
+            elif interval_type == "h":
+                increment = 60*60
+            elif interval_type == "d":
+                increment = 60*60*24
+            else:
+                raise ValueError(f"Invalid candle_size {candle_size}")
+            return num_intervals * increment
+        logger.info(f"{i}/{num_tickers} Fetching {candle_size} candles for {ticker}.")
+        '''
+        It uses a while loop to implement a sliding window to download candles between start_ts and end_ts.
+        However, start_ts for example can be 1 Jan 2021 for a given ticker.
+        But if that ticker listing date is 1 Jan 2025, this while loop would waste a lot of time loop between 1 Jan 2021 thru 31 Dec 2024, slowly incrementing this_cutoff += _calc_increment(candle_size).
+        A more efficient way is to find listing date. Start looping from there.
+        '''
+        market = exchange.markets[ticker]
+        this_ticker_start_ts = start_ts
+        if market['created']:
+            this_ticker_start_ts = max(this_ticker_start_ts, int(market['created']/1000))
+        all_candles = []
+        params = {}
+        this_cutoff = this_ticker_start_ts
+        while this_cutoff<end_ts:
+            candles = _fetch_ohlcv(exchange=exchange, symbol=ticker, timeframe=candle_size, since=int(this_cutoff * 1000), limit=num_candles_limit, params=params)
+            if candles and len(candles)>0:
+                all_candles = all_candles + [[ int(x[0]), float(x[1]), float(x[2]), float(x[3]), float(x[4]), float(x[5]) ] for x in candles if x[1] and x[2] and x[3] and x[4] and x[5] ]
+                record_ts = max([int(record[0]) for record in candles])
+                record_ts_str : str = str(record_ts)
+                if len(record_ts_str)==13:
+                    record_ts = int(int(record_ts_str)/1000) # Convert from milli-seconds to seconds
+                this_cutoff = record_ts  + _calc_increment(candle_size)
+            else:
+                this_cutoff += _calc_increment(candle_size)
+        columns = ['exchange', 'symbol', 'timestamp_ms', 'open', 'high', 'low', 'close', 'volume']
+        pd_all_candles = pd.DataFrame([ [ exchange.name, ticker, x[0], x[1], x[2], x[3], x[4], x[5] ] for x in all_candles], columns=columns)
+        fix_column_types(pd_all_candles)
+        pd_all_candles['pct_chg_on_close'] = pd_all_candles['close'].pct_change()
+        rsp[ticker] = pd_all_candles
+        i+=1
+    return rsp
 def fetch_deribit_btc_option_expiries(
     market: str = 'BTC'
@@ -404,7 +739,7 @@ def fetch_deribit_btc_option_expiries(
         Dict[str, Dict[str, Union[str, float]]]
     ]
 ]:
-    exchange = deribit()
+    exchange = ccxt.deribit()
     instruments = exchange.public_get_get_instruments({
         'currency': market,
         'kind': 'option',
@@ -456,4 +791,56 @@ def fetch_deribit_btc_option_expiries(
         'index_price' : index_price,
         'by_expiry' : sorted_expiry_data, # type: ignore Otherwise, Error: Type "dict[str, list[tuple[str, float]] | dict[str, Dict[Unknown, Unknown]]]" is not assignable to return type "Dict[str, Dict[str, float] | Dict[str, Dict[str, str | float]]]"
         'by_expiry_and_strike' : expiry_data_breakdown_by_strike
-    }
+    }
+def build_pair_candles(
+    pd_candles1 : pd.DataFrame,
+    pd_candles2 : pd.DataFrame,
+    left_columns_postfix : str = "_1",
+    right_columns_postfix : str = "_2"
+) -> pd.DataFrame:
+    min_timestamp_ms1 = int(pd_candles1.iloc[0]['timestamp_ms'])
+    max_timestamp_ms1 = int(pd_candles1.iloc[-1]['timestamp_ms'])
+    min_timestamp_ms2 = int(pd_candles2.iloc[0]['timestamp_ms'])
+    max_timestamp_ms2 = int(pd_candles2.iloc[-1]['timestamp_ms'])
+    pd_candles1 = pd_candles1[(pd_candles1.timestamp_ms>=min_timestamp_ms2) & (pd_candles1.timestamp_ms<=max_timestamp_ms2) & (~pd_candles1.timestamp_ms.isna()) ]
+    pd_candles2 = pd_candles2[(pd_candles2.timestamp_ms>=min_timestamp_ms1) & (pd_candles2.timestamp_ms<=max_timestamp_ms1) & (~pd_candles2.timestamp_ms.isna())]
+    assert(pd_candles1.shape[0]==pd_candles2.shape[0])
+    pd_candles1['timestamp_ms_gap'] = pd_candles1['timestamp_ms'] - pd_candles1['timestamp_ms'].shift(1)
+    timestamp_ms_gap = pd_candles1.iloc[-1]['timestamp_ms_gap']
+    assert(pd_candles1[~pd_candles1.timestamp_ms_gap.isna()][pd_candles1.timestamp_ms_gap!=timestamp_ms_gap].shape[0]==0)
+    pd_candles1.drop(columns=['timestamp_ms_gap'], inplace=True)
+    pd_candles2['timestamp_ms_gap'] = pd_candles2['timestamp_ms'] - pd_candles2['timestamp_ms'].shift(1)
+    timestamp_ms_gap = pd_candles2.iloc[-1]['timestamp_ms_gap']
+    assert(pd_candles2[~pd_candles2.timestamp_ms_gap.isna()][pd_candles2.timestamp_ms_gap!=timestamp_ms_gap].shape[0]==0)
+    pd_candles2.drop(columns=['timestamp_ms_gap'], inplace=True)
+    min_timestamp_ms1 = int(pd_candles1.iloc[0]['timestamp_ms'])
+    max_timestamp_ms1 = int(pd_candles1.iloc[-1]['timestamp_ms'])
+    min_timestamp_ms2 = int(pd_candles2.iloc[0]['timestamp_ms'])
+    max_timestamp_ms2 = int(pd_candles2.iloc[-1]['timestamp_ms'])
+    assert(min_timestamp_ms1==min_timestamp_ms2)
+    assert(max_timestamp_ms1==max_timestamp_ms2)
+    assert(pd_candles1.shape[0]==pd_candles2.shape[0])
+    if len([ col for col in pd_candles1.columns if col[-2:]==left_columns_postfix ]) == 0:
+        pd_candles1.columns = [str(col) + left_columns_postfix for col in pd_candles1.columns]
+    if len([ col for col in pd_candles2.columns if col[-2:]==right_columns_postfix ]) == 0:
+        pd_candles2.columns = [str(col) + right_columns_postfix for col in pd_candles2.columns]
+    pd_candles1.reset_index(drop=True, inplace=True)
+    pd_candles2.reset_index(drop=True, inplace=True)
+    pd_candles = pd.concat([pd_candles1, pd_candles2], axis=1)
+    pd_candles['timestamp_ms_gap'] = pd_candles[f'timestamp_ms{left_columns_postfix}'] - pd_candles[f'timestamp_ms{right_columns_postfix}']
+    assert(pd_candles[pd_candles.timestamp_ms_gap!=0].shape[0]==0)
+    pd_candles.drop(pd_candles.columns[pd_candles.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
+    return pd_candles

siglab-py 0.1.29__py3-none-any.whl → 0.6.12__py3-none-any.whl

Potentially problematic release.

siglab-py 0.1.29py3-none-any.whl → 0.6.12py3-none-any.whl