PyPI - quantjourney-bidask - Versions diffs - 0.5.0__py3-none-any.whl - Mend

quantjourney-bidask 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

quantjourney_bidask/__init__.py +8 -0
quantjourney_bidask/_version.py +7 -0
quantjourney_bidask/data_fetcher.py +160 -0
quantjourney_bidask/edge.py +148 -0
quantjourney_bidask/edge_expanding.py +59 -0
quantjourney_bidask/edge_rolling.py +202 -0
quantjourney_bidask/websocket_fetcher.py +308 -0
quantjourney_bidask-0.5.0.dist-info/METADATA +183 -0
quantjourney_bidask-0.5.0.dist-info/RECORD +12 -0
quantjourney_bidask-0.5.0.dist-info/WHEEL +5 -0
quantjourney_bidask-0.5.0.dist-info/licenses/LICENSE +21 -0
quantjourney_bidask-0.5.0.dist-info/top_level.txt +1 -0

quantjourney_bidask/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+from .edge import edge
+from .edge_rolling import edge_rolling
+from .edge_expanding import edge_expanding
+from .data_fetcher import fetch_binance_data, fetch_yfinance_data
+from .websocket_fetcher import LiveSpreadMonitor
+from ._version import __version__, __author__, __email__, __license__
+__all__ = ['edge', 'edge_rolling', 'edge_expanding', 'fetch_binance_data', 'fetch_yfinance_data', 'LiveSpreadMonitor']

quantjourney_bidask/_version.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Version information for quantjourney_bidask."""
+__version__ = "0.1.0"
+__author__ = "Jakub Polec"
+__email__ = "jakub@quantjourney.pro"
+__license__ = "Apache License 2.0"
+__copyright__ = "Copyright (c) 2024 Jakub Polec, QuantJourney"

quantjourney_bidask/data_fetcher.py ADDED Viewed

@@ -0,0 +1,160 @@
+import pandas as pd
+import requests
+import yfinance as yf
+from typing import Optional, List
+from datetime import datetime
+def fetch_binance_data(
+    symbols: List[str],
+    timeframe: str,
+    start: str,
+    end: str,
+    api_key: str,
+    api_url: str = "http://localhost:8000"
+) -> pd.DataFrame:
+    """
+    Fetch OHLCV data from Binance using the provided FastAPI server.
+    Parameters
+    ----------
+    symbols : List[str]
+        List of trading pairs (e.g., ["BTCUSDT", "ETHUSDT"]).
+    timeframe : str
+        Data timeframe (e.g., "1m", "1h", "1d").
+    start : str
+        Start time in ISO 8601 format (e.g., "2024-01-01T00:00:00Z").
+    end : str
+        End time in ISO 8601 format (e.g., "2024-01-02T00:00:00Z").
+    api_key : str
+        API key for authentication.
+    api_url : str, default "http://localhost:8000"
+        Base URL of the FastAPI server.
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with columns ['open', 'high', 'low', 'close', 'volume', 'timestamp', 'symbol'].
+    Raises
+    ------
+    ValueError
+        If the API request fails or returns an error.
+    """
+    payload = {
+        "exchange": "binance",
+        "symbols": symbols,
+        "start": start,
+        "end": end,
+        "timeframe": timeframe,
+        "upload_d1": False,
+        "force": False
+    }
+    headers = {"X-API-Key": api_key}
+    # Initiate fetch request
+    response = requests.post(f"{api_url}/fetch", json=payload, headers=headers)
+    if response.status_code != 200:
+        raise ValueError(f"Fetch request failed: {response.text}")
+    task_id = response.json().get("task_id")
+    if not task_id:
+        raise ValueError("No task ID returned from fetch request")
+    # Poll task status
+    while True:
+        status_response = requests.get(f"{api_url}/tasks/{task_id}")
+        if status_response.status_code != 200:
+            raise ValueError(f"Task status check failed: {status_response.text}")
+        task = status_response.json().get("task")
+        if task["status"] in ["completed", "failed"]:
+            if task["status"] == "failed":
+                raise ValueError(f"Task failed: {task.get('message')}")
+            break
+    # Query data
+    data = []
+    for symbol in symbols:
+        query_payload = {
+            "symbol": symbol,
+            "timeframe": timeframe,
+            "start": start,
+            "end": end
+        }
+        query_response = requests.post(f"{api_url}/d1/query", json=query_payload)
+        if query_response.status_code != 200:
+            raise ValueError(f"Data query failed for {symbol}: {query_response.text}")
+        rows = query_response.json().get("data", [])
+        df = pd.DataFrame(rows)
+        if not df.empty:
+            df['symbol'] = symbol
+            data.append(df)
+    if not data:
+        raise ValueError("No data retrieved for the specified parameters")
+    result = pd.concat(data, ignore_index=True)
+    result['timestamp'] = pd.to_datetime(result['timestamp'])
+    return result[['timestamp', 'symbol', 'open', 'high', 'low', 'close', 'volume']]
+def fetch_yfinance_data(
+    tickers: List[str],
+    period: str = "1mo",
+    interval: str = "1d",
+    start: Optional[str] = None,
+    end: Optional[str] = None
+) -> pd.DataFrame:
+    """
+    Fetch OHLCV data from Yahoo Finance using yfinance.
+    Parameters
+    ----------
+    tickers : List[str]
+        List of ticker symbols (e.g., ["AAPL", "MSFT"]).
+    period : str, default "1mo"
+        Data period (e.g., "1d", "1mo", "1y"). Ignored if start and end are provided.
+    interval : str, default "1d"
+        Data interval (e.g., "1m", "1h", "1d").
+    start : str, optional
+        Start date (e.g., "2024-01-01"). Overrides period if provided.
+    end : str, optional
+        End date (e.g., "2024-01-31"). Overrides period if provided.
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with columns ['open', 'high', 'low', 'close', 'volume', 'timestamp', 'symbol'].
+    Raises
+    ------
+    ValueError
+        If no data is retrieved for the specified parameters.
+    """
+    data = []
+    for ticker in tickers:
+        stock = yf.Ticker(ticker)
+        if start and end:
+            df = stock.history(start=start, end=end, interval=interval)
+        else:
+            df = stock.history(period=period, interval=interval)
+        if df.empty:
+            continue
+        df = df.reset_index()
+        df['symbol'] = ticker
+        df = df.rename(columns={
+            'Date': 'timestamp',
+            'Open': 'open',
+            'High': 'high',
+            'Low': 'low',
+            'Close': 'close',
+            'Volume': 'volume'
+        })
+        data.append(df[['timestamp', 'symbol', 'open', 'high', 'low', 'close', 'volume']])
+    if not data:
+        raise ValueError("No data retrieved for the specified parameters")
+    return pd.concat(data, ignore_index=True)

quantjourney_bidask/edge.py ADDED Viewed

@@ -0,0 +1,148 @@
+import numpy as np
+import warnings
+from typing import Union, List, Tuple, Any
+def edge(
+    open: Union[List[float], Any],
+    high: Union[List[float], Any],
+    low: Union[List[float], Any],
+    close: Union[List[float], Any],
+    sign: bool = False
+) -> float:
+    """
+    Estimate the effective bid-ask spread from open, high, low, and close (OHLC) prices.
+    Implements the efficient estimator described in Ardia, Guidotti, & Kroencke (2024):
+    https://doi.org/10.1016/j.jfineco.2024.103916. The estimator computes the root mean square
+    effective spread within the sample period using log-returns and indicator variables.
+    Parameters
+    ----------
+    open : array-like
+        Vector of open prices, sorted in ascending order of timestamp.
+    high : array-like
+        Vector of high prices, sorted in ascending order of timestamp.
+    low : array-like
+        Vector of low prices, sorted in ascending order of timestamp.
+    close : array-like
+        Vector of close prices, sorted in ascending order of timestamp.
+    sign : bool, default False
+        If True, returns signed estimates (negative values possible). If False, returns
+        absolute values to reduce small-sample bias in averaging or regression studies.
+    Returns
+    -------
+    float
+        Estimated bid-ask spread as a fraction of price (e.g., 0.01 = 1% spread).
+        Returns np.nan if the estimate cannot be computed (e.g., insufficient data).
+    Notes
+    -----
+    - Requires at least 3 observations for a valid estimate.
+    - Handles missing values (NaNs) automatically by excluding them from calculations.
+    - The estimator assumes prices are positive and non-zero to compute log-prices.
+    - For optimal results, use high-frequency data (e.g., minute or hourly) for frequently
+      traded assets, or lower frequency (e.g., daily) for less liquid assets.
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> df = pd.read_csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv")
+    >>> spread = edge(df.Open, df.High, df.Low, df.Close)
+    >>> print(f"Estimated spread: {spread:.6f}")
+    Estimated spread: 0.010185
+    """
+    # Convert inputs to numpy arrays
+    open = np.asarray(open, dtype=float)
+    high = np.asarray(high, dtype=float)
+    low = np.asarray(low, dtype=float)
+    close = np.asarray(close, dtype=float)
+    # Validate input lengths
+    nobs = len(open)
+    if len(high) != nobs or len(low) != nobs or len(close) != nobs:
+        raise ValueError("Open, high, low, and close must have the same length")
+    # Return NaN if insufficient observations
+    if nobs < 3:
+        return np.nan
+    # Compute log-prices, handling non-positive prices
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", RuntimeWarning)
+        o = np.log(np.where(open > 0, open, np.nan))
+        h = np.log(np.where(high > 0, high, np.nan))
+        l = np.log(np.where(low > 0, low, np.nan))
+        c = np.log(np.where(close > 0, close, np.nan))
+        m = (h + l) / 2.0  # Mid-price log
+    # Shift log-prices by one period
+    h1, l1, c1, m1 = h[:-1], l[:-1], c[:-1], m[:-1]
+    o, h, l, c, m = o[1:], h[1:], l[1:], c[1:], m[1:]
+    # Compute log-returns
+    r1 = m - o        # Mid - Open
+    r2 = o - m1       # Open - Previous Mid
+    r3 = m - c1       # Mid - Previous Close
+    r4 = c1 - m1      # Previous Close - Previous Mid
+    r5 = o - c1       # Open - Previous Close
+    # Compute indicator variables
+    # tau: Indicator for valid price variation (1 if high != low or low != previous close)
+    tau = np.where(np.isnan(h) | np.isnan(l) | np.isnan(c1), np.nan,
+                   ((h != l) | (l != c1)).astype(float))
+    # po1: Indicator for open price not equal to high, scaled by tau
+    po1 = tau * np.where(np.isnan(o) | np.isnan(h), np.nan, (o != h).astype(float))
+    # po2: Indicator for open price not equal to low, scaled by tau
+    po2 = tau * np.where(np.isnan(o) | np.isnan(l), np.nan, (o != l).astype(float))
+    # pc1: Indicator for previous close not equal to previous high, scaled by tau
+    pc1 = tau * np.where(np.isnan(c1) | np.isnan(h1), np.nan, (c1 != h1).astype(float))
+    # pc2: Indicator for previous close not equal to previous low, scaled by tau
+    pc2 = tau * np.where(np.isnan(c1) | np.isnan(l1), np.nan, (c1 != l1).astype(float))
+    # Compute probabilities with NaN handling
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", RuntimeWarning)
+        pt = np.nanmean(tau)
+        po = np.nanmean(po1) + np.nanmean(po2)
+        pc = np.nanmean(pc1) + np.nanmean(pc2)
+    # Return NaN if insufficient valid periods or probabilities are zero
+    if np.nansum(tau) < 2 or po == 0 or pc == 0:
+        return np.nan
+    # Compute de-meaned log-returns
+    d1 = r1 - np.nanmean(r1) / pt * tau
+    d3 = r3 - np.nanmean(r3) / pt * tau
+    d5 = r5 - np.nanmean(r5) / pt * tau
+    # Compute input vectors for GMM estimation
+    # x1: First moment condition combining open-high-low and close-high-low effects
+    x1 = -4.0 / po * d1 * r2 + -4.0 / pc * d3 * r4  # Scaled by probability of open/close extremes
+    # x2: Second moment condition combining open-high-low-close and close-high-low-open effects
+    x2 = -4.0 / po * d1 * r5 + -4.0 / pc * d5 * r4
+    # Compute expectations (means) of the moment conditions
+    e1 = np.nanmean(x1)  # First moment expectation
+    e2 = np.nanmean(x2)  # Second moment expectation
+    # Compute variances of the moment conditions for optimal weighting
+    v1 = np.nanmean(x1**2) - e1**2  # Variance of first moment
+    v2 = np.nanmean(x2**2) - e2**2  # Variance of second moment
+    # Compute squared spread estimate using optimal GMM weights
+    vt = v1 + v2  # Total variance for weighting
+    # If total variance is positive, use optimal weighted average
+    # Otherwise fall back to simple average of the two estimates
+    s2 = (v2 * e1 + v1 * e2) / vt if vt > 0 else (e1 + e2) / 2.0
+    # Compute signed root
+    s = np.sqrt(np.abs(s2))
+    if sign:
+        s *= np.sign(s2)
+    return float(s)

quantjourney_bidask/edge_expanding.py ADDED Viewed

@@ -0,0 +1,59 @@
+import pandas as pd
+from typing import Union
+from .edge import edge
+from .edge_rolling import edge_rolling
+def edge_expanding(
+    df: pd.DataFrame,
+    min_periods: int = 1,
+    sign: bool = False
+) -> pd.Series:
+    """
+    Compute expanding window estimates of the bid-ask spread from OHLC prices.
+    Uses the efficient estimator from Ardia, Guidotti, & Kroencke (2024):
+    https://doi.org/10.1016/j.jfineco.2024.103916. Calculates spreads over
+    expanding windows starting from the first observation.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame with columns 'open', 'high', 'low', 'close' (case-insensitive).
+    min_periods : int, default 1
+        Minimum number of observations required for an estimate. Note that
+        at least 3 observations are needed for a non-NaN result.
+    sign : bool, default False
+        If True, returns signed estimates. If False, returns absolute values.
+    Returns
+    -------
+    pd.Series
+        Series of expanding spread estimates, indexed by the DataFrame's index.
+        A value of 0.01 corresponds to a 1% spread. NaN for periods with
+        insufficient data.
+    Notes
+    -----
+    - The function leverages `edge_rolling` with a window equal to the DataFrame length.
+    - Missing values are handled automatically.
+    - The estimator is most reliable with sufficient data (e.g., 20+ observations).
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> df = pd.read_csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv")
+    >>> spreads = edge_expanding(df, min_periods=21)
+    >>> print(spreads.head())
+    """
+    # Standardize column names
+    df = df.rename(columns=str.lower).copy()
+    required_cols = ['open', 'high', 'low', 'close']
+    if not all(col in df.columns for col in required_cols):
+        raise ValueError("DataFrame must contain 'open', 'high', 'low', 'close' columns")
+    return edge_rolling(
+        df=df,
+        window=len(df),
+        min_periods=max(min_periods, 3),
+        sign=sign
+    )

quantjourney_bidask/edge_rolling.py ADDED Viewed

@@ -0,0 +1,202 @@
+import numpy as np
+import pandas as pd
+from typing import Union, Dict
+from .edge import edge
+def edge_rolling(
+    df: pd.DataFrame,
+    window: Union[int, str, pd.offsets.BaseOffset],
+    sign: bool = False,
+    **kwargs
+) -> pd.Series:
+    """
+    Compute rolling window estimates of the bid-ask spread from OHLC prices.
+    Uses the efficient estimator from Ardia, Guidotti, & Kroencke (2024):
+    https://doi.org/10.1016/j.jfineco.2024.103916. Optimized for fast computation
+    over rolling windows using vectorized operations.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame with columns 'open', 'high', 'low', 'close' (case-insensitive).
+    window : int, str, or pd.offsets.BaseOffset
+        Size of the rolling window. Can be an integer (number of periods),
+        a string (e.g., '30D' for 30 days), or a pandas offset object.
+        See pandas.DataFrame.rolling for details.
+    sign : bool, default False
+        If True, returns signed estimates. If False, returns absolute values.
+    **kwargs
+        Additional arguments to pass to pandas.DataFrame.rolling, such as
+        min_periods, step, or center.
+    Returns
+    -------
+    pd.Series
+        Series of rolling spread estimates, indexed by the DataFrame's index.
+        A value of 0.01 corresponds to a 1% spread. NaN for periods with
+        insufficient data.
+    Notes
+    -----
+    - The function accounts for missing values by masking invalid periods.
+    - The first observation is masked due to the need for lagged prices.
+    - For large datasets, this implementation is significantly faster than
+      applying `edge` repeatedly over windows.
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> df = pd.read_csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv")
+    >>> spreads = edge_rolling(df, window=21)
+    >>> print(spreads.head())
+    """
+    # Standardize column names
+    df = df.rename(columns=str.lower).copy()
+    required_cols = ['open', 'high', 'low', 'close']
+    if not all(col in df.columns for col in required_cols):
+        raise ValueError("DataFrame must contain 'open', 'high', 'low', 'close' columns")
+    # Compute log-prices, handling non-positive prices by replacing them with NaN
+    # This prevents errors from taking log of zero or negative values
+    o = np.log(df['open'].where(df['open'] > 0))  # Log of open prices
+    h = np.log(df['high'].where(df['high'] > 0))  # Log of high prices
+    l = np.log(df['low'].where(df['low'] > 0))    # Log of low prices
+    c = np.log(df['close'].where(df['close'] > 0)) # Log of close prices
+    m = (h + l) / 2.0  # Log of geometric mid-price each period
+    # Get lagged (previous period) log-prices using pandas shift
+    # These are needed to compute overnight returns and indicators
+    h1 = h.shift(1)  # Previous period's high
+    l1 = l.shift(1)  # Previous period's low
+    c1 = c.shift(1)  # Previous period's close
+    m1 = m.shift(1)  # Previous period's mid-price
+    # Compute log-returns:
+    r1 = m - o        # Mid-price minus open (intraday return from open to mid)
+    r2 = o - m1       # Open minus previous mid (overnight return from prev mid to open)
+    r3 = m - c1       # Mid-price minus previous close (return from prev close to mid)
+    r4 = c1 - m1      # Previous close minus previous mid (prev intraday return from mid to close)
+    r5 = o - c1       # Open minus previous close (overnight return from prev close to open)
+    # Compute indicator variables for price variation and extremes
+    # tau: Indicator for valid price variation (1 if high != low or low != previous close)
+    tau = np.where(np.isnan(h) | np.isnan(l) | np.isnan(c1), np.nan,
+                   ((h != l) | (l != c1)).astype(float))
+    # po1: Indicator for open price not equal to high, scaled by tau
+    po1 = tau * np.where(np.isnan(o) | np.isnan(h), np.nan, (o != h).astype(float))
+    # po2: Indicator for open price not equal to low, scaled by tau
+    po2 = tau * np.where(np.isnan(o) | np.isnan(l), np.nan, (o != l).astype(float))
+    # pc1: Indicator for previous close not equal to previous high, scaled by tau
+    pc1 = tau * np.where(np.isnan(c1) | np.isnan(h1), np.nan, (c1 != h1).astype(float))
+    # pc2: Indicator for previous close not equal to previous low, scaled by tau
+    pc2 = tau * np.where(np.isnan(c1) | np.isnan(l1), np.nan, (c1 != l1).astype(float))
+    # Compute base products needed for rolling means
+    # Products of log-returns for covariance calculations
+    r12 = r1 * r2  # Mid-Open × Open-PrevMid
+    r15 = r1 * r5  # Mid-Open × Open-PrevClose
+    r34 = r3 * r4  # Mid-PrevClose × PrevClose-PrevMid
+    r45 = r4 * r5  # PrevClose-PrevMid × Open-PrevClose
+    # Products with tau indicator for valid periods
+    tr1 = tau * r1  # Scaled Mid-Open
+    tr2 = tau * r2  # Scaled Open-PrevMid
+    tr4 = tau * r4  # Scaled PrevClose-PrevMid
+    tr5 = tau * r5  # Scaled Open-PrevClose
+    # Set up DataFrame for efficient rolling mean calculations
+    # Includes all products needed for moment conditions and variance calculations
+    x = pd.DataFrame({
+        # Basic return products
+        'r12': r12, 'r34': r34, 'r15': r15, 'r45': r45,
+        'tau': tau,  # Price variation indicator
+        # Individual returns
+        'r1': r1, 'tr2': tr2, 'r3': r3, 'tr4': tr4, 'r5': r5,
+        # Squared terms for variance
+        'r12_sq': r12**2, 'r34_sq': r34**2, 'r15_sq': r15**2, 'r45_sq': r45**2,
+        # Cross products for covariance
+        'r12_r34': r12 * r34, 'r15_r45': r15 * r45,
+        # Products with tau-scaled returns
+        'tr2_r2': tr2 * r2, 'tr4_r4': tr4 * r4, 'tr5_r5': tr5 * r5,
+        'tr2_r12': tr2 * r12, 'tr4_r34': tr4 * r34,
+        'tr5_r15': tr5 * r15, 'tr4_r45': tr4 * r45,
+        'tr4_r12': tr4 * r12, 'tr2_r34': tr2 * r34,
+        'tr2_r4': tr2 * r4, 'tr1_r45': tr1 * r45,
+        'tr5_r45': tr5 * r45, 'tr4_r5': tr4 * r5,
+        'tr5': tr5,
+        # Extreme price indicators
+        'po1': po1, 'po2': po2, 'pc1': pc1, 'pc2': pc2
+    }, index=df.index)
+    # Handle first observation and adjust window parameters
+    x.iloc[0] = np.nan  # Mask first row due to lagged values
+    if isinstance(window, (int, np.integer)):
+        window = max(0, window - 1)  # Adjust window size for lag
+    if 'min_periods' in kwargs and isinstance(kwargs['min_periods'], (int, np.integer)):
+        kwargs['min_periods'] = max(0, kwargs['min_periods'] - 1)
+    # Compute rolling means for all variables
+    m = x.rolling(window=window, **kwargs).mean()
+    # Calculate probabilities of price extremes
+    pt = m['tau']  # Probability of valid price variation
+    po = m['po1'] + m['po2']  # Probability of open being extreme
+    pc = m['pc1'] + m['pc2']  # Probability of close being extreme
+    # Mask periods with insufficient data or zero probabilities
+    nt = x['tau'].rolling(window=window, **kwargs).sum()
+    m[(nt < 2) | (po == 0) | (pc == 0)] = np.nan
+    # Compute coefficients for moment conditions
+    a1 = -4.0 / po  # Scaling for open price moments
+    a2 = -4.0 / pc  # Scaling for close price moments
+    a3 = m['r1'] / pt  # Mean-adjustment for Mid-Open
+    a4 = m['tr4'] / pt  # Mean-adjustment for PrevClose-PrevMid
+    a5 = m['r3'] / pt  # Mean-adjustment for Mid-PrevClose
+    a6 = m['r5'] / pt  # Mean-adjustment for Open-PrevClose
+    # Pre-compute squared and product terms
+    a12 = 2 * a1 * a2
+    a11 = a1**2
+    a22 = a2**2
+    a33 = a3**2
+    a55 = a5**2
+    a66 = a6**2
+    # Calculate moment condition expectations
+    e1 = a1 * (m['r12'] - a3 * m['tr2']) + a2 * (m['r34'] - a4 * m['r3'])  # First moment
+    e2 = a1 * (m['r15'] - a3 * m['tr5']) + a2 * (m['r45'] - a4 * m['r5'])  # Second moment
+    # Calculate variances of moment conditions
+    # v1: Variance of first moment condition
+    v1 = -e1**2 + (
+        a11 * (m['r12_sq'] - 2 * a3 * m['tr2_r12'] + a33 * m['tr2_r2']) +
+        a22 * (m['r34_sq'] - 2 * a5 * m['tr4_r34'] + a55 * m['tr4_r4']) +
+        a12 * (m['r12_r34'] - a3 * m['tr2_r34'] - a5 * m['tr4_r12'] + a3 * a5 * m['tr2_r4'])
+    )
+    # v2: Variance of second moment condition
+    v2 = -e2**2 + (
+        a11 * (m['r15_sq'] - 2 * a3 * m['tr5_r15'] + a33 * m['tr5_r5']) +
+        a22 * (m['r45_sq'] - 2 * a6 * m['tr4_r45'] + a66 * m['tr4_r4']) +
+        a12 * (m['r15_r45'] - a3 * m['tr5_r45'] - a6 * m['tr1_r45'] + a3 * a6 * m['tr4_r5'])
+    )
+    # Compute squared spread using optimal GMM weights
+    vt = v1 + v2  # Total variance
+    s2 = pd.Series(np.where(
+        vt > 0,
+        (v2 * e1 + v1 * e2) / vt,  # Optimal weighted average if variance is positive
+        (e1 + e2) / 2.0  # Simple average if variance is zero/negative
+    ), index=df.index)
+    # Compute signed root
+    s = np.sqrt(np.abs(s2))
+    if sign:
+        s *= np.sign(s2)
+    return pd.Series(s, index=df.index, name=f"EDGE_rolling_{window}")

quantjourney_bidask/websocket_fetcher.py ADDED Viewed

@@ -0,0 +1,308 @@
+"""
+WebSocket Live Data Fetcher
+Real-time data fetching for cryptocurrency exchanges using WebSockets.
+"""
+import json
+import threading
+import time
+import pandas as pd
+import numpy as np
+from datetime import datetime, timezone
+from typing import Dict, List, Callable, Optional
+import websocket
+from collections import deque
+from .edge_rolling import edge_rolling
+class LiveSpreadMonitor:
+    """
+    Real-time spread monitoring using WebSocket connections.
+    Supports Binance WebSocket streams for live OHLC data and real-time
+    spread calculation with configurable alerts.
+    """
+    def __init__(self, symbols: List[str], window: int = 20, buffer_size: int = 1000):
+        """
+        Initialize the live spread monitor.
+        Parameters
+        ----------
+        symbols : List[str]
+            List of trading symbols to monitor (e.g., ['BTCUSDT', 'ETHUSDT'])
+        window : int
+            Rolling window size for spread calculation
+        buffer_size : int
+            Maximum number of candles to keep in memory
+        """
+        self.symbols = [s.lower() for s in symbols]
+        self.window = window
+        self.buffer_size = buffer_size
+        # Data storage
+        self.data_buffers = {symbol: deque(maxlen=buffer_size) for symbol in self.symbols}
+        self.spread_buffers = {symbol: deque(maxlen=buffer_size) for symbol in self.symbols}
+        # WebSocket connections
+        self.ws_connections = {}
+        self.running = False
+        # Callbacks
+        self.data_callbacks = []
+        self.alert_callbacks = []
+        # Alert thresholds (in basis points)
+        self.alert_thresholds = {symbol: {'high': 100, 'low': 5} for symbol in self.symbols}
+    def add_data_callback(self, callback: Callable):
+        """Add callback function for new data events."""
+        self.data_callbacks.append(callback)
+    def add_alert_callback(self, callback: Callable):
+        """Add callback function for alert events."""
+        self.alert_callbacks.append(callback)
+    def set_alert_threshold(self, symbol: str, high_bps: float, low_bps: float):
+        """Set alert thresholds for a symbol (in basis points)."""
+        symbol = symbol.lower()
+        if symbol in self.alert_thresholds:
+            self.alert_thresholds[symbol] = {'high': high_bps, 'low': low_bps}
+    def _create_websocket_url(self, symbols: List[str]) -> str:
+        """Create Binance WebSocket URL for multiple symbols."""
+        streams = []
+        for symbol in symbols:
+            streams.append(f"{symbol}@kline_1m")  # 1-minute klines
+        if len(streams) == 1:
+            return f"wss://stream.binance.com:9443/ws/{streams[0]}"
+        else:
+            stream_string = "/".join(streams)
+            return f"wss://stream.binance.com:9443/stream?streams={stream_string}"
+    def _on_message(self, ws, message):
+        """Handle incoming WebSocket messages."""
+        try:
+            data = json.loads(message)
+            # Handle multi-stream format
+            if 'stream' in data:
+                stream_data = data['data']
+                symbol = stream_data['s'].lower()
+            else:
+                stream_data = data
+                symbol = stream_data['s'].lower()
+            # Extract kline data
+            kline = stream_data['k']
+            is_closed = kline['x']  # Whether kline is closed
+            if is_closed:  # Only process closed candles
+                candle_data = {
+                    'timestamp': pd.to_datetime(kline['t'], unit='ms'),
+                    'symbol': symbol,
+                    'open': float(kline['o']),
+                    'high': float(kline['h']),
+                    'low': float(kline['l']),
+                    'close': float(kline['c']),
+                    'volume': float(kline['v'])
+                }
+                self._process_candle(candle_data)
+        except Exception as e:
+            print(f"Error processing message: {e}")
+    def _process_candle(self, candle_data: Dict):
+        """Process new candle data and update spreads."""
+        symbol = candle_data['symbol']
+        # Add to buffer
+        self.data_buffers[symbol].append(candle_data)
+        # Calculate spread if we have enough data
+        if len(self.data_buffers[symbol]) >= self.window:
+            # Convert buffer to DataFrame for spread calculation
+            df = pd.DataFrame(list(self.data_buffers[symbol])[-self.window:])
+            # Calculate current spread
+            try:
+                current_spread = edge_rolling(df.tail(1), window=min(len(df), self.window)).iloc[-1]
+                if not pd.isna(current_spread):
+                    spread_bps = current_spread * 10000  # Convert to basis points
+                    spread_data = {
+                        'timestamp': candle_data['timestamp'],
+                        'symbol': symbol,
+                        'spread_bps': spread_bps,
+                        'price': candle_data['close']
+                    }
+                    self.spread_buffers[symbol].append(spread_data)
+                    # Check for alerts
+                    self._check_alerts(spread_data)
+                    # Notify callbacks
+                    for callback in self.data_callbacks:
+                        callback(candle_data, spread_data)
+            except Exception as e:
+                print(f"Error calculating spread for {symbol}: {e}")
+    def _check_alerts(self, spread_data: Dict):
+        """Check if spread triggers any alerts."""
+        symbol = spread_data['symbol']
+        spread_bps = spread_data['spread_bps']
+        thresholds = self.alert_thresholds[symbol]
+        alert_type = None
+        if spread_bps > thresholds['high']:
+            alert_type = 'HIGH'
+        elif spread_bps < thresholds['low']:
+            alert_type = 'LOW'
+        if alert_type:
+            alert_data = {
+                'type': alert_type,
+                'symbol': symbol,
+                'spread_bps': spread_bps,
+                'threshold': thresholds[alert_type.lower()],
+                'timestamp': spread_data['timestamp'],
+                'price': spread_data['price']
+            }
+            for callback in self.alert_callbacks:
+                callback(alert_data)
+    def _on_error(self, ws, error):
+        """Handle WebSocket errors."""
+        print(f"WebSocket error: {error}")
+    def _on_close(self, ws, close_status_code, close_msg):
+        """Handle WebSocket connection close."""
+        print("WebSocket connection closed")
+    def _on_open(self, ws):
+        """Handle WebSocket connection open."""
+        print(f"WebSocket connected for symbols: {', '.join(self.symbols)}")
+    def start(self):
+        """Start the live monitoring."""
+        if self.running:
+            print("Monitor is already running")
+            return
+        self.running = True
+        # Create WebSocket URL
+        ws_url = self._create_websocket_url(self.symbols)
+        # Create WebSocket connection
+        self.ws = websocket.WebSocketApp(
+            ws_url,
+            on_message=self._on_message,
+            on_error=self._on_error,
+            on_close=self._on_close,
+            on_open=self._on_open
+        )
+        # Start WebSocket in a separate thread
+        self.ws_thread = threading.Thread(target=self.ws.run_forever)
+        self.ws_thread.daemon = True
+        self.ws_thread.start()
+        print("Live spread monitoring started...")
+    def stop(self):
+        """Stop the live monitoring."""
+        if not self.running:
+            return
+        self.running = False
+        if hasattr(self, 'ws'):
+            self.ws.close()
+        print("Live spread monitoring stopped.")
+    def get_current_data(self) -> Dict[str, pd.DataFrame]:
+        """Get current data for all symbols."""
+        result = {}
+        for symbol in self.symbols:
+            if len(self.data_buffers[symbol]) > 0:
+                result[symbol] = pd.DataFrame(list(self.data_buffers[symbol]))
+        return result
+    def get_current_spreads(self) -> Dict[str, pd.DataFrame]:
+        """Get current spread data for all symbols."""
+        result = {}
+        for symbol in self.symbols:
+            if len(self.spread_buffers[symbol]) > 0:
+                result[symbol] = pd.DataFrame(list(self.spread_buffers[symbol]))
+        return result
+def create_live_dashboard_example():
+    """
+    Example of creating a live dashboard (console-based).
+    """
+    import time
+    def data_callback(candle_data, spread_data):
+        """Print new data to console."""
+        symbol = spread_data['symbol'].upper()
+        timestamp = spread_data['timestamp'].strftime('%H:%M:%S')
+        price = spread_data['price']
+        spread_bps = spread_data['spread_bps']
+        print(f"[{timestamp}] {symbol}: ${price:.2f} | Spread: {spread_bps:.2f}bps")
+    def alert_callback(alert_data):
+        """Print alerts to console."""
+        symbol = alert_data['symbol'].upper()
+        alert_type = alert_data['type']
+        spread_bps = alert_data['spread_bps']
+        threshold = alert_data['threshold']
+        timestamp = alert_data['timestamp'].strftime('%H:%M:%S')
+        print(f"🚨 [{timestamp}] {alert_type} SPREAD ALERT for {symbol}: "
+              f"{spread_bps:.2f}bps (threshold: {threshold}bps)")
+    # Create monitor
+    monitor = LiveSpreadMonitor(['BTCUSDT', 'ETHUSDT'], window=10)
+    # Set custom thresholds
+    monitor.set_alert_threshold('BTCUSDT', high_bps=50, low_bps=2)
+    monitor.set_alert_threshold('ETHUSDT', high_bps=60, low_bps=3)
+    # Add callbacks
+    monitor.add_data_callback(data_callback)
+    monitor.add_alert_callback(alert_callback)
+    return monitor
+if __name__ == "__main__":
+    print("Live Spread Monitor Example")
+    print("==========================")
+    print("This example demonstrates real-time spread monitoring using WebSockets.")
+    print("Note: This requires an active internet connection and will connect to Binance WebSocket.")
+    print()
+    try:
+        # Create and start monitor
+        monitor = create_live_dashboard_example()
+        print("Starting live monitor... (Press Ctrl+C to stop)")
+        monitor.start()
+        # Run for a demo period
+        time.sleep(60)  # Run for 1 minute
+    except KeyboardInterrupt:
+        print("\nStopping monitor...")
+    finally:
+        if 'monitor' in locals():
+            monitor.stop()
+    print("Example completed.")

quantjourney_bidask-0.5.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,183 @@
+Metadata-Version: 2.4
+Name: quantjourney-bidask
+Version: 0.5.0
+Summary: Efficient bid-ask spread estimator from OHLC prices
+Author-email: Jakub Polec <jakub@quantjourney.pro>
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/QuantJourneyOrg/qj_bidask
+Project-URL: Repository, https://github.com/QuantJourneyOrg/qj_bidask
+Project-URL: Bug Tracker, https://github.com/QuantJourneyOrg/qj_bidask/issues
+Keywords: finance,bid-ask,spread,trading,quantitative,OHLC
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Financial and Insurance Industry
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Office/Business :: Financial :: Investment
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: <3.15,>=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.20
+Requires-Dist: pandas>=1.5
+Requires-Dist: requests>=2.28
+Requires-Dist: yfinance>=0.2
+Requires-Dist: matplotlib>=3.5
+Requires-Dist: plotly>=5.0
+Requires-Dist: websocket-client>=1.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: pytest-mock>=3.10; extra == "dev"
+Requires-Dist: pytest-cov>=4.0; extra == "dev"
+Requires-Dist: ruff>=0.1; extra == "dev"
+Requires-Dist: mypy>=1.0; extra == "dev"
+Requires-Dist: black>=22.0; extra == "dev"
+Requires-Dist: isort>=5.0; extra == "dev"
+Provides-Extra: examples
+Requires-Dist: jupyter>=1.0; extra == "examples"
+Requires-Dist: ipywidgets>=7.0; extra == "examples"
+Dynamic: license-file
+# QuantJourney Bid-Ask Spread Estimator
+![PyPI](https://img.shields.io/pypi/v/quantjourney-bidask)
+![License](https://img.shields.io/github/license/quantjourney/bidask)
+![Tests](https://img.shields.io/github/workflow/status/quantjourney/bidask/Test)
+The `quantjourney-bidask` library provides an efficient estimator for calculating bid-ask spreads from open, high, low, and close (OHLC) prices, based on the methodology described in:
+> Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi:10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916)
+This library is designed for quantitative finance professionals, researchers, and traders who need accurate and computationally efficient spread estimates for equities, cryptocurrencies, and other assets.
+## Features
+- **Efficient Spread Estimation**: Implements the EDGE estimator for single, rolling, and expanding windows.
+- **Data Integration**: Fetch OHLC data from Binance (via custom FastAPI server) and Yahoo Finance (via yfinance).
+- **Robust Handling**: Supports missing values, non-positive prices, and various data frequencies.
+- **Comprehensive Tests**: Extensive unit tests with known test cases from the original paper.
+- **Clear Documentation**: Detailed docstrings and usage examples.
+## Installation
+Install the library via pip:
+```bash
+pip install quantjourney-bidask
+```
+## Quick Start
+### Basic Usage
+```python
+from quantjourney_bidask import edge
+# Example OHLC data (as lists or numpy arrays)
+open_prices = [100.0, 101.5, 99.8, 102.1, 100.9]
+high_prices = [102.3, 103.0, 101.2, 103.5, 102.0]
+low_prices = [99.5, 100.8, 98.9, 101.0, 100.1]
+close_prices = [101.2, 100.2, 101.8, 100.5, 101.5]
+# Calculate bid-ask spread
+spread = edge(open_prices, high_prices, low_prices, close_prices)
+print(f"Estimated bid-ask spread: {spread:.6f}")
+```
+### Rolling Window Analysis
+```python
+from quantjourney_bidask import edge_rolling
+# Calculate rolling spreads with a 20-period window
+rolling_spreads = edge_rolling(
+    open_prices, high_prices, low_prices, close_prices,
+    window=20
+)
+print(f"Rolling spreads: {rolling_spreads}")
+```
+### Data Fetching Integration
+```python
+from quantjourney_bidask import fetch_yfinance_data, edge
+# Fetch OHLC data for a stock
+data = fetch_yfinance_data("AAPL", period="1mo", interval="1h")
+# Calculate spread from fetched data
+spread = edge(data['Open'], data['High'], data['Low'], data['Close'])
+print(f"AAPL spread estimate: {spread:.6f}")
+```
+### Live Monitoring
+```python
+from quantjourney_bidask import LiveSpreadMonitor
+# Monitor live spreads for cryptocurrency
+monitor = LiveSpreadMonitor("BTCUSDT", window=100)
+monitor.start()
+# Get current spread estimate
+current_spread = monitor.get_current_spread()
+print(f"Current BTC/USDT spread: {current_spread:.6f}")
+monitor.stop()
+```
+## API Reference
+### Core Functions
+- `edge(open, high, low, close, sign=False)`: Single-period spread estimation
+- `edge_rolling(open, high, low, close, window, min_periods=None)`: Rolling window estimation
+- `edge_expanding(open, high, low, close, min_periods=3)`: Expanding window estimation
+### Data Fetching
+- `fetch_yfinance_data(symbol, period, interval)`: Fetch data from Yahoo Finance
+- `fetch_binance_data(symbol, interval, limit)`: Fetch data from Binance API
+### Live Monitoring
+- `LiveSpreadMonitor(symbol, window)`: Real-time spread monitoring via WebSocket
+## Requirements
+- Python >= 3.8
+- numpy >= 1.20
+- pandas >= 1.5
+- requests >= 2.28
+- yfinance >= 0.2
+## Academic Citation
+If you use this library in academic research, please cite:
+```bibtex
+@article{ardia2024efficient,
+  title={Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices},
+  author={Ardia, David and Guidotti, Emanuele and Kroencke, Tim A},
+  journal={Journal of Financial Economics},
+  volume={161},
+  pages={103916},
+  year={2024},
+  publisher={Elsevier}
+}
+```
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## Contributing
+Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
+## Support
+- **Documentation**: [GitHub Repository](https://github.com/QuantJourneyOrg/qj_bidask)
+- **Issues**: [Bug Tracker](https://github.com/QuantJourneyOrg/qj_bidask/issues)
+- **Contact**: jakub@quantjourney.pro

quantjourney_bidask-0.5.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+quantjourney_bidask/__init__.py,sha256=vumoRDEDOTclYapknfSwKpCZi9IdfJbukdp7S1-kphA,409
+quantjourney_bidask/_version.py,sha256=FG3XKw_Vb0JfvroFMn303BEuhI10eKAvkGzI0gQT-LY,235
+quantjourney_bidask/data_fetcher.py,sha256=GMVf4wRVwIE2JJ2sYAR_CCo56JQnReNhTWTSrZc0-L0,4931
+quantjourney_bidask/edge.py,sha256=z-uRUH3Rot6Zw-dPa2pNlQu0hY1YJu6d0c18IyqbiNs,6105
+quantjourney_bidask/edge_expanding.py,sha256=bN6lBetJdqC2xSdRc1RTjHfSI1XXVKegl0GQaD8eanY,2047
+quantjourney_bidask/edge_rolling.py,sha256=CAZW_wBF7G6mGLenoEwlq4yB_1x1-PsQ4TgwL-zdM7w,8910
+quantjourney_bidask/websocket_fetcher.py,sha256=xMS_qLbSW9hCS3RbNKvkn5HTK0XGmAO4wpaAl4_Mxb4,10895
+quantjourney_bidask-0.5.0.dist-info/licenses/LICENSE,sha256=vny3AM3KIslUu5fdooMsdxVKghoZhDKnBCsLvMDHqLg,1081
+quantjourney_bidask-0.5.0.dist-info/METADATA,sha256=17qESjK2WZZl89pukiZF21IzX4xlbP_n14lZWNsfDbg,6234
+quantjourney_bidask-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+quantjourney_bidask-0.5.0.dist-info/top_level.txt,sha256=rOBM4GxA87iQv-mR8-WZdu3-Yj5ESyggRICpUhJ-4Dg,20
+quantjourney_bidask-0.5.0.dist-info/RECORD,,

quantjourney_bidask-0.5.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

quantjourney_bidask-0.5.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 Jakub Polec, QuantJourney
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

quantjourney_bidask-0.5.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ quantjourney_bidask