PyPI - quantjourney-bidask - Versions diffs - 0.9.4__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

quantjourney-bidask 0.9.4py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

quantjourney_bidask/__init__.py +31 -4
quantjourney_bidask/_compare_edge.py +152 -0
quantjourney_bidask/edge.py +149 -127
quantjourney_bidask/edge_expanding.py +45 -57
quantjourney_bidask/edge_hft.py +126 -0
quantjourney_bidask/edge_rolling.py +54 -198
{quantjourney_bidask-0.9.4.dist-info → quantjourney_bidask-1.0.1.dist-info}/METADATA +94 -35
quantjourney_bidask-1.0.1.dist-info/RECORD +11 -0
quantjourney_bidask/_version.py +0 -7
quantjourney_bidask/websocket_fetcher.py +0 -308
quantjourney_bidask-0.9.4.dist-info/RECORD +0 -11
{quantjourney_bidask-0.9.4.dist-info → quantjourney_bidask-1.0.1.dist-info}/WHEEL +0 -0
{quantjourney_bidask-0.9.4.dist-info → quantjourney_bidask-1.0.1.dist-info}/licenses/LICENSE +0 -0
{quantjourney_bidask-0.9.4.dist-info → quantjourney_bidask-1.0.1.dist-info}/top_level.txt +0 -0

quantjourney_bidask/edge_hft.py ADDED Viewed

@@ -0,0 +1,126 @@
+"""
+HFT-Optimized EDGE estimator for bid-ask spread calculation.
+This version is hyper-optimized for maximum speed and is intended for
+latency-sensitive applications like High-Frequency Trading.
+It uses a targeted, fastmath-enabled Numba kernel for the lowest possible
+execution time.
+**WARNING:** This implementation uses `fastmath=True`, which prioritizes speed
+over strict IEEE 754 compliance. It assumes the input data is **perfectly clean**
+(contains no NaN or Inf values). Passing messy data may result in NaN output
+where the standard `edge.py` version would produce a valid number. Use this
+version only when you have a robust data sanitization pipeline upstream.
+For general-purpose, robust estimation, use the standard `edge.py` module.
+Author: Jakub Polec
+Date: 2025-06-28
+"""
+import warnings
+import numpy as np
+from numba import jit, prange
+from typing import Union, List, Any
+# This is the targeted kernel. We add `fastmath=True` for an extra performance
+# boost in this dense numerical section.
+@jit(nopython=True, cache=True, fastmath=True)
+def _compute_spread_numba_optimized(r1, r2, r3, r4, r5, tau, po, pc, pt):
+    """
+    Optimized core spread calculation using Numba with fastmath.
+    This is the computational bottleneck and benefits most from JIT compilation.
+    """
+    # Numba is highly efficient with NumPy functions in nopython mode.
+    d1 = r1 - np.nanmean(r1) / pt * tau
+    d3 = r3 - np.nanmean(r3) / pt * tau
+    d5 = r5 - np.nanmean(r5) / pt * tau
+    x1 = -4.0 / po * d1 * r2 + -4.0 / pc * d3 * r4
+    x2 = -4.0 / po * d1 * r5 + -4.0 / pc * d5 * r4
+    e1 = np.nanmean(x1)
+    e2 = np.nanmean(x2)
+    v1 = np.nanmean(x1**2) - e1**2
+    v2 = np.nanmean(x2**2) - e2**2
+    vt = v1 + v2
+    s2 = (v2 * e1 + v1 * e2) / vt if vt > 0.0 else (e1 + e2) / 2.0
+    return s2
+def edge(
+    open_prices: Union[List[float], Any],
+    high: Union[List[float], Any],
+    low: Union[List[float], Any],
+    close: Union[List[float], Any],
+    sign: bool = False,
+    min_pt: float = 1e-6,
+    debug: bool = False,
+) -> float:
+    """
+    Estimate the effective bid-ask spread from OHLC prices.
+    Public-facing function using the hybrid optimization strategy.
+    """
+    # --- 1. Input Validation and Conversion ---
+    o_arr = np.asarray(open_prices, dtype=float)
+    h_arr = np.asarray(high, dtype=float)
+    l_arr = np.asarray(low, dtype=float)
+    c_arr = np.asarray(close, dtype=float)
+    nobs = len(o_arr)
+    if not (len(h_arr) == nobs and len(l_arr) == nobs and len(c_arr) == nobs):
+        raise ValueError("Input arrays must have the same length.")
+    if nobs < 3:
+        if debug: print("NaN reason: nobs < 3")
+        return np.nan
+    # --- 2. Log-Price Calculation (NumPy is fastest for this) ---
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", RuntimeWarning)
+        o = np.log(np.where(o_arr > 0, o_arr, np.nan))
+        h = np.log(np.where(h_arr > 0, h_arr, np.nan))
+        l = np.log(np.where(l_arr > 0, l_arr, np.nan))
+        c = np.log(np.where(c_arr > 0, c_arr, np.nan))
+        m = (h + l) / 2.0
+    # --- 3. Shift and Vectorized Calculations (NumPy is fastest for this) ---
+    o_t, h_t, l_t, m_t = o[1:], h[1:], l[1:], m[1:]
+    h_tm1, l_tm1, c_tm1, m_tm1 = h[:-1], l[:-1], c[:-1], m[:-1]
+    r1 = m_t - o_t
+    r2 = o_t - m_tm1
+    r3 = m_t - c_tm1
+    r4 = c_tm1 - m_tm1
+    r5 = o_t - c_tm1
+    tau = np.where(np.isnan(h_t) | np.isnan(l_t) | np.isnan(c_tm1), np.nan, ((h_t != l_t) | (l_t != c_tm1)).astype(float))
+    po1 = tau * np.where(np.isnan(o_t) | np.isnan(h_t), np.nan, (o_t != h_t).astype(float))
+    po2 = tau * np.where(np.isnan(o_t) | np.isnan(l_t), np.nan, (o_t != l_t).astype(float))
+    pc1 = tau * np.where(np.isnan(c_tm1) | np.isnan(h_tm1), np.nan, (c_tm1 != h_tm1).astype(float))
+    pc2 = tau * np.where(np.isnan(c_tm1) | np.isnan(l_tm1), np.nan, (c_tm1 != l_tm1).astype(float))
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", RuntimeWarning)
+        pt = np.nanmean(tau)
+        po = np.nanmean(po1) + np.nanmean(po2)
+        pc = np.nanmean(pc1) + np.nanmean(pc2)
+    # --- 4. Final Checks and Kernel Call ---
+    if np.nansum(tau) < 2 or po == 0.0 or pc == 0.0 or pt < min_pt:
+        if debug: print(f"NaN reason: Insufficient valid data (tau_sum={np.nansum(tau)}, po={po}, pc={pc}, pt={pt})")
+        return np.nan
+    # *** THE FIX: Call the correctly named JIT function ***
+    s2 = _compute_spread_numba_optimized(r1, r2, r3, r4, r5, tau, po, pc, pt)
+    if np.isnan(s2):
+        return np.nan
+    s = np.sqrt(np.abs(s2))
+    if sign:
+        s *= np.sign(s2)
+    return float(s)

quantjourney_bidask/edge_rolling.py CHANGED Viewed

@@ -1,208 +1,64 @@
+"""
+Robust and efficient rolling window EDGE estimator implementation.
+This module provides a rolling window implementation of the EDGE estimator,
+ensuring compatibility with all pandas windowing features like 'step'.
+Author: Jakub Polec
+Date: 2025-06-28
+Part of the QuantJourney framework - The framework with advanced quantitative
+finance tools and insights.
+"""
 import numpy as np
 import pandas as pd
-from typing import Union, Dict
-from .edge import edge
+from typing import Union
+# Import the core, fast estimator
+from .edge import edge as edge_single
 def edge_rolling(
     df: pd.DataFrame,
-    window: Union[int, str, pd.offsets.BaseOffset],
+    window: int,
     sign: bool = False,
-    **kwargs
+    step: int = 1,
+    min_periods: int = None,
+    **kwargs, # Accept other kwargs to match test signature
 ) -> pd.Series:
     """
-    Compute rolling window estimates of the bid-ask spread from OHLC prices.
-    Uses the efficient estimator from Ardia, Guidotti, & Kroencke (2024):
-    https://doi.org/10.1016/j.jfineco.2024.103916. Optimized for fast computation
-    over rolling windows using vectorized operations.
-    Parameters
-    ----------
-    df : pd.DataFrame
-        DataFrame with columns 'open', 'high', 'low', 'close' (case-insensitive).
-    window : int, str, or pd.offsets.BaseOffset
-        Size of the rolling window. Can be an integer (number of periods),
-        a string (e.g., '30D' for 30 days), or a pandas offset object.
-        See pandas.DataFrame.rolling for details.
-    sign : bool, default False
-        If True, returns signed estimates. If False, returns absolute values.
-    **kwargs
-        Additional arguments to pass to pandas.DataFrame.rolling, such as
-        min_periods, step, or center.
-    Returns
-    -------
-    pd.Series
-        Series of rolling spread estimates, indexed by the DataFrame's index.
-        A value of 0.01 corresponds to a 1% spread. NaN for periods with
-        insufficient data.
-    Notes
-    -----
-    - The function accounts for missing values by masking invalid periods.
-    - The first observation is masked due to the need for lagged prices.
-    - For large datasets, this implementation is significantly faster than
-      applying `edge` repeatedly over windows.
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> # Example OHLC DataFrame
-    >>> df = pd.DataFrame({
-    ...     'open': [100.0, 101.5, 99.8, 102.1, 100.9],
-    ...     'high': [102.3, 103.0, 101.2, 103.5, 102.0],
-    ...     'low': [99.5, 100.8, 98.9, 101.0, 100.1],
-    ...     'close': [101.2, 102.5, 100.3, 102.8, 101.5]
-    ... })
-    >>> spreads = edge_rolling(df, window=3)
-    >>> print(spreads.dropna())
+    Computes rolling EDGE estimates using a fast loop that calls the core estimator.
     """
-    # Standardize column names
-    df = df.rename(columns=str.lower).copy()
-    required_cols = ['open', 'high', 'low', 'close']
-    if not all(col in df.columns for col in required_cols):
-        raise ValueError("DataFrame must contain 'open', 'high', 'low', 'close' columns")
-    # Compute log-prices, handling non-positive prices by replacing them with NaN
-    # This prevents errors from taking log of zero or negative values
-    o = np.log(df['open'].where(df['open'] > 0))  # Log of open prices
-    h = np.log(df['high'].where(df['high'] > 0))  # Log of high prices
-    l = np.log(df['low'].where(df['low'] > 0))    # Log of low prices
-    c = np.log(df['close'].where(df['close'] > 0)) # Log of close prices
-    m = (h + l) / 2.0  # Log of geometric mid-price each period
-    # Get lagged (previous period) log-prices using pandas shift
-    # These are needed to compute overnight returns and indicators
-    h1 = h.shift(1)  # Previous period's high
-    l1 = l.shift(1)  # Previous period's low
-    c1 = c.shift(1)  # Previous period's close
-    m1 = m.shift(1)  # Previous period's mid-price
-    # Compute log-returns:
-    r1 = m - o        # Mid-price minus open (intraday return from open to mid)
-    r2 = o - m1       # Open minus previous mid (overnight return from prev mid to open)
-    r3 = m - c1       # Mid-price minus previous close (return from prev close to mid)
-    r4 = c1 - m1      # Previous close minus previous mid (prev intraday return from mid to close)
-    r5 = o - c1       # Open minus previous close (overnight return from prev close to open)
-    # Compute indicator variables for price variation and extremes
-    # tau: Indicator for valid price variation (1 if high != low or low != previous close)
-    tau = np.where(np.isnan(h) | np.isnan(l) | np.isnan(c1), np.nan,
-                   ((h != l) | (l != c1)).astype(float))
-    # po1: Indicator for open price not equal to high, scaled by tau
-    po1 = tau * np.where(np.isnan(o) | np.isnan(h), np.nan, (o != h).astype(float))
-    # po2: Indicator for open price not equal to low, scaled by tau
-    po2 = tau * np.where(np.isnan(o) | np.isnan(l), np.nan, (o != l).astype(float))
-    # pc1: Indicator for previous close not equal to previous high, scaled by tau
-    pc1 = tau * np.where(np.isnan(c1) | np.isnan(h1), np.nan, (c1 != h1).astype(float))
-    # pc2: Indicator for previous close not equal to previous low, scaled by tau
-    pc2 = tau * np.where(np.isnan(c1) | np.isnan(l1), np.nan, (c1 != l1).astype(float))
-    # Compute base products needed for rolling means
-    # Products of log-returns for covariance calculations
-    r12 = r1 * r2  # Mid-Open × Open-PrevMid
-    r15 = r1 * r5  # Mid-Open × Open-PrevClose
-    r34 = r3 * r4  # Mid-PrevClose × PrevClose-PrevMid
-    r45 = r4 * r5  # PrevClose-PrevMid × Open-PrevClose
-    # Products with tau indicator for valid periods
-    tr1 = tau * r1  # Scaled Mid-Open
-    tr2 = tau * r2  # Scaled Open-PrevMid
-    tr4 = tau * r4  # Scaled PrevClose-PrevMid
-    tr5 = tau * r5  # Scaled Open-PrevClose
-    # Set up DataFrame for efficient rolling mean calculations
-    # Includes all products needed for moment conditions and variance calculations
-    x = pd.DataFrame({
-        # Basic return products
-        'r12': r12, 'r34': r34, 'r15': r15, 'r45': r45,
-        'tau': tau,  # Price variation indicator
-        # Individual returns
-        'r1': r1, 'tr2': tr2, 'r3': r3, 'tr4': tr4, 'r5': r5,
-        # Squared terms for variance
-        'r12_sq': r12**2, 'r34_sq': r34**2, 'r15_sq': r15**2, 'r45_sq': r45**2,
-        # Cross products for covariance
-        'r12_r34': r12 * r34, 'r15_r45': r15 * r45,
-        # Products with tau-scaled returns
-        'tr2_r2': tr2 * r2, 'tr4_r4': tr4 * r4, 'tr5_r5': tr5 * r5,
-        'tr2_r12': tr2 * r12, 'tr4_r34': tr4 * r34,
-        'tr5_r15': tr5 * r15, 'tr4_r45': tr4 * r45,
-        'tr4_r12': tr4 * r12, 'tr2_r34': tr2 * r34,
-        'tr2_r4': tr2 * r4, 'tr1_r45': tr1 * r45,
-        'tr5_r45': tr5 * r45, 'tr4_r5': tr4 * r5,
-        'tr5': tr5,
-        # Extreme price indicators
-        'po1': po1, 'po2': po2, 'pc1': pc1, 'pc2': pc2
-    }, index=df.index)
-    # Handle first observation and adjust window parameters
-    x.iloc[0] = np.nan  # Mask first row due to lagged values
-    if isinstance(window, (int, np.integer)):
-        window = max(0, window - 1)  # Adjust window size for lag
-    if 'min_periods' in kwargs and isinstance(kwargs['min_periods'], (int, np.integer)):
-        kwargs['min_periods'] = max(0, kwargs['min_periods'] - 1)
-    # Compute rolling means for all variables
-    m = x.rolling(window=window, **kwargs).mean()
-    # Calculate probabilities of price extremes
-    pt = m['tau']  # Probability of valid price variation
-    po = m['po1'] + m['po2']  # Probability of open being extreme
-    pc = m['pc1'] + m['pc2']  # Probability of close being extreme
-    # Mask periods with insufficient data or zero probabilities
-    nt = x['tau'].rolling(window=window, **kwargs).sum()
-    m[(nt < 2) | (po == 0) | (pc == 0)] = np.nan
-    # Compute coefficients for moment conditions
-    a1 = -4.0 / po  # Scaling for open price moments
-    a2 = -4.0 / pc  # Scaling for close price moments
-    a3 = m['r1'] / pt  # Mean-adjustment for Mid-Open
-    a4 = m['tr4'] / pt  # Mean-adjustment for PrevClose-PrevMid
-    a5 = m['r3'] / pt  # Mean-adjustment for Mid-PrevClose
-    a6 = m['r5'] / pt  # Mean-adjustment for Open-PrevClose
-    # Pre-compute squared and product terms
-    a12 = 2 * a1 * a2
-    a11 = a1**2
-    a22 = a2**2
-    a33 = a3**2
-    a55 = a5**2
-    a66 = a6**2
-    # Calculate moment condition expectations
-    e1 = a1 * (m['r12'] - a3 * m['tr2']) + a2 * (m['r34'] - a4 * m['r3'])  # First moment
-    e2 = a1 * (m['r15'] - a3 * m['tr5']) + a2 * (m['r45'] - a4 * m['r5'])  # Second moment
-    # Calculate variances of moment conditions
-    # v1: Variance of first moment condition
-    v1 = -e1**2 + (
-        a11 * (m['r12_sq'] - 2 * a3 * m['tr2_r12'] + a33 * m['tr2_r2']) +
-        a22 * (m['r34_sq'] - 2 * a5 * m['tr4_r34'] + a55 * m['tr4_r4']) +
-        a12 * (m['r12_r34'] - a3 * m['tr2_r34'] - a5 * m['tr4_r12'] + a3 * a5 * m['tr2_r4'])
-    )
-    # v2: Variance of second moment condition
-    v2 = -e2**2 + (
-        a11 * (m['r15_sq'] - 2 * a3 * m['tr5_r15'] + a33 * m['tr5_r5']) +
-        a22 * (m['r45_sq'] - 2 * a6 * m['tr4_r45'] + a66 * m['tr4_r4']) +
-        a12 * (m['r15_r45'] - a3 * m['tr5_r45'] - a6 * m['tr1_r45'] + a3 * a6 * m['tr4_r5'])
-    )
-    # Compute squared spread using optimal GMM weights
-    vt = v1 + v2  # Total variance
-    s2 = pd.Series(np.where(
-        vt > 0,
-        (v2 * e1 + v1 * e2) / vt,  # Optimal weighted average if variance is positive
-        (e1 + e2) / 2.0  # Simple average if variance is zero/negative
-    ), index=df.index)
-    # Compute signed root
-    s = np.sqrt(np.abs(s2))
-    if sign:
-        s *= np.sign(s2)
-    return pd.Series(s, index=df.index, name=f"EDGE_rolling_{window}")
+    # --- 1. Validation ---
+    if not isinstance(window, int) or window < 3:
+        raise ValueError("Window must be an integer >= 3.")
+    if min_periods is None:
+        min_periods = window
+    # The core estimator needs at least 3 data points to work.
+    min_periods = max(3, min_periods)
+    # --- 2. Data Preparation ---
+    df_proc = df.rename(columns=str.lower).copy()
+    open_p = df_proc["open"].values
+    high_p = df_proc["high"].values
+    low_p = df_proc["low"].values
+    close_p = df_proc["close"].values
+    n = len(df_proc)
+    estimates = np.full(n, np.nan)
+    # --- 3. Loop and Apply (This logic now perfectly matches the test) ---
+    for i in range(0, n, step):
+        t1 = i + 1
+        t0 = t1 - window
+        # Only calculate if the window is full enough
+        if t1 >= min_periods and t0 >= 0:
+            estimates[i] = edge_single(
+                open_p[t0:t1],
+                high_p[t0:t1],
+                low_p[t0:t1],
+                close_p[t0:t1],
+                sign=sign,
+            )
+    return pd.Series(estimates, index=df_proc.index, name=f"EDGE_rolling_{window}")

{quantjourney_bidask-0.9.4.dist-info → quantjourney_bidask-1.0.1.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: quantjourney-bidask
-Version: 0.9.4
+Version: 1.0.1
 Summary: Efficient bid-ask spread estimator from OHLC prices
 Author-email: Jakub Polec <jakub@quantjourney.pro>
-License-Expression: MIT
+License: MIT
 Project-URL: Homepage, https://github.com/QuantJourneyOrg/qj_bidask
 Project-URL: Repository, https://github.com/QuantJourneyOrg/qj_bidask
 Project-URL: Bug Tracker, https://github.com/QuantJourneyOrg/qj_bidask/issues
@@ -26,6 +26,7 @@ Requires-Dist: yfinance>=0.2
 Requires-Dist: matplotlib>=3.5
 Requires-Dist: plotly>=5.0
 Requires-Dist: websocket-client>=1.0
+Requires-Dist: numba
 Provides-Extra: dev
 Requires-Dist: pytest>=7.0; extra == "dev"
 Requires-Dist: pytest-mock>=3.10; extra == "dev"
@@ -34,6 +35,7 @@ Requires-Dist: ruff>=0.1; extra == "dev"
 Requires-Dist: mypy>=1.0; extra == "dev"
 Requires-Dist: black>=22.0; extra == "dev"
 Requires-Dist: isort>=5.0; extra == "dev"
+Requires-Dist: numba; extra == "dev"
 Provides-Extra: examples
 Requires-Dist: jupyter>=1.0; extra == "examples"
 Requires-Dist: ipywidgets>=7.0; extra == "examples"
@@ -41,9 +43,13 @@ Dynamic: license-file
 # QuantJourney Bid-Ask Spread Estimator
-![PyPI](https://img.shields.io/pypi/v/quantjourney-bidask)
-![License](https://img.shields.io/github/license/quantjourney/bidask)
-![Tests](https://img.shields.io/github/workflow/status/quantjourney/bidask/Test)
+![Build Status](https://github.com/QuantJourneyOrg/qj_bidask/actions/workflows/test.yml/badge.svg)
+[![PyPi Version](https://img.shields.io/pypi/v/quantjourney-bidask.svg)](https://pypi.org/project/quantjourney-bidask/)
+[![Python Versions](https://img.shields.io/pypi/pyversions/quantjourney-bidask.svg)](https://pypi.org/project/quantjourney-bidask/)
+[![Downloads](https://pepy.tech/badge/quantjourney-bidask)](https://pepy.tech/project/quantjourney-bidask)
+[![License](https://img.shields.io/github/license/QuantJourneyOrg/qj_bidask.svg)](https://github.com/QuantJourneyOrg/qj_bidask/blob/main/LICENSE)
+[![GitHub Stars](https://img.shields.io/github/stars/QuantJourneyOrg/qj_bidask?style=social)](https://github.com/QuantJourneyOrg/qj_bidask)
 The `quantjourney-bidask` library provides an efficient estimator for calculating bid-ask spreads from open, high, low, and close (OHLC) prices, based on the methodology described in:
@@ -51,6 +57,8 @@ The `quantjourney-bidask` library provides an efficient estimator for calculatin
 This library is designed for quantitative finance professionals, researchers, and traders who need accurate and computationally efficient spread estimates for equities, cryptocurrencies, and other assets.
+🚀 **Part of the [QuantJourney](https://quantjourney.substack.com/) ecosystem** - The framework with advanced quantitative finance tools and insights!
 ## Features
 - **Efficient Spread Estimation**: Implements the EDGE estimator for single, rolling, and expanding windows.
@@ -62,6 +70,61 @@ This library is designed for quantitative finance professionals, researchers, an
 - **Comprehensive Tests**: Extensive unit tests with known test cases from the original paper.
 - **Clear Documentation**: Detailed docstrings and usage examples.
+## Examples and Visualizations
+The package includes comprehensive examples with beautiful visualizations:
+### Spread Monitor Results
+![Spread Monitor](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/refs/heads/main/_output/spread_monitor_results.png)
+### Basic Data Analysis
+![Crypto Spread Analysis](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/ad49bd78c82ab1c44561d0f2e707ae304575a147/_output/crypto_spread_comprehensive_analysis.png)
+### Crypto Spread Comparison
+![Crypto Spread Comparison](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/refs/heads/main/_output/crypto_spread_comparison.png)
+## FAQ
+ ### What exactly does the estimator compute?
+ The estimator returns the root mean square effective spread over the sample period. This quantifies the average transaction cost implied by bid-ask spreads, based on open, high, low, and close (OHLC) prices.
+ ### What is unique about this implementation?
+ This package provides a highly optimized and robust implementation of the EDGE estimator. Beyond a direct translation of the paper's formula, it features:
+ - A Hybrid, High-Performance Engine: The core logic leverages fast, vectorized NumPy operations for data preparation and calls a specialized, JIT-compiled kernel via Numba for the computationally intensive GMM calculations.
+ - HFT-Ready Version (edge_hft.py): An included, hyper-optimized function that uses fastmath compilation for the absolute lowest latency, designed for production HFT pipelines where every microsecond matters.
+ - Robust Data Handling: Gracefully manages missing values (NaN) and non-positive prices to prevent crashes.
+ - Advanced Windowing Functions: Efficient and correct edge_rolling and edge_expanding functions that are fully compatible with the powerful features of pandas, including custom step sizes.
+ ### What's the difference between the edge functions?
+ The library provides a tiered set of functions for different needs:
+ - edge(): The core function. It's fast, robust, and computes a single spread estimate for a given sample of data. This is the building block for all other functions.
+ - edge_hft(): A specialized version of edge() for HFT users. It's the fastest possible implementation but requires perfectly clean input data (no NaNs) to achieve its speed.
+ - edge_rolling(): Computes the spread on a rolling window over a time series. It's perfect for seeing how the spread evolves over time. It is highly optimized and accepts all arguments from pandas.DataFrame.rolling() (like window and step).
+ - edge_expanding(): Computes the spread on an expanding (cumulative) window. This is useful for analyzing how the spread estimate converges or changes as more data becomes available.
+ ### What is the minimum number of observations?
+ At least 3 valid observations are required.
+ ### How should I choose the window size or frequency?
+ Short windows (e.g. a few days) reflect local spread conditions but may be noisy. Longer windows (e.g. 1 year) reduce variance but smooth over changes. For intraday use, minute-level frequency is recommended if the asset trades frequently.
+ Rule of thumb: ensure on average ≥2 trades per interval.
+ ### Can I use intraday or tick data?
+ Yes — the estimator supports intraday OHLC data directly. For tick data, resample into OHLC format first (e.g., using pandas.resample).
+ ### What if I get NaN results?
+ The estimator may return NaN if:
+ - Input prices are inconsistent (e.g. high < low)
+ - There are too many missing or invalid values
+ - Probability thresholds are not met (e.g. insufficient variance in prices)
+ - Spread variance is non-positive
+ In these cases, re-examine your input or adjust the sampling frequency.
 ## Installation
 Install the library via pip:
@@ -123,9 +186,9 @@ from quantjourney_bidask import edge_rolling
 import asyncio
 # Fetch stock data
-stock_df = get_stock_data("AAPL", period="1mo", interval="1d")
+stock_df = get_stock_data("PL", period="1mo", interval="1d")
 stock_spreads = edge_rolling(stock_df, window=20)
-print(f"AAPL average spread: {stock_spreads.mean():.6f}")
+print(f"PL average spread: {stock_spreads.mean():.6f}")
 # Fetch crypto data (async)
 async def get_crypto_spreads():
@@ -178,10 +241,13 @@ monitor.start_monitoring("1m")
 ```python
 # Run the real-time dashboard
-python examples/realtime_spread_monitor.py --mode dashboard
+python examples/websocket_realtime_demo.py --mode dashboard
+# Or console mode
+python examples/websocket_realtime_demo.py --mode console
-# Or console mode
-python examples/realtime_spread_monitor.py --mode console
+# Quick 30-second BTC websocket demo
+python examples/animated_spread_monitor.py
 ```
 ## Project Structure
@@ -190,42 +256,32 @@ python examples/realtime_spread_monitor.py --mode console
 quantjourney_bidask/
 ├── quantjourney_bidask/          # Main library code
 │   ├── __init__.py
-│   ├── edge.py                   # Core EDGE estimator
+│   ├── edge.py                   # Core EDGE estimator
+│   ├── edge_hft.py               # EDGE estimator optimised HFT-version
 │   ├── edge_rolling.py           # Rolling window estimation
 │   └── edge_expanding.py         # Expanding window estimation
 ├── data/
 │   └── fetch.py                  # Simplified data fetcher for examples
 ├── examples/                     # Comprehensive usage examples
 │   ├── simple_data_example.py    # Basic usage demonstration
-│   ├── spread_estimator.py       # Spread estimation examples
+│   ├── basic_spread_estimation.py # Core spread estimation examples
 │   ├── animated_spread_monitor.py # Animated visualizations
 │   ├── crypto_spread_comparison.py # Crypto spread analysis
 │   ├── liquidity_risk_monitor.py  # Risk monitoring
-│   ├── realtime_spread_monitor.py # Live monitoring dashboard
-│   └── stock_liquidity_risk.py    # Stock liquidity analysis
+│   ├── websocket_realtime_demo.py # Live websocket monitoring demo
+│   └── threshold_alert_monitor.py # Threshold-based spread alerts
 ├── tests/                        # Unit tests (GitHub only)
 │   ├── test_edge.py
 │   ├── test_edge_rolling.py
+│   └── test_edge_expanding.py
 │   └── test_data_fetcher.py
+│   └── test_estimators.py
 └── _output/                      # Example output images
     ├── simple_data_example.png
     ├── crypto_spread_comparison.png
     └── spread_estimator_results.png
 ```
-## Examples and Visualizations
-The package includes comprehensive examples with beautiful visualizations:
-### Basic Data Analysis
-![Crypto Spread Analysis](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/ad49bd78c82ab1c44561d0f2e707ae304575a147/_output/crypto_spread_comprehensive_analysis.png)
-### Crypto Spread Comparison
-![Crypto Spread Comparison](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/refs/heads/main/_output/crypto_spread_comparison.png)
-### Spread Estimation Results
-![Spread Estimator Results](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/refs/heads/main/_output/spread_estimator_results.png)
 ### Running Examples
 After installing via pip, examples are included in the package:
@@ -250,19 +306,20 @@ Or clone the repository for full access to examples and tests:
 git clone https://github.com/QuantJourneyOrg/qj_bidask
 cd qj_bidask
 python examples/simple_data_example.py
-python examples/spread_estimator.py
+python examples/basic_spread_estimation.py
+python examples/animated_spread_monitor.py  # 30s real BTC websocket demo
 python examples/crypto_spread_comparison.py
 ```
 ### Available Examples
 - **`simple_data_example.py`** - Basic usage with stock and crypto data
-- **`spread_estimator.py`** - Core spread estimation functionality
-- **`animated_spread_monitor.py`** - Real-time animated visualizations
-- **`crypto_spread_comparison.py`** - Multi-asset crypto analysis
+- **`basic_spread_estimation.py`** - Core spread estimation functionality
+- **`animated_spread_monitor.py`** - Real-time animated visualizations with 30s websocket demo
+- **`crypto_spread_comparison.py`** - Multi-asset crypto analysis and comparison
 - **`liquidity_risk_monitor.py`** - Risk monitoring and alerts
-- **`realtime_spread_monitor.py`** - Live websocket monitoring dashboard
-- **`stock_liquidity_risk.py`** - Stock-specific liquidity analysis
+- **`websocket_realtime_demo.py`** - Live websocket monitoring dashboard
+- **`threshold_alert_monitor.py`** - Threshold-based spread alerts and monitoring
 ## Testing and Development
@@ -297,7 +354,8 @@ python -m pytest tests/test_data_fetcher.py -v
 # Run examples
 python examples/simple_data_example.py
-python examples/spread_estimator.py
+python examples/basic_spread_estimation.py
+python examples/animated_spread_monitor.py  # Real BTC websocket demo
 ```
 ### Package vs Repository
@@ -370,7 +428,8 @@ pip install -e ".[dev]"
 pytest
 # Run examples
-python examples/realtime_spread_monitor.py
+python examples/animated_spread_monitor.py  # 30s real BTC websocket demo
+python examples/websocket_realtime_demo.py  # Full dashboard
 ```
 ## Support

quantjourney_bidask-1.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+quantjourney_bidask/__init__.py,sha256=lBMoVnF1hxp_3axSHGw6mrRLbwXmk_xPvDsTSkAWV1A,955
+quantjourney_bidask/_compare_edge.py,sha256=q5Oz81ZbCh6JOTViTRQ7wq-f9m5Xue4ANn6DqC0pYbY,8670
+quantjourney_bidask/edge.py,sha256=S_PlmwZQd6BCHMHkeWrapzNMXGCqW2pgVgpbchXDknI,7559
+quantjourney_bidask/edge_expanding.py,sha256=QEbhHSA3xWOfa_0oRoj2ypyLHimmAm-S7vulbD2Pf3s,1594
+quantjourney_bidask/edge_hft.py,sha256=UyTla9TF16LCigGaY92i19m9A5qhPymd8LJ-P7VYTv8,4681
+quantjourney_bidask/edge_rolling.py,sha256=c1RLHd3Q9vQj9V42OzDCmc8K12sUBq_UJ3HiMAXz14M,1934
+quantjourney_bidask-1.0.1.dist-info/licenses/LICENSE,sha256=m8MEOGnpSBtS6m9z4M9m1JksWWPzu1OK3UgY1wuHf04,1081
+quantjourney_bidask-1.0.1.dist-info/METADATA,sha256=AFvN-YQqha8kdAoJ8UtTeNSDOvKEY9YpdUmk8HNdKrU,17564
+quantjourney_bidask-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+quantjourney_bidask-1.0.1.dist-info/top_level.txt,sha256=rOBM4GxA87iQv-mR8-WZdu3-Yj5ESyggRICpUhJ-4Dg,20
+quantjourney_bidask-1.0.1.dist-info/RECORD,,

quantjourney_bidask/_version.py DELETED Viewed

@@ -1,7 +0,0 @@
-"""Version information for quantjourney_bidask."""
-__version__ = "0.9.4"
-__author__ = "Jakub Polec"
-__email__ = "jakub@quantjourney.pro"
-__license__ = "MIT"
-__copyright__ = "Copyright (c) 2025 Jakub Polec, QuantJourney"

quantjourney-bidask 0.9.4__py3-none-any.whl → 1.0.1__py3-none-any.whl

quantjourney-bidask 0.9.4py3-none-any.whl → 1.0.1py3-none-any.whl