quantjourney-bidask 0.9.4__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,126 @@
1
+ """
2
+ HFT-Optimized EDGE estimator for bid-ask spread calculation.
3
+
4
+ This version is hyper-optimized for maximum speed and is intended for
5
+ latency-sensitive applications like High-Frequency Trading.
6
+
7
+ It uses a targeted, fastmath-enabled Numba kernel for the lowest possible
8
+ execution time.
9
+
10
+ **WARNING:** This implementation uses `fastmath=True`, which prioritizes speed
11
+ over strict IEEE 754 compliance. It assumes the input data is **perfectly clean**
12
+ (contains no NaN or Inf values). Passing messy data may result in NaN output
13
+ where the standard `edge.py` version would produce a valid number. Use this
14
+ version only when you have a robust data sanitization pipeline upstream.
15
+
16
+ For general-purpose, robust estimation, use the standard `edge.py` module.
17
+
18
+ Author: Jakub Polec
19
+ Date: 2025-06-28
20
+ """
21
+ import warnings
22
+ import numpy as np
23
+ from numba import jit, prange
24
+ from typing import Union, List, Any
25
+
26
+ # This is the targeted kernel. We add `fastmath=True` for an extra performance
27
+ # boost in this dense numerical section.
28
+ @jit(nopython=True, cache=True, fastmath=True)
29
+ def _compute_spread_numba_optimized(r1, r2, r3, r4, r5, tau, po, pc, pt):
30
+ """
31
+ Optimized core spread calculation using Numba with fastmath.
32
+ This is the computational bottleneck and benefits most from JIT compilation.
33
+ """
34
+ # Numba is highly efficient with NumPy functions in nopython mode.
35
+ d1 = r1 - np.nanmean(r1) / pt * tau
36
+ d3 = r3 - np.nanmean(r3) / pt * tau
37
+ d5 = r5 - np.nanmean(r5) / pt * tau
38
+
39
+ x1 = -4.0 / po * d1 * r2 + -4.0 / pc * d3 * r4
40
+ x2 = -4.0 / po * d1 * r5 + -4.0 / pc * d5 * r4
41
+
42
+ e1 = np.nanmean(x1)
43
+ e2 = np.nanmean(x2)
44
+
45
+ v1 = np.nanmean(x1**2) - e1**2
46
+ v2 = np.nanmean(x2**2) - e2**2
47
+
48
+ vt = v1 + v2
49
+ s2 = (v2 * e1 + v1 * e2) / vt if vt > 0.0 else (e1 + e2) / 2.0
50
+
51
+ return s2
52
+
53
+ def edge(
54
+ open_prices: Union[List[float], Any],
55
+ high: Union[List[float], Any],
56
+ low: Union[List[float], Any],
57
+ close: Union[List[float], Any],
58
+ sign: bool = False,
59
+ min_pt: float = 1e-6,
60
+ debug: bool = False,
61
+ ) -> float:
62
+ """
63
+ Estimate the effective bid-ask spread from OHLC prices.
64
+ Public-facing function using the hybrid optimization strategy.
65
+ """
66
+ # --- 1. Input Validation and Conversion ---
67
+ o_arr = np.asarray(open_prices, dtype=float)
68
+ h_arr = np.asarray(high, dtype=float)
69
+ l_arr = np.asarray(low, dtype=float)
70
+ c_arr = np.asarray(close, dtype=float)
71
+
72
+ nobs = len(o_arr)
73
+ if not (len(h_arr) == nobs and len(l_arr) == nobs and len(c_arr) == nobs):
74
+ raise ValueError("Input arrays must have the same length.")
75
+
76
+ if nobs < 3:
77
+ if debug: print("NaN reason: nobs < 3")
78
+ return np.nan
79
+
80
+ # --- 2. Log-Price Calculation (NumPy is fastest for this) ---
81
+ with warnings.catch_warnings():
82
+ warnings.simplefilter("ignore", RuntimeWarning)
83
+ o = np.log(np.where(o_arr > 0, o_arr, np.nan))
84
+ h = np.log(np.where(h_arr > 0, h_arr, np.nan))
85
+ l = np.log(np.where(l_arr > 0, l_arr, np.nan))
86
+ c = np.log(np.where(c_arr > 0, c_arr, np.nan))
87
+ m = (h + l) / 2.0
88
+
89
+ # --- 3. Shift and Vectorized Calculations (NumPy is fastest for this) ---
90
+ o_t, h_t, l_t, m_t = o[1:], h[1:], l[1:], m[1:]
91
+ h_tm1, l_tm1, c_tm1, m_tm1 = h[:-1], l[:-1], c[:-1], m[:-1]
92
+
93
+ r1 = m_t - o_t
94
+ r2 = o_t - m_tm1
95
+ r3 = m_t - c_tm1
96
+ r4 = c_tm1 - m_tm1
97
+ r5 = o_t - c_tm1
98
+
99
+ tau = np.where(np.isnan(h_t) | np.isnan(l_t) | np.isnan(c_tm1), np.nan, ((h_t != l_t) | (l_t != c_tm1)).astype(float))
100
+ po1 = tau * np.where(np.isnan(o_t) | np.isnan(h_t), np.nan, (o_t != h_t).astype(float))
101
+ po2 = tau * np.where(np.isnan(o_t) | np.isnan(l_t), np.nan, (o_t != l_t).astype(float))
102
+ pc1 = tau * np.where(np.isnan(c_tm1) | np.isnan(h_tm1), np.nan, (c_tm1 != h_tm1).astype(float))
103
+ pc2 = tau * np.where(np.isnan(c_tm1) | np.isnan(l_tm1), np.nan, (c_tm1 != l_tm1).astype(float))
104
+
105
+ with warnings.catch_warnings():
106
+ warnings.simplefilter("ignore", RuntimeWarning)
107
+ pt = np.nanmean(tau)
108
+ po = np.nanmean(po1) + np.nanmean(po2)
109
+ pc = np.nanmean(pc1) + np.nanmean(pc2)
110
+
111
+ # --- 4. Final Checks and Kernel Call ---
112
+ if np.nansum(tau) < 2 or po == 0.0 or pc == 0.0 or pt < min_pt:
113
+ if debug: print(f"NaN reason: Insufficient valid data (tau_sum={np.nansum(tau)}, po={po}, pc={pc}, pt={pt})")
114
+ return np.nan
115
+
116
+ # *** THE FIX: Call the correctly named JIT function ***
117
+ s2 = _compute_spread_numba_optimized(r1, r2, r3, r4, r5, tau, po, pc, pt)
118
+
119
+ if np.isnan(s2):
120
+ return np.nan
121
+
122
+ s = np.sqrt(np.abs(s2))
123
+ if sign:
124
+ s *= np.sign(s2)
125
+
126
+ return float(s)
@@ -1,208 +1,64 @@
1
+ """
2
+ Robust and efficient rolling window EDGE estimator implementation.
3
+ This module provides a rolling window implementation of the EDGE estimator,
4
+ ensuring compatibility with all pandas windowing features like 'step'.
5
+
6
+ Author: Jakub Polec
7
+ Date: 2025-06-28
8
+
9
+ Part of the QuantJourney framework - The framework with advanced quantitative
10
+ finance tools and insights.
11
+ """
1
12
  import numpy as np
2
13
  import pandas as pd
3
- from typing import Union, Dict
4
- from .edge import edge
14
+ from typing import Union
15
+
16
+ # Import the core, fast estimator
17
+ from .edge import edge as edge_single
5
18
 
6
19
  def edge_rolling(
7
20
  df: pd.DataFrame,
8
- window: Union[int, str, pd.offsets.BaseOffset],
21
+ window: int,
9
22
  sign: bool = False,
10
- **kwargs
23
+ step: int = 1,
24
+ min_periods: int = None,
25
+ **kwargs, # Accept other kwargs to match test signature
11
26
  ) -> pd.Series:
12
27
  """
13
- Compute rolling window estimates of the bid-ask spread from OHLC prices.
14
-
15
- Uses the efficient estimator from Ardia, Guidotti, & Kroencke (2024):
16
- https://doi.org/10.1016/j.jfineco.2024.103916. Optimized for fast computation
17
- over rolling windows using vectorized operations.
18
-
19
- Parameters
20
- ----------
21
- df : pd.DataFrame
22
- DataFrame with columns 'open', 'high', 'low', 'close' (case-insensitive).
23
- window : int, str, or pd.offsets.BaseOffset
24
- Size of the rolling window. Can be an integer (number of periods),
25
- a string (e.g., '30D' for 30 days), or a pandas offset object.
26
- See pandas.DataFrame.rolling for details.
27
- sign : bool, default False
28
- If True, returns signed estimates. If False, returns absolute values.
29
- **kwargs
30
- Additional arguments to pass to pandas.DataFrame.rolling, such as
31
- min_periods, step, or center.
32
-
33
- Returns
34
- -------
35
- pd.Series
36
- Series of rolling spread estimates, indexed by the DataFrame's index.
37
- A value of 0.01 corresponds to a 1% spread. NaN for periods with
38
- insufficient data.
39
-
40
- Notes
41
- -----
42
- - The function accounts for missing values by masking invalid periods.
43
- - The first observation is masked due to the need for lagged prices.
44
- - For large datasets, this implementation is significantly faster than
45
- applying `edge` repeatedly over windows.
46
-
47
- Examples
48
- --------
49
- >>> import pandas as pd
50
- >>> # Example OHLC DataFrame
51
- >>> df = pd.DataFrame({
52
- ... 'open': [100.0, 101.5, 99.8, 102.1, 100.9],
53
- ... 'high': [102.3, 103.0, 101.2, 103.5, 102.0],
54
- ... 'low': [99.5, 100.8, 98.9, 101.0, 100.1],
55
- ... 'close': [101.2, 102.5, 100.3, 102.8, 101.5]
56
- ... })
57
- >>> spreads = edge_rolling(df, window=3)
58
- >>> print(spreads.dropna())
28
+ Computes rolling EDGE estimates using a fast loop that calls the core estimator.
59
29
  """
60
- # Standardize column names
61
- df = df.rename(columns=str.lower).copy()
62
- required_cols = ['open', 'high', 'low', 'close']
63
- if not all(col in df.columns for col in required_cols):
64
- raise ValueError("DataFrame must contain 'open', 'high', 'low', 'close' columns")
65
-
66
- # Compute log-prices, handling non-positive prices by replacing them with NaN
67
- # This prevents errors from taking log of zero or negative values
68
- o = np.log(df['open'].where(df['open'] > 0)) # Log of open prices
69
- h = np.log(df['high'].where(df['high'] > 0)) # Log of high prices
70
- l = np.log(df['low'].where(df['low'] > 0)) # Log of low prices
71
- c = np.log(df['close'].where(df['close'] > 0)) # Log of close prices
72
- m = (h + l) / 2.0 # Log of geometric mid-price each period
73
-
74
- # Get lagged (previous period) log-prices using pandas shift
75
- # These are needed to compute overnight returns and indicators
76
- h1 = h.shift(1) # Previous period's high
77
- l1 = l.shift(1) # Previous period's low
78
- c1 = c.shift(1) # Previous period's close
79
- m1 = m.shift(1) # Previous period's mid-price
80
-
81
- # Compute log-returns:
82
- r1 = m - o # Mid-price minus open (intraday return from open to mid)
83
- r2 = o - m1 # Open minus previous mid (overnight return from prev mid to open)
84
- r3 = m - c1 # Mid-price minus previous close (return from prev close to mid)
85
- r4 = c1 - m1 # Previous close minus previous mid (prev intraday return from mid to close)
86
- r5 = o - c1 # Open minus previous close (overnight return from prev close to open)
87
-
88
- # Compute indicator variables for price variation and extremes
89
- # tau: Indicator for valid price variation (1 if high != low or low != previous close)
90
- tau = np.where(np.isnan(h) | np.isnan(l) | np.isnan(c1), np.nan,
91
- ((h != l) | (l != c1)).astype(float))
92
-
93
- # po1: Indicator for open price not equal to high, scaled by tau
94
- po1 = tau * np.where(np.isnan(o) | np.isnan(h), np.nan, (o != h).astype(float))
95
-
96
- # po2: Indicator for open price not equal to low, scaled by tau
97
- po2 = tau * np.where(np.isnan(o) | np.isnan(l), np.nan, (o != l).astype(float))
98
-
99
- # pc1: Indicator for previous close not equal to previous high, scaled by tau
100
- pc1 = tau * np.where(np.isnan(c1) | np.isnan(h1), np.nan, (c1 != h1).astype(float))
101
-
102
- # pc2: Indicator for previous close not equal to previous low, scaled by tau
103
- pc2 = tau * np.where(np.isnan(c1) | np.isnan(l1), np.nan, (c1 != l1).astype(float))
104
-
105
- # Compute base products needed for rolling means
106
- # Products of log-returns for covariance calculations
107
- r12 = r1 * r2 # Mid-Open × Open-PrevMid
108
- r15 = r1 * r5 # Mid-Open × Open-PrevClose
109
- r34 = r3 * r4 # Mid-PrevClose × PrevClose-PrevMid
110
- r45 = r4 * r5 # PrevClose-PrevMid × Open-PrevClose
111
-
112
- # Products with tau indicator for valid periods
113
- tr1 = tau * r1 # Scaled Mid-Open
114
- tr2 = tau * r2 # Scaled Open-PrevMid
115
- tr4 = tau * r4 # Scaled PrevClose-PrevMid
116
- tr5 = tau * r5 # Scaled Open-PrevClose
117
-
118
- # Set up DataFrame for efficient rolling mean calculations
119
- # Includes all products needed for moment conditions and variance calculations
120
- x = pd.DataFrame({
121
- # Basic return products
122
- 'r12': r12, 'r34': r34, 'r15': r15, 'r45': r45,
123
- 'tau': tau, # Price variation indicator
124
- # Individual returns
125
- 'r1': r1, 'tr2': tr2, 'r3': r3, 'tr4': tr4, 'r5': r5,
126
- # Squared terms for variance
127
- 'r12_sq': r12**2, 'r34_sq': r34**2, 'r15_sq': r15**2, 'r45_sq': r45**2,
128
- # Cross products for covariance
129
- 'r12_r34': r12 * r34, 'r15_r45': r15 * r45,
130
- # Products with tau-scaled returns
131
- 'tr2_r2': tr2 * r2, 'tr4_r4': tr4 * r4, 'tr5_r5': tr5 * r5,
132
- 'tr2_r12': tr2 * r12, 'tr4_r34': tr4 * r34,
133
- 'tr5_r15': tr5 * r15, 'tr4_r45': tr4 * r45,
134
- 'tr4_r12': tr4 * r12, 'tr2_r34': tr2 * r34,
135
- 'tr2_r4': tr2 * r4, 'tr1_r45': tr1 * r45,
136
- 'tr5_r45': tr5 * r45, 'tr4_r5': tr4 * r5,
137
- 'tr5': tr5,
138
- # Extreme price indicators
139
- 'po1': po1, 'po2': po2, 'pc1': pc1, 'pc2': pc2
140
- }, index=df.index)
141
-
142
- # Handle first observation and adjust window parameters
143
- x.iloc[0] = np.nan # Mask first row due to lagged values
144
- if isinstance(window, (int, np.integer)):
145
- window = max(0, window - 1) # Adjust window size for lag
146
- if 'min_periods' in kwargs and isinstance(kwargs['min_periods'], (int, np.integer)):
147
- kwargs['min_periods'] = max(0, kwargs['min_periods'] - 1)
148
-
149
- # Compute rolling means for all variables
150
- m = x.rolling(window=window, **kwargs).mean()
151
-
152
- # Calculate probabilities of price extremes
153
- pt = m['tau'] # Probability of valid price variation
154
- po = m['po1'] + m['po2'] # Probability of open being extreme
155
- pc = m['pc1'] + m['pc2'] # Probability of close being extreme
156
-
157
- # Mask periods with insufficient data or zero probabilities
158
- nt = x['tau'].rolling(window=window, **kwargs).sum()
159
- m[(nt < 2) | (po == 0) | (pc == 0)] = np.nan
160
-
161
- # Compute coefficients for moment conditions
162
- a1 = -4.0 / po # Scaling for open price moments
163
- a2 = -4.0 / pc # Scaling for close price moments
164
- a3 = m['r1'] / pt # Mean-adjustment for Mid-Open
165
- a4 = m['tr4'] / pt # Mean-adjustment for PrevClose-PrevMid
166
- a5 = m['r3'] / pt # Mean-adjustment for Mid-PrevClose
167
- a6 = m['r5'] / pt # Mean-adjustment for Open-PrevClose
168
-
169
- # Pre-compute squared and product terms
170
- a12 = 2 * a1 * a2
171
- a11 = a1**2
172
- a22 = a2**2
173
- a33 = a3**2
174
- a55 = a5**2
175
- a66 = a6**2
176
-
177
- # Calculate moment condition expectations
178
- e1 = a1 * (m['r12'] - a3 * m['tr2']) + a2 * (m['r34'] - a4 * m['r3']) # First moment
179
- e2 = a1 * (m['r15'] - a3 * m['tr5']) + a2 * (m['r45'] - a4 * m['r5']) # Second moment
180
-
181
- # Calculate variances of moment conditions
182
- # v1: Variance of first moment condition
183
- v1 = -e1**2 + (
184
- a11 * (m['r12_sq'] - 2 * a3 * m['tr2_r12'] + a33 * m['tr2_r2']) +
185
- a22 * (m['r34_sq'] - 2 * a5 * m['tr4_r34'] + a55 * m['tr4_r4']) +
186
- a12 * (m['r12_r34'] - a3 * m['tr2_r34'] - a5 * m['tr4_r12'] + a3 * a5 * m['tr2_r4'])
187
- )
188
- # v2: Variance of second moment condition
189
- v2 = -e2**2 + (
190
- a11 * (m['r15_sq'] - 2 * a3 * m['tr5_r15'] + a33 * m['tr5_r5']) +
191
- a22 * (m['r45_sq'] - 2 * a6 * m['tr4_r45'] + a66 * m['tr4_r4']) +
192
- a12 * (m['r15_r45'] - a3 * m['tr5_r45'] - a6 * m['tr1_r45'] + a3 * a6 * m['tr4_r5'])
193
- )
194
-
195
- # Compute squared spread using optimal GMM weights
196
- vt = v1 + v2 # Total variance
197
- s2 = pd.Series(np.where(
198
- vt > 0,
199
- (v2 * e1 + v1 * e2) / vt, # Optimal weighted average if variance is positive
200
- (e1 + e2) / 2.0 # Simple average if variance is zero/negative
201
- ), index=df.index)
202
-
203
- # Compute signed root
204
- s = np.sqrt(np.abs(s2))
205
- if sign:
206
- s *= np.sign(s2)
207
30
 
208
- return pd.Series(s, index=df.index, name=f"EDGE_rolling_{window}")
31
+ # --- 1. Validation ---
32
+ if not isinstance(window, int) or window < 3:
33
+ raise ValueError("Window must be an integer >= 3.")
34
+ if min_periods is None:
35
+ min_periods = window
36
+ # The core estimator needs at least 3 data points to work.
37
+ min_periods = max(3, min_periods)
38
+
39
+ # --- 2. Data Preparation ---
40
+ df_proc = df.rename(columns=str.lower).copy()
41
+ open_p = df_proc["open"].values
42
+ high_p = df_proc["high"].values
43
+ low_p = df_proc["low"].values
44
+ close_p = df_proc["close"].values
45
+
46
+ n = len(df_proc)
47
+ estimates = np.full(n, np.nan)
48
+
49
+ # --- 3. Loop and Apply (This logic now perfectly matches the test) ---
50
+ for i in range(0, n, step):
51
+ t1 = i + 1
52
+ t0 = t1 - window
53
+
54
+ # Only calculate if the window is full enough
55
+ if t1 >= min_periods and t0 >= 0:
56
+ estimates[i] = edge_single(
57
+ open_p[t0:t1],
58
+ high_p[t0:t1],
59
+ low_p[t0:t1],
60
+ close_p[t0:t1],
61
+ sign=sign,
62
+ )
63
+
64
+ return pd.Series(estimates, index=df_proc.index, name=f"EDGE_rolling_{window}")
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: quantjourney-bidask
3
- Version: 0.9.4
3
+ Version: 1.0.1
4
4
  Summary: Efficient bid-ask spread estimator from OHLC prices
5
5
  Author-email: Jakub Polec <jakub@quantjourney.pro>
6
- License-Expression: MIT
6
+ License: MIT
7
7
  Project-URL: Homepage, https://github.com/QuantJourneyOrg/qj_bidask
8
8
  Project-URL: Repository, https://github.com/QuantJourneyOrg/qj_bidask
9
9
  Project-URL: Bug Tracker, https://github.com/QuantJourneyOrg/qj_bidask/issues
@@ -26,6 +26,7 @@ Requires-Dist: yfinance>=0.2
26
26
  Requires-Dist: matplotlib>=3.5
27
27
  Requires-Dist: plotly>=5.0
28
28
  Requires-Dist: websocket-client>=1.0
29
+ Requires-Dist: numba
29
30
  Provides-Extra: dev
30
31
  Requires-Dist: pytest>=7.0; extra == "dev"
31
32
  Requires-Dist: pytest-mock>=3.10; extra == "dev"
@@ -34,6 +35,7 @@ Requires-Dist: ruff>=0.1; extra == "dev"
34
35
  Requires-Dist: mypy>=1.0; extra == "dev"
35
36
  Requires-Dist: black>=22.0; extra == "dev"
36
37
  Requires-Dist: isort>=5.0; extra == "dev"
38
+ Requires-Dist: numba; extra == "dev"
37
39
  Provides-Extra: examples
38
40
  Requires-Dist: jupyter>=1.0; extra == "examples"
39
41
  Requires-Dist: ipywidgets>=7.0; extra == "examples"
@@ -41,9 +43,13 @@ Dynamic: license-file
41
43
 
42
44
  # QuantJourney Bid-Ask Spread Estimator
43
45
 
44
- ![PyPI](https://img.shields.io/pypi/v/quantjourney-bidask)
45
- ![License](https://img.shields.io/github/license/quantjourney/bidask)
46
- ![Tests](https://img.shields.io/github/workflow/status/quantjourney/bidask/Test)
46
+ ![Build Status](https://github.com/QuantJourneyOrg/qj_bidask/actions/workflows/test.yml/badge.svg)
47
+ [![PyPi Version](https://img.shields.io/pypi/v/quantjourney-bidask.svg)](https://pypi.org/project/quantjourney-bidask/)
48
+ [![Python Versions](https://img.shields.io/pypi/pyversions/quantjourney-bidask.svg)](https://pypi.org/project/quantjourney-bidask/)
49
+ [![Downloads](https://pepy.tech/badge/quantjourney-bidask)](https://pepy.tech/project/quantjourney-bidask)
50
+ [![License](https://img.shields.io/github/license/QuantJourneyOrg/qj_bidask.svg)](https://github.com/QuantJourneyOrg/qj_bidask/blob/main/LICENSE)
51
+ [![GitHub Stars](https://img.shields.io/github/stars/QuantJourneyOrg/qj_bidask?style=social)](https://github.com/QuantJourneyOrg/qj_bidask)
52
+
47
53
 
48
54
  The `quantjourney-bidask` library provides an efficient estimator for calculating bid-ask spreads from open, high, low, and close (OHLC) prices, based on the methodology described in:
49
55
 
@@ -51,6 +57,8 @@ The `quantjourney-bidask` library provides an efficient estimator for calculatin
51
57
 
52
58
  This library is designed for quantitative finance professionals, researchers, and traders who need accurate and computationally efficient spread estimates for equities, cryptocurrencies, and other assets.
53
59
 
60
+ 🚀 **Part of the [QuantJourney](https://quantjourney.substack.com/) ecosystem** - The framework with advanced quantitative finance tools and insights!
61
+
54
62
  ## Features
55
63
 
56
64
  - **Efficient Spread Estimation**: Implements the EDGE estimator for single, rolling, and expanding windows.
@@ -62,6 +70,61 @@ This library is designed for quantitative finance professionals, researchers, an
62
70
  - **Comprehensive Tests**: Extensive unit tests with known test cases from the original paper.
63
71
  - **Clear Documentation**: Detailed docstrings and usage examples.
64
72
 
73
+ ## Examples and Visualizations
74
+
75
+ The package includes comprehensive examples with beautiful visualizations:
76
+
77
+ ### Spread Monitor Results
78
+ ![Spread Monitor](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/refs/heads/main/_output/spread_monitor_results.png)
79
+
80
+ ### Basic Data Analysis
81
+ ![Crypto Spread Analysis](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/ad49bd78c82ab1c44561d0f2e707ae304575a147/_output/crypto_spread_comprehensive_analysis.png)
82
+
83
+ ### Crypto Spread Comparison
84
+ ![Crypto Spread Comparison](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/refs/heads/main/_output/crypto_spread_comparison.png)
85
+
86
+ ## FAQ
87
+
88
+ ### What exactly does the estimator compute?
89
+ The estimator returns the root mean square effective spread over the sample period. This quantifies the average transaction cost implied by bid-ask spreads, based on open, high, low, and close (OHLC) prices.
90
+
91
+ ### What is unique about this implementation?
92
+ This package provides a highly optimized and robust implementation of the EDGE estimator. Beyond a direct translation of the paper's formula, it features:
93
+
94
+ - A Hybrid, High-Performance Engine: The core logic leverages fast, vectorized NumPy operations for data preparation and calls a specialized, JIT-compiled kernel via Numba for the computationally intensive GMM calculations.
95
+ - HFT-Ready Version (edge_hft.py): An included, hyper-optimized function that uses fastmath compilation for the absolute lowest latency, designed for production HFT pipelines where every microsecond matters.
96
+ - Robust Data Handling: Gracefully manages missing values (NaN) and non-positive prices to prevent crashes.
97
+ - Advanced Windowing Functions: Efficient and correct edge_rolling and edge_expanding functions that are fully compatible with the powerful features of pandas, including custom step sizes.
98
+
99
+ ### What's the difference between the edge functions?
100
+ The library provides a tiered set of functions for different needs:
101
+
102
+ - edge(): The core function. It's fast, robust, and computes a single spread estimate for a given sample of data. This is the building block for all other functions.
103
+ - edge_hft(): A specialized version of edge() for HFT users. It's the fastest possible implementation but requires perfectly clean input data (no NaNs) to achieve its speed.
104
+ - edge_rolling(): Computes the spread on a rolling window over a time series. It's perfect for seeing how the spread evolves over time. It is highly optimized and accepts all arguments from pandas.DataFrame.rolling() (like window and step).
105
+ - edge_expanding(): Computes the spread on an expanding (cumulative) window. This is useful for analyzing how the spread estimate converges or changes as more data becomes available.
106
+
107
+ ### What is the minimum number of observations?
108
+ At least 3 valid observations are required.
109
+
110
+ ### How should I choose the window size or frequency?
111
+ Short windows (e.g. a few days) reflect local spread conditions but may be noisy. Longer windows (e.g. 1 year) reduce variance but smooth over changes. For intraday use, minute-level frequency is recommended if the asset trades frequently.
112
+
113
+ Rule of thumb: ensure on average ≥2 trades per interval.
114
+
115
+ ### Can I use intraday or tick data?
116
+ Yes — the estimator supports intraday OHLC data directly. For tick data, resample into OHLC format first (e.g., using pandas.resample).
117
+
118
+ ### What if I get NaN results?
119
+ The estimator may return NaN if:
120
+
121
+ - Input prices are inconsistent (e.g. high < low)
122
+ - There are too many missing or invalid values
123
+ - Probability thresholds are not met (e.g. insufficient variance in prices)
124
+ - Spread variance is non-positive
125
+
126
+ In these cases, re-examine your input or adjust the sampling frequency.
127
+
65
128
  ## Installation
66
129
 
67
130
  Install the library via pip:
@@ -123,9 +186,9 @@ from quantjourney_bidask import edge_rolling
123
186
  import asyncio
124
187
 
125
188
  # Fetch stock data
126
- stock_df = get_stock_data("AAPL", period="1mo", interval="1d")
189
+ stock_df = get_stock_data("PL", period="1mo", interval="1d")
127
190
  stock_spreads = edge_rolling(stock_df, window=20)
128
- print(f"AAPL average spread: {stock_spreads.mean():.6f}")
191
+ print(f"PL average spread: {stock_spreads.mean():.6f}")
129
192
 
130
193
  # Fetch crypto data (async)
131
194
  async def get_crypto_spreads():
@@ -178,10 +241,13 @@ monitor.start_monitoring("1m")
178
241
 
179
242
  ```python
180
243
  # Run the real-time dashboard
181
- python examples/realtime_spread_monitor.py --mode dashboard
244
+ python examples/websocket_realtime_demo.py --mode dashboard
245
+
246
+ # Or console mode
247
+ python examples/websocket_realtime_demo.py --mode console
182
248
 
183
- # Or console mode
184
- python examples/realtime_spread_monitor.py --mode console
249
+ # Quick 30-second BTC websocket demo
250
+ python examples/animated_spread_monitor.py
185
251
  ```
186
252
 
187
253
  ## Project Structure
@@ -190,42 +256,32 @@ python examples/realtime_spread_monitor.py --mode console
190
256
  quantjourney_bidask/
191
257
  ├── quantjourney_bidask/ # Main library code
192
258
  │ ├── __init__.py
193
- │ ├── edge.py # Core EDGE estimator
259
+ │ ├── edge.py # Core EDGE estimator
260
+ │ ├── edge_hft.py # EDGE estimator optimised HFT-version
194
261
  │ ├── edge_rolling.py # Rolling window estimation
195
262
  │ └── edge_expanding.py # Expanding window estimation
196
263
  ├── data/
197
264
  │ └── fetch.py # Simplified data fetcher for examples
198
265
  ├── examples/ # Comprehensive usage examples
199
266
  │ ├── simple_data_example.py # Basic usage demonstration
200
- │ ├── spread_estimator.py # Spread estimation examples
267
+ │ ├── basic_spread_estimation.py # Core spread estimation examples
201
268
  │ ├── animated_spread_monitor.py # Animated visualizations
202
269
  │ ├── crypto_spread_comparison.py # Crypto spread analysis
203
270
  │ ├── liquidity_risk_monitor.py # Risk monitoring
204
- │ ├── realtime_spread_monitor.py # Live monitoring dashboard
205
- │ └── stock_liquidity_risk.py # Stock liquidity analysis
271
+ │ ├── websocket_realtime_demo.py # Live websocket monitoring demo
272
+ │ └── threshold_alert_monitor.py # Threshold-based spread alerts
206
273
  ├── tests/ # Unit tests (GitHub only)
207
274
  │ ├── test_edge.py
208
275
  │ ├── test_edge_rolling.py
276
+ │ └── test_edge_expanding.py
209
277
  │ └── test_data_fetcher.py
278
+ │ └── test_estimators.py
210
279
  └── _output/ # Example output images
211
280
  ├── simple_data_example.png
212
281
  ├── crypto_spread_comparison.png
213
282
  └── spread_estimator_results.png
214
283
  ```
215
284
 
216
- ## Examples and Visualizations
217
-
218
- The package includes comprehensive examples with beautiful visualizations:
219
-
220
- ### Basic Data Analysis
221
- ![Crypto Spread Analysis](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/ad49bd78c82ab1c44561d0f2e707ae304575a147/_output/crypto_spread_comprehensive_analysis.png)
222
-
223
- ### Crypto Spread Comparison
224
- ![Crypto Spread Comparison](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/refs/heads/main/_output/crypto_spread_comparison.png)
225
-
226
- ### Spread Estimation Results
227
- ![Spread Estimator Results](https://raw.githubusercontent.com/QuantJourneyOrg/qj_bidask/refs/heads/main/_output/spread_estimator_results.png)
228
-
229
285
  ### Running Examples
230
286
 
231
287
  After installing via pip, examples are included in the package:
@@ -250,19 +306,20 @@ Or clone the repository for full access to examples and tests:
250
306
  git clone https://github.com/QuantJourneyOrg/qj_bidask
251
307
  cd qj_bidask
252
308
  python examples/simple_data_example.py
253
- python examples/spread_estimator.py
309
+ python examples/basic_spread_estimation.py
310
+ python examples/animated_spread_monitor.py # 30s real BTC websocket demo
254
311
  python examples/crypto_spread_comparison.py
255
312
  ```
256
313
 
257
314
  ### Available Examples
258
315
 
259
316
  - **`simple_data_example.py`** - Basic usage with stock and crypto data
260
- - **`spread_estimator.py`** - Core spread estimation functionality
261
- - **`animated_spread_monitor.py`** - Real-time animated visualizations
262
- - **`crypto_spread_comparison.py`** - Multi-asset crypto analysis
317
+ - **`basic_spread_estimation.py`** - Core spread estimation functionality
318
+ - **`animated_spread_monitor.py`** - Real-time animated visualizations with 30s websocket demo
319
+ - **`crypto_spread_comparison.py`** - Multi-asset crypto analysis and comparison
263
320
  - **`liquidity_risk_monitor.py`** - Risk monitoring and alerts
264
- - **`realtime_spread_monitor.py`** - Live websocket monitoring dashboard
265
- - **`stock_liquidity_risk.py`** - Stock-specific liquidity analysis
321
+ - **`websocket_realtime_demo.py`** - Live websocket monitoring dashboard
322
+ - **`threshold_alert_monitor.py`** - Threshold-based spread alerts and monitoring
266
323
 
267
324
  ## Testing and Development
268
325
 
@@ -297,7 +354,8 @@ python -m pytest tests/test_data_fetcher.py -v
297
354
 
298
355
  # Run examples
299
356
  python examples/simple_data_example.py
300
- python examples/spread_estimator.py
357
+ python examples/basic_spread_estimation.py
358
+ python examples/animated_spread_monitor.py # Real BTC websocket demo
301
359
  ```
302
360
 
303
361
  ### Package vs Repository
@@ -370,7 +428,8 @@ pip install -e ".[dev]"
370
428
  pytest
371
429
 
372
430
  # Run examples
373
- python examples/realtime_spread_monitor.py
431
+ python examples/animated_spread_monitor.py # 30s real BTC websocket demo
432
+ python examples/websocket_realtime_demo.py # Full dashboard
374
433
  ```
375
434
 
376
435
  ## Support
@@ -0,0 +1,11 @@
1
+ quantjourney_bidask/__init__.py,sha256=lBMoVnF1hxp_3axSHGw6mrRLbwXmk_xPvDsTSkAWV1A,955
2
+ quantjourney_bidask/_compare_edge.py,sha256=q5Oz81ZbCh6JOTViTRQ7wq-f9m5Xue4ANn6DqC0pYbY,8670
3
+ quantjourney_bidask/edge.py,sha256=S_PlmwZQd6BCHMHkeWrapzNMXGCqW2pgVgpbchXDknI,7559
4
+ quantjourney_bidask/edge_expanding.py,sha256=QEbhHSA3xWOfa_0oRoj2ypyLHimmAm-S7vulbD2Pf3s,1594
5
+ quantjourney_bidask/edge_hft.py,sha256=UyTla9TF16LCigGaY92i19m9A5qhPymd8LJ-P7VYTv8,4681
6
+ quantjourney_bidask/edge_rolling.py,sha256=c1RLHd3Q9vQj9V42OzDCmc8K12sUBq_UJ3HiMAXz14M,1934
7
+ quantjourney_bidask-1.0.1.dist-info/licenses/LICENSE,sha256=m8MEOGnpSBtS6m9z4M9m1JksWWPzu1OK3UgY1wuHf04,1081
8
+ quantjourney_bidask-1.0.1.dist-info/METADATA,sha256=AFvN-YQqha8kdAoJ8UtTeNSDOvKEY9YpdUmk8HNdKrU,17564
9
+ quantjourney_bidask-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
+ quantjourney_bidask-1.0.1.dist-info/top_level.txt,sha256=rOBM4GxA87iQv-mR8-WZdu3-Yj5ESyggRICpUhJ-4Dg,20
11
+ quantjourney_bidask-1.0.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- """Version information for quantjourney_bidask."""
2
-
3
- __version__ = "0.9.4"
4
- __author__ = "Jakub Polec"
5
- __email__ = "jakub@quantjourney.pro"
6
- __license__ = "MIT"
7
- __copyright__ = "Copyright (c) 2025 Jakub Polec, QuantJourney"