liquidator-indicator 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ """Liquidator indicator package - lightweight core API."""
2
+ from .core import Liquidator
3
+ from .indicators import compute_vwap, compute_atr
4
+
5
+ __all__ = ["Liquidator", "compute_vwap", "compute_atr"]
6
+ __version__ = "0.0.2"
@@ -0,0 +1,11 @@
1
+ """
2
+ Data stream collectors for liquidator_indicator package.
3
+
4
+ These are OPTIONAL utilities that help users collect public data feeds.
5
+ Users can either:
6
+ 1. Use these collectors to automate data gathering
7
+ 2. Implement their own collectors
8
+ 3. Feed data from existing sources (files, APIs, etc.)
9
+
10
+ The indicator package itself is data-source agnostic.
11
+ """
@@ -0,0 +1,202 @@
1
+ """
2
+ FundingRateCollector - Collect live funding rates and open interest from Hyperliquid WebSocket.
3
+
4
+ Based on src/data/funding_ws_connection.py pattern but simplified for package distribution.
5
+
6
+ Usage:
7
+ from liquidator_indicator.collectors.funding import FundingRateCollector
8
+
9
+ collector = FundingRateCollector(symbols=['BTC', 'ETH'])
10
+ collector.start()
11
+
12
+ # Get latest data
13
+ data = collector.get_latest()
14
+ print(data)
15
+ # {'BTC': {'funding_rate': 0.0001, 'open_interest': 12345.67, 'timestamp': '2026-02-02T...'}}
16
+
17
+ # Feed to indicator
18
+ liq.ingest_funding_rates(data)
19
+
20
+ collector.stop()
21
+ """
22
+ import json
23
+ import time
24
+ import threading
25
+ from datetime import datetime, timezone
26
+ from typing import List, Dict, Optional, Callable
27
+ import logging
28
+
29
+ try:
30
+ import websocket
31
+ except ImportError:
32
+ raise ImportError("websocket-client required: pip install websocket-client")
33
+
34
+ logger = logging.getLogger("liquidator_indicator.funding_collector")
35
+
36
+
37
+ class FundingRateCollector:
38
+ """Collect live funding rates and open interest from Hyperliquid WebSocket."""
39
+
40
+ def __init__(
41
+ self,
42
+ symbols: List[str],
43
+ ws_url: str = "wss://api.hyperliquid.xyz/ws",
44
+ callback: Optional[Callable] = None
45
+ ):
46
+ """
47
+ Args:
48
+ symbols: List of coin symbols to track (e.g. ['BTC', 'ETH'])
49
+ ws_url: WebSocket endpoint URL
50
+ callback: Optional function called on each update: callback(symbol, data)
51
+ """
52
+ self.symbols = [s.upper() for s in symbols]
53
+ self.ws_url = ws_url
54
+ self.callback = callback
55
+
56
+ self._data = {} # {symbol: {funding_rate, open_interest, timestamp}}
57
+ self._ws = None
58
+ self._thread = None
59
+ self._running = False
60
+ self._lock = threading.Lock()
61
+
62
+ def start(self):
63
+ """Start WebSocket connection in background thread."""
64
+ if self._running:
65
+ logger.warning("Collector already running")
66
+ return
67
+
68
+ self._running = True
69
+ self._thread = threading.Thread(target=self._run_ws, daemon=True)
70
+ self._thread.start()
71
+ logger.info(f"FundingRateCollector started for {self.symbols}")
72
+
73
+ def stop(self):
74
+ """Stop WebSocket connection."""
75
+ self._running = False
76
+ if self._ws:
77
+ try:
78
+ self._ws.close()
79
+ except Exception:
80
+ pass
81
+ if self._thread:
82
+ self._thread.join(timeout=2)
83
+ logger.info("FundingRateCollector stopped")
84
+
85
+ def get_latest(self) -> Dict[str, Dict]:
86
+ """Get latest funding data for all symbols.
87
+
88
+ Returns:
89
+ {symbol: {funding_rate, open_interest, timestamp}}
90
+ """
91
+ with self._lock:
92
+ return self._data.copy()
93
+
94
+ def get_symbol(self, symbol: str) -> Optional[Dict]:
95
+ """Get latest data for specific symbol."""
96
+ with self._lock:
97
+ return self._data.get(symbol.upper())
98
+
99
+ def _run_ws(self):
100
+ """WebSocket main loop (runs in background thread)."""
101
+ while self._running:
102
+ try:
103
+ self._ws = websocket.WebSocketApp(
104
+ self.ws_url,
105
+ on_message=self._on_message,
106
+ on_error=self._on_error,
107
+ on_close=self._on_close,
108
+ on_open=self._on_open
109
+ )
110
+ self._ws.run_forever()
111
+ except Exception as e:
112
+ logger.error(f"WebSocket error: {e}")
113
+
114
+ if self._running:
115
+ logger.info("Reconnecting in 5s...")
116
+ time.sleep(5)
117
+
118
+ def _on_open(self, ws):
119
+ """Subscribe to activeAssetCtx channel for funding rates."""
120
+ logger.info("WebSocket connected")
121
+ for symbol in self.symbols:
122
+ sub_msg = json.dumps({
123
+ "method": "subscribe",
124
+ "subscription": {
125
+ "type": "activeAssetCtx",
126
+ "coin": symbol
127
+ }
128
+ })
129
+ ws.send(sub_msg)
130
+ logger.info(f"Subscribed to {symbol} funding/OI")
131
+
132
+ def _on_message(self, ws, message):
133
+ """Process funding rate updates."""
134
+ try:
135
+ if message == '{"channel":"pong"}':
136
+ return
137
+
138
+ data = json.loads(message)
139
+ channel = data.get('channel', '')
140
+
141
+ if channel == 'activeAssetCtx':
142
+ asset_data = data.get('data', {})
143
+ coin = asset_data.get('coin', '')
144
+ ctx = asset_data.get('ctx', {})
145
+
146
+ if coin and ctx and coin in self.symbols:
147
+ funding_rate = float(ctx.get('funding', 0))
148
+ open_interest = float(ctx.get('openInterest', 0))
149
+ timestamp = datetime.now(timezone.utc).isoformat()
150
+
151
+ update = {
152
+ 'funding_rate': funding_rate,
153
+ 'open_interest': open_interest,
154
+ 'timestamp': timestamp
155
+ }
156
+
157
+ with self._lock:
158
+ self._data[coin] = update
159
+
160
+ logger.debug(f"{coin}: funding={funding_rate:.6f}, oi={open_interest:.2f}")
161
+
162
+ # Call user callback if provided
163
+ if self.callback:
164
+ try:
165
+ self.callback(coin, update)
166
+ except Exception as e:
167
+ logger.error(f"Callback error: {e}")
168
+
169
+ except Exception as e:
170
+ logger.error(f"Message parse error: {e}")
171
+
172
+ def _on_error(self, ws, error):
173
+ logger.error(f"WebSocket error: {error}")
174
+
175
+ def _on_close(self, ws, close_status_code, close_msg):
176
+ logger.info(f"WebSocket closed: {close_status_code} {close_msg}")
177
+
178
+
179
+ # Example usage
180
+ if __name__ == '__main__':
181
+ logging.basicConfig(level=logging.INFO)
182
+
183
+ def on_update(symbol, data):
184
+ print(f"UPDATE: {symbol} @ {data['timestamp']}")
185
+ print(f" Funding: {data['funding_rate']:.6f}")
186
+ print(f" OI: {data['open_interest']:.2f}")
187
+
188
+ collector = FundingRateCollector(
189
+ symbols=['BTC', 'ETH', 'SOL'],
190
+ callback=on_update
191
+ )
192
+
193
+ collector.start()
194
+
195
+ try:
196
+ while True:
197
+ time.sleep(10)
198
+ latest = collector.get_latest()
199
+ print(f"\nLatest data: {len(latest)} symbols")
200
+ except KeyboardInterrupt:
201
+ print("\nStopping...")
202
+ collector.stop()
@@ -0,0 +1,447 @@
1
+ """Core logic for liquidator indicator: infer liquidation zones from public trade data.
2
+
3
+ Instead of using private liquidation feeds, this analyzes PUBLIC TRADE DATA to detect
4
+ liquidation-like patterns and cluster them into zones. Works with data anyone can collect
5
+ from public websocket feeds.
6
+ """
7
+ from typing import List, Dict, Optional
8
+ import pandas as pd
9
+ import numpy as np
10
+ import math
11
+ from datetime import datetime, timezone
12
+
13
+ # Try to import numba optimizations, fall back to pure Python if not available
14
+ try:
15
+ from . import numba_optimized
16
+ NUMBA_AVAILABLE = True
17
+ except ImportError:
18
+ NUMBA_AVAILABLE = False
19
+
20
+ DEFAULT_PCT_MERGE = 0.003 # 0.3%
21
+ DEFAULT_LIQ_SIZE_THRESHOLD = 0.1 # BTC minimum for liquidation inference
22
+
23
+ class Liquidator:
24
+ """Infer liquidation zones from public trade data.
25
+
26
+ Detects liquidation-like patterns from public trades:
27
+ - Large sudden trades (size threshold)
28
+ - Rapid price moves with volume spikes
29
+ - Clustered trades at price levels
30
+
31
+ Usage:
32
+ L = Liquidator()
33
+ L.ingest_trades(trades_list) # public trade data
34
+ zones = L.compute_zones()
35
+ """
36
+ def __init__(self, coin: str = 'BTC', pct_merge: float = DEFAULT_PCT_MERGE, zone_vol_mult: float = 1.5, window_minutes: int = 30, liq_size_threshold: float = DEFAULT_LIQ_SIZE_THRESHOLD):
37
+ self.coin = coin
38
+ self._trades = pd.DataFrame()
39
+ self._inferred_liqs = pd.DataFrame()
40
+ self._funding_data = pd.DataFrame() # NEW: funding rates + open interest
41
+ self._candles = None
42
+ self._zone_history = [] # track zone width over time for expansion/contraction
43
+ # configuration
44
+ self.pct_merge = float(pct_merge)
45
+ self.zone_vol_mult = float(zone_vol_mult)
46
+ self.window_minutes = int(window_minutes)
47
+ self.liq_size_threshold = float(liq_size_threshold)
48
+
49
+ def ingest_trades(self, data):
50
+ """Ingest public trade data and infer liquidation events.
51
+
52
+ Accepts list[dict] or DataFrame with fields:
53
+ - time/timestamp: trade timestamp (ms or ISO)
54
+ - px/price: trade price
55
+ - sz/size: trade size
56
+ - side: 'A' (ask/sell) or 'B' (bid/buy)
57
+ - coin: asset symbol
58
+ """
59
+ if data is None:
60
+ return
61
+ if isinstance(data, pd.DataFrame):
62
+ df = data.copy()
63
+ else:
64
+ df = pd.DataFrame(data)
65
+ if df.empty:
66
+ return
67
+
68
+ # normalize timestamp
69
+ if 'time' in df.columns:
70
+ try:
71
+ df['timestamp'] = pd.to_datetime(df['time'], unit='ms', errors='coerce', utc=True)
72
+ except Exception:
73
+ df['timestamp'] = pd.to_datetime(df['time'], errors='coerce', utc=True)
74
+ elif 'timestamp' in df.columns:
75
+ df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce', utc=True)
76
+ else:
77
+ df['timestamp'] = pd.to_datetime(df.iloc[:,0], errors='coerce', utc=True)
78
+
79
+ # normalize price/size
80
+ if 'px' in df.columns:
81
+ df['price'] = pd.to_numeric(df['px'], errors='coerce')
82
+ elif 'price' not in df.columns:
83
+ df['price'] = pd.to_numeric(df.iloc[:,2], errors='coerce')
84
+ else:
85
+ df['price'] = pd.to_numeric(df['price'], errors='coerce')
86
+
87
+ if 'sz' in df.columns:
88
+ df['size'] = pd.to_numeric(df['sz'], errors='coerce')
89
+ elif 'size' not in df.columns:
90
+ df['size'] = pd.to_numeric(df.iloc[:,3], errors='coerce')
91
+ else:
92
+ df['size'] = pd.to_numeric(df['size'], errors='coerce')
93
+
94
+ # normalize side
95
+ if 'side' in df.columns:
96
+ df['side'] = df['side'].astype(str).str.upper()
97
+ df['coin'] = df.get('coin', self.coin)
98
+ df['usd_value'] = df['price'] * df['size']
99
+
100
+ df = df[['timestamp','side','coin','price','size','usd_value']]
101
+ df = df.dropna(subset=['timestamp','price','size'])
102
+ df = df.sort_values('timestamp')
103
+
104
+ # store raw trades
105
+ if self._trades.empty:
106
+ self._trades = df
107
+ else:
108
+ self._trades = pd.concat([self._trades, df], ignore_index=True).drop_duplicates().sort_values('timestamp')
109
+
110
+ # filter to keep only recent trades (last 48h by default)
111
+ cutoff_time = pd.Timestamp.now(tz='UTC') - pd.Timedelta(hours=48)
112
+ self._trades = self._trades[self._trades['timestamp'] >= cutoff_time]
113
+
114
+ # infer liquidations from trade patterns
115
+ self._infer_liquidations()
116
+
117
+ def _infer_liquidations(self):
118
+ """Detect liquidation-like events from trade patterns + funding/OI signals."""
119
+ if self._trades.empty:
120
+ return
121
+
122
+ df = self._trades.copy()
123
+
124
+ # Pattern 1: Large trades (likely forced liquidations)
125
+ large_trades = df[df['size'] >= self.liq_size_threshold].copy()
126
+
127
+ # Pattern 2: Rapid price moves with volume spikes (cascade indicator)
128
+ df['price_change'] = df['price'].pct_change().abs()
129
+ df['vol_spike'] = df['size'] > df['size'].rolling(20, min_periods=1).mean() * 2
130
+ cascades = df[(df['price_change'] > 0.001) & df['vol_spike']].copy()
131
+
132
+ # Pattern 3: Funding rate extremes (NEW)
133
+ # Extreme funding (>0.1% or <-0.1%) indicates overleveraged positions
134
+ funding_liqs = pd.DataFrame()
135
+ if not self._funding_data.empty:
136
+ try:
137
+ # Get latest funding for this coin
138
+ coin_funding = self._funding_data[self._funding_data['symbol'] == self.coin]
139
+ if not coin_funding.empty:
140
+ latest_funding = coin_funding.iloc[-1]
141
+ funding_rate = float(latest_funding.get('funding_rate', 0))
142
+
143
+ # Extreme funding threshold
144
+ if abs(funding_rate) > 0.001: # 0.1%
145
+ # Trades during extreme funding = higher liquidation probability
146
+ # Apply 1.5x weight multiplier to these trades
147
+ funding_liqs = df.copy()
148
+ funding_liqs['usd_value'] = funding_liqs['usd_value'] * 1.5
149
+ funding_liqs = funding_liqs.head(int(len(funding_liqs) * 0.3)) # Top 30% by recency
150
+ except Exception:
151
+ pass
152
+
153
+ # Pattern 4: Open interest drops (NEW)
154
+ # Sudden OI drops indicate liquidations happening NOW
155
+ oi_liqs = pd.DataFrame()
156
+ if not self._funding_data.empty and len(self._funding_data) > 1:
157
+ try:
158
+ coin_oi = self._funding_data[self._funding_data['symbol'] == self.coin].sort_values('timestamp')
159
+ if len(coin_oi) >= 2:
160
+ oi_change_pct = (coin_oi['open_interest'].iloc[-1] - coin_oi['open_interest'].iloc[-2]) / coin_oi['open_interest'].iloc[-2]
161
+
162
+ # OI drop >5% = confirmed liquidation event
163
+ if oi_change_pct < -0.05:
164
+ # Recent trades during OI drop = confirmed liquidations
165
+ # Apply 2x weight multiplier
166
+ recent_window = pd.Timestamp.now(tz='UTC') - pd.Timedelta(minutes=5)
167
+ oi_liqs = df[df['timestamp'] > recent_window].copy()
168
+ oi_liqs['usd_value'] = oi_liqs['usd_value'] * 2.0
169
+ except Exception:
170
+ pass
171
+
172
+ # Combine all patterns
173
+ patterns = [large_trades, cascades]
174
+ if not funding_liqs.empty:
175
+ patterns.append(funding_liqs)
176
+ if not oi_liqs.empty:
177
+ patterns.append(oi_liqs)
178
+
179
+ inferred = pd.concat(patterns, ignore_index=True).drop_duplicates(subset=['timestamp','price'])
180
+
181
+ if inferred.empty:
182
+ self._inferred_liqs = pd.DataFrame()
183
+ return
184
+
185
+ # Map side: A (ask/sell) = long liquidation, B (bid/buy) = short liquidation
186
+ inferred['side'] = inferred['side'].map({'A': 'long', 'B': 'short'})
187
+
188
+ self._inferred_liqs = inferred[['timestamp','side','coin','price','usd_value']].sort_values('timestamp')
189
+
190
+ def ingest_funding_rates(self, data):
191
+ """Ingest funding rate and open interest data.
192
+
193
+ Accepts dict {symbol: {funding_rate, open_interest, timestamp}} or DataFrame.
194
+ Used to enhance liquidation detection with funding/OI signals.
195
+
196
+ Example:
197
+ funding = {'BTC': {'funding_rate': 0.0001, 'open_interest': 12345.67, 'timestamp': '...'}}
198
+ liq.ingest_funding_rates(funding)
199
+ """
200
+ if data is None:
201
+ return
202
+
203
+ if isinstance(data, dict):
204
+ # Convert dict to DataFrame
205
+ rows = []
206
+ for symbol, vals in data.items():
207
+ rows.append({
208
+ 'symbol': symbol,
209
+ 'funding_rate': float(vals.get('funding_rate', 0)),
210
+ 'open_interest': float(vals.get('open_interest', 0)),
211
+ 'timestamp': vals.get('timestamp', datetime.now(timezone.utc).isoformat())
212
+ })
213
+ df = pd.DataFrame(rows)
214
+ else:
215
+ df = data.copy() if isinstance(data, pd.DataFrame) else pd.DataFrame(data)
216
+
217
+ if df.empty:
218
+ return
219
+
220
+ # Normalize timestamp
221
+ if 'timestamp' in df.columns:
222
+ df['timestamp'] = pd.to_datetime(df['timestamp'], utc=True, errors='coerce')
223
+
224
+ # Merge with existing data
225
+ if self._funding_data.empty:
226
+ self._funding_data = df
227
+ else:
228
+ # Keep most recent data per symbol
229
+ combined = pd.concat([self._funding_data, df], ignore_index=True)
230
+ self._funding_data = combined.sort_values('timestamp').groupby('symbol').tail(1).reset_index(drop=True)
231
+
232
+ def ingest_liqs(self, data):
233
+ """Legacy method for backward compatibility. Redirects to ingest_trades."""
234
+ self.ingest_trades(data)
235
+
236
+ def update_candles(self, df: pd.DataFrame):
237
+ """Optional candle series used to compute volatility-adjusted bands."""
238
+ self._candles = df.copy()
239
+
240
+ def compute_zones(self, window_minutes: Optional[int] = None, pct_merge: Optional[float] = None, use_atr: bool = True):
241
+ """Cluster inferred liquidations by price into zones.
242
+ Returns DataFrame of zones: price_mean, price_min, price_max, total_usd, count, first_ts, last_ts, strength
243
+ """
244
+ if self._inferred_liqs.empty:
245
+ return pd.DataFrame()
246
+ # determine params
247
+ window_minutes = int(window_minutes) if window_minutes is not None else int(self.window_minutes)
248
+ pct_merge = float(pct_merge) if pct_merge is not None else float(self.pct_merge)
249
+ # limit to recent window
250
+ now = pd.Timestamp.utcnow()
251
+ window_start = now - pd.Timedelta(minutes=window_minutes)
252
+ df = self._inferred_liqs[self._inferred_liqs['timestamp'] >= window_start].copy()
253
+ # If filtering by recent window returns nothing (e.g., test data with static timestamps),
254
+ # fall back to using all available inferred liquidations so the algorithms can still run.
255
+ if df.empty:
256
+ df = self._inferred_liqs.copy()
257
+ # sort by price and iterate to form clusters
258
+ df = df.sort_values('price').reset_index(drop=True)
259
+
260
+ # Use Numba-optimized clustering if available
261
+ if NUMBA_AVAILABLE and len(df) > 100: # Worth it for larger datasets
262
+ # Prepare numpy arrays for numba
263
+ prices = df['price'].to_numpy(dtype=np.float64)
264
+ usd_values = df['usd_value'].fillna(0.0).to_numpy(dtype=np.float64)
265
+ timestamps_seconds = (df['timestamp'].astype(np.int64).to_numpy() / 1e9).astype(np.float64)
266
+
267
+ # Encode sides: 0=unknown, 1=long, 2=short
268
+ side_map = {'long': 1, 'short': 2}
269
+ sides_encoded = df['side'].map(side_map).fillna(0).astype(np.int32).to_numpy()
270
+
271
+ # Run numba clustering
272
+ (cluster_ids, cluster_means, cluster_mins, cluster_maxs, cluster_usds,
273
+ cluster_cnts, cluster_ts_firsts, cluster_ts_lasts, cluster_longs, cluster_shorts) = \
274
+ numba_optimized.cluster_prices_numba(prices, usd_values, timestamps_seconds,
275
+ sides_encoded, pct_merge)
276
+
277
+ # Determine dominant side per cluster
278
+ dominant_sides = []
279
+ for i in range(len(cluster_means)):
280
+ if cluster_longs[i] > cluster_shorts[i]:
281
+ dominant_sides.append('long')
282
+ elif cluster_shorts[i] > cluster_longs[i]:
283
+ dominant_sides.append('short')
284
+ else:
285
+ dominant_sides.append('unknown')
286
+
287
+ # Compute strength using numba
288
+ current_time_sec = pd.Timestamp.utcnow().timestamp()
289
+ strengths = numba_optimized.compute_strength_batch(
290
+ cluster_usds, cluster_cnts, cluster_ts_lasts, current_time_sec
291
+ )
292
+
293
+ # Build output DataFrame
294
+ zones_df = pd.DataFrame({
295
+ 'price_mean': cluster_means,
296
+ 'price_min': cluster_mins,
297
+ 'price_max': cluster_maxs,
298
+ 'total_usd': cluster_usds,
299
+ 'count': cluster_cnts,
300
+ 'first_ts': pd.to_datetime(cluster_ts_firsts, unit='s', utc=True),
301
+ 'last_ts': pd.to_datetime(cluster_ts_lasts, unit='s', utc=True),
302
+ 'dominant_side': dominant_sides,
303
+ 'strength': strengths
304
+ }).sort_values('strength', ascending=False)
305
+ else:
306
+ # Fallback to original Python implementation
307
+ clusters = []
308
+ cur = {'prices': [], 'usd': 0.0, 'count': 0, 'ts_first': None, 'ts_last': None, 'sides': {}}
309
+ for _, row in df.iterrows():
310
+ p = float(row['price'])
311
+ u = float(row.get('usd_value') or 0.0)
312
+ ts = pd.to_datetime(row['timestamp'])
313
+ side_val = str(row['side']) if pd.notna(row['side']) else 'unknown'
314
+ if cur['count'] == 0:
315
+ cur['prices'] = [p]
316
+ cur['usd'] = u
317
+ cur['count'] = 1
318
+ cur['ts_first'] = ts
319
+ cur['ts_last'] = ts
320
+ cur['sides'] = {side_val: 1}
321
+ continue
322
+ pm = np.mean(cur['prices'])
323
+ # pct distance
324
+ if abs(p - pm) / pm <= pct_merge:
325
+ cur['prices'].append(p)
326
+ cur['usd'] += u
327
+ cur['count'] += 1
328
+ cur['ts_last'] = max(cur['ts_last'], ts)
329
+ cur['sides'][side_val] = cur['sides'].get(side_val,0) + 1
330
+ else:
331
+ clusters.append(cur)
332
+ cur = {'prices':[p], 'usd':u, 'count':1, 'ts_first':ts, 'ts_last':ts, 'sides':{side_val:1}}
333
+ if cur['count'] > 0:
334
+ clusters.append(cur)
335
+ # build DataFrame
336
+ out = []
337
+ for c in clusters:
338
+ prices = np.array(c['prices'], dtype=float)
339
+ price_mean = float(prices.mean())
340
+ price_min = float(prices.min())
341
+ price_max = float(prices.max())
342
+ total_usd = float(c['usd'])
343
+ count = int(c['count'])
344
+ first_ts = c['ts_first']
345
+ last_ts = c['ts_last']
346
+ sides = c['sides']
347
+ dominant_side = max(sides.items(), key=lambda x: x[1])[0] if sides else None
348
+ strength = self._compute_strength(total_usd, count, last_ts)
349
+ out_item = {'price_mean':price_mean,'price_min':price_min,'price_max':price_max,'total_usd':total_usd,'count':count,'first_ts':first_ts,'last_ts':last_ts,'dominant_side':dominant_side,'strength':strength}
350
+ out.append(out_item)
351
+ zones_df = pd.DataFrame(out).sort_values('strength', ascending=False)
352
+
353
+ # compute volatility band (ATR) if requested and candles available
354
+ if use_atr and self._candles is not None and not self._candles.empty and 'high' in self._candles.columns and 'low' in self._candles.columns and 'close' in self._candles.columns:
355
+ try:
356
+ if NUMBA_AVAILABLE:
357
+ # Use numba-optimized ATR
358
+ high = self._candles['high'].to_numpy(dtype=np.float64)
359
+ low = self._candles['low'].to_numpy(dtype=np.float64)
360
+ close = self._candles['close'].to_numpy(dtype=np.float64)
361
+ atr_array = numba_optimized.compute_atr_numba(high, low, close, per=14)
362
+ last_atr = float(atr_array[-1]) if len(atr_array) > 0 else 0.0
363
+ else:
364
+ atr = self._compute_atr(self._candles)
365
+ last_atr = float(atr.iloc[-1]) if not atr.empty else 0.0
366
+ except Exception:
367
+ last_atr = 0.0
368
+ else:
369
+ last_atr = 0.0
370
+
371
+ # apply band: band = max(perc-based pad, atr*zone_vol_mult)
372
+ if NUMBA_AVAILABLE and not zones_df.empty:
373
+ # Use numba-optimized band computation
374
+ price_means = zones_df['price_mean'].to_numpy(dtype=np.float64)
375
+ band_widths, entry_lows, entry_highs, band_pcts = \
376
+ numba_optimized.compute_zone_bands(price_means, pct_merge, last_atr, self.zone_vol_mult)
377
+
378
+ zones_df['atr'] = last_atr
379
+ zones_df['band'] = band_widths
380
+ zones_df['band_pct'] = band_pcts
381
+ zones_df['entry_low'] = entry_lows
382
+ zones_df['entry_high'] = entry_highs
383
+ else:
384
+ # Fallback to original implementation
385
+ bands = []
386
+ for _, row in zones_df.iterrows():
387
+ pm = float(row['price_mean'])
388
+ # percent padding fallback (small)
389
+ pct_pad = max(0.001, pct_merge)
390
+ pad_by_pct = pm * pct_pad
391
+ pad_by_atr = last_atr * float(self.zone_vol_mult)
392
+ pad = max(pad_by_pct, pad_by_atr)
393
+ entry_low = pm - pad
394
+ entry_high = pm + pad
395
+ band_pct = pad / pm if pm else 0.0
396
+ bands.append({'atr': last_atr, 'band': pad, 'band_pct': band_pct, 'entry_low': entry_low, 'entry_high': entry_high})
397
+ if bands:
398
+ bands_df = pd.DataFrame(bands)
399
+ zones_df = pd.concat([zones_df.reset_index(drop=True), bands_df.reset_index(drop=True)], axis=1)
400
+
401
+ # Track zone width for regime detection
402
+ if not zones_df.empty and 'band' in zones_df.columns:
403
+ avg_width = float(zones_df['band'].mean())
404
+ self._zone_history.append({'timestamp': pd.Timestamp.utcnow(), 'avg_width': avg_width})
405
+ # Keep last 20 measurements
406
+ if len(self._zone_history) > 20:
407
+ self._zone_history = self._zone_history[-20:]
408
+
409
+ return zones_df
410
+
411
+ def _compute_strength(self, usd_total: float, count: int, last_ts: Optional[pd.Timestamp]):
412
+ """Heuristic scoring: combine usd_total (log), count, and recency (time decay)."""
413
+ a = math.log1p(usd_total)
414
+ b = math.log1p(count)
415
+ recency_weight = 1.0
416
+ try:
417
+ if last_ts is not None:
418
+ age_sec = (pd.Timestamp.utcnow() - pd.to_datetime(last_ts)).total_seconds()
419
+ # recent events score higher — decay with half-life of 1 hour
420
+ recency_weight = 1.0 / (1.0 + (age_sec / 3600.0))
421
+ except Exception:
422
+ recency_weight = 1.0
423
+ score = (a * 0.6 + b * 0.4) * recency_weight
424
+ return float(score)
425
+
426
+ def _compute_atr(self, candles: pd.DataFrame, per: int = 14) -> pd.Series:
427
+ """Compute ATR series (Wilder) from candle DF with high/low/close columns."""
428
+ df = candles.copy()
429
+ df['high'] = pd.to_numeric(df['high'], errors='coerce')
430
+ df['low'] = pd.to_numeric(df['low'], errors='coerce')
431
+ df['close'] = pd.to_numeric(df['close'], errors='coerce')
432
+ tr1 = df['high'] - df['low']
433
+ tr2 = (df['high'] - df['close'].shift(1)).abs()
434
+ tr3 = (df['low'] - df['close'].shift(1)).abs()
435
+ tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1).dropna()
436
+ atr = tr.ewm(alpha=1.0/per, adjust=False).mean()
437
+ return atr
438
+
439
+ def get_nearest_zone(self, price: float, zones_df: Optional[pd.DataFrame] = None):
440
+ """Return nearest zone row (as dict) to `price` or None."""
441
+ if zones_df is None:
442
+ zones_df = self.compute_zones()
443
+ if zones_df is None or zones_df.empty:
444
+ return None
445
+ zones_df['dist'] = (zones_df['price_mean'] - price).abs() / zones_df['price_mean']
446
+ r = zones_df.sort_values('dist').iloc[0]
447
+ return r.to_dict()
@@ -0,0 +1,52 @@
1
+ """Common indicator helpers: VWAP and ATR that are useful for zone banding."""
2
+ from typing import Optional
3
+ import pandas as pd
4
+
5
+
6
+ def compute_vwap(candles: pd.DataFrame, period: Optional[int] = None, price_col: str = 'close', vol_col: str = 'volume') -> pd.Series:
7
+ """Compute VWAP series from candles.
8
+
9
+ If `period` is None returns cumulative VWAP, otherwise a rolling-window VWAP of given length.
10
+ Candles must contain price and volume columns.
11
+ """
12
+ if candles is None or candles.empty:
13
+ return pd.Series(dtype=float)
14
+ df = candles.copy()
15
+ # normalize column names
16
+ if price_col not in df.columns:
17
+ if 'close' in df.columns:
18
+ price_col = 'close'
19
+ elif 'px' in df.columns:
20
+ price_col = 'px'
21
+ if vol_col not in df.columns:
22
+ if 'volume' in df.columns:
23
+ vol_col = 'volume'
24
+ df[price_col] = pd.to_numeric(df[price_col], errors='coerce')
25
+ df[vol_col] = pd.to_numeric(df[vol_col], errors='coerce').fillna(0.0)
26
+ tp_vol = df[price_col] * df[vol_col]
27
+ if period is None:
28
+ cum_tp_vol = tp_vol.cumsum()
29
+ cum_vol = df[vol_col].cumsum()
30
+ vwap = cum_tp_vol / cum_vol.replace({0: pd.NA})
31
+ return vwap.ffill().fillna(0.0)
32
+ else:
33
+ tp_vol_r = tp_vol.rolling(window=period, min_periods=1).sum()
34
+ vol_r = df[vol_col].rolling(window=period, min_periods=1).sum()
35
+ vwap = tp_vol_r / vol_r.replace({0: pd.NA})
36
+ return vwap.ffill().fillna(0.0)
37
+
38
+
39
+ def compute_atr(candles: pd.DataFrame, per: int = 14) -> pd.Series:
40
+ """Public ATR computation (Wilder-style) from candles with high/low/close columns."""
41
+ if candles is None or candles.empty:
42
+ return pd.Series(dtype=float)
43
+ df = candles.copy()
44
+ df['high'] = pd.to_numeric(df['high'], errors='coerce')
45
+ df['low'] = pd.to_numeric(df['low'], errors='coerce')
46
+ df['close'] = pd.to_numeric(df['close'], errors='coerce')
47
+ tr1 = df['high'] - df['low']
48
+ tr2 = (df['high'] - df['close'].shift(1)).abs()
49
+ tr3 = (df['low'] - df['close'].shift(1)).abs()
50
+ tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1).dropna()
51
+ atr = tr.ewm(alpha=1.0/per, adjust=False).mean()
52
+ return atr
@@ -0,0 +1,351 @@
1
+ """Numba-optimized functions for performance-critical operations.
2
+
3
+ These JIT-compiled functions provide 10-100x speedup for numerical loops:
4
+ - Price clustering algorithm
5
+ - Strength computation with time decay
6
+ - ATR (Average True Range) calculation
7
+ """
8
+ import numpy as np
9
+ from numba import jit
10
+
11
+
12
+ @jit(nopython=True, cache=True)
13
+ def cluster_prices_numba(prices, usd_values, timestamps_seconds, sides_encoded, pct_merge):
14
+ """Fast clustering of prices into zones using numba JIT.
15
+
16
+ Args:
17
+ prices: np.array of float prices (sorted)
18
+ usd_values: np.array of float USD values
19
+ timestamps_seconds: np.array of float timestamps (seconds since epoch)
20
+ sides_encoded: np.array of int (0=unknown, 1=long, 2=short)
21
+ pct_merge: float percentage threshold for merging (e.g., 0.003 for 0.3%)
22
+
23
+ Returns:
24
+ Tuple of arrays defining clusters:
25
+ - cluster_ids: array mapping each price to its cluster ID
26
+ - cluster_price_means: mean price per cluster
27
+ - cluster_price_mins: min price per cluster
28
+ - cluster_price_maxs: max price per cluster
29
+ - cluster_usd_totals: total USD per cluster
30
+ - cluster_counts: count per cluster
31
+ - cluster_ts_first: first timestamp per cluster
32
+ - cluster_ts_last: last timestamp per cluster
33
+ - cluster_side_long: count of longs per cluster
34
+ - cluster_side_short: count of shorts per cluster
35
+ """
36
+ n = len(prices)
37
+ if n == 0:
38
+ return (np.empty(0, dtype=np.int32), np.empty(0), np.empty(0), np.empty(0),
39
+ np.empty(0), np.empty(0, dtype=np.int32), np.empty(0), np.empty(0),
40
+ np.empty(0, dtype=np.int32), np.empty(0, dtype=np.int32))
41
+
42
+ cluster_ids = np.zeros(n, dtype=np.int32)
43
+ current_cluster = 0
44
+
45
+ # Track cluster stats
46
+ cluster_price_sum = prices[0]
47
+ cluster_price_min = prices[0]
48
+ cluster_price_max = prices[0]
49
+ cluster_usd_sum = usd_values[0]
50
+ cluster_count = 1
51
+ cluster_ts_min = timestamps_seconds[0]
52
+ cluster_ts_max = timestamps_seconds[0]
53
+ cluster_long_count = 1 if sides_encoded[0] == 1 else 0
54
+ cluster_short_count = 1 if sides_encoded[0] == 2 else 0
55
+
56
+ cluster_ids[0] = current_cluster
57
+
58
+ # Output lists (will convert to arrays)
59
+ cluster_means = [0.0] * n # Pre-allocate max size
60
+ cluster_mins = [0.0] * n
61
+ cluster_maxs = [0.0] * n
62
+ cluster_usds = [0.0] * n
63
+ cluster_cnts = [0] * n
64
+ cluster_ts_firsts = [0.0] * n
65
+ cluster_ts_lasts = [0.0] * n
66
+ cluster_longs = [0] * n
67
+ cluster_shorts = [0] * n
68
+
69
+ for i in range(1, n):
70
+ p = prices[i]
71
+ cluster_mean = cluster_price_sum / cluster_count
72
+
73
+ # Check if within merge threshold
74
+ if abs(p - cluster_mean) / cluster_mean <= pct_merge:
75
+ # Add to current cluster
76
+ cluster_price_sum += p
77
+ cluster_price_min = min(cluster_price_min, p)
78
+ cluster_price_max = max(cluster_price_max, p)
79
+ cluster_usd_sum += usd_values[i]
80
+ cluster_count += 1
81
+ cluster_ts_min = min(cluster_ts_min, timestamps_seconds[i])
82
+ cluster_ts_max = max(cluster_ts_max, timestamps_seconds[i])
83
+ if sides_encoded[i] == 1:
84
+ cluster_long_count += 1
85
+ elif sides_encoded[i] == 2:
86
+ cluster_short_count += 1
87
+ cluster_ids[i] = current_cluster
88
+ else:
89
+ # Save current cluster stats
90
+ cluster_means[current_cluster] = cluster_mean
91
+ cluster_mins[current_cluster] = cluster_price_min
92
+ cluster_maxs[current_cluster] = cluster_price_max
93
+ cluster_usds[current_cluster] = cluster_usd_sum
94
+ cluster_cnts[current_cluster] = cluster_count
95
+ cluster_ts_firsts[current_cluster] = cluster_ts_min
96
+ cluster_ts_lasts[current_cluster] = cluster_ts_max
97
+ cluster_longs[current_cluster] = cluster_long_count
98
+ cluster_shorts[current_cluster] = cluster_short_count
99
+
100
+ # Start new cluster
101
+ current_cluster += 1
102
+ cluster_price_sum = p
103
+ cluster_price_min = p
104
+ cluster_price_max = p
105
+ cluster_usd_sum = usd_values[i]
106
+ cluster_count = 1
107
+ cluster_ts_min = timestamps_seconds[i]
108
+ cluster_ts_max = timestamps_seconds[i]
109
+ cluster_long_count = 1 if sides_encoded[i] == 1 else 0
110
+ cluster_short_count = 1 if sides_encoded[i] == 2 else 0
111
+ cluster_ids[i] = current_cluster
112
+
113
+ # Save final cluster
114
+ cluster_means[current_cluster] = cluster_price_sum / cluster_count
115
+ cluster_mins[current_cluster] = cluster_price_min
116
+ cluster_maxs[current_cluster] = cluster_price_max
117
+ cluster_usds[current_cluster] = cluster_usd_sum
118
+ cluster_cnts[current_cluster] = cluster_count
119
+ cluster_ts_firsts[current_cluster] = cluster_ts_min
120
+ cluster_ts_lasts[current_cluster] = cluster_ts_max
121
+ cluster_longs[current_cluster] = cluster_long_count
122
+ cluster_shorts[current_cluster] = cluster_short_count
123
+
124
+ # Trim to actual number of clusters
125
+ num_clusters = current_cluster + 1
126
+
127
+ return (
128
+ cluster_ids,
129
+ np.array(cluster_means[:num_clusters]),
130
+ np.array(cluster_mins[:num_clusters]),
131
+ np.array(cluster_maxs[:num_clusters]),
132
+ np.array(cluster_usds[:num_clusters]),
133
+ np.array(cluster_cnts[:num_clusters], dtype=np.int32),
134
+ np.array(cluster_ts_firsts[:num_clusters]),
135
+ np.array(cluster_ts_lasts[:num_clusters]),
136
+ np.array(cluster_longs[:num_clusters], dtype=np.int32),
137
+ np.array(cluster_shorts[:num_clusters], dtype=np.int32)
138
+ )
139
+
140
+
141
+ @jit(nopython=True, cache=True)
142
+ def compute_strength_batch(usd_totals, counts, last_ts_seconds, current_time_seconds):
143
+ """Vectorized strength computation with time decay.
144
+
145
+ Args:
146
+ usd_totals: array of USD totals per zone
147
+ counts: array of trade counts per zone
148
+ last_ts_seconds: array of last timestamp (seconds since epoch) per zone
149
+ current_time_seconds: current time in seconds since epoch
150
+
151
+ Returns:
152
+ Array of strength scores
153
+ """
154
+ n = len(usd_totals)
155
+ strengths = np.zeros(n)
156
+
157
+ for i in range(n):
158
+ # Log-scaled components
159
+ a = np.log1p(usd_totals[i])
160
+ b = np.log1p(counts[i])
161
+
162
+ # Time decay: recent events score higher (half-life = 1 hour = 3600 seconds)
163
+ age_sec = current_time_seconds - last_ts_seconds[i]
164
+ recency_weight = 1.0 / (1.0 + (age_sec / 3600.0))
165
+
166
+ # Weighted combination
167
+ strengths[i] = (a * 0.6 + b * 0.4) * recency_weight
168
+
169
+ return strengths
170
+
171
+
172
+ @jit(nopython=True, cache=True)
173
+ def compute_atr_numba(high, low, close, period=14):
174
+ """Fast ATR (Average True Range) calculation using Wilder's smoothing.
175
+
176
+ Args:
177
+ high: array of high prices
178
+ low: array of low prices
179
+ close: array of close prices
180
+ period: ATR period (default 14)
181
+
182
+ Returns:
183
+ Array of ATR values (same length as input)
184
+ """
185
+ n = len(high)
186
+ if n == 0:
187
+ return np.empty(0)
188
+
189
+ # Compute True Range
190
+ tr = np.zeros(n)
191
+ tr[0] = high[0] - low[0] # First TR is just high-low
192
+
193
+ for i in range(1, n):
194
+ tr1 = high[i] - low[i]
195
+ tr2 = abs(high[i] - close[i-1])
196
+ tr3 = abs(low[i] - close[i-1])
197
+ tr[i] = max(tr1, max(tr2, tr3))
198
+
199
+ # Wilder's smoothing (exponential moving average)
200
+ atr = np.zeros(n)
201
+
202
+ # First ATR is simple average
203
+ if n >= period:
204
+ atr[period-1] = np.mean(tr[:period])
205
+
206
+ # Subsequent values use Wilder's smoothing
207
+ alpha = 1.0 / period
208
+ for i in range(period, n):
209
+ atr[i] = atr[i-1] * (1 - alpha) + tr[i] * alpha
210
+
211
+ # Fill early values with first computed ATR
212
+ for i in range(period-1):
213
+ atr[i] = atr[period-1]
214
+ else:
215
+ # Not enough data for full period, use cumulative mean
216
+ for i in range(n):
217
+ atr[i] = np.mean(tr[:i+1])
218
+
219
+ return atr
220
+
221
+
222
+ @jit(nopython=True, cache=True)
223
+ def compute_zone_bands(price_means, pct_merge, last_atr, zone_vol_mult):
224
+ """Compute entry bands for zones using ATR and percentage thresholds.
225
+
226
+ Args:
227
+ price_means: array of zone mean prices
228
+ pct_merge: percentage merge threshold (fallback)
229
+ last_atr: last ATR value from candles
230
+ zone_vol_mult: multiplier for ATR-based band
231
+
232
+ Returns:
233
+ Tuple of (band_widths, entry_lows, entry_highs, band_pcts)
234
+ """
235
+ n = len(price_means)
236
+ bands = np.zeros(n)
237
+ entry_lows = np.zeros(n)
238
+ entry_highs = np.zeros(n)
239
+ band_pcts = np.zeros(n)
240
+
241
+ for i in range(n):
242
+ pm = price_means[i]
243
+
244
+ # Percent-based padding (fallback)
245
+ pct_pad = max(0.001, pct_merge)
246
+ pad_by_pct = pm * pct_pad
247
+
248
+ # ATR-based padding (preferred for volatility adjustment)
249
+ pad_by_atr = last_atr * zone_vol_mult
250
+
251
+ # Take maximum
252
+ pad = max(pad_by_pct, pad_by_atr)
253
+
254
+ bands[i] = pad
255
+ entry_lows[i] = pm - pad
256
+ entry_highs[i] = pm + pad
257
+ band_pcts[i] = pad / pm if pm > 0 else 0.0
258
+
259
+ return bands, entry_lows, entry_highs, band_pcts
260
+
261
+
262
+ @jit(nopython=True, cache=True)
263
+ def detect_volume_spikes(sizes, threshold_multiplier=2.0, window=20):
264
+ """Detect volume spikes using rolling mean comparison.
265
+
266
+ Args:
267
+ sizes: array of trade sizes
268
+ threshold_multiplier: multiplier for mean (default 2.0)
269
+ window: rolling window size
270
+
271
+ Returns:
272
+ Boolean array indicating spike locations
273
+ """
274
+ n = len(sizes)
275
+ spikes = np.zeros(n, dtype=np.bool_)
276
+
277
+ for i in range(n):
278
+ start_idx = max(0, i - window + 1)
279
+ window_mean = np.mean(sizes[start_idx:i+1])
280
+
281
+ if sizes[i] > window_mean * threshold_multiplier:
282
+ spikes[i] = True
283
+
284
+ return spikes
285
+
286
+
287
+ @jit(nopython=True, cache=True)
288
+ def compute_price_changes(prices):
289
+ """Fast percentage change calculation for prices.
290
+
291
+ Args:
292
+ prices: array of prices
293
+
294
+ Returns:
295
+ Array of absolute percentage changes
296
+ """
297
+ n = len(prices)
298
+ if n < 2:
299
+ return np.zeros(n)
300
+
301
+ changes = np.zeros(n)
302
+ changes[0] = 0.0
303
+
304
+ for i in range(1, n):
305
+ if prices[i-1] != 0:
306
+ changes[i] = abs((prices[i] - prices[i-1]) / prices[i-1])
307
+
308
+ return changes
309
+
310
+
311
+ @jit(nopython=True, cache=True)
312
+ def filter_large_trades(sizes, usd_values, threshold):
313
+ """Filter for large trades exceeding threshold.
314
+
315
+ Args:
316
+ sizes: array of trade sizes
317
+ usd_values: array of USD values
318
+ threshold: minimum size threshold
319
+
320
+ Returns:
321
+ Boolean array of large trades
322
+ """
323
+ n = len(sizes)
324
+ large = np.zeros(n, dtype=np.bool_)
325
+
326
+ for i in range(n):
327
+ if sizes[i] >= threshold:
328
+ large[i] = True
329
+
330
+ return large
331
+
332
+
333
+ @jit(nopython=True, cache=True)
334
+ def rolling_mean(arr, window):
335
+ """Fast rolling mean calculation.
336
+
337
+ Args:
338
+ arr: input array
339
+ window: window size
340
+
341
+ Returns:
342
+ Array of rolling means
343
+ """
344
+ n = len(arr)
345
+ result = np.zeros(n)
346
+
347
+ for i in range(n):
348
+ start_idx = max(0, i - window + 1)
349
+ result[i] = np.mean(arr[start_idx:i+1])
350
+
351
+ return result
@@ -0,0 +1,119 @@
1
+ """Small parser helpers to normalize common liquidation and bbo messages into DataFrame rows."""
2
+ from typing import Any, Dict
3
+ import pandas as pd
4
+ import json
5
+ import os
6
+ import glob
7
+
8
+ def parse_liq_msg(msg: Dict[str, Any]) -> Dict:
9
+ """Attempt to extract fields from a liq message dict into canonical form.
10
+ Canonical keys: timestamp, side, coin, price, usd_value, size
11
+ """
12
+ out = {}
13
+ # timestamp variants
14
+ for k in ('timestamp','time','t'):
15
+ if k in msg:
16
+ out['timestamp'] = msg[k]
17
+ break
18
+ # side
19
+ side = msg.get('side') or msg.get('direction') or msg.get('dir')
20
+ if side is not None:
21
+ out['side'] = str(side).lower()
22
+ # price
23
+ for pk in ('price','px','p'):
24
+ if pk in msg:
25
+ out['price'] = msg[pk]
26
+ break
27
+ # usd value
28
+ out['usd_value'] = msg.get('usd_value') or msg.get('value') or msg.get('usd') or msg.get('usdValue') or 0.0
29
+ out['coin'] = msg.get('coin') or msg.get('market') or 'BTC'
30
+ out['size'] = msg.get('size') or msg.get('qty') or msg.get('quantity')
31
+ return out
32
+
33
+ def parse_bbo_msg(msg: Dict[str, Any]) -> Dict:
34
+ out = {}
35
+ # simple extraction
36
+ out['timestamp'] = msg.get('timestamp') or msg.get('t')
37
+ out['bid'] = msg.get('bid') or msg.get('bidPrice') or None
38
+ out['ask'] = msg.get('ask') or msg.get('askPrice') or None
39
+ return out
40
+
41
+
42
+ def read_liquidations_jsonl(path: str) -> pd.DataFrame:
43
+ """Read a JSONL file of liquidation events and normalize into a DataFrame using `parse_liq_msg`.
44
+
45
+ Returns empty DataFrame on error.
46
+ """
47
+ out = []
48
+ try:
49
+ with open(path, 'r', encoding='utf-8') as f:
50
+ for line in f:
51
+ line = line.strip()
52
+ if not line:
53
+ continue
54
+ try:
55
+ obj = json.loads(line)
56
+ except Exception:
57
+ # some files may contain 'data' wrapper
58
+ try:
59
+ obj = json.loads(line.split('\t')[-1])
60
+ except Exception:
61
+ continue
62
+ row = parse_liq_msg(obj if isinstance(obj, dict) else obj.get('data', {}))
63
+ out.append(row)
64
+ if not out:
65
+ return pd.DataFrame()
66
+ df = pd.DataFrame(out)
67
+ # normalize timestamp
68
+ if 'timestamp' in df.columns:
69
+ df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
70
+ return df
71
+ except Exception:
72
+ return pd.DataFrame()
73
+
74
+
75
+ def read_bbo_jsonl(path: str) -> pd.DataFrame:
76
+ """Read BBO JSONL file into DataFrame using `parse_bbo_msg`.
77
+
78
+ If path is a directory, will glob for *.jsonl and concat.
79
+ """
80
+ rows = []
81
+ try:
82
+ paths = [path]
83
+ if os.path.isdir(path):
84
+ paths = glob.glob(os.path.join(path, '*.jsonl'))
85
+ for p in paths:
86
+ with open(p, 'r', encoding='utf-8') as f:
87
+ for line in f:
88
+ line = line.strip()
89
+ if not line:
90
+ continue
91
+ try:
92
+ obj = json.loads(line)
93
+ except Exception:
94
+ continue
95
+ rows.append(parse_bbo_msg(obj if isinstance(obj, dict) else obj.get('data', {})))
96
+ if not rows:
97
+ return pd.DataFrame()
98
+ df = pd.DataFrame(rows)
99
+ if 'timestamp' in df.columns:
100
+ df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
101
+ return df
102
+ except Exception:
103
+ return pd.DataFrame()
104
+
105
+
106
+ def tail_last_jsonl(path: str) -> Dict:
107
+ """Read last non-empty JSON line from a file and return parsed dict, or {} on error.
108
+
109
+ This implementation reads the file in text mode and returns the last non-empty line parsed as JSON.
110
+ """
111
+ try:
112
+ with open(path, 'r', encoding='utf-8') as f:
113
+ lines = [ln.strip() for ln in f if ln.strip()]
114
+ if not lines:
115
+ return {}
116
+ last = lines[-1]
117
+ return json.loads(last)
118
+ except Exception:
119
+ return {}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: liquidator_indicator
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: Lightweight liquidation zone indicator with multi-signal detection and visual chart integration
5
5
  Home-page: https://github.com/ViWarshawski/liquidator_indicator
6
6
  Author: ViWarshawski
@@ -18,8 +18,9 @@ Classifier: Topic :: Office/Business :: Financial :: Investment
18
18
  Requires-Python: >=3.9
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
- Requires-Dist: pandas==2.3.3
22
- Requires-Dist: numpy==1.26.4
21
+ Requires-Dist: pandas>=1.3.0
22
+ Requires-Dist: numpy>=1.20.0
23
+ Requires-Dist: numba>=0.56.0; python_version >= "3.9"
23
24
  Dynamic: license-file
24
25
 
25
26
  # liquidator_indicator
@@ -0,0 +1,12 @@
1
+ liquidator_indicator/__init__.py,sha256=YM8TAML-t-vOzvRmgAT0zgmAmiYQTwVj6Fc5ZvJ5_uY,223
2
+ liquidator_indicator/core.py,sha256=7-lIJWVXV8x5s3RtJQ31ze3FYDCdYrlyHcK4ji6K_pI,21505
3
+ liquidator_indicator/indicators.py,sha256=xLW98VzCNVhhr7tnzQl5k7MlVyGn9o8JDxaHWTvnM0Q,2243
4
+ liquidator_indicator/numba_optimized.py,sha256=F39rELVhcR8ENod-cncL2nyxKjhphFtOnIeHDCceW4I,11799
5
+ liquidator_indicator/parsers.py,sha256=1CBAdp_QOrL_u9SclqofBwoKt02QQcv6q2_axG4l7vg,4197
6
+ liquidator_indicator/collectors/__init__.py,sha256=EQ2Po4fYM7Cfv7xHA8sVobwNxSSFu3XQ-D2KQOJ_v5s,362
7
+ liquidator_indicator/collectors/funding.py,sha256=tEzHfuBUkuHDPOx_7GcvBXUwjwll0dyKs585xyESFb0,6913
8
+ liquidator_indicator-0.0.6.dist-info/licenses/LICENSE,sha256=EMu9ano_EP7IAtdNLPdSshuUaGgxEc7uD1Rm8wTi9kQ,1091
9
+ liquidator_indicator-0.0.6.dist-info/METADATA,sha256=Si46d8PXlAwiOK7tE2UBGBhIt6ePUR87un_dtr9G9Go,12178
10
+ liquidator_indicator-0.0.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
11
+ liquidator_indicator-0.0.6.dist-info/top_level.txt,sha256=nu6L0DNbmTvoXWufe9v82_QOq78hUGwwFxXr9IoR6mU,21
12
+ liquidator_indicator-0.0.6.dist-info/RECORD,,
@@ -0,0 +1 @@
1
+ liquidator_indicator
@@ -1,5 +0,0 @@
1
- liquidator_indicator-0.0.4.dist-info/licenses/LICENSE,sha256=EMu9ano_EP7IAtdNLPdSshuUaGgxEc7uD1Rm8wTi9kQ,1091
2
- liquidator_indicator-0.0.4.dist-info/METADATA,sha256=YMxB6SU-ZJIVfN_OGlrieZUl3VzAglsHixczsC6jELE,12123
3
- liquidator_indicator-0.0.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
4
- liquidator_indicator-0.0.4.dist-info/top_level.txt,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
5
- liquidator_indicator-0.0.4.dist-info/RECORD,,