rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,734 @@
1
+ """ClickHouse cache for computed range bars.
2
+
3
+ This module provides the RangeBarCache class that caches computed range bars
4
+ (Tier 2) in ClickHouse. Raw tick data is stored locally using Parquet files
5
+ via the `rangebar.storage` module.
6
+
7
+ Architecture:
8
+ - Tier 1 (raw ticks): Local Parquet files via `rangebar.storage.TickStorage`
9
+ - Tier 2 (computed bars): ClickHouse via this module
10
+
11
+ The cache uses mise environment variables for configuration and
12
+ supports multiple GPU workstations via SSH aliases.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import hashlib
18
+ import logging
19
+ from dataclasses import dataclass
20
+ from importlib import resources
21
+ from typing import TYPE_CHECKING
22
+
23
+ import pandas as pd
24
+
25
+ from .._core import __version__
26
+ from ..constants import (
27
+ EXCHANGE_SESSION_COLUMNS,
28
+ MICROSTRUCTURE_COLUMNS,
29
+ )
30
+ from ..conversion import normalize_arrow_dtypes
31
+ from ..exceptions import (
32
+ CacheReadError,
33
+ CacheWriteError,
34
+ )
35
+ from .bulk_operations import BulkStoreMixin
36
+ from .client import get_client
37
+ from .mixin import ClickHouseClientMixin
38
+ from .preflight import (
39
+ HostConnection,
40
+ get_available_clickhouse_host,
41
+ )
42
+ from .query_operations import QueryOperationsMixin
43
+ from .tunnel import SSHTunnel
44
+
45
+ if TYPE_CHECKING:
46
+ import clickhouse_connect
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class CacheKey:
53
+ """Cache key for range bar lookups.
54
+
55
+ Uniquely identifies a set of computed range bars based on:
56
+ - Symbol (e.g., "BTCUSDT")
57
+ - Threshold in decimal basis points (dbps)
58
+ - Time range of source data
59
+ - Ouroboros mode for reproducibility
60
+
61
+ Attributes
62
+ ----------
63
+ symbol : str
64
+ Trading symbol
65
+ threshold_decimal_bps : int
66
+ Threshold in decimal basis points (1 dbps = 0.001%)
67
+ start_ts : int
68
+ Start timestamp in milliseconds
69
+ end_ts : int
70
+ End timestamp in milliseconds
71
+ ouroboros_mode : str
72
+ Ouroboros reset mode: "year", "month", or "week" (default: "year")
73
+ """
74
+
75
+ symbol: str
76
+ threshold_decimal_bps: int
77
+ start_ts: int
78
+ end_ts: int
79
+ ouroboros_mode: str = "year"
80
+
81
+ @property
82
+ def hash_key(self) -> str:
83
+ """Get hash key for cache lookups.
84
+
85
+ Returns
86
+ -------
87
+ str
88
+ MD5 hash of cache key components
89
+ """
90
+ key_str = (
91
+ f"{self.symbol}_{self.threshold_decimal_bps}_"
92
+ f"{self.start_ts}_{self.end_ts}_{self.ouroboros_mode}"
93
+ )
94
+ return hashlib.md5(key_str.encode()).hexdigest()
95
+
96
+
97
+ class RangeBarCache(ClickHouseClientMixin, BulkStoreMixin, QueryOperationsMixin):
98
+ """ClickHouse cache for computed range bars.
99
+
100
+ Caches computed range bars (Tier 2) in ClickHouse. For raw tick data
101
+ storage (Tier 1), use `rangebar.storage.TickStorage` instead.
102
+
103
+ PREFLIGHT runs in __init__ - fails loudly if no ClickHouse available.
104
+
105
+ Parameters
106
+ ----------
107
+ client : Client | None
108
+ External ClickHouse client. If None, creates connection based on
109
+ environment configuration (mise env vars).
110
+
111
+ Raises
112
+ ------
113
+ ClickHouseNotConfiguredError
114
+ If no ClickHouse hosts available (with guidance for setup)
115
+
116
+ Examples
117
+ --------
118
+ >>> with RangeBarCache() as cache:
119
+ ... # Check cache
120
+ ... if cache.has_range_bars(key):
121
+ ... bars = cache.get_range_bars(key)
122
+ ... else:
123
+ ... # Compute bars and store
124
+ ... cache.store_range_bars(key, bars)
125
+
126
+ See Also
127
+ --------
128
+ rangebar.storage.TickStorage : Local Parquet storage for raw tick data
129
+ """
130
+
131
+ def __init__(self, client: clickhouse_connect.driver.Client | None = None) -> None:
132
+ """Initialize cache with ClickHouse connection.
133
+
134
+ Runs preflight check to find available ClickHouse host.
135
+ Creates schema if it doesn't exist.
136
+ """
137
+ self._tunnel: SSHTunnel | None = None
138
+ self._host_connection: HostConnection | None = None
139
+
140
+ if client is None:
141
+ # PREFLIGHT CHECK - runs before anything else
142
+ # This function fails loudly if no CH available
143
+ host_conn = get_available_clickhouse_host()
144
+ self._host_connection = host_conn
145
+
146
+ # Connect based on method (local, direct, or SSH tunnel)
147
+ if host_conn.method == "local":
148
+ self._init_client(get_client("localhost", host_conn.port))
149
+ elif host_conn.method == "direct":
150
+ from .preflight import _resolve_ssh_alias_to_ip
151
+
152
+ ip = _resolve_ssh_alias_to_ip(host_conn.host)
153
+ if ip is None:
154
+ msg = f"Could not resolve SSH alias: {host_conn.host}"
155
+ raise RuntimeError(msg)
156
+ self._init_client(get_client(ip, host_conn.port))
157
+ elif host_conn.method == "ssh_tunnel":
158
+ self._tunnel = SSHTunnel(host_conn.host, host_conn.port)
159
+ local_port = self._tunnel.start()
160
+ self._init_client(get_client("localhost", local_port))
161
+ else:
162
+ self._init_client(client)
163
+
164
+ self._ensure_schema()
165
+
166
+ def close(self) -> None:
167
+ """Close client and tunnel if owned."""
168
+ super().close()
169
+ if self._tunnel is not None:
170
+ self._tunnel.stop()
171
+ self._tunnel = None
172
+
173
+ def _ensure_schema(self) -> None:
174
+ """Create database and tables if they don't exist."""
175
+ # Create database
176
+ self.client.command("CREATE DATABASE IF NOT EXISTS rangebar_cache")
177
+
178
+ # Load and execute schema
179
+ schema_sql = resources.files(__package__).joinpath("schema.sql").read_text()
180
+
181
+ # Split by semicolon and execute each statement
182
+ for statement in schema_sql.split(";"):
183
+ # Check if statement contains a CREATE TABLE/MATERIALIZED VIEW
184
+ if "CREATE TABLE" in statement or "CREATE MATERIALIZED" in statement:
185
+ # Strip single-line comments from each line, keep the SQL
186
+ lines = []
187
+ for line in statement.split("\n"):
188
+ line = line.strip()
189
+ # Skip pure comment lines
190
+ if line.startswith("--"):
191
+ continue
192
+ # Remove trailing comments
193
+ if "--" in line:
194
+ line = line[: line.index("--")].strip()
195
+ if line:
196
+ lines.append(line)
197
+ clean_sql = " ".join(lines)
198
+ if clean_sql:
199
+ self.client.command(clean_sql)
200
+
201
+ # =========================================================================
202
+ # Range Bars Cache (Tier 2)
203
+ # =========================================================================
204
+ # Note: Raw tick data (Tier 1) is now stored locally using Parquet files.
205
+ # See rangebar.storage.TickStorage for tick data caching.
206
+
207
+ def store_range_bars(
208
+ self,
209
+ key: CacheKey,
210
+ bars: pd.DataFrame,
211
+ version: str | None = None,
212
+ ) -> int:
213
+ """Store computed range bars in cache.
214
+
215
+ Parameters
216
+ ----------
217
+ key : CacheKey
218
+ Cache key identifying these bars
219
+ bars : pd.DataFrame
220
+ DataFrame with OHLCV columns (from rangebar processing)
221
+ version : str | None
222
+ rangebar-core version for cache invalidation. If None (default),
223
+ uses current package version for schema evolution tracking.
224
+
225
+ Returns
226
+ -------
227
+ int
228
+ Number of rows inserted
229
+
230
+ Raises
231
+ ------
232
+ CacheWriteError
233
+ If the insert operation fails.
234
+ """
235
+ if bars.empty:
236
+ logger.debug("Skipping cache write for %s: empty DataFrame", key.symbol)
237
+ return 0
238
+
239
+ logger.debug(
240
+ "Writing %d bars to cache for %s @ %d dbps",
241
+ len(bars),
242
+ key.symbol,
243
+ key.threshold_decimal_bps,
244
+ )
245
+
246
+ df = bars.copy()
247
+
248
+ # Handle DatetimeIndex
249
+ if isinstance(df.index, pd.DatetimeIndex):
250
+ df = df.reset_index()
251
+ if "timestamp" in df.columns:
252
+ df["timestamp_ms"] = df["timestamp"].astype("int64") // 10**6
253
+ df = df.drop(columns=["timestamp"])
254
+ elif "index" in df.columns:
255
+ df["timestamp_ms"] = df["index"].astype("int64") // 10**6
256
+ df = df.drop(columns=["index"])
257
+
258
+ # Normalize column names (lowercase)
259
+ df.columns = df.columns.str.lower()
260
+
261
+ # Add cache metadata
262
+ df["symbol"] = key.symbol
263
+ df["threshold_decimal_bps"] = key.threshold_decimal_bps
264
+ df["ouroboros_mode"] = key.ouroboros_mode
265
+ df["cache_key"] = key.hash_key
266
+ df["rangebar_version"] = version if version is not None else __version__
267
+ df["source_start_ts"] = key.start_ts
268
+ df["source_end_ts"] = key.end_ts
269
+
270
+ # Select columns for insertion
271
+ columns = [
272
+ "symbol",
273
+ "threshold_decimal_bps",
274
+ "ouroboros_mode",
275
+ "timestamp_ms",
276
+ "open",
277
+ "high",
278
+ "low",
279
+ "close",
280
+ "volume",
281
+ "cache_key",
282
+ "rangebar_version",
283
+ "source_start_ts",
284
+ "source_end_ts",
285
+ ]
286
+
287
+ # Add optional microstructure columns if present (from constants.py SSoT)
288
+ for col in MICROSTRUCTURE_COLUMNS:
289
+ if col in df.columns:
290
+ columns.append(col)
291
+
292
+ # Add optional exchange session columns if present (Issue #8)
293
+ for col in EXCHANGE_SESSION_COLUMNS:
294
+ if col in df.columns:
295
+ columns.append(col)
296
+
297
+ # Filter to existing columns
298
+ columns = [c for c in columns if c in df.columns]
299
+
300
+ try:
301
+ summary = self.client.insert_df(
302
+ "rangebar_cache.range_bars",
303
+ df[columns],
304
+ )
305
+ written = summary.written_rows
306
+ logger.info(
307
+ "Cached %d bars for %s @ %d dbps",
308
+ written,
309
+ key.symbol,
310
+ key.threshold_decimal_bps,
311
+ )
312
+ return written
313
+ except (OSError, RuntimeError) as e:
314
+ logger.exception(
315
+ "Cache write failed for %s @ %d dbps",
316
+ key.symbol,
317
+ key.threshold_decimal_bps,
318
+ )
319
+ msg = f"Failed to write bars for {key.symbol}: {e}"
320
+ raise CacheWriteError(
321
+ msg,
322
+ symbol=key.symbol,
323
+ operation="write",
324
+ ) from e
325
+
326
+ def get_range_bars(self, key: CacheKey) -> pd.DataFrame | None:
327
+ """Get cached range bars.
328
+
329
+ Parameters
330
+ ----------
331
+ key : CacheKey
332
+ Cache key to lookup
333
+
334
+ Returns
335
+ -------
336
+ pd.DataFrame | None
337
+ OHLCV DataFrame if found, None otherwise
338
+
339
+ Raises
340
+ ------
341
+ CacheReadError
342
+ If the query fails due to database errors.
343
+ """
344
+ query = """
345
+ SELECT
346
+ timestamp_ms,
347
+ open as Open,
348
+ high as High,
349
+ low as Low,
350
+ close as Close,
351
+ volume as Volume
352
+ FROM rangebar_cache.range_bars FINAL
353
+ WHERE symbol = {symbol:String}
354
+ AND threshold_decimal_bps = {threshold_decimal_bps:UInt32}
355
+ AND source_start_ts = {start_ts:Int64}
356
+ AND source_end_ts = {end_ts:Int64}
357
+ ORDER BY timestamp_ms
358
+ """
359
+ try:
360
+ # Use Arrow-optimized query for 3x faster DataFrame creation
361
+ df = self.client.query_df_arrow(
362
+ query,
363
+ parameters={
364
+ "symbol": key.symbol,
365
+ "threshold_decimal_bps": key.threshold_decimal_bps,
366
+ "start_ts": key.start_ts,
367
+ "end_ts": key.end_ts,
368
+ },
369
+ )
370
+ except (OSError, RuntimeError) as e:
371
+ logger.exception(
372
+ "Cache read failed for %s @ %d dbps",
373
+ key.symbol,
374
+ key.threshold_decimal_bps,
375
+ )
376
+ msg = f"Failed to read bars for {key.symbol}: {e}"
377
+ raise CacheReadError(
378
+ msg,
379
+ symbol=key.symbol,
380
+ operation="read",
381
+ ) from e
382
+
383
+ if df.empty:
384
+ logger.debug(
385
+ "Cache miss for %s @ %d dbps (key: %s)",
386
+ key.symbol,
387
+ key.threshold_decimal_bps,
388
+ key.hash_key[:8],
389
+ )
390
+ return None
391
+
392
+ logger.debug(
393
+ "Cache hit: %d bars for %s @ %d dbps",
394
+ len(df),
395
+ key.symbol,
396
+ key.threshold_decimal_bps,
397
+ )
398
+
399
+ # Convert to TZ-aware UTC DatetimeIndex (Issue #20: consistent timestamps)
400
+ df["timestamp"] = pd.to_datetime(df["timestamp_ms"], unit="ms", utc=True)
401
+ df = df.set_index("timestamp")
402
+ df = df.drop(columns=["timestamp_ms"])
403
+
404
+ # Convert PyArrow dtypes to numpy for compatibility with fresh computation
405
+ # query_df_arrow returns double[pyarrow], but process_trades returns float64
406
+ df = normalize_arrow_dtypes(df)
407
+
408
+ return df
409
+
410
+ def has_range_bars(self, key: CacheKey) -> bool:
411
+ """Check if range bars exist in cache.
412
+
413
+ Parameters
414
+ ----------
415
+ key : CacheKey
416
+ Cache key to check
417
+
418
+ Returns
419
+ -------
420
+ bool
421
+ True if bars exist
422
+ """
423
+ query = """
424
+ SELECT count() > 0
425
+ FROM rangebar_cache.range_bars FINAL
426
+ WHERE symbol = {symbol:String}
427
+ AND threshold_decimal_bps = {threshold_decimal_bps:UInt32}
428
+ AND source_start_ts = {start_ts:Int64}
429
+ AND source_end_ts = {end_ts:Int64}
430
+ LIMIT 1
431
+ """
432
+ result = self.client.command(
433
+ query,
434
+ parameters={
435
+ "symbol": key.symbol,
436
+ "threshold_decimal_bps": key.threshold_decimal_bps,
437
+ "start_ts": key.start_ts,
438
+ "end_ts": key.end_ts,
439
+ },
440
+ )
441
+ return bool(result)
442
+
443
+ def invalidate_range_bars(self, key: CacheKey) -> int:
444
+ """Invalidate (delete) cached range bars.
445
+
446
+ Parameters
447
+ ----------
448
+ key : CacheKey
449
+ Cache key to invalidate
450
+
451
+ Returns
452
+ -------
453
+ int
454
+ Number of rows deleted
455
+ """
456
+ # Note: ClickHouse DELETE is async via mutations
457
+ query = """
458
+ ALTER TABLE rangebar_cache.range_bars
459
+ DELETE WHERE symbol = {symbol:String}
460
+ AND threshold_decimal_bps = {threshold_decimal_bps:UInt32}
461
+ AND source_start_ts = {start_ts:Int64}
462
+ AND source_end_ts = {end_ts:Int64}
463
+ """
464
+ self.client.command(
465
+ query,
466
+ parameters={
467
+ "symbol": key.symbol,
468
+ "threshold_decimal_bps": key.threshold_decimal_bps,
469
+ "start_ts": key.start_ts,
470
+ "end_ts": key.end_ts,
471
+ },
472
+ )
473
+ return 0 # ClickHouse DELETE is async, can't return count
474
+
475
+ def invalidate_range_bars_by_range(
476
+ self,
477
+ symbol: str,
478
+ threshold_decimal_bps: int,
479
+ start_timestamp_ms: int,
480
+ end_timestamp_ms: int,
481
+ ) -> int:
482
+ """Invalidate (delete) cached bars within a timestamp range.
483
+
484
+ Unlike `invalidate_range_bars()` which requires an exact CacheKey match,
485
+ this method deletes all bars for a symbol/threshold within a time range.
486
+ Useful for overlap detection during precomputation.
487
+
488
+ Parameters
489
+ ----------
490
+ symbol : str
491
+ Trading symbol (e.g., "BTCUSDT")
492
+ threshold_decimal_bps : int
493
+ Threshold in decimal basis points
494
+ start_timestamp_ms : int
495
+ Start timestamp in milliseconds (inclusive)
496
+ end_timestamp_ms : int
497
+ End timestamp in milliseconds (inclusive)
498
+
499
+ Returns
500
+ -------
501
+ int
502
+ Always returns 0 (ClickHouse DELETE is async via mutations)
503
+
504
+ Examples
505
+ --------
506
+ >>> cache.invalidate_range_bars_by_range(
507
+ ... "BTCUSDT", 250,
508
+ ... start_timestamp_ms=1704067200000, # 2024-01-01
509
+ ... end_timestamp_ms=1706745600000, # 2024-01-31
510
+ ... )
511
+ """
512
+ query = """
513
+ ALTER TABLE rangebar_cache.range_bars
514
+ DELETE WHERE symbol = {symbol:String}
515
+ AND threshold_decimal_bps = {threshold:UInt32}
516
+ AND timestamp_ms >= {start_ts:Int64}
517
+ AND timestamp_ms <= {end_ts:Int64}
518
+ """
519
+ self.client.command(
520
+ query,
521
+ parameters={
522
+ "symbol": symbol,
523
+ "threshold": threshold_decimal_bps,
524
+ "start_ts": start_timestamp_ms,
525
+ "end_ts": end_timestamp_ms,
526
+ },
527
+ )
528
+ return 0 # ClickHouse DELETE is async, can't return count
529
+
530
+ def get_last_bar_before(
531
+ self,
532
+ symbol: str,
533
+ threshold_decimal_bps: int,
534
+ before_timestamp_ms: int,
535
+ ) -> dict | None:
536
+ """Get the last bar before a given timestamp.
537
+
538
+ Useful for validating continuity at junction points during
539
+ incremental precomputation.
540
+
541
+ Parameters
542
+ ----------
543
+ symbol : str
544
+ Trading symbol (e.g., "BTCUSDT")
545
+ threshold_decimal_bps : int
546
+ Threshold in decimal basis points
547
+ before_timestamp_ms : int
548
+ Timestamp boundary in milliseconds
549
+
550
+ Returns
551
+ -------
552
+ dict | None
553
+ Last bar as dict with OHLCV keys, or None if no bars found
554
+
555
+ Examples
556
+ --------
557
+ >>> bar = cache.get_last_bar_before("BTCUSDT", 250, 1704067200000)
558
+ >>> if bar:
559
+ ... print(f"Last close: {bar['Close']}")
560
+ """
561
+ query = """
562
+ SELECT timestamp_ms, open, high, low, close, volume
563
+ FROM rangebar_cache.range_bars FINAL
564
+ WHERE symbol = {symbol:String}
565
+ AND threshold_decimal_bps = {threshold:UInt32}
566
+ AND timestamp_ms < {before_ts:Int64}
567
+ ORDER BY timestamp_ms DESC
568
+ LIMIT 1
569
+ """
570
+ result = self.client.query(
571
+ query,
572
+ parameters={
573
+ "symbol": symbol,
574
+ "threshold": threshold_decimal_bps,
575
+ "before_ts": before_timestamp_ms,
576
+ },
577
+ )
578
+
579
+ rows = result.result_rows
580
+ if not rows:
581
+ return None
582
+
583
+ row = rows[0]
584
+ return {
585
+ "timestamp_ms": row[0],
586
+ "Open": row[1],
587
+ "High": row[2],
588
+ "Low": row[3],
589
+ "Close": row[4],
590
+ "Volume": row[5],
591
+ }
592
+
593
+ # =========================================================================
594
+ # Bar-Count-Based API (get_n_range_bars support)
595
+ # =========================================================================
596
+ # These methods support retrieving a deterministic number of bars
597
+ # regardless of time bounds. Uses split query paths to avoid OR conditions.
598
+
599
+ def count_bars(
600
+ self,
601
+ symbol: str,
602
+ threshold_decimal_bps: int,
603
+ before_ts: int | None = None,
604
+ ) -> int:
605
+ """Count available bars in cache.
606
+
607
+ Uses split query paths to avoid OR conditions for ClickHouse optimization.
608
+
609
+ Parameters
610
+ ----------
611
+ symbol : str
612
+ Trading symbol (e.g., "BTCUSDT")
613
+ threshold_decimal_bps : int
614
+ Threshold in decimal basis points
615
+ before_ts : int | None
616
+ Only count bars with timestamp_ms < before_ts.
617
+ If None, counts all bars for symbol/threshold.
618
+
619
+ Returns
620
+ -------
621
+ int
622
+ Number of bars in cache
623
+ """
624
+ if before_ts is not None:
625
+ # Split path: with end_ts filter
626
+ query = """
627
+ SELECT count()
628
+ FROM rangebar_cache.range_bars FINAL
629
+ WHERE symbol = {symbol:String}
630
+ AND threshold_decimal_bps = {threshold:UInt32}
631
+ AND timestamp_ms < {end_ts:Int64}
632
+ """
633
+ result = self.client.command(
634
+ query,
635
+ parameters={
636
+ "symbol": symbol,
637
+ "threshold": threshold_decimal_bps,
638
+ "end_ts": before_ts,
639
+ },
640
+ )
641
+ else:
642
+ # Split path: no end_ts filter (most recent)
643
+ query = """
644
+ SELECT count()
645
+ FROM rangebar_cache.range_bars FINAL
646
+ WHERE symbol = {symbol:String}
647
+ AND threshold_decimal_bps = {threshold:UInt32}
648
+ """
649
+ result = self.client.command(
650
+ query,
651
+ parameters={
652
+ "symbol": symbol,
653
+ "threshold": threshold_decimal_bps,
654
+ },
655
+ )
656
+ return int(result) if result else 0
657
+
658
+
659
+ def get_oldest_bar_timestamp(
660
+ self,
661
+ symbol: str,
662
+ threshold_decimal_bps: int,
663
+ ) -> int | None:
664
+ """Get timestamp of oldest bar in cache.
665
+
666
+ Useful for determining how far back we can query.
667
+
668
+ Parameters
669
+ ----------
670
+ symbol : str
671
+ Trading symbol (e.g., "BTCUSDT")
672
+ threshold_decimal_bps : int
673
+ Threshold in decimal basis points
674
+
675
+ Returns
676
+ -------
677
+ int | None
678
+ Oldest bar timestamp in milliseconds, or None if no bars
679
+ """
680
+ query = """
681
+ SELECT min(timestamp_ms)
682
+ FROM rangebar_cache.range_bars FINAL
683
+ WHERE symbol = {symbol:String}
684
+ AND threshold_decimal_bps = {threshold:UInt32}
685
+ """
686
+ result = self.client.command(
687
+ query,
688
+ parameters={
689
+ "symbol": symbol,
690
+ "threshold": threshold_decimal_bps,
691
+ },
692
+ )
693
+ # ClickHouse returns 0 for min() on empty result
694
+ return int(result) if result and result > 0 else None
695
+
696
+ def get_newest_bar_timestamp(
697
+ self,
698
+ symbol: str,
699
+ threshold_decimal_bps: int,
700
+ ) -> int | None:
701
+ """Get timestamp of newest bar in cache.
702
+
703
+ Useful for determining the end of available data.
704
+
705
+ Parameters
706
+ ----------
707
+ symbol : str
708
+ Trading symbol (e.g., "BTCUSDT")
709
+ threshold_decimal_bps : int
710
+ Threshold in decimal basis points
711
+
712
+ Returns
713
+ -------
714
+ int | None
715
+ Newest bar timestamp in milliseconds, or None if no bars
716
+ """
717
+ query = """
718
+ SELECT max(timestamp_ms)
719
+ FROM rangebar_cache.range_bars FINAL
720
+ WHERE symbol = {symbol:String}
721
+ AND threshold_decimal_bps = {threshold:UInt32}
722
+ """
723
+ result = self.client.command(
724
+ query,
725
+ parameters={
726
+ "symbol": symbol,
727
+ "threshold": threshold_decimal_bps,
728
+ },
729
+ )
730
+ # ClickHouse returns 0 for max() on empty result
731
+ return int(result) if result and result > 0 else None
732
+
733
+
734
+