rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rangebar/CLAUDE.md +327 -0
- rangebar/__init__.py +227 -0
- rangebar/__init__.pyi +1089 -0
- rangebar/_core.cpython-313-darwin.so +0 -0
- rangebar/checkpoint.py +472 -0
- rangebar/cli.py +298 -0
- rangebar/clickhouse/CLAUDE.md +139 -0
- rangebar/clickhouse/__init__.py +100 -0
- rangebar/clickhouse/bulk_operations.py +309 -0
- rangebar/clickhouse/cache.py +734 -0
- rangebar/clickhouse/client.py +121 -0
- rangebar/clickhouse/config.py +141 -0
- rangebar/clickhouse/mixin.py +120 -0
- rangebar/clickhouse/preflight.py +504 -0
- rangebar/clickhouse/query_operations.py +345 -0
- rangebar/clickhouse/schema.sql +187 -0
- rangebar/clickhouse/tunnel.py +222 -0
- rangebar/constants.py +288 -0
- rangebar/conversion.py +177 -0
- rangebar/exceptions.py +207 -0
- rangebar/exness.py +364 -0
- rangebar/hooks.py +311 -0
- rangebar/logging.py +171 -0
- rangebar/notify/__init__.py +15 -0
- rangebar/notify/pushover.py +155 -0
- rangebar/notify/telegram.py +271 -0
- rangebar/orchestration/__init__.py +20 -0
- rangebar/orchestration/count_bounded.py +797 -0
- rangebar/orchestration/helpers.py +412 -0
- rangebar/orchestration/models.py +76 -0
- rangebar/orchestration/precompute.py +498 -0
- rangebar/orchestration/range_bars.py +736 -0
- rangebar/orchestration/tick_fetcher.py +226 -0
- rangebar/ouroboros.py +454 -0
- rangebar/processors/__init__.py +22 -0
- rangebar/processors/api.py +383 -0
- rangebar/processors/core.py +522 -0
- rangebar/resource_guard.py +567 -0
- rangebar/storage/__init__.py +22 -0
- rangebar/storage/checksum_registry.py +218 -0
- rangebar/storage/parquet.py +728 -0
- rangebar/streaming.py +300 -0
- rangebar/validation/__init__.py +69 -0
- rangebar/validation/cache_staleness.py +277 -0
- rangebar/validation/continuity.py +664 -0
- rangebar/validation/gap_classification.py +294 -0
- rangebar/validation/post_storage.py +317 -0
- rangebar/validation/tier1.py +175 -0
- rangebar/validation/tier2.py +261 -0
- rangebar-11.6.1.dist-info/METADATA +308 -0
- rangebar-11.6.1.dist-info/RECORD +54 -0
- rangebar-11.6.1.dist-info/WHEEL +4 -0
- rangebar-11.6.1.dist-info/entry_points.txt +2 -0
- rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,734 @@
|
|
|
1
|
+
"""ClickHouse cache for computed range bars.
|
|
2
|
+
|
|
3
|
+
This module provides the RangeBarCache class that caches computed range bars
|
|
4
|
+
(Tier 2) in ClickHouse. Raw tick data is stored locally using Parquet files
|
|
5
|
+
via the `rangebar.storage` module.
|
|
6
|
+
|
|
7
|
+
Architecture:
|
|
8
|
+
- Tier 1 (raw ticks): Local Parquet files via `rangebar.storage.TickStorage`
|
|
9
|
+
- Tier 2 (computed bars): ClickHouse via this module
|
|
10
|
+
|
|
11
|
+
The cache uses mise environment variables for configuration and
|
|
12
|
+
supports multiple GPU workstations via SSH aliases.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import logging
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from importlib import resources
|
|
21
|
+
from typing import TYPE_CHECKING
|
|
22
|
+
|
|
23
|
+
import pandas as pd
|
|
24
|
+
|
|
25
|
+
from .._core import __version__
|
|
26
|
+
from ..constants import (
|
|
27
|
+
EXCHANGE_SESSION_COLUMNS,
|
|
28
|
+
MICROSTRUCTURE_COLUMNS,
|
|
29
|
+
)
|
|
30
|
+
from ..conversion import normalize_arrow_dtypes
|
|
31
|
+
from ..exceptions import (
|
|
32
|
+
CacheReadError,
|
|
33
|
+
CacheWriteError,
|
|
34
|
+
)
|
|
35
|
+
from .bulk_operations import BulkStoreMixin
|
|
36
|
+
from .client import get_client
|
|
37
|
+
from .mixin import ClickHouseClientMixin
|
|
38
|
+
from .preflight import (
|
|
39
|
+
HostConnection,
|
|
40
|
+
get_available_clickhouse_host,
|
|
41
|
+
)
|
|
42
|
+
from .query_operations import QueryOperationsMixin
|
|
43
|
+
from .tunnel import SSHTunnel
|
|
44
|
+
|
|
45
|
+
if TYPE_CHECKING:
|
|
46
|
+
import clickhouse_connect
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class CacheKey:
|
|
53
|
+
"""Cache key for range bar lookups.
|
|
54
|
+
|
|
55
|
+
Uniquely identifies a set of computed range bars based on:
|
|
56
|
+
- Symbol (e.g., "BTCUSDT")
|
|
57
|
+
- Threshold in decimal basis points (dbps)
|
|
58
|
+
- Time range of source data
|
|
59
|
+
- Ouroboros mode for reproducibility
|
|
60
|
+
|
|
61
|
+
Attributes
|
|
62
|
+
----------
|
|
63
|
+
symbol : str
|
|
64
|
+
Trading symbol
|
|
65
|
+
threshold_decimal_bps : int
|
|
66
|
+
Threshold in decimal basis points (1 dbps = 0.001%)
|
|
67
|
+
start_ts : int
|
|
68
|
+
Start timestamp in milliseconds
|
|
69
|
+
end_ts : int
|
|
70
|
+
End timestamp in milliseconds
|
|
71
|
+
ouroboros_mode : str
|
|
72
|
+
Ouroboros reset mode: "year", "month", or "week" (default: "year")
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
symbol: str
|
|
76
|
+
threshold_decimal_bps: int
|
|
77
|
+
start_ts: int
|
|
78
|
+
end_ts: int
|
|
79
|
+
ouroboros_mode: str = "year"
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def hash_key(self) -> str:
|
|
83
|
+
"""Get hash key for cache lookups.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
str
|
|
88
|
+
MD5 hash of cache key components
|
|
89
|
+
"""
|
|
90
|
+
key_str = (
|
|
91
|
+
f"{self.symbol}_{self.threshold_decimal_bps}_"
|
|
92
|
+
f"{self.start_ts}_{self.end_ts}_{self.ouroboros_mode}"
|
|
93
|
+
)
|
|
94
|
+
return hashlib.md5(key_str.encode()).hexdigest()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class RangeBarCache(ClickHouseClientMixin, BulkStoreMixin, QueryOperationsMixin):
|
|
98
|
+
"""ClickHouse cache for computed range bars.
|
|
99
|
+
|
|
100
|
+
Caches computed range bars (Tier 2) in ClickHouse. For raw tick data
|
|
101
|
+
storage (Tier 1), use `rangebar.storage.TickStorage` instead.
|
|
102
|
+
|
|
103
|
+
PREFLIGHT runs in __init__ - fails loudly if no ClickHouse available.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
client : Client | None
|
|
108
|
+
External ClickHouse client. If None, creates connection based on
|
|
109
|
+
environment configuration (mise env vars).
|
|
110
|
+
|
|
111
|
+
Raises
|
|
112
|
+
------
|
|
113
|
+
ClickHouseNotConfiguredError
|
|
114
|
+
If no ClickHouse hosts available (with guidance for setup)
|
|
115
|
+
|
|
116
|
+
Examples
|
|
117
|
+
--------
|
|
118
|
+
>>> with RangeBarCache() as cache:
|
|
119
|
+
... # Check cache
|
|
120
|
+
... if cache.has_range_bars(key):
|
|
121
|
+
... bars = cache.get_range_bars(key)
|
|
122
|
+
... else:
|
|
123
|
+
... # Compute bars and store
|
|
124
|
+
... cache.store_range_bars(key, bars)
|
|
125
|
+
|
|
126
|
+
See Also
|
|
127
|
+
--------
|
|
128
|
+
rangebar.storage.TickStorage : Local Parquet storage for raw tick data
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, client: clickhouse_connect.driver.Client | None = None) -> None:
|
|
132
|
+
"""Initialize cache with ClickHouse connection.
|
|
133
|
+
|
|
134
|
+
Runs preflight check to find available ClickHouse host.
|
|
135
|
+
Creates schema if it doesn't exist.
|
|
136
|
+
"""
|
|
137
|
+
self._tunnel: SSHTunnel | None = None
|
|
138
|
+
self._host_connection: HostConnection | None = None
|
|
139
|
+
|
|
140
|
+
if client is None:
|
|
141
|
+
# PREFLIGHT CHECK - runs before anything else
|
|
142
|
+
# This function fails loudly if no CH available
|
|
143
|
+
host_conn = get_available_clickhouse_host()
|
|
144
|
+
self._host_connection = host_conn
|
|
145
|
+
|
|
146
|
+
# Connect based on method (local, direct, or SSH tunnel)
|
|
147
|
+
if host_conn.method == "local":
|
|
148
|
+
self._init_client(get_client("localhost", host_conn.port))
|
|
149
|
+
elif host_conn.method == "direct":
|
|
150
|
+
from .preflight import _resolve_ssh_alias_to_ip
|
|
151
|
+
|
|
152
|
+
ip = _resolve_ssh_alias_to_ip(host_conn.host)
|
|
153
|
+
if ip is None:
|
|
154
|
+
msg = f"Could not resolve SSH alias: {host_conn.host}"
|
|
155
|
+
raise RuntimeError(msg)
|
|
156
|
+
self._init_client(get_client(ip, host_conn.port))
|
|
157
|
+
elif host_conn.method == "ssh_tunnel":
|
|
158
|
+
self._tunnel = SSHTunnel(host_conn.host, host_conn.port)
|
|
159
|
+
local_port = self._tunnel.start()
|
|
160
|
+
self._init_client(get_client("localhost", local_port))
|
|
161
|
+
else:
|
|
162
|
+
self._init_client(client)
|
|
163
|
+
|
|
164
|
+
self._ensure_schema()
|
|
165
|
+
|
|
166
|
+
def close(self) -> None:
|
|
167
|
+
"""Close client and tunnel if owned."""
|
|
168
|
+
super().close()
|
|
169
|
+
if self._tunnel is not None:
|
|
170
|
+
self._tunnel.stop()
|
|
171
|
+
self._tunnel = None
|
|
172
|
+
|
|
173
|
+
def _ensure_schema(self) -> None:
|
|
174
|
+
"""Create database and tables if they don't exist."""
|
|
175
|
+
# Create database
|
|
176
|
+
self.client.command("CREATE DATABASE IF NOT EXISTS rangebar_cache")
|
|
177
|
+
|
|
178
|
+
# Load and execute schema
|
|
179
|
+
schema_sql = resources.files(__package__).joinpath("schema.sql").read_text()
|
|
180
|
+
|
|
181
|
+
# Split by semicolon and execute each statement
|
|
182
|
+
for statement in schema_sql.split(";"):
|
|
183
|
+
# Check if statement contains a CREATE TABLE/MATERIALIZED VIEW
|
|
184
|
+
if "CREATE TABLE" in statement or "CREATE MATERIALIZED" in statement:
|
|
185
|
+
# Strip single-line comments from each line, keep the SQL
|
|
186
|
+
lines = []
|
|
187
|
+
for line in statement.split("\n"):
|
|
188
|
+
line = line.strip()
|
|
189
|
+
# Skip pure comment lines
|
|
190
|
+
if line.startswith("--"):
|
|
191
|
+
continue
|
|
192
|
+
# Remove trailing comments
|
|
193
|
+
if "--" in line:
|
|
194
|
+
line = line[: line.index("--")].strip()
|
|
195
|
+
if line:
|
|
196
|
+
lines.append(line)
|
|
197
|
+
clean_sql = " ".join(lines)
|
|
198
|
+
if clean_sql:
|
|
199
|
+
self.client.command(clean_sql)
|
|
200
|
+
|
|
201
|
+
# =========================================================================
|
|
202
|
+
# Range Bars Cache (Tier 2)
|
|
203
|
+
# =========================================================================
|
|
204
|
+
# Note: Raw tick data (Tier 1) is now stored locally using Parquet files.
|
|
205
|
+
# See rangebar.storage.TickStorage for tick data caching.
|
|
206
|
+
|
|
207
|
+
def store_range_bars(
|
|
208
|
+
self,
|
|
209
|
+
key: CacheKey,
|
|
210
|
+
bars: pd.DataFrame,
|
|
211
|
+
version: str | None = None,
|
|
212
|
+
) -> int:
|
|
213
|
+
"""Store computed range bars in cache.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
key : CacheKey
|
|
218
|
+
Cache key identifying these bars
|
|
219
|
+
bars : pd.DataFrame
|
|
220
|
+
DataFrame with OHLCV columns (from rangebar processing)
|
|
221
|
+
version : str | None
|
|
222
|
+
rangebar-core version for cache invalidation. If None (default),
|
|
223
|
+
uses current package version for schema evolution tracking.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
int
|
|
228
|
+
Number of rows inserted
|
|
229
|
+
|
|
230
|
+
Raises
|
|
231
|
+
------
|
|
232
|
+
CacheWriteError
|
|
233
|
+
If the insert operation fails.
|
|
234
|
+
"""
|
|
235
|
+
if bars.empty:
|
|
236
|
+
logger.debug("Skipping cache write for %s: empty DataFrame", key.symbol)
|
|
237
|
+
return 0
|
|
238
|
+
|
|
239
|
+
logger.debug(
|
|
240
|
+
"Writing %d bars to cache for %s @ %d dbps",
|
|
241
|
+
len(bars),
|
|
242
|
+
key.symbol,
|
|
243
|
+
key.threshold_decimal_bps,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
df = bars.copy()
|
|
247
|
+
|
|
248
|
+
# Handle DatetimeIndex
|
|
249
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
|
250
|
+
df = df.reset_index()
|
|
251
|
+
if "timestamp" in df.columns:
|
|
252
|
+
df["timestamp_ms"] = df["timestamp"].astype("int64") // 10**6
|
|
253
|
+
df = df.drop(columns=["timestamp"])
|
|
254
|
+
elif "index" in df.columns:
|
|
255
|
+
df["timestamp_ms"] = df["index"].astype("int64") // 10**6
|
|
256
|
+
df = df.drop(columns=["index"])
|
|
257
|
+
|
|
258
|
+
# Normalize column names (lowercase)
|
|
259
|
+
df.columns = df.columns.str.lower()
|
|
260
|
+
|
|
261
|
+
# Add cache metadata
|
|
262
|
+
df["symbol"] = key.symbol
|
|
263
|
+
df["threshold_decimal_bps"] = key.threshold_decimal_bps
|
|
264
|
+
df["ouroboros_mode"] = key.ouroboros_mode
|
|
265
|
+
df["cache_key"] = key.hash_key
|
|
266
|
+
df["rangebar_version"] = version if version is not None else __version__
|
|
267
|
+
df["source_start_ts"] = key.start_ts
|
|
268
|
+
df["source_end_ts"] = key.end_ts
|
|
269
|
+
|
|
270
|
+
# Select columns for insertion
|
|
271
|
+
columns = [
|
|
272
|
+
"symbol",
|
|
273
|
+
"threshold_decimal_bps",
|
|
274
|
+
"ouroboros_mode",
|
|
275
|
+
"timestamp_ms",
|
|
276
|
+
"open",
|
|
277
|
+
"high",
|
|
278
|
+
"low",
|
|
279
|
+
"close",
|
|
280
|
+
"volume",
|
|
281
|
+
"cache_key",
|
|
282
|
+
"rangebar_version",
|
|
283
|
+
"source_start_ts",
|
|
284
|
+
"source_end_ts",
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
# Add optional microstructure columns if present (from constants.py SSoT)
|
|
288
|
+
for col in MICROSTRUCTURE_COLUMNS:
|
|
289
|
+
if col in df.columns:
|
|
290
|
+
columns.append(col)
|
|
291
|
+
|
|
292
|
+
# Add optional exchange session columns if present (Issue #8)
|
|
293
|
+
for col in EXCHANGE_SESSION_COLUMNS:
|
|
294
|
+
if col in df.columns:
|
|
295
|
+
columns.append(col)
|
|
296
|
+
|
|
297
|
+
# Filter to existing columns
|
|
298
|
+
columns = [c for c in columns if c in df.columns]
|
|
299
|
+
|
|
300
|
+
try:
|
|
301
|
+
summary = self.client.insert_df(
|
|
302
|
+
"rangebar_cache.range_bars",
|
|
303
|
+
df[columns],
|
|
304
|
+
)
|
|
305
|
+
written = summary.written_rows
|
|
306
|
+
logger.info(
|
|
307
|
+
"Cached %d bars for %s @ %d dbps",
|
|
308
|
+
written,
|
|
309
|
+
key.symbol,
|
|
310
|
+
key.threshold_decimal_bps,
|
|
311
|
+
)
|
|
312
|
+
return written
|
|
313
|
+
except (OSError, RuntimeError) as e:
|
|
314
|
+
logger.exception(
|
|
315
|
+
"Cache write failed for %s @ %d dbps",
|
|
316
|
+
key.symbol,
|
|
317
|
+
key.threshold_decimal_bps,
|
|
318
|
+
)
|
|
319
|
+
msg = f"Failed to write bars for {key.symbol}: {e}"
|
|
320
|
+
raise CacheWriteError(
|
|
321
|
+
msg,
|
|
322
|
+
symbol=key.symbol,
|
|
323
|
+
operation="write",
|
|
324
|
+
) from e
|
|
325
|
+
|
|
326
|
+
def get_range_bars(self, key: CacheKey) -> pd.DataFrame | None:
|
|
327
|
+
"""Get cached range bars.
|
|
328
|
+
|
|
329
|
+
Parameters
|
|
330
|
+
----------
|
|
331
|
+
key : CacheKey
|
|
332
|
+
Cache key to lookup
|
|
333
|
+
|
|
334
|
+
Returns
|
|
335
|
+
-------
|
|
336
|
+
pd.DataFrame | None
|
|
337
|
+
OHLCV DataFrame if found, None otherwise
|
|
338
|
+
|
|
339
|
+
Raises
|
|
340
|
+
------
|
|
341
|
+
CacheReadError
|
|
342
|
+
If the query fails due to database errors.
|
|
343
|
+
"""
|
|
344
|
+
query = """
|
|
345
|
+
SELECT
|
|
346
|
+
timestamp_ms,
|
|
347
|
+
open as Open,
|
|
348
|
+
high as High,
|
|
349
|
+
low as Low,
|
|
350
|
+
close as Close,
|
|
351
|
+
volume as Volume
|
|
352
|
+
FROM rangebar_cache.range_bars FINAL
|
|
353
|
+
WHERE symbol = {symbol:String}
|
|
354
|
+
AND threshold_decimal_bps = {threshold_decimal_bps:UInt32}
|
|
355
|
+
AND source_start_ts = {start_ts:Int64}
|
|
356
|
+
AND source_end_ts = {end_ts:Int64}
|
|
357
|
+
ORDER BY timestamp_ms
|
|
358
|
+
"""
|
|
359
|
+
try:
|
|
360
|
+
# Use Arrow-optimized query for 3x faster DataFrame creation
|
|
361
|
+
df = self.client.query_df_arrow(
|
|
362
|
+
query,
|
|
363
|
+
parameters={
|
|
364
|
+
"symbol": key.symbol,
|
|
365
|
+
"threshold_decimal_bps": key.threshold_decimal_bps,
|
|
366
|
+
"start_ts": key.start_ts,
|
|
367
|
+
"end_ts": key.end_ts,
|
|
368
|
+
},
|
|
369
|
+
)
|
|
370
|
+
except (OSError, RuntimeError) as e:
|
|
371
|
+
logger.exception(
|
|
372
|
+
"Cache read failed for %s @ %d dbps",
|
|
373
|
+
key.symbol,
|
|
374
|
+
key.threshold_decimal_bps,
|
|
375
|
+
)
|
|
376
|
+
msg = f"Failed to read bars for {key.symbol}: {e}"
|
|
377
|
+
raise CacheReadError(
|
|
378
|
+
msg,
|
|
379
|
+
symbol=key.symbol,
|
|
380
|
+
operation="read",
|
|
381
|
+
) from e
|
|
382
|
+
|
|
383
|
+
if df.empty:
|
|
384
|
+
logger.debug(
|
|
385
|
+
"Cache miss for %s @ %d dbps (key: %s)",
|
|
386
|
+
key.symbol,
|
|
387
|
+
key.threshold_decimal_bps,
|
|
388
|
+
key.hash_key[:8],
|
|
389
|
+
)
|
|
390
|
+
return None
|
|
391
|
+
|
|
392
|
+
logger.debug(
|
|
393
|
+
"Cache hit: %d bars for %s @ %d dbps",
|
|
394
|
+
len(df),
|
|
395
|
+
key.symbol,
|
|
396
|
+
key.threshold_decimal_bps,
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Convert to TZ-aware UTC DatetimeIndex (Issue #20: consistent timestamps)
|
|
400
|
+
df["timestamp"] = pd.to_datetime(df["timestamp_ms"], unit="ms", utc=True)
|
|
401
|
+
df = df.set_index("timestamp")
|
|
402
|
+
df = df.drop(columns=["timestamp_ms"])
|
|
403
|
+
|
|
404
|
+
# Convert PyArrow dtypes to numpy for compatibility with fresh computation
|
|
405
|
+
# query_df_arrow returns double[pyarrow], but process_trades returns float64
|
|
406
|
+
df = normalize_arrow_dtypes(df)
|
|
407
|
+
|
|
408
|
+
return df
|
|
409
|
+
|
|
410
|
+
def has_range_bars(self, key: CacheKey) -> bool:
|
|
411
|
+
"""Check if range bars exist in cache.
|
|
412
|
+
|
|
413
|
+
Parameters
|
|
414
|
+
----------
|
|
415
|
+
key : CacheKey
|
|
416
|
+
Cache key to check
|
|
417
|
+
|
|
418
|
+
Returns
|
|
419
|
+
-------
|
|
420
|
+
bool
|
|
421
|
+
True if bars exist
|
|
422
|
+
"""
|
|
423
|
+
query = """
|
|
424
|
+
SELECT count() > 0
|
|
425
|
+
FROM rangebar_cache.range_bars FINAL
|
|
426
|
+
WHERE symbol = {symbol:String}
|
|
427
|
+
AND threshold_decimal_bps = {threshold_decimal_bps:UInt32}
|
|
428
|
+
AND source_start_ts = {start_ts:Int64}
|
|
429
|
+
AND source_end_ts = {end_ts:Int64}
|
|
430
|
+
LIMIT 1
|
|
431
|
+
"""
|
|
432
|
+
result = self.client.command(
|
|
433
|
+
query,
|
|
434
|
+
parameters={
|
|
435
|
+
"symbol": key.symbol,
|
|
436
|
+
"threshold_decimal_bps": key.threshold_decimal_bps,
|
|
437
|
+
"start_ts": key.start_ts,
|
|
438
|
+
"end_ts": key.end_ts,
|
|
439
|
+
},
|
|
440
|
+
)
|
|
441
|
+
return bool(result)
|
|
442
|
+
|
|
443
|
+
def invalidate_range_bars(self, key: CacheKey) -> int:
|
|
444
|
+
"""Invalidate (delete) cached range bars.
|
|
445
|
+
|
|
446
|
+
Parameters
|
|
447
|
+
----------
|
|
448
|
+
key : CacheKey
|
|
449
|
+
Cache key to invalidate
|
|
450
|
+
|
|
451
|
+
Returns
|
|
452
|
+
-------
|
|
453
|
+
int
|
|
454
|
+
Number of rows deleted
|
|
455
|
+
"""
|
|
456
|
+
# Note: ClickHouse DELETE is async via mutations
|
|
457
|
+
query = """
|
|
458
|
+
ALTER TABLE rangebar_cache.range_bars
|
|
459
|
+
DELETE WHERE symbol = {symbol:String}
|
|
460
|
+
AND threshold_decimal_bps = {threshold_decimal_bps:UInt32}
|
|
461
|
+
AND source_start_ts = {start_ts:Int64}
|
|
462
|
+
AND source_end_ts = {end_ts:Int64}
|
|
463
|
+
"""
|
|
464
|
+
self.client.command(
|
|
465
|
+
query,
|
|
466
|
+
parameters={
|
|
467
|
+
"symbol": key.symbol,
|
|
468
|
+
"threshold_decimal_bps": key.threshold_decimal_bps,
|
|
469
|
+
"start_ts": key.start_ts,
|
|
470
|
+
"end_ts": key.end_ts,
|
|
471
|
+
},
|
|
472
|
+
)
|
|
473
|
+
return 0 # ClickHouse DELETE is async, can't return count
|
|
474
|
+
|
|
475
|
+
def invalidate_range_bars_by_range(
|
|
476
|
+
self,
|
|
477
|
+
symbol: str,
|
|
478
|
+
threshold_decimal_bps: int,
|
|
479
|
+
start_timestamp_ms: int,
|
|
480
|
+
end_timestamp_ms: int,
|
|
481
|
+
) -> int:
|
|
482
|
+
"""Invalidate (delete) cached bars within a timestamp range.
|
|
483
|
+
|
|
484
|
+
Unlike `invalidate_range_bars()` which requires an exact CacheKey match,
|
|
485
|
+
this method deletes all bars for a symbol/threshold within a time range.
|
|
486
|
+
Useful for overlap detection during precomputation.
|
|
487
|
+
|
|
488
|
+
Parameters
|
|
489
|
+
----------
|
|
490
|
+
symbol : str
|
|
491
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
492
|
+
threshold_decimal_bps : int
|
|
493
|
+
Threshold in decimal basis points
|
|
494
|
+
start_timestamp_ms : int
|
|
495
|
+
Start timestamp in milliseconds (inclusive)
|
|
496
|
+
end_timestamp_ms : int
|
|
497
|
+
End timestamp in milliseconds (inclusive)
|
|
498
|
+
|
|
499
|
+
Returns
|
|
500
|
+
-------
|
|
501
|
+
int
|
|
502
|
+
Always returns 0 (ClickHouse DELETE is async via mutations)
|
|
503
|
+
|
|
504
|
+
Examples
|
|
505
|
+
--------
|
|
506
|
+
>>> cache.invalidate_range_bars_by_range(
|
|
507
|
+
... "BTCUSDT", 250,
|
|
508
|
+
... start_timestamp_ms=1704067200000, # 2024-01-01
|
|
509
|
+
... end_timestamp_ms=1706745600000, # 2024-01-31
|
|
510
|
+
... )
|
|
511
|
+
"""
|
|
512
|
+
query = """
|
|
513
|
+
ALTER TABLE rangebar_cache.range_bars
|
|
514
|
+
DELETE WHERE symbol = {symbol:String}
|
|
515
|
+
AND threshold_decimal_bps = {threshold:UInt32}
|
|
516
|
+
AND timestamp_ms >= {start_ts:Int64}
|
|
517
|
+
AND timestamp_ms <= {end_ts:Int64}
|
|
518
|
+
"""
|
|
519
|
+
self.client.command(
|
|
520
|
+
query,
|
|
521
|
+
parameters={
|
|
522
|
+
"symbol": symbol,
|
|
523
|
+
"threshold": threshold_decimal_bps,
|
|
524
|
+
"start_ts": start_timestamp_ms,
|
|
525
|
+
"end_ts": end_timestamp_ms,
|
|
526
|
+
},
|
|
527
|
+
)
|
|
528
|
+
return 0 # ClickHouse DELETE is async, can't return count
|
|
529
|
+
|
|
530
|
+
def get_last_bar_before(
|
|
531
|
+
self,
|
|
532
|
+
symbol: str,
|
|
533
|
+
threshold_decimal_bps: int,
|
|
534
|
+
before_timestamp_ms: int,
|
|
535
|
+
) -> dict | None:
|
|
536
|
+
"""Get the last bar before a given timestamp.
|
|
537
|
+
|
|
538
|
+
Useful for validating continuity at junction points during
|
|
539
|
+
incremental precomputation.
|
|
540
|
+
|
|
541
|
+
Parameters
|
|
542
|
+
----------
|
|
543
|
+
symbol : str
|
|
544
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
545
|
+
threshold_decimal_bps : int
|
|
546
|
+
Threshold in decimal basis points
|
|
547
|
+
before_timestamp_ms : int
|
|
548
|
+
Timestamp boundary in milliseconds
|
|
549
|
+
|
|
550
|
+
Returns
|
|
551
|
+
-------
|
|
552
|
+
dict | None
|
|
553
|
+
Last bar as dict with OHLCV keys, or None if no bars found
|
|
554
|
+
|
|
555
|
+
Examples
|
|
556
|
+
--------
|
|
557
|
+
>>> bar = cache.get_last_bar_before("BTCUSDT", 250, 1704067200000)
|
|
558
|
+
>>> if bar:
|
|
559
|
+
... print(f"Last close: {bar['Close']}")
|
|
560
|
+
"""
|
|
561
|
+
query = """
|
|
562
|
+
SELECT timestamp_ms, open, high, low, close, volume
|
|
563
|
+
FROM rangebar_cache.range_bars FINAL
|
|
564
|
+
WHERE symbol = {symbol:String}
|
|
565
|
+
AND threshold_decimal_bps = {threshold:UInt32}
|
|
566
|
+
AND timestamp_ms < {before_ts:Int64}
|
|
567
|
+
ORDER BY timestamp_ms DESC
|
|
568
|
+
LIMIT 1
|
|
569
|
+
"""
|
|
570
|
+
result = self.client.query(
|
|
571
|
+
query,
|
|
572
|
+
parameters={
|
|
573
|
+
"symbol": symbol,
|
|
574
|
+
"threshold": threshold_decimal_bps,
|
|
575
|
+
"before_ts": before_timestamp_ms,
|
|
576
|
+
},
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
rows = result.result_rows
|
|
580
|
+
if not rows:
|
|
581
|
+
return None
|
|
582
|
+
|
|
583
|
+
row = rows[0]
|
|
584
|
+
return {
|
|
585
|
+
"timestamp_ms": row[0],
|
|
586
|
+
"Open": row[1],
|
|
587
|
+
"High": row[2],
|
|
588
|
+
"Low": row[3],
|
|
589
|
+
"Close": row[4],
|
|
590
|
+
"Volume": row[5],
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
# =========================================================================
|
|
594
|
+
# Bar-Count-Based API (get_n_range_bars support)
|
|
595
|
+
# =========================================================================
|
|
596
|
+
# These methods support retrieving a deterministic number of bars
|
|
597
|
+
# regardless of time bounds. Uses split query paths to avoid OR conditions.
|
|
598
|
+
|
|
599
|
+
def count_bars(
|
|
600
|
+
self,
|
|
601
|
+
symbol: str,
|
|
602
|
+
threshold_decimal_bps: int,
|
|
603
|
+
before_ts: int | None = None,
|
|
604
|
+
) -> int:
|
|
605
|
+
"""Count available bars in cache.
|
|
606
|
+
|
|
607
|
+
Uses split query paths to avoid OR conditions for ClickHouse optimization.
|
|
608
|
+
|
|
609
|
+
Parameters
|
|
610
|
+
----------
|
|
611
|
+
symbol : str
|
|
612
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
613
|
+
threshold_decimal_bps : int
|
|
614
|
+
Threshold in decimal basis points
|
|
615
|
+
before_ts : int | None
|
|
616
|
+
Only count bars with timestamp_ms < before_ts.
|
|
617
|
+
If None, counts all bars for symbol/threshold.
|
|
618
|
+
|
|
619
|
+
Returns
|
|
620
|
+
-------
|
|
621
|
+
int
|
|
622
|
+
Number of bars in cache
|
|
623
|
+
"""
|
|
624
|
+
if before_ts is not None:
|
|
625
|
+
# Split path: with end_ts filter
|
|
626
|
+
query = """
|
|
627
|
+
SELECT count()
|
|
628
|
+
FROM rangebar_cache.range_bars FINAL
|
|
629
|
+
WHERE symbol = {symbol:String}
|
|
630
|
+
AND threshold_decimal_bps = {threshold:UInt32}
|
|
631
|
+
AND timestamp_ms < {end_ts:Int64}
|
|
632
|
+
"""
|
|
633
|
+
result = self.client.command(
|
|
634
|
+
query,
|
|
635
|
+
parameters={
|
|
636
|
+
"symbol": symbol,
|
|
637
|
+
"threshold": threshold_decimal_bps,
|
|
638
|
+
"end_ts": before_ts,
|
|
639
|
+
},
|
|
640
|
+
)
|
|
641
|
+
else:
|
|
642
|
+
# Split path: no end_ts filter (most recent)
|
|
643
|
+
query = """
|
|
644
|
+
SELECT count()
|
|
645
|
+
FROM rangebar_cache.range_bars FINAL
|
|
646
|
+
WHERE symbol = {symbol:String}
|
|
647
|
+
AND threshold_decimal_bps = {threshold:UInt32}
|
|
648
|
+
"""
|
|
649
|
+
result = self.client.command(
|
|
650
|
+
query,
|
|
651
|
+
parameters={
|
|
652
|
+
"symbol": symbol,
|
|
653
|
+
"threshold": threshold_decimal_bps,
|
|
654
|
+
},
|
|
655
|
+
)
|
|
656
|
+
return int(result) if result else 0
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def get_oldest_bar_timestamp(
|
|
660
|
+
self,
|
|
661
|
+
symbol: str,
|
|
662
|
+
threshold_decimal_bps: int,
|
|
663
|
+
) -> int | None:
|
|
664
|
+
"""Get timestamp of oldest bar in cache.
|
|
665
|
+
|
|
666
|
+
Useful for determining how far back we can query.
|
|
667
|
+
|
|
668
|
+
Parameters
|
|
669
|
+
----------
|
|
670
|
+
symbol : str
|
|
671
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
672
|
+
threshold_decimal_bps : int
|
|
673
|
+
Threshold in decimal basis points
|
|
674
|
+
|
|
675
|
+
Returns
|
|
676
|
+
-------
|
|
677
|
+
int | None
|
|
678
|
+
Oldest bar timestamp in milliseconds, or None if no bars
|
|
679
|
+
"""
|
|
680
|
+
query = """
|
|
681
|
+
SELECT min(timestamp_ms)
|
|
682
|
+
FROM rangebar_cache.range_bars FINAL
|
|
683
|
+
WHERE symbol = {symbol:String}
|
|
684
|
+
AND threshold_decimal_bps = {threshold:UInt32}
|
|
685
|
+
"""
|
|
686
|
+
result = self.client.command(
|
|
687
|
+
query,
|
|
688
|
+
parameters={
|
|
689
|
+
"symbol": symbol,
|
|
690
|
+
"threshold": threshold_decimal_bps,
|
|
691
|
+
},
|
|
692
|
+
)
|
|
693
|
+
# ClickHouse returns 0 for min() on empty result
|
|
694
|
+
return int(result) if result and result > 0 else None
|
|
695
|
+
|
|
696
|
+
def get_newest_bar_timestamp(
|
|
697
|
+
self,
|
|
698
|
+
symbol: str,
|
|
699
|
+
threshold_decimal_bps: int,
|
|
700
|
+
) -> int | None:
|
|
701
|
+
"""Get timestamp of newest bar in cache.
|
|
702
|
+
|
|
703
|
+
Useful for determining the end of available data.
|
|
704
|
+
|
|
705
|
+
Parameters
|
|
706
|
+
----------
|
|
707
|
+
symbol : str
|
|
708
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
709
|
+
threshold_decimal_bps : int
|
|
710
|
+
Threshold in decimal basis points
|
|
711
|
+
|
|
712
|
+
Returns
|
|
713
|
+
-------
|
|
714
|
+
int | None
|
|
715
|
+
Newest bar timestamp in milliseconds, or None if no bars
|
|
716
|
+
"""
|
|
717
|
+
query = """
|
|
718
|
+
SELECT max(timestamp_ms)
|
|
719
|
+
FROM rangebar_cache.range_bars FINAL
|
|
720
|
+
WHERE symbol = {symbol:String}
|
|
721
|
+
AND threshold_decimal_bps = {threshold:UInt32}
|
|
722
|
+
"""
|
|
723
|
+
result = self.client.command(
|
|
724
|
+
query,
|
|
725
|
+
parameters={
|
|
726
|
+
"symbol": symbol,
|
|
727
|
+
"threshold": threshold_decimal_bps,
|
|
728
|
+
},
|
|
729
|
+
)
|
|
730
|
+
# ClickHouse returns 0 for max() on empty result
|
|
731
|
+
return int(result) if result and result > 0 else None
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
|