rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rangebar/CLAUDE.md +327 -0
- rangebar/__init__.py +227 -0
- rangebar/__init__.pyi +1089 -0
- rangebar/_core.cpython-313-darwin.so +0 -0
- rangebar/checkpoint.py +472 -0
- rangebar/cli.py +298 -0
- rangebar/clickhouse/CLAUDE.md +139 -0
- rangebar/clickhouse/__init__.py +100 -0
- rangebar/clickhouse/bulk_operations.py +309 -0
- rangebar/clickhouse/cache.py +734 -0
- rangebar/clickhouse/client.py +121 -0
- rangebar/clickhouse/config.py +141 -0
- rangebar/clickhouse/mixin.py +120 -0
- rangebar/clickhouse/preflight.py +504 -0
- rangebar/clickhouse/query_operations.py +345 -0
- rangebar/clickhouse/schema.sql +187 -0
- rangebar/clickhouse/tunnel.py +222 -0
- rangebar/constants.py +288 -0
- rangebar/conversion.py +177 -0
- rangebar/exceptions.py +207 -0
- rangebar/exness.py +364 -0
- rangebar/hooks.py +311 -0
- rangebar/logging.py +171 -0
- rangebar/notify/__init__.py +15 -0
- rangebar/notify/pushover.py +155 -0
- rangebar/notify/telegram.py +271 -0
- rangebar/orchestration/__init__.py +20 -0
- rangebar/orchestration/count_bounded.py +797 -0
- rangebar/orchestration/helpers.py +412 -0
- rangebar/orchestration/models.py +76 -0
- rangebar/orchestration/precompute.py +498 -0
- rangebar/orchestration/range_bars.py +736 -0
- rangebar/orchestration/tick_fetcher.py +226 -0
- rangebar/ouroboros.py +454 -0
- rangebar/processors/__init__.py +22 -0
- rangebar/processors/api.py +383 -0
- rangebar/processors/core.py +522 -0
- rangebar/resource_guard.py +567 -0
- rangebar/storage/__init__.py +22 -0
- rangebar/storage/checksum_registry.py +218 -0
- rangebar/storage/parquet.py +728 -0
- rangebar/streaming.py +300 -0
- rangebar/validation/__init__.py +69 -0
- rangebar/validation/cache_staleness.py +277 -0
- rangebar/validation/continuity.py +664 -0
- rangebar/validation/gap_classification.py +294 -0
- rangebar/validation/post_storage.py +317 -0
- rangebar/validation/tier1.py +175 -0
- rangebar/validation/tier2.py +261 -0
- rangebar-11.6.1.dist-info/METADATA +308 -0
- rangebar-11.6.1.dist-info/RECORD +54 -0
- rangebar-11.6.1.dist-info/WHEEL +4 -0
- rangebar-11.6.1.dist-info/entry_points.txt +2 -0
- rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
# polars-exception: backtesting.py requires Pandas DataFrames with DatetimeIndex
|
|
2
|
+
# Issue #46: Modularization M6 - Extract query operations from cache.py
|
|
3
|
+
"""Query operations for ClickHouse range bar cache.
|
|
4
|
+
|
|
5
|
+
Provides mixin methods for retrieving range bars by count (get_n_bars)
|
|
6
|
+
and by timestamp range (get_bars_by_timestamp_range). Used by RangeBarCache
|
|
7
|
+
via mixin inheritance.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
|
|
16
|
+
from ..constants import (
|
|
17
|
+
MICROSTRUCTURE_COLUMNS,
|
|
18
|
+
MIN_VERSION_FOR_MICROSTRUCTURE,
|
|
19
|
+
)
|
|
20
|
+
from ..conversion import normalize_arrow_dtypes
|
|
21
|
+
from ..exceptions import CacheReadError
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class QueryOperationsMixin:
|
|
27
|
+
"""Mixin providing query operations for RangeBarCache.
|
|
28
|
+
|
|
29
|
+
Requires `self.client` and `self.count_bars()` from parent class.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def get_n_bars(
|
|
33
|
+
self,
|
|
34
|
+
symbol: str,
|
|
35
|
+
threshold_decimal_bps: int,
|
|
36
|
+
n_bars: int,
|
|
37
|
+
before_ts: int | None = None,
|
|
38
|
+
include_microstructure: bool = False,
|
|
39
|
+
min_schema_version: str | None = None,
|
|
40
|
+
) -> tuple[pd.DataFrame | None, int]:
|
|
41
|
+
"""Get N bars from cache, ordered chronologically (oldest first).
|
|
42
|
+
|
|
43
|
+
Uses ORDER BY timestamp_ms DESC LIMIT N for efficient retrieval,
|
|
44
|
+
then reverses in Python for chronological order.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
symbol : str
|
|
49
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
50
|
+
threshold_decimal_bps : int
|
|
51
|
+
Threshold in decimal basis points
|
|
52
|
+
n_bars : int
|
|
53
|
+
Maximum number of bars to retrieve
|
|
54
|
+
before_ts : int | None
|
|
55
|
+
Only get bars with timestamp_ms < before_ts.
|
|
56
|
+
If None, gets most recent bars.
|
|
57
|
+
include_microstructure : bool
|
|
58
|
+
If True, includes vwap, buy_volume, sell_volume columns
|
|
59
|
+
min_schema_version : str | None
|
|
60
|
+
Minimum schema version required for cache hit. If specified,
|
|
61
|
+
only returns data with rangebar_version >= min_schema_version.
|
|
62
|
+
When include_microstructure=True and min_schema_version=None,
|
|
63
|
+
automatically requires version >= 7.0.0.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
tuple[pd.DataFrame | None, int]
|
|
68
|
+
(bars_df, available_count) where:
|
|
69
|
+
- bars_df is OHLCV DataFrame (or None if no bars)
|
|
70
|
+
- available_count is total bars available (may be > len(bars_df))
|
|
71
|
+
"""
|
|
72
|
+
# First get the count (for reporting)
|
|
73
|
+
available_count = self.count_bars(symbol, threshold_decimal_bps, before_ts)
|
|
74
|
+
|
|
75
|
+
if available_count == 0:
|
|
76
|
+
return None, 0
|
|
77
|
+
|
|
78
|
+
# Select columns
|
|
79
|
+
base_cols = """
|
|
80
|
+
timestamp_ms,
|
|
81
|
+
open as Open,
|
|
82
|
+
high as High,
|
|
83
|
+
low as Low,
|
|
84
|
+
close as Close,
|
|
85
|
+
volume as Volume
|
|
86
|
+
"""
|
|
87
|
+
if include_microstructure:
|
|
88
|
+
base_cols += """,
|
|
89
|
+
vwap,
|
|
90
|
+
buy_volume,
|
|
91
|
+
sell_volume,
|
|
92
|
+
duration_us,
|
|
93
|
+
ofi,
|
|
94
|
+
vwap_close_deviation,
|
|
95
|
+
price_impact,
|
|
96
|
+
kyle_lambda_proxy,
|
|
97
|
+
trade_intensity,
|
|
98
|
+
volume_per_trade,
|
|
99
|
+
aggression_ratio,
|
|
100
|
+
aggregation_density,
|
|
101
|
+
turnover_imbalance
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
# Determine effective min version for schema evolution filtering
|
|
105
|
+
effective_min_version = min_schema_version
|
|
106
|
+
if include_microstructure and effective_min_version is None:
|
|
107
|
+
effective_min_version = MIN_VERSION_FOR_MICROSTRUCTURE
|
|
108
|
+
|
|
109
|
+
# Build version filter if specified
|
|
110
|
+
version_filter = ""
|
|
111
|
+
if effective_min_version:
|
|
112
|
+
version_filter = """
|
|
113
|
+
AND rangebar_version != ''
|
|
114
|
+
AND rangebar_version >= {min_version:String}"""
|
|
115
|
+
|
|
116
|
+
if before_ts is not None:
|
|
117
|
+
# Split path: with end_ts filter
|
|
118
|
+
query = f"""
|
|
119
|
+
SELECT {base_cols}
|
|
120
|
+
FROM rangebar_cache.range_bars FINAL
|
|
121
|
+
WHERE symbol = {{symbol:String}}
|
|
122
|
+
AND threshold_decimal_bps = {{threshold:UInt32}}
|
|
123
|
+
AND timestamp_ms < {{end_ts:Int64}}
|
|
124
|
+
{version_filter}
|
|
125
|
+
ORDER BY timestamp_ms DESC
|
|
126
|
+
LIMIT {{n_bars:UInt64}}
|
|
127
|
+
"""
|
|
128
|
+
params: dict[str, str | int] = {
|
|
129
|
+
"symbol": symbol,
|
|
130
|
+
"threshold": threshold_decimal_bps,
|
|
131
|
+
"end_ts": before_ts,
|
|
132
|
+
"n_bars": n_bars,
|
|
133
|
+
}
|
|
134
|
+
if effective_min_version:
|
|
135
|
+
params["min_version"] = effective_min_version
|
|
136
|
+
df = self.client.query_df_arrow(query, parameters=params)
|
|
137
|
+
else:
|
|
138
|
+
# Split path: no end_ts filter (most recent)
|
|
139
|
+
query = f"""
|
|
140
|
+
SELECT {base_cols}
|
|
141
|
+
FROM rangebar_cache.range_bars FINAL
|
|
142
|
+
WHERE symbol = {{symbol:String}}
|
|
143
|
+
AND threshold_decimal_bps = {{threshold:UInt32}}
|
|
144
|
+
{version_filter}
|
|
145
|
+
ORDER BY timestamp_ms DESC
|
|
146
|
+
LIMIT {{n_bars:UInt64}}
|
|
147
|
+
"""
|
|
148
|
+
params = {
|
|
149
|
+
"symbol": symbol,
|
|
150
|
+
"threshold": threshold_decimal_bps,
|
|
151
|
+
"n_bars": n_bars,
|
|
152
|
+
}
|
|
153
|
+
if effective_min_version:
|
|
154
|
+
params["min_version"] = effective_min_version
|
|
155
|
+
df = self.client.query_df_arrow(query, parameters=params)
|
|
156
|
+
|
|
157
|
+
if df.empty:
|
|
158
|
+
return None, available_count
|
|
159
|
+
|
|
160
|
+
# Reverse to chronological order (oldest first)
|
|
161
|
+
df = df.iloc[::-1].reset_index(drop=True)
|
|
162
|
+
|
|
163
|
+
# Convert to TZ-aware UTC DatetimeIndex (Issue #20: match get_range_bars output)
|
|
164
|
+
df["timestamp"] = pd.to_datetime(df["timestamp_ms"], unit="ms", utc=True)
|
|
165
|
+
df = df.set_index("timestamp")
|
|
166
|
+
df = df.drop(columns=["timestamp_ms"])
|
|
167
|
+
|
|
168
|
+
# Convert PyArrow dtypes to numpy for compatibility
|
|
169
|
+
df = normalize_arrow_dtypes(df)
|
|
170
|
+
|
|
171
|
+
# Convert microstructure columns if present
|
|
172
|
+
if include_microstructure:
|
|
173
|
+
df = normalize_arrow_dtypes(df, columns=list(MICROSTRUCTURE_COLUMNS))
|
|
174
|
+
|
|
175
|
+
return df, available_count
|
|
176
|
+
|
|
177
|
+
def get_bars_by_timestamp_range(
|
|
178
|
+
self,
|
|
179
|
+
symbol: str,
|
|
180
|
+
threshold_decimal_bps: int,
|
|
181
|
+
start_ts: int,
|
|
182
|
+
end_ts: int,
|
|
183
|
+
include_microstructure: bool = False,
|
|
184
|
+
include_exchange_sessions: bool = False,
|
|
185
|
+
ouroboros_mode: str = "year",
|
|
186
|
+
min_schema_version: str | None = None,
|
|
187
|
+
) -> pd.DataFrame | None:
|
|
188
|
+
"""Get bars within a timestamp range (for get_range_bars cache lookup).
|
|
189
|
+
|
|
190
|
+
Unlike get_range_bars() which requires exact CacheKey match,
|
|
191
|
+
this method queries by timestamp range, returning any cached bars
|
|
192
|
+
that fall within [start_ts, end_ts].
|
|
193
|
+
|
|
194
|
+
Parameters
|
|
195
|
+
----------
|
|
196
|
+
symbol : str
|
|
197
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
198
|
+
threshold_decimal_bps : int
|
|
199
|
+
Threshold in decimal basis points
|
|
200
|
+
start_ts : int
|
|
201
|
+
Start timestamp in milliseconds (inclusive)
|
|
202
|
+
end_ts : int
|
|
203
|
+
End timestamp in milliseconds (inclusive)
|
|
204
|
+
include_microstructure : bool
|
|
205
|
+
If True, includes vwap, buy_volume, sell_volume columns
|
|
206
|
+
include_exchange_sessions : bool
|
|
207
|
+
If True, includes exchange_session_* columns (Issue #8)
|
|
208
|
+
ouroboros_mode : str
|
|
209
|
+
Ouroboros reset mode: "year", "month", or "week" (default: "year")
|
|
210
|
+
Plan: sparkling-coalescing-dijkstra.md
|
|
211
|
+
min_schema_version : str | None
|
|
212
|
+
Minimum schema version required for cache hit. If specified,
|
|
213
|
+
only returns data with rangebar_version >= min_schema_version.
|
|
214
|
+
When include_microstructure=True and min_schema_version=None,
|
|
215
|
+
automatically requires version >= 7.0.0.
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
pd.DataFrame | None
|
|
220
|
+
OHLCV DataFrame with TZ-aware UTC timestamps if found, None otherwise.
|
|
221
|
+
Returns None if no bars exist in the range or version mismatch.
|
|
222
|
+
|
|
223
|
+
Raises
|
|
224
|
+
------
|
|
225
|
+
CacheReadError
|
|
226
|
+
If the query fails due to database errors.
|
|
227
|
+
"""
|
|
228
|
+
# Build column list
|
|
229
|
+
base_cols = """
|
|
230
|
+
timestamp_ms,
|
|
231
|
+
open as Open,
|
|
232
|
+
high as High,
|
|
233
|
+
low as Low,
|
|
234
|
+
close as Close,
|
|
235
|
+
volume as Volume
|
|
236
|
+
"""
|
|
237
|
+
if include_microstructure:
|
|
238
|
+
base_cols += """,
|
|
239
|
+
vwap,
|
|
240
|
+
buy_volume,
|
|
241
|
+
sell_volume,
|
|
242
|
+
duration_us,
|
|
243
|
+
ofi,
|
|
244
|
+
vwap_close_deviation,
|
|
245
|
+
price_impact,
|
|
246
|
+
kyle_lambda_proxy,
|
|
247
|
+
trade_intensity,
|
|
248
|
+
volume_per_trade,
|
|
249
|
+
aggression_ratio,
|
|
250
|
+
aggregation_density,
|
|
251
|
+
turnover_imbalance
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
# Issue #8: Exchange session flags
|
|
255
|
+
if include_exchange_sessions:
|
|
256
|
+
base_cols += """,
|
|
257
|
+
exchange_session_sydney,
|
|
258
|
+
exchange_session_tokyo,
|
|
259
|
+
exchange_session_london,
|
|
260
|
+
exchange_session_newyork
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
# Ouroboros mode filter ensures cache isolation between modes
|
|
264
|
+
# Plan: sparkling-coalescing-dijkstra.md
|
|
265
|
+
|
|
266
|
+
# Determine effective min version for schema evolution filtering
|
|
267
|
+
effective_min_version = min_schema_version
|
|
268
|
+
if include_microstructure and effective_min_version is None:
|
|
269
|
+
# Auto-require v7.0.0+ for microstructure features
|
|
270
|
+
effective_min_version = MIN_VERSION_FOR_MICROSTRUCTURE
|
|
271
|
+
|
|
272
|
+
# Build version filter if specified
|
|
273
|
+
version_filter = ""
|
|
274
|
+
if effective_min_version:
|
|
275
|
+
version_filter = """
|
|
276
|
+
AND rangebar_version != ''
|
|
277
|
+
AND rangebar_version >= {min_version:String}"""
|
|
278
|
+
|
|
279
|
+
query = f"""
|
|
280
|
+
SELECT {base_cols}
|
|
281
|
+
FROM rangebar_cache.range_bars FINAL
|
|
282
|
+
WHERE symbol = {{symbol:String}}
|
|
283
|
+
AND threshold_decimal_bps = {{threshold:UInt32}}
|
|
284
|
+
AND ouroboros_mode = {{ouroboros_mode:String}}
|
|
285
|
+
AND timestamp_ms >= {{start_ts:Int64}}
|
|
286
|
+
AND timestamp_ms <= {{end_ts:Int64}}
|
|
287
|
+
{version_filter}
|
|
288
|
+
ORDER BY timestamp_ms
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
# Build parameters
|
|
292
|
+
params: dict[str, str | int] = {
|
|
293
|
+
"symbol": symbol,
|
|
294
|
+
"threshold": threshold_decimal_bps,
|
|
295
|
+
"ouroboros_mode": ouroboros_mode,
|
|
296
|
+
"start_ts": start_ts,
|
|
297
|
+
"end_ts": end_ts,
|
|
298
|
+
}
|
|
299
|
+
if effective_min_version:
|
|
300
|
+
params["min_version"] = effective_min_version
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
df = self.client.query_df_arrow(query, parameters=params)
|
|
304
|
+
except (OSError, RuntimeError) as e:
|
|
305
|
+
logger.exception(
|
|
306
|
+
"Cache read failed for %s @ %d dbps (range query)",
|
|
307
|
+
symbol,
|
|
308
|
+
threshold_decimal_bps,
|
|
309
|
+
)
|
|
310
|
+
msg = f"Failed to read bars for {symbol}: {e}"
|
|
311
|
+
raise CacheReadError(
|
|
312
|
+
msg,
|
|
313
|
+
symbol=symbol,
|
|
314
|
+
operation="read_range",
|
|
315
|
+
) from e
|
|
316
|
+
|
|
317
|
+
if df.empty:
|
|
318
|
+
logger.debug(
|
|
319
|
+
"Cache miss for %s @ %d dbps (range: %d-%d)",
|
|
320
|
+
symbol,
|
|
321
|
+
threshold_decimal_bps,
|
|
322
|
+
start_ts,
|
|
323
|
+
end_ts,
|
|
324
|
+
)
|
|
325
|
+
return None
|
|
326
|
+
|
|
327
|
+
logger.debug(
|
|
328
|
+
"Cache hit: %d bars for %s @ %d dbps (range query)",
|
|
329
|
+
len(df),
|
|
330
|
+
symbol,
|
|
331
|
+
threshold_decimal_bps,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# Convert to TZ-aware UTC DatetimeIndex (matches get_range_bars output)
|
|
335
|
+
df["timestamp"] = pd.to_datetime(df["timestamp_ms"], unit="ms", utc=True)
|
|
336
|
+
df = df.set_index("timestamp")
|
|
337
|
+
df = df.drop(columns=["timestamp_ms"])
|
|
338
|
+
|
|
339
|
+
# Convert PyArrow dtypes to numpy float64 for compatibility
|
|
340
|
+
df = normalize_arrow_dtypes(df)
|
|
341
|
+
|
|
342
|
+
if include_microstructure:
|
|
343
|
+
df = normalize_arrow_dtypes(df, columns=list(MICROSTRUCTURE_COLUMNS))
|
|
344
|
+
|
|
345
|
+
return df
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
-- ClickHouse schema for rangebar cache
|
|
2
|
+
-- Stores computed range bars (Tier 2)
|
|
3
|
+
--
|
|
4
|
+
-- Note: Raw tick data (Tier 1) is stored locally using Parquet files.
|
|
5
|
+
-- See rangebar.storage.TickStorage for tick data caching.
|
|
6
|
+
--
|
|
7
|
+
-- Usage:
|
|
8
|
+
-- CREATE DATABASE IF NOT EXISTS rangebar_cache;
|
|
9
|
+
-- Then run this file or use RangeBarCache._ensure_schema()
|
|
10
|
+
|
|
11
|
+
-- ============================================================================
|
|
12
|
+
-- Migration for v5.0.0 (from v4.x)
|
|
13
|
+
-- ============================================================================
|
|
14
|
+
-- Run this ONCE if upgrading from rangebar-py v4.x with existing cache:
|
|
15
|
+
--
|
|
16
|
+
-- ALTER TABLE rangebar_cache.range_bars
|
|
17
|
+
-- RENAME COLUMN threshold_bps TO threshold_decimal_bps;
|
|
18
|
+
--
|
|
19
|
+
-- Note: New installations do not need this migration.
|
|
20
|
+
|
|
21
|
+
-- ============================================================================
|
|
22
|
+
-- Migration for v7.2.0 (Issue #32: rename aggregation_efficiency)
|
|
23
|
+
-- ============================================================================
|
|
24
|
+
-- Run this ONCE if upgrading from rangebar-py v7.1.x with existing cache:
|
|
25
|
+
--
|
|
26
|
+
-- ALTER TABLE rangebar_cache.range_bars
|
|
27
|
+
-- RENAME COLUMN aggregation_efficiency TO aggregation_density;
|
|
28
|
+
--
|
|
29
|
+
-- Note: New installations do not need this migration.
|
|
30
|
+
|
|
31
|
+
-- ============================================================================
|
|
32
|
+
-- Migration for v10.x (Ouroboros: cyclical reset boundaries)
|
|
33
|
+
-- ============================================================================
|
|
34
|
+
-- Run this ONCE if upgrading from rangebar-py v9.x with existing cache:
|
|
35
|
+
--
|
|
36
|
+
-- ALTER TABLE rangebar_cache.range_bars
|
|
37
|
+
-- ADD COLUMN ouroboros_mode LowCardinality(String) DEFAULT 'week';
|
|
38
|
+
--
|
|
39
|
+
-- Note: New installations do not need this migration.
|
|
40
|
+
-- Plan: /Users/terryli/.claude/plans/sparkling-coalescing-dijkstra.md
|
|
41
|
+
|
|
42
|
+
-- ============================================================================
|
|
43
|
+
-- Migration for v12.x (Issue #8: Exchange sessions integration)
|
|
44
|
+
-- ============================================================================
|
|
45
|
+
-- Run this ONCE if upgrading from rangebar-py v11.x with existing cache:
|
|
46
|
+
--
|
|
47
|
+
-- ALTER TABLE rangebar_cache.range_bars
|
|
48
|
+
-- ADD COLUMN exchange_session_sydney UInt8 DEFAULT 0,
|
|
49
|
+
-- ADD COLUMN exchange_session_tokyo UInt8 DEFAULT 0,
|
|
50
|
+
-- ADD COLUMN exchange_session_london UInt8 DEFAULT 0,
|
|
51
|
+
-- ADD COLUMN exchange_session_newyork UInt8 DEFAULT 0;
|
|
52
|
+
--
|
|
53
|
+
-- Note: New installations do not need this migration.
|
|
54
|
+
|
|
55
|
+
-- ============================================================================
|
|
56
|
+
-- Migration for v13.x (Issue #59: Inter-bar features from lookback window)
|
|
57
|
+
-- ============================================================================
|
|
58
|
+
-- Run this ONCE if upgrading from rangebar-py v12.x with existing cache:
|
|
59
|
+
--
|
|
60
|
+
-- ALTER TABLE rangebar_cache.range_bars
|
|
61
|
+
-- -- Tier 1: Core features (7 features)
|
|
62
|
+
-- ADD COLUMN lookback_trade_count Nullable(UInt32) DEFAULT NULL,
|
|
63
|
+
-- ADD COLUMN lookback_ofi Nullable(Float64) DEFAULT NULL,
|
|
64
|
+
-- ADD COLUMN lookback_duration_us Nullable(Int64) DEFAULT NULL,
|
|
65
|
+
-- ADD COLUMN lookback_intensity Nullable(Float64) DEFAULT NULL,
|
|
66
|
+
-- ADD COLUMN lookback_vwap_raw Nullable(Float64) DEFAULT NULL,
|
|
67
|
+
-- ADD COLUMN lookback_vwap_position Nullable(Float64) DEFAULT NULL,
|
|
68
|
+
-- ADD COLUMN lookback_count_imbalance Nullable(Float64) DEFAULT NULL,
|
|
69
|
+
-- -- Tier 2: Statistical features (5 features)
|
|
70
|
+
-- ADD COLUMN lookback_kyle_lambda Nullable(Float64) DEFAULT NULL,
|
|
71
|
+
-- ADD COLUMN lookback_burstiness Nullable(Float64) DEFAULT NULL,
|
|
72
|
+
-- ADD COLUMN lookback_volume_skew Nullable(Float64) DEFAULT NULL,
|
|
73
|
+
-- ADD COLUMN lookback_volume_kurt Nullable(Float64) DEFAULT NULL,
|
|
74
|
+
-- ADD COLUMN lookback_price_range Nullable(Float64) DEFAULT NULL,
|
|
75
|
+
-- -- Tier 3: Advanced features (4 features)
|
|
76
|
+
-- ADD COLUMN lookback_kaufman_er Nullable(Float64) DEFAULT NULL,
|
|
77
|
+
-- ADD COLUMN lookback_garman_klass_vol Nullable(Float64) DEFAULT NULL,
|
|
78
|
+
-- ADD COLUMN lookback_hurst Nullable(Float64) DEFAULT NULL,
|
|
79
|
+
-- ADD COLUMN lookback_permutation_entropy Nullable(Float64) DEFAULT NULL;
|
|
80
|
+
--
|
|
81
|
+
-- Note: New installations do not need this migration.
|
|
82
|
+
|
|
83
|
+
-- ============================================================================
|
|
84
|
+
-- Computed Range Bars Cache (Tier 2)
|
|
85
|
+
-- ============================================================================
|
|
86
|
+
-- Stores computed range bars with all parameters as cache key
|
|
87
|
+
-- Cache hit requires exact match on: symbol, threshold, time range
|
|
88
|
+
|
|
89
|
+
CREATE TABLE IF NOT EXISTS rangebar_cache.range_bars (
|
|
90
|
+
-- Cache key components
|
|
91
|
+
symbol LowCardinality(String),
|
|
92
|
+
threshold_decimal_bps UInt32,
|
|
93
|
+
|
|
94
|
+
-- OHLCV data
|
|
95
|
+
timestamp_ms Int64,
|
|
96
|
+
open Float64,
|
|
97
|
+
high Float64,
|
|
98
|
+
low Float64,
|
|
99
|
+
close Float64,
|
|
100
|
+
volume Float64,
|
|
101
|
+
|
|
102
|
+
-- Market microstructure (from rangebar-core)
|
|
103
|
+
vwap Float64 DEFAULT 0,
|
|
104
|
+
buy_volume Float64 DEFAULT 0,
|
|
105
|
+
sell_volume Float64 DEFAULT 0,
|
|
106
|
+
individual_trade_count UInt32 DEFAULT 0,
|
|
107
|
+
agg_record_count UInt32 DEFAULT 0,
|
|
108
|
+
|
|
109
|
+
-- Microstructure features (Issue #25)
|
|
110
|
+
duration_us Int64 DEFAULT 0,
|
|
111
|
+
ofi Float64 DEFAULT 0,
|
|
112
|
+
vwap_close_deviation Float64 DEFAULT 0,
|
|
113
|
+
price_impact Float64 DEFAULT 0,
|
|
114
|
+
kyle_lambda_proxy Float64 DEFAULT 0,
|
|
115
|
+
trade_intensity Float64 DEFAULT 0,
|
|
116
|
+
volume_per_trade Float64 DEFAULT 0,
|
|
117
|
+
aggression_ratio Float64 DEFAULT 0,
|
|
118
|
+
aggregation_density Float64 DEFAULT 1,
|
|
119
|
+
turnover_imbalance Float64 DEFAULT 0,
|
|
120
|
+
|
|
121
|
+
-- Ouroboros (cyclical reset boundaries, v10.x)
|
|
122
|
+
ouroboros_mode LowCardinality(String) DEFAULT 'week',
|
|
123
|
+
|
|
124
|
+
-- Exchange session flags (Issue #8: indicates active traditional market sessions)
|
|
125
|
+
exchange_session_sydney UInt8 DEFAULT 0,
|
|
126
|
+
exchange_session_tokyo UInt8 DEFAULT 0,
|
|
127
|
+
exchange_session_london UInt8 DEFAULT 0,
|
|
128
|
+
exchange_session_newyork UInt8 DEFAULT 0,
|
|
129
|
+
|
|
130
|
+
-- Inter-bar features (Issue #59: computed from lookback window BEFORE bar opens)
|
|
131
|
+
-- Tier 1: Core features (7 features)
|
|
132
|
+
lookback_trade_count Nullable(UInt32) DEFAULT NULL,
|
|
133
|
+
lookback_ofi Nullable(Float64) DEFAULT NULL,
|
|
134
|
+
lookback_duration_us Nullable(Int64) DEFAULT NULL,
|
|
135
|
+
lookback_intensity Nullable(Float64) DEFAULT NULL,
|
|
136
|
+
lookback_vwap_raw Nullable(Float64) DEFAULT NULL,
|
|
137
|
+
lookback_vwap_position Nullable(Float64) DEFAULT NULL,
|
|
138
|
+
lookback_count_imbalance Nullable(Float64) DEFAULT NULL,
|
|
139
|
+
-- Tier 2: Statistical features (5 features)
|
|
140
|
+
lookback_kyle_lambda Nullable(Float64) DEFAULT NULL,
|
|
141
|
+
lookback_burstiness Nullable(Float64) DEFAULT NULL,
|
|
142
|
+
lookback_volume_skew Nullable(Float64) DEFAULT NULL,
|
|
143
|
+
lookback_volume_kurt Nullable(Float64) DEFAULT NULL,
|
|
144
|
+
lookback_price_range Nullable(Float64) DEFAULT NULL,
|
|
145
|
+
-- Tier 3: Advanced features (4 features)
|
|
146
|
+
lookback_kaufman_er Nullable(Float64) DEFAULT NULL,
|
|
147
|
+
lookback_garman_klass_vol Nullable(Float64) DEFAULT NULL,
|
|
148
|
+
lookback_hurst Nullable(Float64) DEFAULT NULL,
|
|
149
|
+
lookback_permutation_entropy Nullable(Float64) DEFAULT NULL,
|
|
150
|
+
|
|
151
|
+
-- Cache metadata
|
|
152
|
+
cache_key String, -- Hash of full parameters
|
|
153
|
+
rangebar_version String DEFAULT '', -- Version for invalidation
|
|
154
|
+
source_start_ts Int64 DEFAULT 0, -- Input data time range
|
|
155
|
+
source_end_ts Int64 DEFAULT 0,
|
|
156
|
+
computed_at DateTime64(3) DEFAULT now64(3)
|
|
157
|
+
)
|
|
158
|
+
ENGINE = ReplacingMergeTree(computed_at)
|
|
159
|
+
-- Partition by symbol, threshold, and month
|
|
160
|
+
PARTITION BY (symbol, threshold_decimal_bps, toYYYYMM(toDateTime(timestamp_ms / 1000)))
|
|
161
|
+
-- Order for efficient lookups
|
|
162
|
+
ORDER BY (symbol, threshold_decimal_bps, timestamp_ms);
|
|
163
|
+
|
|
164
|
+
-- ============================================================================
|
|
165
|
+
-- Materialized Views (Optional - for analytics)
|
|
166
|
+
-- ============================================================================
|
|
167
|
+
|
|
168
|
+
-- View: Daily trade volume by symbol
|
|
169
|
+
-- CREATE MATERIALIZED VIEW IF NOT EXISTS rangebar_cache.daily_trade_volume
|
|
170
|
+
-- ENGINE = SummingMergeTree()
|
|
171
|
+
-- PARTITION BY toYYYYMM(date)
|
|
172
|
+
-- ORDER BY (symbol, date)
|
|
173
|
+
-- AS SELECT
|
|
174
|
+
-- symbol,
|
|
175
|
+
-- toDate(toDateTime(timestamp_ms / 1000)) AS date,
|
|
176
|
+
-- count() AS trade_count,
|
|
177
|
+
-- sum(quantity) AS total_volume
|
|
178
|
+
-- FROM rangebar_cache.raw_trades
|
|
179
|
+
-- GROUP BY symbol, date;
|
|
180
|
+
|
|
181
|
+
-- ============================================================================
|
|
182
|
+
-- Indexes (ClickHouse creates automatically based on ORDER BY)
|
|
183
|
+
-- ============================================================================
|
|
184
|
+
-- No additional indexes needed - ORDER BY creates primary key index
|
|
185
|
+
-- ClickHouse uses sparse indexing which is efficient for our access patterns:
|
|
186
|
+
-- - Symbol + time range queries
|
|
187
|
+
-- - Symbol + threshold lookups
|