rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,345 @@
1
+ # polars-exception: backtesting.py requires Pandas DataFrames with DatetimeIndex
2
+ # Issue #46: Modularization M6 - Extract query operations from cache.py
3
+ """Query operations for ClickHouse range bar cache.
4
+
5
+ Provides mixin methods for retrieving range bars by count (get_n_bars)
6
+ and by timestamp range (get_bars_by_timestamp_range). Used by RangeBarCache
7
+ via mixin inheritance.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+
14
+ import pandas as pd
15
+
16
+ from ..constants import (
17
+ MICROSTRUCTURE_COLUMNS,
18
+ MIN_VERSION_FOR_MICROSTRUCTURE,
19
+ )
20
+ from ..conversion import normalize_arrow_dtypes
21
+ from ..exceptions import CacheReadError
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class QueryOperationsMixin:
27
+ """Mixin providing query operations for RangeBarCache.
28
+
29
+ Requires `self.client` and `self.count_bars()` from parent class.
30
+ """
31
+
32
+ def get_n_bars(
33
+ self,
34
+ symbol: str,
35
+ threshold_decimal_bps: int,
36
+ n_bars: int,
37
+ before_ts: int | None = None,
38
+ include_microstructure: bool = False,
39
+ min_schema_version: str | None = None,
40
+ ) -> tuple[pd.DataFrame | None, int]:
41
+ """Get N bars from cache, ordered chronologically (oldest first).
42
+
43
+ Uses ORDER BY timestamp_ms DESC LIMIT N for efficient retrieval,
44
+ then reverses in Python for chronological order.
45
+
46
+ Parameters
47
+ ----------
48
+ symbol : str
49
+ Trading symbol (e.g., "BTCUSDT")
50
+ threshold_decimal_bps : int
51
+ Threshold in decimal basis points
52
+ n_bars : int
53
+ Maximum number of bars to retrieve
54
+ before_ts : int | None
55
+ Only get bars with timestamp_ms < before_ts.
56
+ If None, gets most recent bars.
57
+ include_microstructure : bool
58
+ If True, includes vwap, buy_volume, sell_volume columns
59
+ min_schema_version : str | None
60
+ Minimum schema version required for cache hit. If specified,
61
+ only returns data with rangebar_version >= min_schema_version.
62
+ When include_microstructure=True and min_schema_version=None,
63
+ automatically requires version >= 7.0.0.
64
+
65
+ Returns
66
+ -------
67
+ tuple[pd.DataFrame | None, int]
68
+ (bars_df, available_count) where:
69
+ - bars_df is OHLCV DataFrame (or None if no bars)
70
+ - available_count is total bars available (may be > len(bars_df))
71
+ """
72
+ # First get the count (for reporting)
73
+ available_count = self.count_bars(symbol, threshold_decimal_bps, before_ts)
74
+
75
+ if available_count == 0:
76
+ return None, 0
77
+
78
+ # Select columns
79
+ base_cols = """
80
+ timestamp_ms,
81
+ open as Open,
82
+ high as High,
83
+ low as Low,
84
+ close as Close,
85
+ volume as Volume
86
+ """
87
+ if include_microstructure:
88
+ base_cols += """,
89
+ vwap,
90
+ buy_volume,
91
+ sell_volume,
92
+ duration_us,
93
+ ofi,
94
+ vwap_close_deviation,
95
+ price_impact,
96
+ kyle_lambda_proxy,
97
+ trade_intensity,
98
+ volume_per_trade,
99
+ aggression_ratio,
100
+ aggregation_density,
101
+ turnover_imbalance
102
+ """
103
+
104
+ # Determine effective min version for schema evolution filtering
105
+ effective_min_version = min_schema_version
106
+ if include_microstructure and effective_min_version is None:
107
+ effective_min_version = MIN_VERSION_FOR_MICROSTRUCTURE
108
+
109
+ # Build version filter if specified
110
+ version_filter = ""
111
+ if effective_min_version:
112
+ version_filter = """
113
+ AND rangebar_version != ''
114
+ AND rangebar_version >= {min_version:String}"""
115
+
116
+ if before_ts is not None:
117
+ # Split path: with end_ts filter
118
+ query = f"""
119
+ SELECT {base_cols}
120
+ FROM rangebar_cache.range_bars FINAL
121
+ WHERE symbol = {{symbol:String}}
122
+ AND threshold_decimal_bps = {{threshold:UInt32}}
123
+ AND timestamp_ms < {{end_ts:Int64}}
124
+ {version_filter}
125
+ ORDER BY timestamp_ms DESC
126
+ LIMIT {{n_bars:UInt64}}
127
+ """
128
+ params: dict[str, str | int] = {
129
+ "symbol": symbol,
130
+ "threshold": threshold_decimal_bps,
131
+ "end_ts": before_ts,
132
+ "n_bars": n_bars,
133
+ }
134
+ if effective_min_version:
135
+ params["min_version"] = effective_min_version
136
+ df = self.client.query_df_arrow(query, parameters=params)
137
+ else:
138
+ # Split path: no end_ts filter (most recent)
139
+ query = f"""
140
+ SELECT {base_cols}
141
+ FROM rangebar_cache.range_bars FINAL
142
+ WHERE symbol = {{symbol:String}}
143
+ AND threshold_decimal_bps = {{threshold:UInt32}}
144
+ {version_filter}
145
+ ORDER BY timestamp_ms DESC
146
+ LIMIT {{n_bars:UInt64}}
147
+ """
148
+ params = {
149
+ "symbol": symbol,
150
+ "threshold": threshold_decimal_bps,
151
+ "n_bars": n_bars,
152
+ }
153
+ if effective_min_version:
154
+ params["min_version"] = effective_min_version
155
+ df = self.client.query_df_arrow(query, parameters=params)
156
+
157
+ if df.empty:
158
+ return None, available_count
159
+
160
+ # Reverse to chronological order (oldest first)
161
+ df = df.iloc[::-1].reset_index(drop=True)
162
+
163
+ # Convert to TZ-aware UTC DatetimeIndex (Issue #20: match get_range_bars output)
164
+ df["timestamp"] = pd.to_datetime(df["timestamp_ms"], unit="ms", utc=True)
165
+ df = df.set_index("timestamp")
166
+ df = df.drop(columns=["timestamp_ms"])
167
+
168
+ # Convert PyArrow dtypes to numpy for compatibility
169
+ df = normalize_arrow_dtypes(df)
170
+
171
+ # Convert microstructure columns if present
172
+ if include_microstructure:
173
+ df = normalize_arrow_dtypes(df, columns=list(MICROSTRUCTURE_COLUMNS))
174
+
175
+ return df, available_count
176
+
177
+ def get_bars_by_timestamp_range(
178
+ self,
179
+ symbol: str,
180
+ threshold_decimal_bps: int,
181
+ start_ts: int,
182
+ end_ts: int,
183
+ include_microstructure: bool = False,
184
+ include_exchange_sessions: bool = False,
185
+ ouroboros_mode: str = "year",
186
+ min_schema_version: str | None = None,
187
+ ) -> pd.DataFrame | None:
188
+ """Get bars within a timestamp range (for get_range_bars cache lookup).
189
+
190
+ Unlike get_range_bars() which requires exact CacheKey match,
191
+ this method queries by timestamp range, returning any cached bars
192
+ that fall within [start_ts, end_ts].
193
+
194
+ Parameters
195
+ ----------
196
+ symbol : str
197
+ Trading symbol (e.g., "BTCUSDT")
198
+ threshold_decimal_bps : int
199
+ Threshold in decimal basis points
200
+ start_ts : int
201
+ Start timestamp in milliseconds (inclusive)
202
+ end_ts : int
203
+ End timestamp in milliseconds (inclusive)
204
+ include_microstructure : bool
205
+ If True, includes vwap, buy_volume, sell_volume columns
206
+ include_exchange_sessions : bool
207
+ If True, includes exchange_session_* columns (Issue #8)
208
+ ouroboros_mode : str
209
+ Ouroboros reset mode: "year", "month", or "week" (default: "year")
210
+ Plan: sparkling-coalescing-dijkstra.md
211
+ min_schema_version : str | None
212
+ Minimum schema version required for cache hit. If specified,
213
+ only returns data with rangebar_version >= min_schema_version.
214
+ When include_microstructure=True and min_schema_version=None,
215
+ automatically requires version >= 7.0.0.
216
+
217
+ Returns
218
+ -------
219
+ pd.DataFrame | None
220
+ OHLCV DataFrame with TZ-aware UTC timestamps if found, None otherwise.
221
+ Returns None if no bars exist in the range or version mismatch.
222
+
223
+ Raises
224
+ ------
225
+ CacheReadError
226
+ If the query fails due to database errors.
227
+ """
228
+ # Build column list
229
+ base_cols = """
230
+ timestamp_ms,
231
+ open as Open,
232
+ high as High,
233
+ low as Low,
234
+ close as Close,
235
+ volume as Volume
236
+ """
237
+ if include_microstructure:
238
+ base_cols += """,
239
+ vwap,
240
+ buy_volume,
241
+ sell_volume,
242
+ duration_us,
243
+ ofi,
244
+ vwap_close_deviation,
245
+ price_impact,
246
+ kyle_lambda_proxy,
247
+ trade_intensity,
248
+ volume_per_trade,
249
+ aggression_ratio,
250
+ aggregation_density,
251
+ turnover_imbalance
252
+ """
253
+
254
+ # Issue #8: Exchange session flags
255
+ if include_exchange_sessions:
256
+ base_cols += """,
257
+ exchange_session_sydney,
258
+ exchange_session_tokyo,
259
+ exchange_session_london,
260
+ exchange_session_newyork
261
+ """
262
+
263
+ # Ouroboros mode filter ensures cache isolation between modes
264
+ # Plan: sparkling-coalescing-dijkstra.md
265
+
266
+ # Determine effective min version for schema evolution filtering
267
+ effective_min_version = min_schema_version
268
+ if include_microstructure and effective_min_version is None:
269
+ # Auto-require v7.0.0+ for microstructure features
270
+ effective_min_version = MIN_VERSION_FOR_MICROSTRUCTURE
271
+
272
+ # Build version filter if specified
273
+ version_filter = ""
274
+ if effective_min_version:
275
+ version_filter = """
276
+ AND rangebar_version != ''
277
+ AND rangebar_version >= {min_version:String}"""
278
+
279
+ query = f"""
280
+ SELECT {base_cols}
281
+ FROM rangebar_cache.range_bars FINAL
282
+ WHERE symbol = {{symbol:String}}
283
+ AND threshold_decimal_bps = {{threshold:UInt32}}
284
+ AND ouroboros_mode = {{ouroboros_mode:String}}
285
+ AND timestamp_ms >= {{start_ts:Int64}}
286
+ AND timestamp_ms <= {{end_ts:Int64}}
287
+ {version_filter}
288
+ ORDER BY timestamp_ms
289
+ """
290
+
291
+ # Build parameters
292
+ params: dict[str, str | int] = {
293
+ "symbol": symbol,
294
+ "threshold": threshold_decimal_bps,
295
+ "ouroboros_mode": ouroboros_mode,
296
+ "start_ts": start_ts,
297
+ "end_ts": end_ts,
298
+ }
299
+ if effective_min_version:
300
+ params["min_version"] = effective_min_version
301
+
302
+ try:
303
+ df = self.client.query_df_arrow(query, parameters=params)
304
+ except (OSError, RuntimeError) as e:
305
+ logger.exception(
306
+ "Cache read failed for %s @ %d dbps (range query)",
307
+ symbol,
308
+ threshold_decimal_bps,
309
+ )
310
+ msg = f"Failed to read bars for {symbol}: {e}"
311
+ raise CacheReadError(
312
+ msg,
313
+ symbol=symbol,
314
+ operation="read_range",
315
+ ) from e
316
+
317
+ if df.empty:
318
+ logger.debug(
319
+ "Cache miss for %s @ %d dbps (range: %d-%d)",
320
+ symbol,
321
+ threshold_decimal_bps,
322
+ start_ts,
323
+ end_ts,
324
+ )
325
+ return None
326
+
327
+ logger.debug(
328
+ "Cache hit: %d bars for %s @ %d dbps (range query)",
329
+ len(df),
330
+ symbol,
331
+ threshold_decimal_bps,
332
+ )
333
+
334
+ # Convert to TZ-aware UTC DatetimeIndex (matches get_range_bars output)
335
+ df["timestamp"] = pd.to_datetime(df["timestamp_ms"], unit="ms", utc=True)
336
+ df = df.set_index("timestamp")
337
+ df = df.drop(columns=["timestamp_ms"])
338
+
339
+ # Convert PyArrow dtypes to numpy float64 for compatibility
340
+ df = normalize_arrow_dtypes(df)
341
+
342
+ if include_microstructure:
343
+ df = normalize_arrow_dtypes(df, columns=list(MICROSTRUCTURE_COLUMNS))
344
+
345
+ return df
@@ -0,0 +1,187 @@
1
+ -- ClickHouse schema for rangebar cache
2
+ -- Stores computed range bars (Tier 2)
3
+ --
4
+ -- Note: Raw tick data (Tier 1) is stored locally using Parquet files.
5
+ -- See rangebar.storage.TickStorage for tick data caching.
6
+ --
7
+ -- Usage:
8
+ -- CREATE DATABASE IF NOT EXISTS rangebar_cache;
9
+ -- Then run this file or use RangeBarCache._ensure_schema()
10
+
11
+ -- ============================================================================
12
+ -- Migration for v5.0.0 (from v4.x)
13
+ -- ============================================================================
14
+ -- Run this ONCE if upgrading from rangebar-py v4.x with existing cache:
15
+ --
16
+ -- ALTER TABLE rangebar_cache.range_bars
17
+ -- RENAME COLUMN threshold_bps TO threshold_decimal_bps;
18
+ --
19
+ -- Note: New installations do not need this migration.
20
+
21
+ -- ============================================================================
22
+ -- Migration for v7.2.0 (Issue #32: rename aggregation_efficiency)
23
+ -- ============================================================================
24
+ -- Run this ONCE if upgrading from rangebar-py v7.1.x with existing cache:
25
+ --
26
+ -- ALTER TABLE rangebar_cache.range_bars
27
+ -- RENAME COLUMN aggregation_efficiency TO aggregation_density;
28
+ --
29
+ -- Note: New installations do not need this migration.
30
+
31
+ -- ============================================================================
32
+ -- Migration for v10.x (Ouroboros: cyclical reset boundaries)
33
+ -- ============================================================================
34
+ -- Run this ONCE if upgrading from rangebar-py v9.x with existing cache:
35
+ --
36
+ -- ALTER TABLE rangebar_cache.range_bars
37
+ -- ADD COLUMN ouroboros_mode LowCardinality(String) DEFAULT 'week';
38
+ --
39
+ -- Note: New installations do not need this migration.
40
+ -- Plan: /Users/terryli/.claude/plans/sparkling-coalescing-dijkstra.md
41
+
42
+ -- ============================================================================
43
+ -- Migration for v12.x (Issue #8: Exchange sessions integration)
44
+ -- ============================================================================
45
+ -- Run this ONCE if upgrading from rangebar-py v11.x with existing cache:
46
+ --
47
+ -- ALTER TABLE rangebar_cache.range_bars
48
+ -- ADD COLUMN exchange_session_sydney UInt8 DEFAULT 0,
49
+ -- ADD COLUMN exchange_session_tokyo UInt8 DEFAULT 0,
50
+ -- ADD COLUMN exchange_session_london UInt8 DEFAULT 0,
51
+ -- ADD COLUMN exchange_session_newyork UInt8 DEFAULT 0;
52
+ --
53
+ -- Note: New installations do not need this migration.
54
+
55
+ -- ============================================================================
56
+ -- Migration for v13.x (Issue #59: Inter-bar features from lookback window)
57
+ -- ============================================================================
58
+ -- Run this ONCE if upgrading from rangebar-py v12.x with existing cache:
59
+ --
60
+ -- ALTER TABLE rangebar_cache.range_bars
61
+ -- -- Tier 1: Core features (7 features)
62
+ -- ADD COLUMN lookback_trade_count Nullable(UInt32) DEFAULT NULL,
63
+ -- ADD COLUMN lookback_ofi Nullable(Float64) DEFAULT NULL,
64
+ -- ADD COLUMN lookback_duration_us Nullable(Int64) DEFAULT NULL,
65
+ -- ADD COLUMN lookback_intensity Nullable(Float64) DEFAULT NULL,
66
+ -- ADD COLUMN lookback_vwap_raw Nullable(Float64) DEFAULT NULL,
67
+ -- ADD COLUMN lookback_vwap_position Nullable(Float64) DEFAULT NULL,
68
+ -- ADD COLUMN lookback_count_imbalance Nullable(Float64) DEFAULT NULL,
69
+ -- -- Tier 2: Statistical features (5 features)
70
+ -- ADD COLUMN lookback_kyle_lambda Nullable(Float64) DEFAULT NULL,
71
+ -- ADD COLUMN lookback_burstiness Nullable(Float64) DEFAULT NULL,
72
+ -- ADD COLUMN lookback_volume_skew Nullable(Float64) DEFAULT NULL,
73
+ -- ADD COLUMN lookback_volume_kurt Nullable(Float64) DEFAULT NULL,
74
+ -- ADD COLUMN lookback_price_range Nullable(Float64) DEFAULT NULL,
75
+ -- -- Tier 3: Advanced features (4 features)
76
+ -- ADD COLUMN lookback_kaufman_er Nullable(Float64) DEFAULT NULL,
77
+ -- ADD COLUMN lookback_garman_klass_vol Nullable(Float64) DEFAULT NULL,
78
+ -- ADD COLUMN lookback_hurst Nullable(Float64) DEFAULT NULL,
79
+ -- ADD COLUMN lookback_permutation_entropy Nullable(Float64) DEFAULT NULL;
80
+ --
81
+ -- Note: New installations do not need this migration.
82
+
83
+ -- ============================================================================
84
+ -- Computed Range Bars Cache (Tier 2)
85
+ -- ============================================================================
86
+ -- Stores computed range bars with all parameters as cache key
87
+ -- Cache hit requires exact match on: symbol, threshold, time range
88
+
89
+ CREATE TABLE IF NOT EXISTS rangebar_cache.range_bars (
90
+ -- Cache key components
91
+ symbol LowCardinality(String),
92
+ threshold_decimal_bps UInt32,
93
+
94
+ -- OHLCV data
95
+ timestamp_ms Int64,
96
+ open Float64,
97
+ high Float64,
98
+ low Float64,
99
+ close Float64,
100
+ volume Float64,
101
+
102
+ -- Market microstructure (from rangebar-core)
103
+ vwap Float64 DEFAULT 0,
104
+ buy_volume Float64 DEFAULT 0,
105
+ sell_volume Float64 DEFAULT 0,
106
+ individual_trade_count UInt32 DEFAULT 0,
107
+ agg_record_count UInt32 DEFAULT 0,
108
+
109
+ -- Microstructure features (Issue #25)
110
+ duration_us Int64 DEFAULT 0,
111
+ ofi Float64 DEFAULT 0,
112
+ vwap_close_deviation Float64 DEFAULT 0,
113
+ price_impact Float64 DEFAULT 0,
114
+ kyle_lambda_proxy Float64 DEFAULT 0,
115
+ trade_intensity Float64 DEFAULT 0,
116
+ volume_per_trade Float64 DEFAULT 0,
117
+ aggression_ratio Float64 DEFAULT 0,
118
+ aggregation_density Float64 DEFAULT 1,
119
+ turnover_imbalance Float64 DEFAULT 0,
120
+
121
+ -- Ouroboros (cyclical reset boundaries, v10.x)
122
+ ouroboros_mode LowCardinality(String) DEFAULT 'week',
123
+
124
+ -- Exchange session flags (Issue #8: indicates active traditional market sessions)
125
+ exchange_session_sydney UInt8 DEFAULT 0,
126
+ exchange_session_tokyo UInt8 DEFAULT 0,
127
+ exchange_session_london UInt8 DEFAULT 0,
128
+ exchange_session_newyork UInt8 DEFAULT 0,
129
+
130
+ -- Inter-bar features (Issue #59: computed from lookback window BEFORE bar opens)
131
+ -- Tier 1: Core features (7 features)
132
+ lookback_trade_count Nullable(UInt32) DEFAULT NULL,
133
+ lookback_ofi Nullable(Float64) DEFAULT NULL,
134
+ lookback_duration_us Nullable(Int64) DEFAULT NULL,
135
+ lookback_intensity Nullable(Float64) DEFAULT NULL,
136
+ lookback_vwap_raw Nullable(Float64) DEFAULT NULL,
137
+ lookback_vwap_position Nullable(Float64) DEFAULT NULL,
138
+ lookback_count_imbalance Nullable(Float64) DEFAULT NULL,
139
+ -- Tier 2: Statistical features (5 features)
140
+ lookback_kyle_lambda Nullable(Float64) DEFAULT NULL,
141
+ lookback_burstiness Nullable(Float64) DEFAULT NULL,
142
+ lookback_volume_skew Nullable(Float64) DEFAULT NULL,
143
+ lookback_volume_kurt Nullable(Float64) DEFAULT NULL,
144
+ lookback_price_range Nullable(Float64) DEFAULT NULL,
145
+ -- Tier 3: Advanced features (4 features)
146
+ lookback_kaufman_er Nullable(Float64) DEFAULT NULL,
147
+ lookback_garman_klass_vol Nullable(Float64) DEFAULT NULL,
148
+ lookback_hurst Nullable(Float64) DEFAULT NULL,
149
+ lookback_permutation_entropy Nullable(Float64) DEFAULT NULL,
150
+
151
+ -- Cache metadata
152
+ cache_key String, -- Hash of full parameters
153
+ rangebar_version String DEFAULT '', -- Version for invalidation
154
+ source_start_ts Int64 DEFAULT 0, -- Input data time range
155
+ source_end_ts Int64 DEFAULT 0,
156
+ computed_at DateTime64(3) DEFAULT now64(3)
157
+ )
158
+ ENGINE = ReplacingMergeTree(computed_at)
159
+ -- Partition by symbol, threshold, and month
160
+ PARTITION BY (symbol, threshold_decimal_bps, toYYYYMM(toDateTime(timestamp_ms / 1000)))
161
+ -- Order for efficient lookups
162
+ ORDER BY (symbol, threshold_decimal_bps, timestamp_ms);
163
+
164
+ -- ============================================================================
165
+ -- Materialized Views (Optional - for analytics)
166
+ -- ============================================================================
167
+
168
+ -- View: Daily trade volume by symbol
169
+ -- CREATE MATERIALIZED VIEW IF NOT EXISTS rangebar_cache.daily_trade_volume
170
+ -- ENGINE = SummingMergeTree()
171
+ -- PARTITION BY toYYYYMM(date)
172
+ -- ORDER BY (symbol, date)
173
+ -- AS SELECT
174
+ -- symbol,
175
+ -- toDate(toDateTime(timestamp_ms / 1000)) AS date,
176
+ -- count() AS trade_count,
177
+ -- sum(quantity) AS total_volume
178
+ -- FROM rangebar_cache.raw_trades
179
+ -- GROUP BY symbol, date;
180
+
181
+ -- ============================================================================
182
+ -- Indexes (ClickHouse creates automatically based on ORDER BY)
183
+ -- ============================================================================
184
+ -- No additional indexes needed - ORDER BY creates primary key index
185
+ -- ClickHouse uses sparse indexing which is efficient for our access patterns:
186
+ -- - Symbol + time range queries
187
+ -- - Symbol + threshold lookups