rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rangebar/CLAUDE.md +327 -0
- rangebar/__init__.py +227 -0
- rangebar/__init__.pyi +1089 -0
- rangebar/_core.cpython-313-darwin.so +0 -0
- rangebar/checkpoint.py +472 -0
- rangebar/cli.py +298 -0
- rangebar/clickhouse/CLAUDE.md +139 -0
- rangebar/clickhouse/__init__.py +100 -0
- rangebar/clickhouse/bulk_operations.py +309 -0
- rangebar/clickhouse/cache.py +734 -0
- rangebar/clickhouse/client.py +121 -0
- rangebar/clickhouse/config.py +141 -0
- rangebar/clickhouse/mixin.py +120 -0
- rangebar/clickhouse/preflight.py +504 -0
- rangebar/clickhouse/query_operations.py +345 -0
- rangebar/clickhouse/schema.sql +187 -0
- rangebar/clickhouse/tunnel.py +222 -0
- rangebar/constants.py +288 -0
- rangebar/conversion.py +177 -0
- rangebar/exceptions.py +207 -0
- rangebar/exness.py +364 -0
- rangebar/hooks.py +311 -0
- rangebar/logging.py +171 -0
- rangebar/notify/__init__.py +15 -0
- rangebar/notify/pushover.py +155 -0
- rangebar/notify/telegram.py +271 -0
- rangebar/orchestration/__init__.py +20 -0
- rangebar/orchestration/count_bounded.py +797 -0
- rangebar/orchestration/helpers.py +412 -0
- rangebar/orchestration/models.py +76 -0
- rangebar/orchestration/precompute.py +498 -0
- rangebar/orchestration/range_bars.py +736 -0
- rangebar/orchestration/tick_fetcher.py +226 -0
- rangebar/ouroboros.py +454 -0
- rangebar/processors/__init__.py +22 -0
- rangebar/processors/api.py +383 -0
- rangebar/processors/core.py +522 -0
- rangebar/resource_guard.py +567 -0
- rangebar/storage/__init__.py +22 -0
- rangebar/storage/checksum_registry.py +218 -0
- rangebar/storage/parquet.py +728 -0
- rangebar/streaming.py +300 -0
- rangebar/validation/__init__.py +69 -0
- rangebar/validation/cache_staleness.py +277 -0
- rangebar/validation/continuity.py +664 -0
- rangebar/validation/gap_classification.py +294 -0
- rangebar/validation/post_storage.py +317 -0
- rangebar/validation/tier1.py +175 -0
- rangebar/validation/tier2.py +261 -0
- rangebar-11.6.1.dist-info/METADATA +308 -0
- rangebar-11.6.1.dist-info/RECORD +54 -0
- rangebar-11.6.1.dist-info/WHEEL +4 -0
- rangebar-11.6.1.dist-info/entry_points.txt +2 -0
- rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
# Issue #46: Modularization M5 - Extract bulk operations from cache.py
|
|
2
|
+
"""Bulk store operations for ClickHouse range bar cache.
|
|
3
|
+
|
|
4
|
+
Provides mixin methods for storing range bars in bulk (pandas) and batch
|
|
5
|
+
(Polars/Arrow) modes. Used by RangeBarCache via mixin inheritance.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import logging
|
|
12
|
+
from typing import TYPE_CHECKING
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
|
|
16
|
+
from .._core import __version__
|
|
17
|
+
from ..constants import EXCHANGE_SESSION_COLUMNS, MICROSTRUCTURE_COLUMNS
|
|
18
|
+
from ..exceptions import CacheWriteError
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
import polars as pl
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BulkStoreMixin:
|
|
27
|
+
"""Mixin providing bulk store operations for RangeBarCache.
|
|
28
|
+
|
|
29
|
+
Requires `self.client` from ClickHouseClientMixin.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def store_bars_bulk(
|
|
33
|
+
self,
|
|
34
|
+
symbol: str,
|
|
35
|
+
threshold_decimal_bps: int,
|
|
36
|
+
bars: pd.DataFrame,
|
|
37
|
+
version: str | None = None,
|
|
38
|
+
ouroboros_mode: str = "year",
|
|
39
|
+
) -> int:
|
|
40
|
+
"""Store bars without requiring CacheKey (for bar-count API).
|
|
41
|
+
|
|
42
|
+
This method is for storing bars computed during gap-filling
|
|
43
|
+
where we don't have exact date bounds.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
symbol : str
|
|
48
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
49
|
+
threshold_decimal_bps : int
|
|
50
|
+
Threshold in decimal basis points
|
|
51
|
+
bars : pd.DataFrame
|
|
52
|
+
DataFrame with OHLCV columns (from rangebar processing)
|
|
53
|
+
version : str | None
|
|
54
|
+
rangebar-core version for cache invalidation. If None (default),
|
|
55
|
+
uses current package version for schema evolution tracking.
|
|
56
|
+
ouroboros_mode : str
|
|
57
|
+
Ouroboros reset mode: "year", "month", or "week" (default: "year")
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
int
|
|
62
|
+
Number of rows inserted
|
|
63
|
+
|
|
64
|
+
Raises
|
|
65
|
+
------
|
|
66
|
+
CacheWriteError
|
|
67
|
+
If the insert operation fails.
|
|
68
|
+
"""
|
|
69
|
+
if bars.empty:
|
|
70
|
+
logger.debug("Skipping bulk cache write for %s: empty DataFrame", symbol)
|
|
71
|
+
return 0
|
|
72
|
+
|
|
73
|
+
logger.debug(
|
|
74
|
+
"Bulk writing %d bars to cache for %s @ %d dbps",
|
|
75
|
+
len(bars),
|
|
76
|
+
symbol,
|
|
77
|
+
threshold_decimal_bps,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
df = bars.copy()
|
|
81
|
+
|
|
82
|
+
# Handle DatetimeIndex
|
|
83
|
+
if isinstance(df.index, pd.DatetimeIndex):
|
|
84
|
+
df = df.reset_index()
|
|
85
|
+
if "timestamp" in df.columns:
|
|
86
|
+
df["timestamp_ms"] = df["timestamp"].astype("int64") // 10**6
|
|
87
|
+
df = df.drop(columns=["timestamp"])
|
|
88
|
+
elif "index" in df.columns:
|
|
89
|
+
df["timestamp_ms"] = df["index"].astype("int64") // 10**6
|
|
90
|
+
df = df.drop(columns=["index"])
|
|
91
|
+
|
|
92
|
+
# Normalize column names (lowercase)
|
|
93
|
+
df.columns = df.columns.str.lower()
|
|
94
|
+
|
|
95
|
+
# Add cache metadata (Ouroboros: Plan sparkling-coalescing-dijkstra.md)
|
|
96
|
+
df["symbol"] = symbol
|
|
97
|
+
df["threshold_decimal_bps"] = threshold_decimal_bps
|
|
98
|
+
df["ouroboros_mode"] = ouroboros_mode
|
|
99
|
+
df["rangebar_version"] = version if version is not None else __version__
|
|
100
|
+
|
|
101
|
+
# For bulk storage without CacheKey, use timestamp range as source bounds
|
|
102
|
+
if "timestamp_ms" in df.columns and len(df) > 0:
|
|
103
|
+
df["source_start_ts"] = df["timestamp_ms"].min()
|
|
104
|
+
df["source_end_ts"] = df["timestamp_ms"].max()
|
|
105
|
+
# Generate cache_key from symbol, threshold, ouroboros, and timestamp range
|
|
106
|
+
start_ts = df["source_start_ts"].iloc[0]
|
|
107
|
+
end_ts = df["source_end_ts"].iloc[0]
|
|
108
|
+
key_str = (
|
|
109
|
+
f"{symbol}_{threshold_decimal_bps}_{start_ts}_{end_ts}_{ouroboros_mode}"
|
|
110
|
+
)
|
|
111
|
+
df["cache_key"] = hashlib.md5(key_str.encode()).hexdigest()
|
|
112
|
+
else:
|
|
113
|
+
df["source_start_ts"] = 0
|
|
114
|
+
df["source_end_ts"] = 0
|
|
115
|
+
df["cache_key"] = ""
|
|
116
|
+
|
|
117
|
+
# Select columns for insertion
|
|
118
|
+
columns = [
|
|
119
|
+
"symbol",
|
|
120
|
+
"threshold_decimal_bps",
|
|
121
|
+
"ouroboros_mode",
|
|
122
|
+
"timestamp_ms",
|
|
123
|
+
"open",
|
|
124
|
+
"high",
|
|
125
|
+
"low",
|
|
126
|
+
"close",
|
|
127
|
+
"volume",
|
|
128
|
+
"cache_key",
|
|
129
|
+
"rangebar_version",
|
|
130
|
+
"source_start_ts",
|
|
131
|
+
"source_end_ts",
|
|
132
|
+
]
|
|
133
|
+
|
|
134
|
+
# Add optional microstructure columns if present (from constants.py SSoT)
|
|
135
|
+
for col in MICROSTRUCTURE_COLUMNS:
|
|
136
|
+
if col in df.columns:
|
|
137
|
+
columns.append(col)
|
|
138
|
+
|
|
139
|
+
# Add optional exchange session columns if present (Issue #8)
|
|
140
|
+
# Cast numpy.bool_ to int for ClickHouse Nullable(UInt8) (Issue #50)
|
|
141
|
+
for col in EXCHANGE_SESSION_COLUMNS:
|
|
142
|
+
if col in df.columns:
|
|
143
|
+
df[col] = df[col].astype(int)
|
|
144
|
+
columns.append(col)
|
|
145
|
+
|
|
146
|
+
# Filter to existing columns
|
|
147
|
+
columns = [c for c in columns if c in df.columns]
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
summary = self.client.insert_df(
|
|
151
|
+
"rangebar_cache.range_bars",
|
|
152
|
+
df[columns],
|
|
153
|
+
)
|
|
154
|
+
written = summary.written_rows
|
|
155
|
+
logger.info(
|
|
156
|
+
"Bulk cached %d bars for %s @ %d dbps",
|
|
157
|
+
written,
|
|
158
|
+
symbol,
|
|
159
|
+
threshold_decimal_bps,
|
|
160
|
+
)
|
|
161
|
+
return written
|
|
162
|
+
except (OSError, RuntimeError) as e:
|
|
163
|
+
logger.exception(
|
|
164
|
+
"Bulk cache write failed for %s @ %d dbps",
|
|
165
|
+
symbol,
|
|
166
|
+
threshold_decimal_bps,
|
|
167
|
+
)
|
|
168
|
+
msg = f"Failed to bulk write bars for {symbol}: {e}"
|
|
169
|
+
raise CacheWriteError(
|
|
170
|
+
msg,
|
|
171
|
+
symbol=symbol,
|
|
172
|
+
operation="bulk_write",
|
|
173
|
+
) from e
|
|
174
|
+
|
|
175
|
+
def store_bars_batch(
|
|
176
|
+
self,
|
|
177
|
+
symbol: str,
|
|
178
|
+
threshold_decimal_bps: int,
|
|
179
|
+
bars: pl.DataFrame,
|
|
180
|
+
version: str | None = None,
|
|
181
|
+
) -> int:
|
|
182
|
+
"""Store a batch of bars using Arrow for efficient streaming writes.
|
|
183
|
+
|
|
184
|
+
This method is optimized for incremental streaming cache writes
|
|
185
|
+
(Phase 4.3). It uses Arrow for zero-copy data transfer to ClickHouse.
|
|
186
|
+
|
|
187
|
+
Parameters
|
|
188
|
+
----------
|
|
189
|
+
symbol : str
|
|
190
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
191
|
+
threshold_decimal_bps : int
|
|
192
|
+
Threshold in decimal basis points
|
|
193
|
+
bars : pl.DataFrame
|
|
194
|
+
Polars DataFrame with OHLCV columns (from streaming processing)
|
|
195
|
+
version : str | None
|
|
196
|
+
rangebar-core version for cache invalidation. If None (default),
|
|
197
|
+
uses current package version for schema evolution tracking.
|
|
198
|
+
|
|
199
|
+
Returns
|
|
200
|
+
-------
|
|
201
|
+
int
|
|
202
|
+
Number of rows inserted
|
|
203
|
+
|
|
204
|
+
Examples
|
|
205
|
+
--------
|
|
206
|
+
>>> from rangebar.clickhouse import RangeBarCache
|
|
207
|
+
>>> with RangeBarCache() as cache:
|
|
208
|
+
... # Stream bars and write incrementally
|
|
209
|
+
... for batch in stream_range_bars("BTCUSDT", "2024-01-01", "2024-01-07"):
|
|
210
|
+
... written = cache.store_bars_batch(
|
|
211
|
+
... "BTCUSDT", 250, batch, version="7.1.3"
|
|
212
|
+
... )
|
|
213
|
+
... print(f"Wrote {written} bars")
|
|
214
|
+
"""
|
|
215
|
+
import polars as pl
|
|
216
|
+
|
|
217
|
+
if bars.is_empty():
|
|
218
|
+
return 0
|
|
219
|
+
|
|
220
|
+
# Normalize column names (lowercase)
|
|
221
|
+
df = bars.rename({c: c.lower() for c in bars.columns if c != c.lower()})
|
|
222
|
+
|
|
223
|
+
# Handle timestamp conversion from datetime to milliseconds
|
|
224
|
+
if "timestamp" in df.columns:
|
|
225
|
+
# Check if it's already datetime or string
|
|
226
|
+
if df["timestamp"].dtype == pl.Datetime:
|
|
227
|
+
df = df.with_columns(
|
|
228
|
+
(pl.col("timestamp").dt.epoch(time_unit="ms"))
|
|
229
|
+
.cast(pl.Int64)
|
|
230
|
+
.alias("timestamp_ms")
|
|
231
|
+
).drop("timestamp")
|
|
232
|
+
elif df["timestamp"].dtype == pl.Utf8:
|
|
233
|
+
df = df.with_columns(
|
|
234
|
+
pl.col("timestamp")
|
|
235
|
+
.str.to_datetime(format="%Y-%m-%dT%H:%M:%S%.f%:z")
|
|
236
|
+
.dt.epoch(time_unit="ms")
|
|
237
|
+
.cast(pl.Int64)
|
|
238
|
+
.alias("timestamp_ms")
|
|
239
|
+
).drop("timestamp")
|
|
240
|
+
|
|
241
|
+
# Add cache metadata (ouroboros_mode defaults to "year" for batch storage)
|
|
242
|
+
# Schema evolution: use __version__ if version not specified
|
|
243
|
+
effective_version = version if version is not None else __version__
|
|
244
|
+
df = df.with_columns(
|
|
245
|
+
pl.lit(symbol).alias("symbol"),
|
|
246
|
+
pl.lit(threshold_decimal_bps).alias("threshold_decimal_bps"),
|
|
247
|
+
pl.lit("year").alias("ouroboros_mode"), # Default for batch storage
|
|
248
|
+
pl.lit(effective_version).alias("rangebar_version"),
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Add source bounds and cache_key
|
|
252
|
+
if "timestamp_ms" in df.columns and len(df) > 0:
|
|
253
|
+
start_ts = df["timestamp_ms"].min()
|
|
254
|
+
end_ts = df["timestamp_ms"].max()
|
|
255
|
+
key_str = f"{symbol}_{threshold_decimal_bps}_{start_ts}_{end_ts}_year"
|
|
256
|
+
cache_key = hashlib.md5(key_str.encode()).hexdigest()
|
|
257
|
+
|
|
258
|
+
df = df.with_columns(
|
|
259
|
+
pl.lit(start_ts).alias("source_start_ts"),
|
|
260
|
+
pl.lit(end_ts).alias("source_end_ts"),
|
|
261
|
+
pl.lit(cache_key).alias("cache_key"),
|
|
262
|
+
)
|
|
263
|
+
else:
|
|
264
|
+
df = df.with_columns(
|
|
265
|
+
pl.lit(0).alias("source_start_ts"),
|
|
266
|
+
pl.lit(0).alias("source_end_ts"),
|
|
267
|
+
pl.lit("").alias("cache_key"),
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Define columns for insertion
|
|
271
|
+
columns = [
|
|
272
|
+
"symbol",
|
|
273
|
+
"threshold_decimal_bps",
|
|
274
|
+
"ouroboros_mode",
|
|
275
|
+
"timestamp_ms",
|
|
276
|
+
"open",
|
|
277
|
+
"high",
|
|
278
|
+
"low",
|
|
279
|
+
"close",
|
|
280
|
+
"volume",
|
|
281
|
+
"cache_key",
|
|
282
|
+
"rangebar_version",
|
|
283
|
+
"source_start_ts",
|
|
284
|
+
"source_end_ts",
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
# Add optional microstructure columns if present (from constants.py SSoT)
|
|
288
|
+
for col in MICROSTRUCTURE_COLUMNS:
|
|
289
|
+
if col in df.columns:
|
|
290
|
+
columns.append(col)
|
|
291
|
+
|
|
292
|
+
# Add optional exchange session columns if present (Issue #8)
|
|
293
|
+
# Cast bool to UInt8 for ClickHouse Nullable(UInt8) (Issue #50)
|
|
294
|
+
for col in EXCHANGE_SESSION_COLUMNS:
|
|
295
|
+
if col in df.columns:
|
|
296
|
+
df = df.with_columns(pl.col(col).cast(pl.UInt8))
|
|
297
|
+
columns.append(col)
|
|
298
|
+
|
|
299
|
+
# Filter to existing columns
|
|
300
|
+
columns = [c for c in columns if c in df.columns]
|
|
301
|
+
|
|
302
|
+
# Use Arrow for efficient insert (zero-copy)
|
|
303
|
+
arrow_table = df.select(columns).to_arrow()
|
|
304
|
+
summary = self.client.insert_arrow(
|
|
305
|
+
"rangebar_cache.range_bars",
|
|
306
|
+
arrow_table,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
return summary.written_rows
|