rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,309 @@
1
+ # Issue #46: Modularization M5 - Extract bulk operations from cache.py
2
+ """Bulk store operations for ClickHouse range bar cache.
3
+
4
+ Provides mixin methods for storing range bars in bulk (pandas) and batch
5
+ (Polars/Arrow) modes. Used by RangeBarCache via mixin inheritance.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import logging
12
+ from typing import TYPE_CHECKING
13
+
14
+ import pandas as pd
15
+
16
+ from .._core import __version__
17
+ from ..constants import EXCHANGE_SESSION_COLUMNS, MICROSTRUCTURE_COLUMNS
18
+ from ..exceptions import CacheWriteError
19
+
20
+ if TYPE_CHECKING:
21
+ import polars as pl
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class BulkStoreMixin:
27
+ """Mixin providing bulk store operations for RangeBarCache.
28
+
29
+ Requires `self.client` from ClickHouseClientMixin.
30
+ """
31
+
32
+ def store_bars_bulk(
33
+ self,
34
+ symbol: str,
35
+ threshold_decimal_bps: int,
36
+ bars: pd.DataFrame,
37
+ version: str | None = None,
38
+ ouroboros_mode: str = "year",
39
+ ) -> int:
40
+ """Store bars without requiring CacheKey (for bar-count API).
41
+
42
+ This method is for storing bars computed during gap-filling
43
+ where we don't have exact date bounds.
44
+
45
+ Parameters
46
+ ----------
47
+ symbol : str
48
+ Trading symbol (e.g., "BTCUSDT")
49
+ threshold_decimal_bps : int
50
+ Threshold in decimal basis points
51
+ bars : pd.DataFrame
52
+ DataFrame with OHLCV columns (from rangebar processing)
53
+ version : str | None
54
+ rangebar-core version for cache invalidation. If None (default),
55
+ uses current package version for schema evolution tracking.
56
+ ouroboros_mode : str
57
+ Ouroboros reset mode: "year", "month", or "week" (default: "year")
58
+
59
+ Returns
60
+ -------
61
+ int
62
+ Number of rows inserted
63
+
64
+ Raises
65
+ ------
66
+ CacheWriteError
67
+ If the insert operation fails.
68
+ """
69
+ if bars.empty:
70
+ logger.debug("Skipping bulk cache write for %s: empty DataFrame", symbol)
71
+ return 0
72
+
73
+ logger.debug(
74
+ "Bulk writing %d bars to cache for %s @ %d dbps",
75
+ len(bars),
76
+ symbol,
77
+ threshold_decimal_bps,
78
+ )
79
+
80
+ df = bars.copy()
81
+
82
+ # Handle DatetimeIndex
83
+ if isinstance(df.index, pd.DatetimeIndex):
84
+ df = df.reset_index()
85
+ if "timestamp" in df.columns:
86
+ df["timestamp_ms"] = df["timestamp"].astype("int64") // 10**6
87
+ df = df.drop(columns=["timestamp"])
88
+ elif "index" in df.columns:
89
+ df["timestamp_ms"] = df["index"].astype("int64") // 10**6
90
+ df = df.drop(columns=["index"])
91
+
92
+ # Normalize column names (lowercase)
93
+ df.columns = df.columns.str.lower()
94
+
95
+ # Add cache metadata (Ouroboros: Plan sparkling-coalescing-dijkstra.md)
96
+ df["symbol"] = symbol
97
+ df["threshold_decimal_bps"] = threshold_decimal_bps
98
+ df["ouroboros_mode"] = ouroboros_mode
99
+ df["rangebar_version"] = version if version is not None else __version__
100
+
101
+ # For bulk storage without CacheKey, use timestamp range as source bounds
102
+ if "timestamp_ms" in df.columns and len(df) > 0:
103
+ df["source_start_ts"] = df["timestamp_ms"].min()
104
+ df["source_end_ts"] = df["timestamp_ms"].max()
105
+ # Generate cache_key from symbol, threshold, ouroboros, and timestamp range
106
+ start_ts = df["source_start_ts"].iloc[0]
107
+ end_ts = df["source_end_ts"].iloc[0]
108
+ key_str = (
109
+ f"{symbol}_{threshold_decimal_bps}_{start_ts}_{end_ts}_{ouroboros_mode}"
110
+ )
111
+ df["cache_key"] = hashlib.md5(key_str.encode()).hexdigest()
112
+ else:
113
+ df["source_start_ts"] = 0
114
+ df["source_end_ts"] = 0
115
+ df["cache_key"] = ""
116
+
117
+ # Select columns for insertion
118
+ columns = [
119
+ "symbol",
120
+ "threshold_decimal_bps",
121
+ "ouroboros_mode",
122
+ "timestamp_ms",
123
+ "open",
124
+ "high",
125
+ "low",
126
+ "close",
127
+ "volume",
128
+ "cache_key",
129
+ "rangebar_version",
130
+ "source_start_ts",
131
+ "source_end_ts",
132
+ ]
133
+
134
+ # Add optional microstructure columns if present (from constants.py SSoT)
135
+ for col in MICROSTRUCTURE_COLUMNS:
136
+ if col in df.columns:
137
+ columns.append(col)
138
+
139
+ # Add optional exchange session columns if present (Issue #8)
140
+ # Cast numpy.bool_ to int for ClickHouse Nullable(UInt8) (Issue #50)
141
+ for col in EXCHANGE_SESSION_COLUMNS:
142
+ if col in df.columns:
143
+ df[col] = df[col].astype(int)
144
+ columns.append(col)
145
+
146
+ # Filter to existing columns
147
+ columns = [c for c in columns if c in df.columns]
148
+
149
+ try:
150
+ summary = self.client.insert_df(
151
+ "rangebar_cache.range_bars",
152
+ df[columns],
153
+ )
154
+ written = summary.written_rows
155
+ logger.info(
156
+ "Bulk cached %d bars for %s @ %d dbps",
157
+ written,
158
+ symbol,
159
+ threshold_decimal_bps,
160
+ )
161
+ return written
162
+ except (OSError, RuntimeError) as e:
163
+ logger.exception(
164
+ "Bulk cache write failed for %s @ %d dbps",
165
+ symbol,
166
+ threshold_decimal_bps,
167
+ )
168
+ msg = f"Failed to bulk write bars for {symbol}: {e}"
169
+ raise CacheWriteError(
170
+ msg,
171
+ symbol=symbol,
172
+ operation="bulk_write",
173
+ ) from e
174
+
175
+ def store_bars_batch(
176
+ self,
177
+ symbol: str,
178
+ threshold_decimal_bps: int,
179
+ bars: pl.DataFrame,
180
+ version: str | None = None,
181
+ ) -> int:
182
+ """Store a batch of bars using Arrow for efficient streaming writes.
183
+
184
+ This method is optimized for incremental streaming cache writes
185
+ (Phase 4.3). It uses Arrow for zero-copy data transfer to ClickHouse.
186
+
187
+ Parameters
188
+ ----------
189
+ symbol : str
190
+ Trading symbol (e.g., "BTCUSDT")
191
+ threshold_decimal_bps : int
192
+ Threshold in decimal basis points
193
+ bars : pl.DataFrame
194
+ Polars DataFrame with OHLCV columns (from streaming processing)
195
+ version : str | None
196
+ rangebar-core version for cache invalidation. If None (default),
197
+ uses current package version for schema evolution tracking.
198
+
199
+ Returns
200
+ -------
201
+ int
202
+ Number of rows inserted
203
+
204
+ Examples
205
+ --------
206
+ >>> from rangebar.clickhouse import RangeBarCache
207
+ >>> with RangeBarCache() as cache:
208
+ ... # Stream bars and write incrementally
209
+ ... for batch in stream_range_bars("BTCUSDT", "2024-01-01", "2024-01-07"):
210
+ ... written = cache.store_bars_batch(
211
+ ... "BTCUSDT", 250, batch, version="7.1.3"
212
+ ... )
213
+ ... print(f"Wrote {written} bars")
214
+ """
215
+ import polars as pl
216
+
217
+ if bars.is_empty():
218
+ return 0
219
+
220
+ # Normalize column names (lowercase)
221
+ df = bars.rename({c: c.lower() for c in bars.columns if c != c.lower()})
222
+
223
+ # Handle timestamp conversion from datetime to milliseconds
224
+ if "timestamp" in df.columns:
225
+ # Check if it's already datetime or string
226
+ if df["timestamp"].dtype == pl.Datetime:
227
+ df = df.with_columns(
228
+ (pl.col("timestamp").dt.epoch(time_unit="ms"))
229
+ .cast(pl.Int64)
230
+ .alias("timestamp_ms")
231
+ ).drop("timestamp")
232
+ elif df["timestamp"].dtype == pl.Utf8:
233
+ df = df.with_columns(
234
+ pl.col("timestamp")
235
+ .str.to_datetime(format="%Y-%m-%dT%H:%M:%S%.f%:z")
236
+ .dt.epoch(time_unit="ms")
237
+ .cast(pl.Int64)
238
+ .alias("timestamp_ms")
239
+ ).drop("timestamp")
240
+
241
+ # Add cache metadata (ouroboros_mode defaults to "year" for batch storage)
242
+ # Schema evolution: use __version__ if version not specified
243
+ effective_version = version if version is not None else __version__
244
+ df = df.with_columns(
245
+ pl.lit(symbol).alias("symbol"),
246
+ pl.lit(threshold_decimal_bps).alias("threshold_decimal_bps"),
247
+ pl.lit("year").alias("ouroboros_mode"), # Default for batch storage
248
+ pl.lit(effective_version).alias("rangebar_version"),
249
+ )
250
+
251
+ # Add source bounds and cache_key
252
+ if "timestamp_ms" in df.columns and len(df) > 0:
253
+ start_ts = df["timestamp_ms"].min()
254
+ end_ts = df["timestamp_ms"].max()
255
+ key_str = f"{symbol}_{threshold_decimal_bps}_{start_ts}_{end_ts}_year"
256
+ cache_key = hashlib.md5(key_str.encode()).hexdigest()
257
+
258
+ df = df.with_columns(
259
+ pl.lit(start_ts).alias("source_start_ts"),
260
+ pl.lit(end_ts).alias("source_end_ts"),
261
+ pl.lit(cache_key).alias("cache_key"),
262
+ )
263
+ else:
264
+ df = df.with_columns(
265
+ pl.lit(0).alias("source_start_ts"),
266
+ pl.lit(0).alias("source_end_ts"),
267
+ pl.lit("").alias("cache_key"),
268
+ )
269
+
270
+ # Define columns for insertion
271
+ columns = [
272
+ "symbol",
273
+ "threshold_decimal_bps",
274
+ "ouroboros_mode",
275
+ "timestamp_ms",
276
+ "open",
277
+ "high",
278
+ "low",
279
+ "close",
280
+ "volume",
281
+ "cache_key",
282
+ "rangebar_version",
283
+ "source_start_ts",
284
+ "source_end_ts",
285
+ ]
286
+
287
+ # Add optional microstructure columns if present (from constants.py SSoT)
288
+ for col in MICROSTRUCTURE_COLUMNS:
289
+ if col in df.columns:
290
+ columns.append(col)
291
+
292
+ # Add optional exchange session columns if present (Issue #8)
293
+ # Cast bool to UInt8 for ClickHouse Nullable(UInt8) (Issue #50)
294
+ for col in EXCHANGE_SESSION_COLUMNS:
295
+ if col in df.columns:
296
+ df = df.with_columns(pl.col(col).cast(pl.UInt8))
297
+ columns.append(col)
298
+
299
+ # Filter to existing columns
300
+ columns = [c for c in columns if c in df.columns]
301
+
302
+ # Use Arrow for efficient insert (zero-copy)
303
+ arrow_table = df.select(columns).to_arrow()
304
+ summary = self.client.insert_arrow(
305
+ "rangebar_cache.range_bars",
306
+ arrow_table,
307
+ )
308
+
309
+ return summary.written_rows