rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
rangebar/cli.py ADDED
@@ -0,0 +1,298 @@
1
+ """Command-line interface for rangebar-py cache management.
2
+
3
+ This module provides CLI commands for managing the ClickHouse cache,
4
+ including status checks, population, and clearing operations.
5
+
6
+ Usage
7
+ -----
8
+ After installation, the CLI is available as `rangebar`:
9
+
10
+ $ rangebar status BTCUSDT
11
+ $ rangebar populate BTCUSDT --start 2024-01-01 --end 2024-06-30
12
+ $ rangebar clear BTCUSDT --confirm
13
+
14
+ Or run as a module:
15
+
16
+ $ python -m rangebar.cli status BTCUSDT
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import logging
22
+ import sys
23
+ from datetime import UTC, datetime
24
+
25
+ import click
26
+
27
+ # Configure logging for CLI
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format="%(asctime)s [%(levelname)s] %(message)s",
31
+ datefmt="%Y-%m-%d %H:%M:%S",
32
+ )
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ @click.group()
37
+ @click.option("-v", "--verbose", is_flag=True, help="Enable verbose output")
38
+ @click.version_option(package_name="rangebar")
39
+ def cli(verbose: bool) -> None:
40
+ """rangebar-py cache management CLI.
41
+
42
+ Manage ClickHouse cache for computed range bars.
43
+ """
44
+ if verbose:
45
+ logging.getLogger().setLevel(logging.DEBUG)
46
+
47
+
48
+ @cli.command()
49
+ @click.argument("symbol")
50
+ @click.option(
51
+ "--threshold",
52
+ "-t",
53
+ type=int,
54
+ default=250,
55
+ help="Threshold in dbps (default: 250)",
56
+ )
57
+ def status(symbol: str, threshold: int) -> None:
58
+ """Show cache status for a symbol.
59
+
60
+ Example: rangebar status BTCUSDT
61
+ """
62
+ try:
63
+ from .clickhouse import RangeBarCache
64
+ except ImportError as e:
65
+ click.echo(f"Error: ClickHouse support not available: {e}", err=True)
66
+ sys.exit(1)
67
+
68
+ try:
69
+ with RangeBarCache() as cache:
70
+ # Get bar count
71
+ count = cache.count_bars(symbol, threshold)
72
+
73
+ # Get timestamp range
74
+ oldest = cache.get_oldest_bar_timestamp(symbol, threshold)
75
+ newest = cache.get_newest_bar_timestamp(symbol, threshold)
76
+
77
+ click.echo(f"Symbol: {symbol}")
78
+ click.echo(f"Threshold: {threshold} dbps")
79
+ click.echo(f"Cached bars: {count:,}")
80
+
81
+ if oldest and newest:
82
+ oldest_dt = datetime.fromtimestamp(oldest / 1000) # noqa: DTZ006
83
+ newest_dt = datetime.fromtimestamp(newest / 1000) # noqa: DTZ006
84
+ click.echo(f"Date range: {oldest_dt.date()} to {newest_dt.date()}")
85
+ else:
86
+ click.echo("Date range: N/A (no cached bars)")
87
+
88
+ except ConnectionError as e:
89
+ click.echo(f"Error: Cannot connect to ClickHouse: {e}", err=True)
90
+ sys.exit(1)
91
+
92
+
93
+ @cli.command()
94
+ @click.argument("symbol")
95
+ @click.option("--start", "-s", required=True, help="Start date (YYYY-MM-DD)")
96
+ @click.option("--end", "-e", required=True, help="End date (YYYY-MM-DD)")
97
+ @click.option(
98
+ "--threshold",
99
+ "-t",
100
+ type=int,
101
+ default=250,
102
+ help="Threshold in dbps (default: 250)",
103
+ )
104
+ @click.option(
105
+ "--notify/--no-notify",
106
+ default=True,
107
+ help="Send Telegram notifications (default: on)",
108
+ )
109
+ def populate(
110
+ symbol: str,
111
+ start: str,
112
+ end: str,
113
+ threshold: int,
114
+ notify: bool,
115
+ ) -> None:
116
+ """Populate cache for a date range.
117
+
118
+ Fetches tick data and computes range bars, storing results in ClickHouse.
119
+
120
+ Example: rangebar populate BTCUSDT --start 2024-01-01 --end 2024-06-30
121
+ """
122
+ # Validate dates
123
+ try:
124
+ datetime.strptime(start, "%Y-%m-%d") # noqa: DTZ007
125
+ datetime.strptime(end, "%Y-%m-%d") # noqa: DTZ007
126
+ except ValueError:
127
+ click.echo("Error: Invalid date format. Use YYYY-MM-DD", err=True)
128
+ sys.exit(1)
129
+
130
+ # Enable Telegram notifications if requested
131
+ if notify:
132
+ try:
133
+ from .notify.telegram import enable_telegram_notifications
134
+
135
+ enable_telegram_notifications()
136
+ except ImportError:
137
+ click.echo("Warning: Telegram notifications not available", err=True)
138
+
139
+ click.echo(f"Populating cache for {symbol} from {start} to {end}...")
140
+ click.echo(f"Threshold: {threshold} dbps")
141
+
142
+ try:
143
+ from . import get_range_bars
144
+
145
+ df = get_range_bars(
146
+ symbol,
147
+ start,
148
+ end,
149
+ threshold_decimal_bps=threshold,
150
+ use_cache=True,
151
+ fetch_if_missing=True,
152
+ )
153
+
154
+ click.echo(f"Computed {len(df):,} bars")
155
+ click.echo("Bars should now be cached in ClickHouse")
156
+
157
+ except (ValueError, RuntimeError) as e:
158
+ click.echo(f"Error: {e}", err=True)
159
+ sys.exit(1)
160
+
161
+
162
+ @cli.command()
163
+ @click.argument("symbol")
164
+ @click.option(
165
+ "--threshold",
166
+ "-t",
167
+ type=int,
168
+ default=None,
169
+ help="Threshold in dbps (default: all thresholds)",
170
+ )
171
+ @click.option("--confirm", is_flag=True, help="Confirm deletion (required)")
172
+ def clear(symbol: str, threshold: int | None, confirm: bool) -> None:
173
+ """Clear cache for a symbol.
174
+
175
+ Requires --confirm flag to prevent accidental deletion.
176
+
177
+ Example: rangebar clear BTCUSDT --confirm
178
+ """
179
+ if not confirm:
180
+ click.echo("Error: Add --confirm flag to delete cache data", err=True)
181
+ click.echo(f" rangebar clear {symbol} --confirm")
182
+ sys.exit(1)
183
+
184
+ try:
185
+ from .clickhouse import RangeBarCache
186
+ except ImportError as e:
187
+ click.echo(f"Error: ClickHouse support not available: {e}", err=True)
188
+ sys.exit(1)
189
+
190
+ try:
191
+ with RangeBarCache() as cache:
192
+ if threshold:
193
+ # Clear specific threshold
194
+ click.echo(f"Clearing cache for {symbol} @ {threshold} dbps...")
195
+
196
+ # Get count before clearing
197
+ count = cache.count_bars(symbol, threshold)
198
+ if count == 0:
199
+ click.echo("No cached bars found")
200
+ return
201
+
202
+ # Delete using timestamp range (all time)
203
+ cache.invalidate_range_bars_by_range(
204
+ symbol=symbol,
205
+ threshold_decimal_bps=threshold,
206
+ start_timestamp_ms=0,
207
+ end_timestamp_ms=int(datetime.now(tz=UTC).timestamp() * 1000),
208
+ )
209
+ click.echo(f"Cleared {count:,} bars (deletion is async)")
210
+ else:
211
+ click.echo(f"Clearing all cache for {symbol}...")
212
+ # Would need to query all thresholds and clear each
213
+ # For now, just show a message
214
+ click.echo(
215
+ "Note: Clearing all thresholds not yet implemented. "
216
+ "Specify --threshold to clear a specific threshold."
217
+ )
218
+
219
+ except ConnectionError as e:
220
+ click.echo(f"Error: Cannot connect to ClickHouse: {e}", err=True)
221
+ sys.exit(1)
222
+
223
+
224
+ @cli.command()
225
+ def list_symbols() -> None:
226
+ """List all cached symbols and their thresholds."""
227
+ try:
228
+ from .clickhouse import RangeBarCache
229
+ except ImportError as e:
230
+ click.echo(f"Error: ClickHouse support not available: {e}", err=True)
231
+ sys.exit(1)
232
+
233
+ try:
234
+ with RangeBarCache() as cache:
235
+ # Query distinct symbol/threshold combinations
236
+ query = """
237
+ SELECT symbol, threshold_decimal_bps, count(*) as bar_count
238
+ FROM rangebar_cache.range_bars FINAL
239
+ GROUP BY symbol, threshold_decimal_bps
240
+ ORDER BY symbol, threshold_decimal_bps
241
+ """
242
+ result = cache.client.query(query)
243
+
244
+ if not result.result_rows:
245
+ click.echo("No cached data found")
246
+ return
247
+
248
+ click.echo("Cached symbols:")
249
+ click.echo("-" * 50)
250
+ click.echo(f"{'Symbol':<12} {'Threshold':<12} {'Bars':>12}")
251
+ click.echo("-" * 50)
252
+
253
+ for row in result.result_rows:
254
+ symbol, threshold, count = row
255
+ click.echo(f"{symbol:<12} {threshold:<12} {count:>12,}")
256
+
257
+ except ConnectionError as e:
258
+ click.echo(f"Error: Cannot connect to ClickHouse: {e}", err=True)
259
+ sys.exit(1)
260
+
261
+
262
+ @cli.command()
263
+ def test_telegram() -> None:
264
+ """Send a test Telegram notification."""
265
+ try:
266
+ from .notify.telegram import is_configured, send_telegram
267
+ except ImportError:
268
+ click.echo("Error: Telegram module not available", err=True)
269
+ sys.exit(1)
270
+
271
+ if not is_configured():
272
+ click.echo(
273
+ "Error: Telegram not configured. "
274
+ "Set RANGEBAR_TELEGRAM_TOKEN environment variable.",
275
+ err=True,
276
+ )
277
+ sys.exit(1)
278
+
279
+ success = send_telegram(
280
+ "<b>rangebar-py CLI Test</b>\n\n"
281
+ "This is a test notification from the rangebar CLI.\n"
282
+ "If you see this, Telegram notifications are working correctly."
283
+ )
284
+
285
+ if success:
286
+ click.echo("Test notification sent successfully")
287
+ else:
288
+ click.echo("Failed to send test notification", err=True)
289
+ sys.exit(1)
290
+
291
+
292
+ def main() -> None:
293
+ """Entry point for the CLI."""
294
+ cli()
295
+
296
+
297
+ if __name__ == "__main__":
298
+ main()
@@ -0,0 +1,139 @@
1
+ # ClickHouse Cache Layer
2
+
3
+ **Parent**: [/python/rangebar/CLAUDE.md](/python/rangebar/CLAUDE.md) | **Schema**: [schema.sql](./schema.sql)
4
+
5
+ ---
6
+
7
+ ## Cache Population
8
+
9
+ ### Native Method (Recommended)
10
+
11
+ Use `get_range_bars()` with `fetch_if_missing=True` to populate the cache:
12
+
13
+ ```python
14
+ from rangebar import get_range_bars
15
+
16
+ # Populate threshold 100 data for BTCUSDT
17
+ df = get_range_bars(
18
+ "BTCUSDT",
19
+ start_date="2023-06-01",
20
+ end_date="2025-12-01",
21
+ threshold_decimal_bps=100,
22
+ use_cache=True,
23
+ fetch_if_missing=True,
24
+ include_microstructure=False,
25
+ )
26
+ print(f"Fetched {len(df):,} bars")
27
+ ```
28
+
29
+ **How it works**:
30
+
31
+ 1. Checks ClickHouse cache for existing data
32
+ 2. If missing, fetches raw tick data from Binance (day-by-day to prevent OOM)
33
+ 3. Computes range bars via Rust backend
34
+ 4. Stores results in `rangebar_cache.range_bars` table
35
+ 5. Returns pandas DataFrame
36
+
37
+ ### Populating Remote Hosts
38
+
39
+ Run on the target machine:
40
+
41
+ ```bash
42
+ # SSH to host and run in tmux (long-running)
43
+ ssh <host> "tmux new-session -d -s rangebar-fetch 'cd ~/alpha-forge-research && source .venv/bin/activate && python3 << \"PYEOF\"
44
+ from rangebar import get_range_bars
45
+ import time
46
+
47
+ print(\"Fetching BTCUSDT threshold 100 data...\")
48
+ start = time.time()
49
+ df = get_range_bars(
50
+ \"BTCUSDT\",
51
+ start_date=\"2023-06-01\",
52
+ end_date=\"2025-12-01\",
53
+ threshold_decimal_bps=100,
54
+ use_cache=True,
55
+ fetch_if_missing=True,
56
+ )
57
+ elapsed = time.time() - start
58
+ print(f\"Fetched {len(df):,} bars in {elapsed:.1f}s\")
59
+ PYEOF
60
+ 2>&1 | tee ~/fetch_rangebar.log'"
61
+
62
+ # Monitor progress
63
+ ssh <host> "tail -f ~/fetch_rangebar.log"
64
+ ```
65
+
66
+ ---
67
+
68
+ ## Checking Cache Status
69
+
70
+ ### Using cache_status.py script
71
+
72
+ ```bash
73
+ python scripts/cache_status.py
74
+ ```
75
+
76
+ ### Direct ClickHouse query
77
+
78
+ ```python
79
+ import clickhouse_connect
80
+ client = clickhouse_connect.get_client(host='localhost')
81
+ result = client.query('''
82
+ SELECT symbol, threshold_decimal_bps, count(*) as bars,
83
+ min(timestamp_ms) as earliest, max(timestamp_ms) as latest
84
+ FROM rangebar_cache.range_bars
85
+ GROUP BY symbol, threshold_decimal_bps
86
+ ''')
87
+ for row in result.result_rows:
88
+ print(f"{row[0]} @ {row[1]} dbps: {row[2]:,} bars")
89
+ ```
90
+
91
+ ---
92
+
93
+ ## Threshold Presets
94
+
95
+ | Preset | Value | Use Case |
96
+ | ------------ | ----- | ---------------- |
97
+ | `"micro"` | 10 | Scalping |
98
+ | `"tight"` | 50 | Day trading |
99
+ | `"standard"` | 100 | Swing trading |
100
+ | `"medium"` | 250 | Default |
101
+ | `"wide"` | 500 | Position trading |
102
+ | `"macro"` | 1000 | Long-term |
103
+
104
+ ---
105
+
106
+ ## Host-Specific Cache Status
107
+
108
+ | Host | Symbols | Thresholds Cached | Notes |
109
+ | ----------- | ------------------------------------------- | --------------------- | ----------------- |
110
+ | bigblack | BTCUSDT, ETHUSDT, SOLUSDT, BNBUSDT (crypto) | 25, 50, 100, 200 dbps | Primary GPU host |
111
+ | bigblack | EURUSD (forex) | 50, 100, 200 dbps | Exness Raw_Spread |
112
+ | littleblack | 700 | 700 dbps | Secondary host |
113
+ | local | varies | varies | Development |
114
+
115
+ **Total cached**: 260M+ bars (crypto) + 130K bars (forex)
116
+
117
+ To add a threshold to a host, run the population script above on that host.
118
+
119
+ ---
120
+
121
+ ## Files
122
+
123
+ | File | Purpose |
124
+ | --------------------- | -------------------------------------------------------------- |
125
+ | `cache.py` | RangeBarCache class, core cache operations |
126
+ | `bulk_operations.py` | BulkStoreMixin (store_bars_bulk, store_bars_batch) |
127
+ | `query_operations.py` | QueryOperationsMixin (get_n_bars, get_bars_by_timestamp_range) |
128
+ | `schema.sql` | ClickHouse table schema (v7.0: 10 microstructure cols) |
129
+ | `config.py` | Connection configuration |
130
+ | `preflight.py` | Installation checks |
131
+ | `tunnel.py` | SSH tunnel support |
132
+
133
+ ---
134
+
135
+ ## Related
136
+
137
+ - [/CLAUDE.md](/CLAUDE.md) - Project hub
138
+ - [/python/rangebar/CLAUDE.md](/python/rangebar/CLAUDE.md) - Python API
139
+ - [/scripts/cache_status.py](/scripts/cache_status.py) - Status script
@@ -0,0 +1,100 @@
1
+ """ClickHouse cache layer for computed range bars.
2
+
3
+ This module provides caching for computed range bars (Tier 2) using ClickHouse.
4
+ Raw tick data (Tier 1) is stored locally via `rangebar.storage.TickStorage`.
5
+
6
+ Configuration
7
+ -------------
8
+ Set environment variables via mise (recommended) or directly:
9
+
10
+ # Connection mode (RANGEBAR_MODE)
11
+ export RANGEBAR_MODE=local # Force localhost:8123 only
12
+ export RANGEBAR_MODE=cloud # Require CLICKHOUSE_HOST env var
13
+ export RANGEBAR_MODE=auto # Auto-detect (default)
14
+
15
+ # Host configuration (for AUTO/CLOUD modes)
16
+ export RANGEBAR_CH_HOSTS="host1,host2" # SSH aliases from ~/.ssh/config
17
+ export RANGEBAR_CH_PRIMARY="host1" # Default host
18
+ export CLICKHOUSE_HOST="localhost" # Direct host (CLOUD mode)
19
+
20
+ If no env vars set, falls back to localhost:8123 in AUTO mode.
21
+
22
+ Example
23
+ -------
24
+ >>> from rangebar.clickhouse import RangeBarCache, get_available_clickhouse_host
25
+ >>> from rangebar import process_trades_to_dataframe_cached
26
+ >>>
27
+ >>> # Check ClickHouse availability
28
+ >>> host = get_available_clickhouse_host()
29
+ >>> print(f"Using ClickHouse at {host.host} via {host.method}")
30
+ >>>
31
+ >>> # Use cached processing
32
+ >>> df = process_trades_to_dataframe_cached(trades, symbol="BTCUSDT")
33
+
34
+ See Also
35
+ --------
36
+ rangebar.storage.TickStorage : Local Parquet storage for raw tick data
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import os
42
+ import warnings
43
+
44
+ from .cache import CacheKey, RangeBarCache
45
+ from .client import (
46
+ ClickHouseQueryError,
47
+ ClickHouseUnavailableError,
48
+ get_client,
49
+ )
50
+ from .config import ClickHouseConfig, ConnectionMode, get_connection_mode
51
+ from .mixin import ClickHouseClientMixin
52
+ from .preflight import (
53
+ ClickHouseNotConfiguredError,
54
+ HostConnection,
55
+ InstallationLevel,
56
+ PreflightResult,
57
+ detect_clickhouse_state,
58
+ get_available_clickhouse_host,
59
+ )
60
+ from .tunnel import SSHTunnel
61
+
62
+ __all__ = [
63
+ # Sorted for ruff RUF022
64
+ "CacheKey",
65
+ "ClickHouseClientMixin",
66
+ "ClickHouseConfig",
67
+ "ClickHouseNotConfiguredError",
68
+ "ClickHouseQueryError",
69
+ "ClickHouseUnavailableError",
70
+ "ConnectionMode",
71
+ "HostConnection",
72
+ "InstallationLevel",
73
+ "PreflightResult",
74
+ "RangeBarCache",
75
+ "SSHTunnel",
76
+ "detect_clickhouse_state",
77
+ "get_available_clickhouse_host",
78
+ "get_client",
79
+ "get_connection_mode",
80
+ ]
81
+
82
+
83
+ def _emit_import_warning() -> None:
84
+ """Emit warning at import time if ClickHouse not ready."""
85
+ try:
86
+ state = detect_clickhouse_state()
87
+ if state.level < InstallationLevel.RUNNING_NO_SCHEMA:
88
+ warnings.warn(
89
+ f"ClickHouse cache not available: {state.message}. "
90
+ f"Cached functions will fail. {state.action_required or ''}",
91
+ UserWarning,
92
+ stacklevel=3,
93
+ )
94
+ except Exception:
95
+ pass # Don't fail import on preflight errors
96
+
97
+
98
+ # Optional: emit warning at import time (can be disabled via env var)
99
+ if not os.getenv("RANGEBAR_SKIP_IMPORT_CHECK"):
100
+ _emit_import_warning()