rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,504 @@
1
+ """Preflight detection for ClickHouse availability.
2
+
3
+ This module provides comprehensive detection of ClickHouse installation state
4
+ and host selection logic for multi-GPU workstation environments.
5
+
6
+ The preflight system detects:
7
+ - Level -1: ENV not configured (no RANGEBAR_CH_HOSTS and localhost unavailable)
8
+ - Level 0: ClickHouse not installed
9
+ - Level 1: ClickHouse installed but not running
10
+ - Level 2: ClickHouse running but schema missing
11
+ - Level 3: Fully configured and ready
12
+
13
+ Host selection priority:
14
+ 1. localhost:8123 (prefer local GPU workstation)
15
+ 2. RANGEBAR_CH_HOSTS via direct network (if exposed)
16
+ 3. RANGEBAR_CH_HOSTS via SSH tunnel (if firewalled)
17
+ 4. FAIL LOUDLY with Claude Code CLI guidance
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import os
23
+ import shutil
24
+ import socket
25
+ import subprocess
26
+ import urllib.request
27
+ from dataclasses import dataclass
28
+ from enum import IntEnum
29
+ from pathlib import Path
30
+ from typing import Literal
31
+
32
+ from .config import ConnectionMode, get_connection_mode
33
+
34
+ # Loud warning message for Claude Code CLI users
35
+ ENV_NOT_CONFIGURED_WARNING = """
36
+ ╔══════════════════════════════════════════════════════════════════════════════╗
37
+ ║ RANGEBAR CLICKHOUSE CACHE - CONFIGURATION REQUIRED ║
38
+ ╠══════════════════════════════════════════════════════════════════════════════╣
39
+ ║ ║
40
+ ║ No ClickHouse host configured and localhost:8123 unavailable. ║
41
+ ║ ║
42
+ ║ FOR CLAUDE CODE CLI USERS: ║
43
+ ║ ─────────────────────────────────────────────────────────────────────────── ║
44
+ ║ 1. Check your ~/.ssh/config for GPU workstations with ClickHouse ║
45
+ ║ (Look for hosts used for ML training that have ClickHouse installed) ║
46
+ ║ ║
47
+ ║ 2. Configure mise environment variables: ║
48
+ ║ ║
49
+ ║ # Add to ~/.config/mise/config.toml or project mise.toml: ║
50
+ ║ [env] ║
51
+ ║ RANGEBAR_CH_HOSTS = "host1,host2,host3" # SSH aliases from your config ║
52
+ ║ RANGEBAR_CH_PRIMARY = "host1" # Default host ║
53
+ ║ ║
54
+ ║ 3. Reload mise: mise trust && mise install ║
55
+ ║ ║
56
+ ║ 4. Re-run your command ║
57
+ ║ ║
58
+ ║ ALTERNATIVE - Local ClickHouse: ║
59
+ ║ ─────────────────────────────────────────────────────────────────────────── ║
60
+ ║ macOS: brew install --cask clickhouse ║
61
+ ║ Linux: curl https://clickhouse.com/ | sh && ./clickhouse install ║
62
+ ║ ║
63
+ ╚══════════════════════════════════════════════════════════════════════════════╝
64
+ """
65
+
66
+
67
+ class ClickHouseNotConfiguredError(RuntimeError):
68
+ """Raised when no ClickHouse hosts configured and localhost unavailable.
69
+
70
+ This error includes detailed guidance for Claude Code CLI users
71
+ on how to configure their environment.
72
+ """
73
+
74
+ def __init__(self, message: str | None = None) -> None:
75
+ """Initialize with optional custom message."""
76
+ super().__init__(message or ENV_NOT_CONFIGURED_WARNING)
77
+
78
+
79
+ class InstallationLevel(IntEnum):
80
+ """ClickHouse installation state levels."""
81
+
82
+ ENV_NOT_CONFIGURED = -1
83
+ NOT_INSTALLED = 0
84
+ INSTALLED_NOT_RUNNING = 1
85
+ RUNNING_NO_SCHEMA = 2
86
+ FULLY_CONFIGURED = 3
87
+
88
+
89
+ @dataclass
90
+ class PreflightResult:
91
+ """Result of preflight detection.
92
+
93
+ Attributes
94
+ ----------
95
+ level : InstallationLevel
96
+ Current installation state
97
+ version : str | None
98
+ ClickHouse version if running
99
+ binary_path : str | None
100
+ Path to ClickHouse binary if found
101
+ message : str
102
+ Human-readable status message
103
+ action_required : str | None
104
+ Guidance for fixing issues
105
+ """
106
+
107
+ level: InstallationLevel
108
+ version: str | None = None
109
+ binary_path: str | None = None
110
+ message: str = ""
111
+ action_required: str | None = None
112
+
113
+
114
+ @dataclass
115
+ class HostConnection:
116
+ """Connection details for a ClickHouse host.
117
+
118
+ Attributes
119
+ ----------
120
+ host : str
121
+ Host identifier (SSH alias or 'localhost')
122
+ method : Literal["local", "direct", "ssh_tunnel"]
123
+ Connection method
124
+ port : int
125
+ Port to connect to
126
+ """
127
+
128
+ host: str
129
+ method: Literal["local", "direct", "ssh_tunnel"]
130
+ port: int = 8123
131
+
132
+
133
+ def detect_clickhouse_state() -> PreflightResult:
134
+ """Detect ClickHouse installation state on localhost.
135
+
136
+ Returns
137
+ -------
138
+ PreflightResult
139
+ Detection result with level, version, and guidance
140
+
141
+ Examples
142
+ --------
143
+ >>> result = detect_clickhouse_state()
144
+ >>> if result.level >= InstallationLevel.RUNNING_NO_SCHEMA:
145
+ ... print("ClickHouse is running")
146
+ """
147
+ # Level 0: Check for binary
148
+ binary = _find_clickhouse_binary()
149
+ if binary is None:
150
+ return PreflightResult(
151
+ level=InstallationLevel.NOT_INSTALLED,
152
+ message="ClickHouse not installed",
153
+ action_required=(
154
+ "Install ClickHouse:\n"
155
+ " macOS: brew install --cask clickhouse\n"
156
+ " Linux: curl https://clickhouse.com/ | sh"
157
+ ),
158
+ )
159
+
160
+ # Level 1: Check if server is running
161
+ if not _is_port_open("localhost", 8123):
162
+ return PreflightResult(
163
+ level=InstallationLevel.INSTALLED_NOT_RUNNING,
164
+ binary_path=binary,
165
+ message=f"ClickHouse binary at {binary}, but server not running",
166
+ action_required=(
167
+ f"Start ClickHouse server:\n"
168
+ f" {binary} server --daemon\n"
169
+ f" OR: systemctl start clickhouse-server"
170
+ ),
171
+ )
172
+
173
+ # Level 2: Check for schema
174
+ version = _get_clickhouse_version()
175
+ if not _has_rangebar_schema():
176
+ return PreflightResult(
177
+ level=InstallationLevel.RUNNING_NO_SCHEMA,
178
+ binary_path=binary,
179
+ version=version,
180
+ message=f"ClickHouse {version} running, but rangebar_cache schema missing",
181
+ action_required="Schema will be auto-created on first use",
182
+ )
183
+
184
+ # Level 3: Fully configured
185
+ return PreflightResult(
186
+ level=InstallationLevel.FULLY_CONFIGURED,
187
+ binary_path=binary,
188
+ version=version,
189
+ message=f"ClickHouse {version} ready with rangebar_cache schema",
190
+ )
191
+
192
+
193
+ def get_available_clickhouse_host() -> HostConnection:
194
+ """Find best available ClickHouse host based on connection mode.
195
+
196
+ Connection Mode (via RANGEBAR_MODE env var):
197
+ - LOCAL: Force localhost:8123 only
198
+ - CLOUD: Require CLICKHOUSE_HOST env var (remote only)
199
+ - AUTO: Try localhost first, then SSH aliases (default)
200
+
201
+ Priority Order (AUTO mode):
202
+ 1. LOCAL: localhost:8123 (if running on a GPU workstation with ClickHouse)
203
+ 2. DIRECT: Remote host:8123 over network (if exposed)
204
+ 3. SSH_TUNNEL: SSH to remote host, access localhost:8123 there
205
+
206
+ This prefers local execution - if you're running on a GPU workstation
207
+ that has ClickHouse, use it directly without network overhead.
208
+
209
+ Returns
210
+ -------
211
+ HostConnection
212
+ Connection details for available host
213
+
214
+ Raises
215
+ ------
216
+ ClickHouseNotConfiguredError
217
+ If no hosts available (FAIL LOUDLY with guidance)
218
+
219
+ Examples
220
+ --------
221
+ >>> # Default AUTO mode
222
+ >>> host = get_available_clickhouse_host()
223
+ >>> print(f"Using {host.host} via {host.method}")
224
+
225
+ >>> # Force local mode
226
+ >>> import os
227
+ >>> os.environ["RANGEBAR_MODE"] = "local"
228
+ >>> host = get_available_clickhouse_host() # Only checks localhost
229
+ """
230
+ mode = get_connection_mode()
231
+
232
+ # LOCAL mode: Force localhost only
233
+ if mode == ConnectionMode.LOCAL:
234
+ if _is_port_open("localhost", 8123):
235
+ return HostConnection(host="localhost", method="local")
236
+ msg = (
237
+ "RANGEBAR_MODE=local but ClickHouse not running on localhost:8123.\n"
238
+ "Start ClickHouse locally or change mode to 'auto' or 'cloud'."
239
+ )
240
+ raise ClickHouseNotConfiguredError(msg)
241
+
242
+ # CLOUD mode: Require remote host only
243
+ if mode == ConnectionMode.CLOUD:
244
+ cloud_host = os.getenv("CLICKHOUSE_HOST")
245
+ if not cloud_host:
246
+ msg = (
247
+ "RANGEBAR_MODE=cloud but CLICKHOUSE_HOST not set.\n"
248
+ "Set CLICKHOUSE_HOST to your ClickHouse server address."
249
+ )
250
+ raise ClickHouseNotConfiguredError(msg)
251
+ cloud_port = int(os.getenv("CLICKHOUSE_PORT", "8123"))
252
+ if _is_port_open(cloud_host, cloud_port):
253
+ return HostConnection(host=cloud_host, method="direct", port=cloud_port)
254
+ msg = (
255
+ f"RANGEBAR_MODE=cloud but cannot connect to {cloud_host}:{cloud_port}.\n"
256
+ "Verify CLICKHOUSE_HOST and CLICKHOUSE_PORT are correct."
257
+ )
258
+ raise ClickHouseNotConfiguredError(msg)
259
+
260
+ # AUTO mode: Try localhost first, then remote hosts
261
+ # PRIORITY 1: Always check localhost first (prefer local execution)
262
+ # NOTE: Use _verify_clickhouse() not _is_port_open() to verify
263
+ # it's actually ClickHouse, not another service (e.g., Chrome extension)
264
+ if _verify_clickhouse("localhost", 8123):
265
+ return HostConnection(host="localhost", method="local")
266
+
267
+ # PRIORITY 2-3: Try configured remote hosts
268
+ hosts_csv = os.getenv("RANGEBAR_CH_HOSTS", "")
269
+ primary = os.getenv("RANGEBAR_CH_PRIMARY", "")
270
+
271
+ # Build host list with primary first
272
+ hosts: list[str] = []
273
+ if primary:
274
+ hosts.append(primary)
275
+ for host in hosts_csv.split(","):
276
+ host = host.strip()
277
+ if host and host not in hosts:
278
+ hosts.append(host)
279
+
280
+ for host in hosts:
281
+ # Try direct connection first (faster if ClickHouse is exposed)
282
+ if _is_direct_available(host):
283
+ return HostConnection(host=host, method="direct")
284
+
285
+ # Fall back to SSH tunnel (works with firewalled hosts)
286
+ if _is_ssh_available(host):
287
+ return HostConnection(host=host, method="ssh_tunnel")
288
+
289
+ # FAIL LOUDLY - no hosts available
290
+ raise ClickHouseNotConfiguredError()
291
+
292
+
293
+ def _find_clickhouse_binary() -> str | None:
294
+ """Search for ClickHouse binary in common locations.
295
+
296
+ Returns
297
+ -------
298
+ str | None
299
+ Path to binary, or None if not found
300
+ """
301
+ # Check PATH
302
+ if binary := shutil.which("clickhouse-server"):
303
+ return binary
304
+ if binary := shutil.which("clickhouse"):
305
+ return binary
306
+
307
+ # Check common locations
308
+ common_paths = [
309
+ "/usr/bin/clickhouse",
310
+ "/usr/local/bin/clickhouse",
311
+ Path.home() / ".local/bin/clickhouse",
312
+ "/opt/homebrew/bin/clickhouse",
313
+ ]
314
+
315
+ for path in common_paths:
316
+ path = Path(path)
317
+ if path.exists():
318
+ return str(path)
319
+
320
+ return None
321
+
322
+
323
+ def _is_port_open(host: str, port: int, timeout: float = 1.0) -> bool:
324
+ """Check if TCP port is open.
325
+
326
+ Parameters
327
+ ----------
328
+ host : str
329
+ Host to check
330
+ port : int
331
+ Port to check
332
+ timeout : float
333
+ Connection timeout in seconds
334
+
335
+ Returns
336
+ -------
337
+ bool
338
+ True if port is open
339
+ """
340
+ try:
341
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
342
+ s.settimeout(timeout)
343
+ return s.connect_ex((host, port)) == 0
344
+ except OSError:
345
+ return False
346
+
347
+
348
+ def _is_direct_available(host: str, port: int = 8123) -> bool:
349
+ """Check if ClickHouse is directly accessible over network.
350
+
351
+ Parameters
352
+ ----------
353
+ host : str
354
+ SSH alias to resolve
355
+ port : int
356
+ Port to check
357
+
358
+ Returns
359
+ -------
360
+ bool
361
+ True if ClickHouse is accessible
362
+ """
363
+ try:
364
+ # Resolve SSH alias to IP if needed
365
+ ip = _resolve_ssh_alias_to_ip(host)
366
+ if ip and _is_port_open(ip, port, timeout=2.0):
367
+ # Verify it's actually ClickHouse responding
368
+ return _verify_clickhouse(ip, port)
369
+ except (OSError, TimeoutError, urllib.error.URLError):
370
+ # Network/connection errors are expected during preflight probing
371
+ pass
372
+ return False
373
+
374
+
375
+ def _is_ssh_available(ssh_alias: str) -> bool:
376
+ """Check if ClickHouse is available via SSH tunnel.
377
+
378
+ Parameters
379
+ ----------
380
+ ssh_alias : str
381
+ SSH alias from ~/.ssh/config
382
+
383
+ Returns
384
+ -------
385
+ bool
386
+ True if ClickHouse is accessible via SSH
387
+ """
388
+ try:
389
+ result = subprocess.run(
390
+ [
391
+ "ssh",
392
+ "-o",
393
+ "ConnectTimeout=3",
394
+ "-o",
395
+ "BatchMode=yes",
396
+ ssh_alias,
397
+ "curl -s http://localhost:8123/ -d 'SELECT 1'",
398
+ ],
399
+ capture_output=True,
400
+ timeout=8,
401
+ check=False,
402
+ )
403
+ return result.returncode == 0 and b"1" in result.stdout
404
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
405
+ return False
406
+
407
+
408
+ def _resolve_ssh_alias_to_ip(ssh_alias: str) -> str | None:
409
+ """Resolve SSH alias to IP address from ~/.ssh/config.
410
+
411
+ Parameters
412
+ ----------
413
+ ssh_alias : str
414
+ SSH alias to resolve
415
+
416
+ Returns
417
+ -------
418
+ str | None
419
+ Resolved hostname/IP, or None if not found
420
+ """
421
+ try:
422
+ result = subprocess.run(
423
+ ["ssh", "-G", ssh_alias],
424
+ capture_output=True,
425
+ text=True,
426
+ timeout=2,
427
+ check=False,
428
+ )
429
+ for line in result.stdout.splitlines():
430
+ if line.startswith("hostname "):
431
+ return line.split()[1]
432
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
433
+ pass
434
+ return None
435
+
436
+
437
+ def _verify_clickhouse(host: str, port: int) -> bool:
438
+ """Verify ClickHouse is responding at host:port.
439
+
440
+ Parameters
441
+ ----------
442
+ host : str
443
+ Host to verify
444
+ port : int
445
+ Port to verify
446
+
447
+ Returns
448
+ -------
449
+ bool
450
+ True if ClickHouse responds correctly
451
+ """
452
+ try:
453
+ req = urllib.request.Request(
454
+ f"http://{host}:{port}/",
455
+ data=b"SELECT 1",
456
+ method="POST",
457
+ )
458
+ with urllib.request.urlopen(req, timeout=2) as resp:
459
+ return resp.read().strip() == b"1"
460
+ except (OSError, TimeoutError, urllib.error.URLError):
461
+ # Network/connection errors expected during preflight probing
462
+ return False
463
+
464
+
465
+ def _get_clickhouse_version() -> str | None:
466
+ """Get ClickHouse version from running server.
467
+
468
+ Returns
469
+ -------
470
+ str | None
471
+ Version string, or None if unavailable
472
+ """
473
+ try:
474
+ req = urllib.request.Request(
475
+ "http://localhost:8123/",
476
+ data=b"SELECT version()",
477
+ method="POST",
478
+ )
479
+ with urllib.request.urlopen(req, timeout=2) as resp:
480
+ return resp.read().decode().strip()
481
+ except (OSError, TimeoutError, urllib.error.URLError):
482
+ # Network/connection errors expected during preflight probing
483
+ return None
484
+
485
+
486
+ def _has_rangebar_schema() -> bool:
487
+ """Check if rangebar_cache database exists.
488
+
489
+ Returns
490
+ -------
491
+ bool
492
+ True if database exists
493
+ """
494
+ try:
495
+ req = urllib.request.Request(
496
+ "http://localhost:8123/",
497
+ data=b"SHOW DATABASES LIKE 'rangebar_cache'",
498
+ method="POST",
499
+ )
500
+ with urllib.request.urlopen(req, timeout=2) as resp:
501
+ return bool(resp.read().strip())
502
+ except (OSError, TimeoutError, urllib.error.URLError):
503
+ # Network/connection errors expected during preflight probing
504
+ return False