rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rangebar/CLAUDE.md +327 -0
- rangebar/__init__.py +227 -0
- rangebar/__init__.pyi +1089 -0
- rangebar/_core.cpython-313-darwin.so +0 -0
- rangebar/checkpoint.py +472 -0
- rangebar/cli.py +298 -0
- rangebar/clickhouse/CLAUDE.md +139 -0
- rangebar/clickhouse/__init__.py +100 -0
- rangebar/clickhouse/bulk_operations.py +309 -0
- rangebar/clickhouse/cache.py +734 -0
- rangebar/clickhouse/client.py +121 -0
- rangebar/clickhouse/config.py +141 -0
- rangebar/clickhouse/mixin.py +120 -0
- rangebar/clickhouse/preflight.py +504 -0
- rangebar/clickhouse/query_operations.py +345 -0
- rangebar/clickhouse/schema.sql +187 -0
- rangebar/clickhouse/tunnel.py +222 -0
- rangebar/constants.py +288 -0
- rangebar/conversion.py +177 -0
- rangebar/exceptions.py +207 -0
- rangebar/exness.py +364 -0
- rangebar/hooks.py +311 -0
- rangebar/logging.py +171 -0
- rangebar/notify/__init__.py +15 -0
- rangebar/notify/pushover.py +155 -0
- rangebar/notify/telegram.py +271 -0
- rangebar/orchestration/__init__.py +20 -0
- rangebar/orchestration/count_bounded.py +797 -0
- rangebar/orchestration/helpers.py +412 -0
- rangebar/orchestration/models.py +76 -0
- rangebar/orchestration/precompute.py +498 -0
- rangebar/orchestration/range_bars.py +736 -0
- rangebar/orchestration/tick_fetcher.py +226 -0
- rangebar/ouroboros.py +454 -0
- rangebar/processors/__init__.py +22 -0
- rangebar/processors/api.py +383 -0
- rangebar/processors/core.py +522 -0
- rangebar/resource_guard.py +567 -0
- rangebar/storage/__init__.py +22 -0
- rangebar/storage/checksum_registry.py +218 -0
- rangebar/storage/parquet.py +728 -0
- rangebar/streaming.py +300 -0
- rangebar/validation/__init__.py +69 -0
- rangebar/validation/cache_staleness.py +277 -0
- rangebar/validation/continuity.py +664 -0
- rangebar/validation/gap_classification.py +294 -0
- rangebar/validation/post_storage.py +317 -0
- rangebar/validation/tier1.py +175 -0
- rangebar/validation/tier2.py +261 -0
- rangebar-11.6.1.dist-info/METADATA +308 -0
- rangebar-11.6.1.dist-info/RECORD +54 -0
- rangebar-11.6.1.dist-info/WHEEL +4 -0
- rangebar-11.6.1.dist-info/entry_points.txt +2 -0
- rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""Checksum registry for Tier 1 Parquet cache.
|
|
2
|
+
|
|
3
|
+
Implements GitHub Issue #43: Track which cached Parquet files have verified checksums.
|
|
4
|
+
|
|
5
|
+
This module maintains a JSONL registry of checksum verifications, allowing:
|
|
6
|
+
- Audit of which cached files have been checksum-verified
|
|
7
|
+
- Detection of unverified dates that should be re-downloaded
|
|
8
|
+
- Correlation between raw downloads and cached data
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
from dataclasses import asdict, dataclass
|
|
15
|
+
from datetime import UTC, datetime, timedelta
|
|
16
|
+
from datetime import date as dt_date
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ChecksumRecord:
|
|
22
|
+
"""Record of checksum verification for a cached file."""
|
|
23
|
+
|
|
24
|
+
symbol: str
|
|
25
|
+
date: str
|
|
26
|
+
file_path: str
|
|
27
|
+
expected_hash: str
|
|
28
|
+
actual_hash: str
|
|
29
|
+
verified_at: str # ISO8601 UTC
|
|
30
|
+
data_source: str # "binance", "exness", etc.
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def create(
|
|
34
|
+
cls,
|
|
35
|
+
symbol: str,
|
|
36
|
+
date: str,
|
|
37
|
+
file_path: str,
|
|
38
|
+
expected_hash: str,
|
|
39
|
+
actual_hash: str,
|
|
40
|
+
data_source: str = "binance",
|
|
41
|
+
) -> ChecksumRecord:
|
|
42
|
+
"""Create a new checksum record with current timestamp."""
|
|
43
|
+
return cls(
|
|
44
|
+
symbol=symbol,
|
|
45
|
+
date=date,
|
|
46
|
+
file_path=file_path,
|
|
47
|
+
expected_hash=expected_hash,
|
|
48
|
+
actual_hash=actual_hash,
|
|
49
|
+
verified_at=datetime.now(UTC).isoformat(),
|
|
50
|
+
data_source=data_source,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ChecksumRegistry:
|
|
55
|
+
"""Registry tracking which Tier 1 cache files have verified checksums.
|
|
56
|
+
|
|
57
|
+
The registry is stored as NDJSON (one JSON object per line) for:
|
|
58
|
+
- Append-only writes (crash-safe)
|
|
59
|
+
- Easy parsing with standard tools (jq, grep)
|
|
60
|
+
- Streaming reads for large registries
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(self, registry_path: Path | None = None) -> None:
|
|
64
|
+
"""Initialize the registry.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
registry_path: Path to the registry file. If None, uses the default
|
|
68
|
+
location in the project logs directory.
|
|
69
|
+
"""
|
|
70
|
+
if registry_path is None:
|
|
71
|
+
project_root = Path(__file__).parent.parent.parent.parent
|
|
72
|
+
registry_path = project_root / "logs" / "checksum_registry.jsonl"
|
|
73
|
+
self.registry_path = registry_path
|
|
74
|
+
self.registry_path.parent.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
|
|
76
|
+
def record_verification(self, record: ChecksumRecord) -> None:
|
|
77
|
+
"""Append verification record to registry.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
record: ChecksumRecord to append
|
|
81
|
+
"""
|
|
82
|
+
with self.registry_path.open("a") as f:
|
|
83
|
+
f.write(json.dumps(asdict(record)) + "\n")
|
|
84
|
+
|
|
85
|
+
def is_verified(self, symbol: str, date: str) -> bool:
|
|
86
|
+
"""Check if a specific date's data has been checksum-verified.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
symbol: Trading symbol (e.g., "BTCUSDT")
|
|
90
|
+
date: Date string (YYYY-MM-DD)
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
True if the date has a verification record, False otherwise
|
|
94
|
+
"""
|
|
95
|
+
if not self.registry_path.exists():
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
with self.registry_path.open() as f:
|
|
99
|
+
for line in f:
|
|
100
|
+
try:
|
|
101
|
+
record = json.loads(line)
|
|
102
|
+
if record["symbol"] == symbol and record["date"] == date:
|
|
103
|
+
return True
|
|
104
|
+
except json.JSONDecodeError:
|
|
105
|
+
continue
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
def get_verification(self, symbol: str, date: str) -> ChecksumRecord | None:
|
|
109
|
+
"""Get the verification record for a specific date.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
symbol: Trading symbol (e.g., "BTCUSDT")
|
|
113
|
+
date: Date string (YYYY-MM-DD)
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
ChecksumRecord if found, None otherwise
|
|
117
|
+
"""
|
|
118
|
+
if not self.registry_path.exists():
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
with self.registry_path.open() as f:
|
|
122
|
+
for line in f:
|
|
123
|
+
try:
|
|
124
|
+
data = json.loads(line)
|
|
125
|
+
if data["symbol"] == symbol and data["date"] == date:
|
|
126
|
+
return ChecksumRecord(**data)
|
|
127
|
+
except json.JSONDecodeError:
|
|
128
|
+
continue
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
def get_unverified_dates(
|
|
132
|
+
self,
|
|
133
|
+
symbol: str,
|
|
134
|
+
start_date: str,
|
|
135
|
+
end_date: str,
|
|
136
|
+
) -> list[str]:
|
|
137
|
+
"""Find dates in range that lack checksum verification.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
symbol: Trading symbol (e.g., "BTCUSDT")
|
|
141
|
+
start_date: Start date (YYYY-MM-DD, inclusive)
|
|
142
|
+
end_date: End date (YYYY-MM-DD, inclusive)
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
List of unverified date strings in YYYY-MM-DD format
|
|
146
|
+
"""
|
|
147
|
+
verified = set()
|
|
148
|
+
if self.registry_path.exists():
|
|
149
|
+
with self.registry_path.open() as f:
|
|
150
|
+
for line in f:
|
|
151
|
+
try:
|
|
152
|
+
record = json.loads(line)
|
|
153
|
+
if record["symbol"] == symbol:
|
|
154
|
+
verified.add(record["date"])
|
|
155
|
+
except json.JSONDecodeError:
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
# Generate all dates in range
|
|
159
|
+
start = dt_date.fromisoformat(start_date)
|
|
160
|
+
end = dt_date.fromisoformat(end_date)
|
|
161
|
+
all_dates = []
|
|
162
|
+
current = start
|
|
163
|
+
while current <= end:
|
|
164
|
+
all_dates.append(current.isoformat())
|
|
165
|
+
current += timedelta(days=1)
|
|
166
|
+
|
|
167
|
+
return [d for d in all_dates if d not in verified]
|
|
168
|
+
|
|
169
|
+
def get_verified_count(self, symbol: str) -> int:
|
|
170
|
+
"""Get the count of verified dates for a symbol.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
symbol: Trading symbol (e.g., "BTCUSDT")
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Number of verified dates
|
|
177
|
+
"""
|
|
178
|
+
if not self.registry_path.exists():
|
|
179
|
+
return 0
|
|
180
|
+
|
|
181
|
+
count = 0
|
|
182
|
+
with self.registry_path.open() as f:
|
|
183
|
+
for line in f:
|
|
184
|
+
try:
|
|
185
|
+
record = json.loads(line)
|
|
186
|
+
if record["symbol"] == symbol:
|
|
187
|
+
count += 1
|
|
188
|
+
except json.JSONDecodeError:
|
|
189
|
+
continue
|
|
190
|
+
return count
|
|
191
|
+
|
|
192
|
+
def audit_and_alert(
|
|
193
|
+
self,
|
|
194
|
+
symbol: str,
|
|
195
|
+
start_date: str,
|
|
196
|
+
end_date: str,
|
|
197
|
+
) -> None:
|
|
198
|
+
"""Audit cache and send Pushover alert if unverified files found.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
symbol: Trading symbol (e.g., "BTCUSDT")
|
|
202
|
+
start_date: Start date (YYYY-MM-DD)
|
|
203
|
+
end_date: End date (YYYY-MM-DD)
|
|
204
|
+
"""
|
|
205
|
+
unverified = self.get_unverified_dates(symbol, start_date, end_date)
|
|
206
|
+
if unverified:
|
|
207
|
+
from ..notify.pushover import alert_tier1_cache_unverified
|
|
208
|
+
|
|
209
|
+
start = dt_date.fromisoformat(start_date)
|
|
210
|
+
end = dt_date.fromisoformat(end_date)
|
|
211
|
+
total_count = (end - start).days + 1
|
|
212
|
+
|
|
213
|
+
alert_tier1_cache_unverified(
|
|
214
|
+
symbol=symbol,
|
|
215
|
+
date_range=f"{start_date} to {end_date}",
|
|
216
|
+
unverified_count=len(unverified),
|
|
217
|
+
total_count=total_count,
|
|
218
|
+
)
|