rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,218 @@
1
+ """Checksum registry for Tier 1 Parquet cache.
2
+
3
+ Implements GitHub Issue #43: Track which cached Parquet files have verified checksums.
4
+
5
+ This module maintains a JSONL registry of checksum verifications, allowing:
6
+ - Audit of which cached files have been checksum-verified
7
+ - Detection of unverified dates that should be re-downloaded
8
+ - Correlation between raw downloads and cached data
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from dataclasses import asdict, dataclass
15
+ from datetime import UTC, datetime, timedelta
16
+ from datetime import date as dt_date
17
+ from pathlib import Path
18
+
19
+
20
+ @dataclass
21
+ class ChecksumRecord:
22
+ """Record of checksum verification for a cached file."""
23
+
24
+ symbol: str
25
+ date: str
26
+ file_path: str
27
+ expected_hash: str
28
+ actual_hash: str
29
+ verified_at: str # ISO8601 UTC
30
+ data_source: str # "binance", "exness", etc.
31
+
32
+ @classmethod
33
+ def create(
34
+ cls,
35
+ symbol: str,
36
+ date: str,
37
+ file_path: str,
38
+ expected_hash: str,
39
+ actual_hash: str,
40
+ data_source: str = "binance",
41
+ ) -> ChecksumRecord:
42
+ """Create a new checksum record with current timestamp."""
43
+ return cls(
44
+ symbol=symbol,
45
+ date=date,
46
+ file_path=file_path,
47
+ expected_hash=expected_hash,
48
+ actual_hash=actual_hash,
49
+ verified_at=datetime.now(UTC).isoformat(),
50
+ data_source=data_source,
51
+ )
52
+
53
+
54
+ class ChecksumRegistry:
55
+ """Registry tracking which Tier 1 cache files have verified checksums.
56
+
57
+ The registry is stored as NDJSON (one JSON object per line) for:
58
+ - Append-only writes (crash-safe)
59
+ - Easy parsing with standard tools (jq, grep)
60
+ - Streaming reads for large registries
61
+ """
62
+
63
+ def __init__(self, registry_path: Path | None = None) -> None:
64
+ """Initialize the registry.
65
+
66
+ Args:
67
+ registry_path: Path to the registry file. If None, uses the default
68
+ location in the project logs directory.
69
+ """
70
+ if registry_path is None:
71
+ project_root = Path(__file__).parent.parent.parent.parent
72
+ registry_path = project_root / "logs" / "checksum_registry.jsonl"
73
+ self.registry_path = registry_path
74
+ self.registry_path.parent.mkdir(parents=True, exist_ok=True)
75
+
76
+ def record_verification(self, record: ChecksumRecord) -> None:
77
+ """Append verification record to registry.
78
+
79
+ Args:
80
+ record: ChecksumRecord to append
81
+ """
82
+ with self.registry_path.open("a") as f:
83
+ f.write(json.dumps(asdict(record)) + "\n")
84
+
85
+ def is_verified(self, symbol: str, date: str) -> bool:
86
+ """Check if a specific date's data has been checksum-verified.
87
+
88
+ Args:
89
+ symbol: Trading symbol (e.g., "BTCUSDT")
90
+ date: Date string (YYYY-MM-DD)
91
+
92
+ Returns:
93
+ True if the date has a verification record, False otherwise
94
+ """
95
+ if not self.registry_path.exists():
96
+ return False
97
+
98
+ with self.registry_path.open() as f:
99
+ for line in f:
100
+ try:
101
+ record = json.loads(line)
102
+ if record["symbol"] == symbol and record["date"] == date:
103
+ return True
104
+ except json.JSONDecodeError:
105
+ continue
106
+ return False
107
+
108
+ def get_verification(self, symbol: str, date: str) -> ChecksumRecord | None:
109
+ """Get the verification record for a specific date.
110
+
111
+ Args:
112
+ symbol: Trading symbol (e.g., "BTCUSDT")
113
+ date: Date string (YYYY-MM-DD)
114
+
115
+ Returns:
116
+ ChecksumRecord if found, None otherwise
117
+ """
118
+ if not self.registry_path.exists():
119
+ return None
120
+
121
+ with self.registry_path.open() as f:
122
+ for line in f:
123
+ try:
124
+ data = json.loads(line)
125
+ if data["symbol"] == symbol and data["date"] == date:
126
+ return ChecksumRecord(**data)
127
+ except json.JSONDecodeError:
128
+ continue
129
+ return None
130
+
131
+ def get_unverified_dates(
132
+ self,
133
+ symbol: str,
134
+ start_date: str,
135
+ end_date: str,
136
+ ) -> list[str]:
137
+ """Find dates in range that lack checksum verification.
138
+
139
+ Args:
140
+ symbol: Trading symbol (e.g., "BTCUSDT")
141
+ start_date: Start date (YYYY-MM-DD, inclusive)
142
+ end_date: End date (YYYY-MM-DD, inclusive)
143
+
144
+ Returns:
145
+ List of unverified date strings in YYYY-MM-DD format
146
+ """
147
+ verified = set()
148
+ if self.registry_path.exists():
149
+ with self.registry_path.open() as f:
150
+ for line in f:
151
+ try:
152
+ record = json.loads(line)
153
+ if record["symbol"] == symbol:
154
+ verified.add(record["date"])
155
+ except json.JSONDecodeError:
156
+ continue
157
+
158
+ # Generate all dates in range
159
+ start = dt_date.fromisoformat(start_date)
160
+ end = dt_date.fromisoformat(end_date)
161
+ all_dates = []
162
+ current = start
163
+ while current <= end:
164
+ all_dates.append(current.isoformat())
165
+ current += timedelta(days=1)
166
+
167
+ return [d for d in all_dates if d not in verified]
168
+
169
+ def get_verified_count(self, symbol: str) -> int:
170
+ """Get the count of verified dates for a symbol.
171
+
172
+ Args:
173
+ symbol: Trading symbol (e.g., "BTCUSDT")
174
+
175
+ Returns:
176
+ Number of verified dates
177
+ """
178
+ if not self.registry_path.exists():
179
+ return 0
180
+
181
+ count = 0
182
+ with self.registry_path.open() as f:
183
+ for line in f:
184
+ try:
185
+ record = json.loads(line)
186
+ if record["symbol"] == symbol:
187
+ count += 1
188
+ except json.JSONDecodeError:
189
+ continue
190
+ return count
191
+
192
+ def audit_and_alert(
193
+ self,
194
+ symbol: str,
195
+ start_date: str,
196
+ end_date: str,
197
+ ) -> None:
198
+ """Audit cache and send Pushover alert if unverified files found.
199
+
200
+ Args:
201
+ symbol: Trading symbol (e.g., "BTCUSDT")
202
+ start_date: Start date (YYYY-MM-DD)
203
+ end_date: End date (YYYY-MM-DD)
204
+ """
205
+ unverified = self.get_unverified_dates(symbol, start_date, end_date)
206
+ if unverified:
207
+ from ..notify.pushover import alert_tier1_cache_unverified
208
+
209
+ start = dt_date.fromisoformat(start_date)
210
+ end = dt_date.fromisoformat(end_date)
211
+ total_count = (end - start).days + 1
212
+
213
+ alert_tier1_cache_unverified(
214
+ symbol=symbol,
215
+ date_range=f"{start_date} to {end_date}",
216
+ unverified_count=len(unverified),
217
+ total_count=total_count,
218
+ )