rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
Binary file
rangebar/checkpoint.py ADDED
@@ -0,0 +1,472 @@
1
+ """Checkpoint system for resumable cache population (Issue #40).
2
+
3
+ Enables long-running cache population jobs to be resumed after interruption.
4
+ Uses atomic file writes and daily granularity for reliable state persistence.
5
+
6
+ Usage
7
+ -----
8
+ >>> from rangebar.checkpoint import populate_cache_resumable
9
+ >>> bars = populate_cache_resumable(
10
+ ... symbol="BTCUSDT",
11
+ ... start_date="2024-01-01",
12
+ ... end_date="2024-06-30",
13
+ ... )
14
+ >>> print(f"Populated {bars} bars")
15
+
16
+ If interrupted, simply run again - it will resume from the last completed day.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import logging
23
+ import os
24
+ import tempfile
25
+ from dataclasses import asdict, dataclass, field
26
+ from datetime import UTC, datetime
27
+ from pathlib import Path
28
+ from typing import TYPE_CHECKING, Any, Iterator
29
+
30
+ from platformdirs import user_cache_dir
31
+
32
+ if TYPE_CHECKING:
33
+ pass
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # Default checkpoint directory
38
+ _CHECKPOINT_DIR = Path(user_cache_dir("rangebar", "terrylica")) / "checkpoints"
39
+
40
+
41
+ @dataclass
42
+ class PopulationCheckpoint:
43
+ """Checkpoint for resumable cache population.
44
+
45
+ Tracks progress of a multi-day cache population job, allowing
46
+ resumption after interruption.
47
+
48
+ Attributes
49
+ ----------
50
+ symbol : str
51
+ Trading symbol (e.g., "BTCUSDT").
52
+ threshold_bps : int
53
+ Threshold in decimal basis points.
54
+ start_date : str
55
+ Original start date (YYYY-MM-DD).
56
+ end_date : str
57
+ Target end date (YYYY-MM-DD).
58
+ last_completed_date : str
59
+ Most recent successfully completed date.
60
+ bars_written : int
61
+ Total bars written so far.
62
+ created_at : str
63
+ ISO timestamp of checkpoint creation.
64
+ updated_at : str
65
+ ISO timestamp of last update.
66
+ """
67
+
68
+ symbol: str
69
+ threshold_bps: int
70
+ start_date: str
71
+ end_date: str
72
+ last_completed_date: str
73
+ bars_written: int = 0
74
+ created_at: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
75
+ updated_at: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
76
+
77
+ def to_dict(self) -> dict[str, Any]:
78
+ """Convert to dictionary for JSON serialization."""
79
+ return asdict(self)
80
+
81
+ @classmethod
82
+ def from_dict(cls, data: dict[str, Any]) -> PopulationCheckpoint:
83
+ """Create from dictionary."""
84
+ return cls(**data)
85
+
86
+ def save(self, path: Path) -> None:
87
+ """Save checkpoint to file with atomic write.
88
+
89
+ Uses tempfile + fsync + rename pattern for crash safety.
90
+
91
+ Parameters
92
+ ----------
93
+ path : Path
94
+ Path to save checkpoint file.
95
+ """
96
+ self.updated_at = datetime.now(UTC).isoformat()
97
+ data = json.dumps(self.to_dict(), indent=2)
98
+
99
+ # Ensure parent directory exists
100
+ path.parent.mkdir(parents=True, exist_ok=True)
101
+
102
+ # Atomic write: write to temp file, fsync, rename
103
+ fd, temp_path = tempfile.mkstemp(
104
+ dir=path.parent,
105
+ prefix=".checkpoint_",
106
+ suffix=".tmp",
107
+ )
108
+ try:
109
+ with os.fdopen(fd, "w") as f:
110
+ f.write(data)
111
+ f.flush()
112
+ os.fsync(f.fileno())
113
+
114
+ # Atomic rename (POSIX guarantees this is atomic)
115
+ os.replace(temp_path, path)
116
+ logger.debug("Saved checkpoint to %s", path)
117
+ except (OSError, RuntimeError):
118
+ # Clean up temp file on failure
119
+ try:
120
+ os.unlink(temp_path)
121
+ except OSError:
122
+ pass
123
+ raise
124
+
125
+ @classmethod
126
+ def load(cls, path: Path) -> PopulationCheckpoint | None:
127
+ """Load checkpoint from file.
128
+
129
+ Parameters
130
+ ----------
131
+ path : Path
132
+ Path to checkpoint file.
133
+
134
+ Returns
135
+ -------
136
+ PopulationCheckpoint | None
137
+ Loaded checkpoint, or None if file doesn't exist.
138
+ """
139
+ if not path.exists():
140
+ return None
141
+
142
+ try:
143
+ data = json.loads(path.read_text())
144
+ return cls.from_dict(data)
145
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
146
+ logger.warning("Failed to load checkpoint from %s: %s", path, e)
147
+ return None
148
+
149
+
150
+ def _get_checkpoint_path(
151
+ symbol: str,
152
+ start_date: str,
153
+ end_date: str,
154
+ checkpoint_dir: Path | None = None,
155
+ ) -> Path:
156
+ """Get the checkpoint file path for a population job.
157
+
158
+ Parameters
159
+ ----------
160
+ symbol : str
161
+ Trading symbol.
162
+ start_date : str
163
+ Start date (YYYY-MM-DD).
164
+ end_date : str
165
+ End date (YYYY-MM-DD).
166
+ checkpoint_dir : Path | None
167
+ Custom checkpoint directory. Uses default if None.
168
+
169
+ Returns
170
+ -------
171
+ Path
172
+ Path to the checkpoint file.
173
+ """
174
+ if checkpoint_dir is None:
175
+ checkpoint_dir = _CHECKPOINT_DIR
176
+
177
+ # Create unique filename from job parameters
178
+ filename = f"{symbol}_{start_date}_{end_date}.json"
179
+ return checkpoint_dir / filename
180
+
181
+
182
+ def _date_range(start_date: str, end_date: str) -> Iterator[str]:
183
+ """Generate dates from start to end (inclusive).
184
+
185
+ Parameters
186
+ ----------
187
+ start_date : str
188
+ Start date (YYYY-MM-DD).
189
+ end_date : str
190
+ End date (YYYY-MM-DD).
191
+
192
+ Yields
193
+ ------
194
+ str
195
+ Dates in YYYY-MM-DD format.
196
+ """
197
+ from datetime import timedelta
198
+
199
+ start = datetime.strptime(start_date, "%Y-%m-%d") # noqa: DTZ007
200
+ end = datetime.strptime(end_date, "%Y-%m-%d") # noqa: DTZ007
201
+
202
+ current = start
203
+ while current <= end:
204
+ yield current.strftime("%Y-%m-%d")
205
+ current += timedelta(days=1)
206
+
207
+
208
+ def populate_cache_resumable(
209
+ symbol: str,
210
+ start_date: str,
211
+ end_date: str,
212
+ *,
213
+ threshold_decimal_bps: int = 250,
214
+ checkpoint_dir: Path | None = None,
215
+ notify: bool = True,
216
+ ) -> int:
217
+ """Populate cache for a date range with automatic checkpointing.
218
+
219
+ This function fetches tick data and computes range bars day-by-day,
220
+ saving progress after each day. If interrupted, simply run again
221
+ with the same parameters to resume from the last completed day.
222
+
223
+ Parameters
224
+ ----------
225
+ symbol : str
226
+ Trading symbol (e.g., "BTCUSDT").
227
+ start_date : str
228
+ Start date (YYYY-MM-DD).
229
+ end_date : str
230
+ End date (YYYY-MM-DD).
231
+ threshold_decimal_bps : int
232
+ Threshold in decimal basis points (default: 250 = 0.25%).
233
+ checkpoint_dir : Path | None
234
+ Custom checkpoint directory. Uses default if None.
235
+ notify : bool
236
+ Whether to emit hook events for progress tracking.
237
+
238
+ Returns
239
+ -------
240
+ int
241
+ Total number of bars written.
242
+
243
+ Examples
244
+ --------
245
+ >>> bars = populate_cache_resumable("BTCUSDT", "2024-01-01", "2024-06-30")
246
+ >>> print(f"Populated {bars} bars")
247
+ 14523
248
+
249
+ >>> # If interrupted, just run again:
250
+ >>> bars = populate_cache_resumable("BTCUSDT", "2024-01-01", "2024-06-30")
251
+ >>> # Will resume from last completed day
252
+ """
253
+ from rangebar import get_range_bars
254
+ from rangebar.hooks import HookEvent, emit_hook
255
+
256
+ checkpoint_path = _get_checkpoint_path(symbol, start_date, end_date, checkpoint_dir)
257
+
258
+ # Check for existing checkpoint
259
+ checkpoint = PopulationCheckpoint.load(checkpoint_path)
260
+ resume_date = start_date
261
+ total_bars = 0
262
+
263
+ if checkpoint:
264
+ # Validate checkpoint matches our parameters
265
+ if (
266
+ checkpoint.symbol == symbol
267
+ and checkpoint.threshold_bps == threshold_decimal_bps
268
+ and checkpoint.start_date == start_date
269
+ and checkpoint.end_date == end_date
270
+ ):
271
+ # Resume from day after last completed
272
+ from datetime import timedelta
273
+
274
+ last_completed = datetime.strptime( # noqa: DTZ007
275
+ checkpoint.last_completed_date, "%Y-%m-%d"
276
+ )
277
+ resume_date = (last_completed + timedelta(days=1)).strftime("%Y-%m-%d")
278
+ total_bars = checkpoint.bars_written
279
+
280
+ logger.info(
281
+ "Resuming %s population from %s (%d bars already written)",
282
+ symbol,
283
+ resume_date,
284
+ total_bars,
285
+ )
286
+
287
+ if notify:
288
+ emit_hook(
289
+ HookEvent.CHECKPOINT_SAVED,
290
+ symbol=symbol,
291
+ action="resumed",
292
+ last_completed_date=checkpoint.last_completed_date,
293
+ bars_written=total_bars,
294
+ )
295
+ else:
296
+ logger.warning("Checkpoint parameters don't match, starting fresh")
297
+ checkpoint = None
298
+
299
+ # Process day by day
300
+ dates = list(_date_range(resume_date, end_date))
301
+ total_days = len(dates)
302
+
303
+ for i, date in enumerate(dates, 1):
304
+ logger.info(
305
+ "Processing %s [%d/%d]: %s",
306
+ symbol,
307
+ i,
308
+ total_days,
309
+ date,
310
+ )
311
+
312
+ try:
313
+ # Fetch and compute range bars for this day
314
+ df = get_range_bars(
315
+ symbol,
316
+ date,
317
+ date,
318
+ threshold_decimal_bps=threshold_decimal_bps,
319
+ use_cache=True,
320
+ fetch_if_missing=True,
321
+ )
322
+
323
+ bars_today = len(df) if df is not None else 0
324
+ total_bars += bars_today
325
+
326
+ logger.debug(
327
+ "%s %s: %d bars (total: %d)",
328
+ symbol,
329
+ date,
330
+ bars_today,
331
+ total_bars,
332
+ )
333
+
334
+ # Save checkpoint after each successful day
335
+ checkpoint = PopulationCheckpoint(
336
+ symbol=symbol,
337
+ threshold_bps=threshold_decimal_bps,
338
+ start_date=start_date,
339
+ end_date=end_date,
340
+ last_completed_date=date,
341
+ bars_written=total_bars,
342
+ created_at=(
343
+ checkpoint.created_at
344
+ if checkpoint
345
+ else datetime.now(UTC).isoformat()
346
+ ),
347
+ )
348
+ checkpoint.save(checkpoint_path)
349
+
350
+ if notify:
351
+ emit_hook(
352
+ HookEvent.CHECKPOINT_SAVED,
353
+ symbol=symbol,
354
+ date=date,
355
+ bars_today=bars_today,
356
+ total_bars=total_bars,
357
+ progress_pct=round(i / total_days * 100, 1),
358
+ )
359
+
360
+ except (ValueError, RuntimeError, OSError) as exc:
361
+ logger.exception(
362
+ "Failed to process %s for %s",
363
+ symbol,
364
+ date,
365
+ )
366
+ if notify:
367
+ emit_hook(
368
+ HookEvent.POPULATION_FAILED,
369
+ symbol=symbol,
370
+ date=date,
371
+ error=str(exc),
372
+ total_bars=total_bars,
373
+ )
374
+ raise
375
+
376
+ # Clean up checkpoint on success
377
+ try:
378
+ checkpoint_path.unlink()
379
+ logger.debug("Removed checkpoint file after successful completion")
380
+ except OSError:
381
+ pass
382
+
383
+ logger.info(
384
+ "Population complete for %s: %d bars from %s to %s",
385
+ symbol,
386
+ total_bars,
387
+ start_date,
388
+ end_date,
389
+ )
390
+
391
+ if notify:
392
+ emit_hook(
393
+ HookEvent.POPULATION_COMPLETE,
394
+ symbol=symbol,
395
+ start_date=start_date,
396
+ end_date=end_date,
397
+ total_bars=total_bars,
398
+ )
399
+
400
+ return total_bars
401
+
402
+
403
+ def list_checkpoints(
404
+ checkpoint_dir: Path | None = None,
405
+ ) -> list[PopulationCheckpoint]:
406
+ """List all existing checkpoints.
407
+
408
+ Parameters
409
+ ----------
410
+ checkpoint_dir : Path | None
411
+ Custom checkpoint directory. Uses default if None.
412
+
413
+ Returns
414
+ -------
415
+ list[PopulationCheckpoint]
416
+ List of checkpoints found.
417
+ """
418
+ if checkpoint_dir is None:
419
+ checkpoint_dir = _CHECKPOINT_DIR
420
+
421
+ if not checkpoint_dir.exists():
422
+ return []
423
+
424
+ checkpoints = []
425
+ for path in checkpoint_dir.glob("*.json"):
426
+ checkpoint = PopulationCheckpoint.load(path)
427
+ if checkpoint:
428
+ checkpoints.append(checkpoint)
429
+
430
+ return checkpoints
431
+
432
+
433
+ def clear_checkpoint(
434
+ symbol: str,
435
+ start_date: str,
436
+ end_date: str,
437
+ checkpoint_dir: Path | None = None,
438
+ ) -> bool:
439
+ """Clear a specific checkpoint.
440
+
441
+ Parameters
442
+ ----------
443
+ symbol : str
444
+ Trading symbol.
445
+ start_date : str
446
+ Start date (YYYY-MM-DD).
447
+ end_date : str
448
+ End date (YYYY-MM-DD).
449
+ checkpoint_dir : Path | None
450
+ Custom checkpoint directory. Uses default if None.
451
+
452
+ Returns
453
+ -------
454
+ bool
455
+ True if checkpoint was found and removed.
456
+ """
457
+ checkpoint_path = _get_checkpoint_path(symbol, start_date, end_date, checkpoint_dir)
458
+
459
+ if checkpoint_path.exists():
460
+ checkpoint_path.unlink()
461
+ logger.info("Cleared checkpoint: %s", checkpoint_path)
462
+ return True
463
+
464
+ return False
465
+
466
+
467
+ __all__ = [
468
+ "PopulationCheckpoint",
469
+ "clear_checkpoint",
470
+ "list_checkpoints",
471
+ "populate_cache_resumable",
472
+ ]