rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
rangebar/__init__.pyi ADDED
@@ -0,0 +1,1089 @@
1
+ """Type stubs for rangebar package.
2
+
3
+ Public API
4
+ ----------
5
+ get_range_bars : Get range bars with automatic data fetching and caching (date-bounded).
6
+ get_n_range_bars : Get exactly N range bars (count-bounded, deterministic).
7
+ precompute_range_bars : Pre-compute continuous range bars for a date range (single-pass).
8
+ validate_continuity_tiered : Validate range bar continuity with tiered gap classification.
9
+ ContinuityError : Exception raised when range bar continuity is violated.
10
+ ContinuityWarning : Warning issued when range bar discontinuities are detected.
11
+ PrecomputeProgress : Progress update for precomputation.
12
+ PrecomputeResult : Result of precomputation.
13
+ GapTier : Gap severity classification enum.
14
+ AssetClass : Asset class enum for tolerance calibration.
15
+ TierThresholds : Configurable boundaries between gap tiers.
16
+ ValidationPreset : Immutable validation configuration preset.
17
+ GapInfo : Details of a single gap between consecutive bars.
18
+ TierSummary : Per-tier statistics for gap analysis.
19
+ TieredValidationResult : Comprehensive validation result with tier breakdown.
20
+ TIER1_SYMBOLS : High-liquidity symbols available on all Binance markets.
21
+ THRESHOLD_PRESETS : Named threshold presets (micro, tight, standard, etc.).
22
+ VALIDATION_PRESETS : Named validation presets (research, strict, crypto, etc.).
23
+ THRESHOLD_DECIMAL_MIN : Minimum valid threshold (1 = 0.1bps).
24
+ THRESHOLD_DECIMAL_MAX : Maximum valid threshold (100,000 = 10,000bps).
25
+ __version__ : Package version string.
26
+ """
27
+
28
+ from collections.abc import Callable, Iterator
29
+ from dataclasses import dataclass
30
+ from enum import Enum, IntEnum
31
+ from typing import Any, Literal, overload
32
+
33
+ import pandas as pd
34
+ import polars as pl
35
+
36
+ # ============================================================================
37
+ # Exceptions and Warnings
38
+ # ============================================================================
39
+
40
+ class ContinuityError(Exception):
41
+ """Raised when range bar continuity is violated.
42
+
43
+ The bar[i+1].open == bar[i].close invariant is broken, indicating
44
+ discontinuities in the range bar sequence.
45
+ """
46
+
47
+ discontinuities: list[dict]
48
+ """List of discontinuity details (bar_index, prev_close, next_open, gap_pct)."""
49
+
50
+ def __init__(
51
+ self, message: str, discontinuities: list[dict] | None = None
52
+ ) -> None: ...
53
+
54
+ class ContinuityWarning(UserWarning):
55
+ """Warning issued when range bar discontinuities are detected but not fatal."""
56
+
57
+ # ============================================================================
58
+ # Data Classes
59
+ # ============================================================================
60
+
61
+ @dataclass
62
+ class PrecomputeProgress:
63
+ """Progress update for precomputation.
64
+
65
+ Passed to progress_callback during precompute_range_bars() execution.
66
+ """
67
+
68
+ phase: Literal["fetching", "processing", "caching"]
69
+ """Current processing phase."""
70
+ current_month: str
71
+ """Current month being processed (YYYY-MM format)."""
72
+ months_completed: int
73
+ """Number of months already processed."""
74
+ months_total: int
75
+ """Total number of months to process."""
76
+ bars_generated: int
77
+ """Cumulative bars generated so far."""
78
+ ticks_processed: int
79
+ """Cumulative ticks processed so far."""
80
+ elapsed_seconds: float
81
+ """Seconds elapsed since precomputation started."""
82
+
83
+ @dataclass
84
+ class PrecomputeResult:
85
+ """Result of precomputation.
86
+
87
+ Returned by precompute_range_bars() after successful execution.
88
+ """
89
+
90
+ symbol: str
91
+ """Trading symbol that was precomputed."""
92
+ threshold_decimal_bps: int
93
+ """Threshold used for bar construction."""
94
+ start_date: str
95
+ """Start date of precomputed range (YYYY-MM-DD)."""
96
+ end_date: str
97
+ """End date of precomputed range (YYYY-MM-DD)."""
98
+ total_bars: int
99
+ """Total number of bars generated."""
100
+ total_ticks: int
101
+ """Total number of ticks processed."""
102
+ elapsed_seconds: float
103
+ """Total time taken for precomputation."""
104
+ continuity_valid: bool | None
105
+ """Whether all bars pass continuity validation. None if validation was skipped."""
106
+ cache_key: str
107
+ """Cache key for the stored bars."""
108
+
109
+ __version__: str
110
+
111
+ # ============================================================================
112
+ # Ouroboros: Cyclical Reset Boundaries for Reproducibility
113
+ # Plan: /Users/terryli/.claude/plans/sparkling-coalescing-dijkstra.md
114
+ # ============================================================================
115
+
116
+ from datetime import date, datetime
117
+
118
+ class OuroborosMode(str, Enum):
119
+ """Ouroboros granularity modes for reset boundaries.
120
+
121
+ Ouroboros (Greek: οὐροβόρος) represents cyclical reset boundaries
122
+ that enable reproducible range bar construction.
123
+ """
124
+
125
+ YEAR = "year"
126
+ """Reset at January 1 00:00:00 UTC each year."""
127
+ MONTH = "month"
128
+ """Reset at 1st of each month 00:00:00 UTC."""
129
+ WEEK = "week"
130
+ """Reset at Sunday 00:00:00 UTC each week (crypto) or first tick after market open (forex)."""
131
+
132
+ @dataclass(frozen=True)
133
+ class OuroborosBoundary:
134
+ """A single ouroboros reset boundary.
135
+
136
+ Represents a specific timestamp where the range bar processor should reset
137
+ its state to enable reproducible bar construction across segments.
138
+ """
139
+
140
+ timestamp: datetime
141
+ """UTC datetime of the boundary."""
142
+ mode: OuroborosMode
143
+ """Which granularity created this boundary."""
144
+ reason: str
145
+ """Human-readable reason (e.g., 'year_boundary', 'month_boundary')."""
146
+
147
+ @property
148
+ def timestamp_ms(self) -> int:
149
+ """Timestamp in milliseconds (for comparison with trade data)."""
150
+
151
+ @property
152
+ def timestamp_us(self) -> int:
153
+ """Timestamp in microseconds."""
154
+
155
+ @dataclass
156
+ class OrphanedBarMetadata:
157
+ """Metadata for orphaned bars at ouroboros boundaries.
158
+
159
+ Orphaned bars are incomplete bars that existed when the processor
160
+ was reset at an ouroboros boundary. They can be included or excluded
161
+ from results based on the `include_orphaned_bars` parameter.
162
+ """
163
+
164
+ is_orphan: bool = True
165
+ """Always True for orphaned bars."""
166
+ ouroboros_boundary: datetime | None = None
167
+ """Which boundary caused the orphan."""
168
+ reason: str | None = None
169
+ """Reason string: 'year_boundary', 'month_boundary', 'week_boundary'."""
170
+ expected_duration_us: int | None = None
171
+ """Expected duration if bar had completed normally."""
172
+
173
+ def get_ouroboros_boundaries(
174
+ start: date,
175
+ end: date,
176
+ mode: Literal["year", "month", "week"],
177
+ ) -> list[OuroborosBoundary]:
178
+ """Return all ouroboros reset points within the date range.
179
+
180
+ Parameters
181
+ ----------
182
+ start : date
183
+ Start date (inclusive)
184
+ end : date
185
+ End date (inclusive)
186
+ mode : {"year", "month", "week"}
187
+ Ouroboros granularity
188
+
189
+ Returns
190
+ -------
191
+ list[OuroborosBoundary]
192
+ Sorted list of boundaries within the date range
193
+
194
+ Examples
195
+ --------
196
+ >>> from datetime import date
197
+ >>> from rangebar import get_ouroboros_boundaries
198
+ >>> boundaries = get_ouroboros_boundaries(date(2024, 1, 1), date(2024, 3, 31), "month")
199
+ >>> len(boundaries)
200
+ 3
201
+ >>> boundaries[0].reason
202
+ 'month_boundary'
203
+ """
204
+
205
+ # ============================================================================
206
+ # Configuration Constants
207
+ # ============================================================================
208
+
209
+ TIER1_SYMBOLS: tuple[str, ...]
210
+ """18 high-liquidity symbols available on ALL Binance markets.
211
+
212
+ AAVE, ADA, AVAX, BCH, BNB, BTC, DOGE, ETH, FIL,
213
+ LINK, LTC, NEAR, SOL, SUI, UNI, WIF, WLD, XRP
214
+ """
215
+
216
+ THRESHOLD_DECIMAL_MIN: int
217
+ """Minimum valid threshold: 1 (0.1bps = 0.001%)"""
218
+
219
+ THRESHOLD_DECIMAL_MAX: int
220
+ """Maximum valid threshold: 100,000 (10,000bps = 100%)"""
221
+
222
+ THRESHOLD_PRESETS: dict[str, int]
223
+ """Named threshold presets (in 0.1bps units).
224
+
225
+ - "micro": 10 (1bps = 0.01%) - scalping
226
+ - "tight": 50 (5bps = 0.05%) - day trading
227
+ - "standard": 100 (10bps = 0.1%) - swing trading
228
+ - "medium": 250 (25bps = 0.25%) - default
229
+ - "wide": 500 (50bps = 0.5%) - position trading
230
+ - "macro": 1000 (100bps = 1%) - long-term
231
+ """
232
+
233
+ # Issue #59: Inter-bar microstructure features
234
+ INTER_BAR_FEATURE_COLUMNS: tuple[str, ...]
235
+ """16 inter-bar microstructure feature column names (Issue #59).
236
+
237
+ Tier 1 - Core (7 features):
238
+ - lookback_trade_count: Trade count in lookback window
239
+ - lookback_ofi: Order flow imbalance [-1, 1]
240
+ - lookback_duration_us: Lookback window duration (microseconds)
241
+ - lookback_intensity: Trade intensity (trades/second)
242
+ - lookback_vwap_raw: Volume-weighted average price (raw i64)
243
+ - lookback_vwap_position: VWAP position in price range [0, 1]
244
+ - lookback_count_imbalance: Trade count imbalance [-1, 1]
245
+
246
+ Tier 2 - Statistical (5 features):
247
+ - lookback_kyle_lambda: Kyle's lambda (price impact)
248
+ - lookback_burstiness: Goh-Barabási burstiness [-1, 1]
249
+ - lookback_volume_skew: Volume distribution skewness
250
+ - lookback_volume_kurt: Volume distribution kurtosis
251
+ - lookback_price_range: Price range / first price [0, +inf)
252
+
253
+ Tier 3 - Advanced (4 features):
254
+ - lookback_kaufman_er: Kaufman efficiency ratio [0, 1]
255
+ - lookback_garman_klass_vol: Garman-Klass volatility [0, 1)
256
+ - lookback_hurst: Hurst exponent [0, 1]
257
+ - lookback_permutation_entropy: Permutation entropy [0, 1]
258
+
259
+ All inter-bar features are Optional - None when no lookback data available.
260
+ """
261
+
262
+ # ============================================================================
263
+ # Tiered Validation System (Issue #19 - v6.2.0+)
264
+ # ============================================================================
265
+
266
+ class GapTier(IntEnum):
267
+ """Gap severity classification for range bar continuity validation.
268
+
269
+ Tiers are based on empirical analysis of 30-month BTC data which
270
+ identified 49 legitimate market microstructure events.
271
+ """
272
+
273
+ PRECISION = 1
274
+ """< 0.001% - Floating-point artifacts (always ignored)"""
275
+ NOISE = 2
276
+ """0.001% - 0.01% - Tick-level noise (logged, not flagged)"""
277
+ MARKET_MOVE = 3
278
+ """0.01% - 0.1% - Normal market movement (configurable)"""
279
+ MICROSTRUCTURE = 4
280
+ """> 0.1% - Flash crashes, liquidations (warning/error)"""
281
+ SESSION_BOUNDARY = 5
282
+ """> threshold*2 - Definite session break (always error)"""
283
+
284
+ class AssetClass(Enum):
285
+ """Asset class for tolerance calibration.
286
+
287
+ Different asset classes have different typical gap magnitudes.
288
+ """
289
+
290
+ CRYPTO = "crypto"
291
+ """24/7 markets, flash crashes possible"""
292
+ FOREX = "forex"
293
+ """Session-based, weekend gaps"""
294
+ EQUITIES = "equities"
295
+ """Overnight gaps, circuit breakers"""
296
+ UNKNOWN = "unknown"
297
+ """Fallback to crypto defaults"""
298
+
299
+ ASSET_CLASS_MULTIPLIERS: dict[AssetClass, float]
300
+ """Tolerance multipliers by asset class (relative to baseline)."""
301
+
302
+ def detect_asset_class(symbol: str) -> AssetClass:
303
+ """Auto-detect asset class from symbol pattern.
304
+
305
+ Detection Rules:
306
+ - Crypto: Contains common crypto bases (BTC, ETH, etc.) or ends with USDT/BUSD
307
+ - Forex: Standard 6-char pairs (EURUSD) or commodities (XAU, XAG)
308
+ - Unknown: Fallback for unrecognized patterns
309
+
310
+ Parameters
311
+ ----------
312
+ symbol : str
313
+ Trading symbol (case-insensitive)
314
+
315
+ Returns
316
+ -------
317
+ AssetClass
318
+ Detected asset class
319
+
320
+ Examples
321
+ --------
322
+ >>> detect_asset_class("BTCUSDT")
323
+ <AssetClass.CRYPTO: 'crypto'>
324
+ >>> detect_asset_class("EURUSD")
325
+ <AssetClass.FOREX: 'forex'>
326
+ """
327
+
328
+ @dataclass(frozen=True)
329
+ class TierThresholds:
330
+ """Configurable boundaries between gap tiers (in percentage).
331
+
332
+ These thresholds define the boundaries for classifying gaps into tiers.
333
+ Values are percentages (e.g., 0.00001 = 0.001%).
334
+ """
335
+
336
+ precision: float = ...
337
+ """Tier 1/2 boundary (default: 0.00001 = 0.001%)"""
338
+ noise: float = ...
339
+ """Tier 2/3 boundary (default: 0.0001 = 0.01%)"""
340
+ market_move: float = ...
341
+ """Tier 3/4 boundary (default: 0.001 = 0.1%)"""
342
+ session_factor: float = ...
343
+ """Tier 5 multiplier (default: 2.0)"""
344
+
345
+ @dataclass(frozen=True)
346
+ class ValidationPreset:
347
+ """Immutable validation configuration preset.
348
+
349
+ Presets bundle tolerance, behavior mode, and tier thresholds into
350
+ named configurations for common use cases.
351
+ """
352
+
353
+ tolerance_pct: float
354
+ """Maximum gap percentage before flagging (e.g., 0.01 = 1%)"""
355
+ mode: Literal["error", "warn", "skip"]
356
+ """Behavior on validation failure"""
357
+ tier_thresholds: TierThresholds = ...
358
+ """Boundaries for gap tier classification"""
359
+ asset_class: AssetClass | None = ...
360
+ """Override auto-detection if set"""
361
+ description: str = ...
362
+ """Human-readable description of the preset"""
363
+
364
+ VALIDATION_PRESETS: dict[str, ValidationPreset]
365
+ """Named validation presets for common scenarios.
366
+
367
+ General-purpose:
368
+ - "permissive": 5% tolerance, warn mode
369
+ - "research": 2% tolerance, warn mode (exploratory analysis)
370
+ - "standard": 1% tolerance, warn mode (production backtesting)
371
+ - "strict": 0.5% tolerance, error mode (ML training data)
372
+ - "paranoid": 0.1% tolerance, error mode (original v6.1.0 behavior)
373
+
374
+ Asset-class specific:
375
+ - "crypto": 2% tolerance, crypto asset class
376
+ - "forex": 1% tolerance, forex asset class
377
+ - "equities": 3% tolerance, equities asset class
378
+
379
+ Special:
380
+ - "skip": Disable validation entirely
381
+ - "audit": 0.2% tolerance, error mode (data quality audit)
382
+ """
383
+
384
+ @dataclass
385
+ class GapInfo:
386
+ """Details of a single gap between consecutive bars."""
387
+
388
+ bar_index: int
389
+ """Index of the bar with the gap (0-based)"""
390
+ prev_close: float
391
+ """Close price of the previous bar"""
392
+ curr_open: float
393
+ """Open price of the current bar"""
394
+ gap_pct: float
395
+ """Gap magnitude as percentage (e.g., 0.01 = 1%)"""
396
+ tier: GapTier
397
+ """Severity classification of this gap"""
398
+ timestamp: pd.Timestamp | None = ...
399
+ """Timestamp of the bar (if available from DataFrame index)"""
400
+
401
+ @dataclass
402
+ class TierSummary:
403
+ """Per-tier statistics for gap analysis."""
404
+
405
+ count: int = ...
406
+ """Number of gaps in this tier"""
407
+ max_gap_pct: float = ...
408
+ """Maximum gap percentage in this tier"""
409
+ avg_gap_pct: float = ...
410
+ """Average gap percentage in this tier (0 if count == 0)"""
411
+
412
+ @dataclass
413
+ class TieredValidationResult:
414
+ """Comprehensive validation result with tier breakdown.
415
+
416
+ This result provides detailed gap analysis categorized by severity tier,
417
+ enabling nuanced handling of different gap magnitudes.
418
+ """
419
+
420
+ is_valid: bool
421
+ """True if no SESSION_BOUNDARY gaps (tier 5) detected"""
422
+ bar_count: int
423
+ """Total number of bars validated"""
424
+ gaps_by_tier: dict[GapTier, TierSummary]
425
+ """Per-tier statistics"""
426
+ all_gaps: list[GapInfo]
427
+ """All gaps above PRECISION tier (detailed list)"""
428
+ threshold_used_pct: float
429
+ """Range bar threshold used for validation (as percentage)"""
430
+ asset_class_detected: AssetClass
431
+ """Auto-detected or overridden asset class"""
432
+ preset_used: str | None
433
+ """Name of preset used, or None for custom config"""
434
+
435
+ @property
436
+ def has_session_breaks(self) -> bool:
437
+ """True if any SESSION_BOUNDARY gaps detected."""
438
+
439
+ @property
440
+ def has_microstructure_events(self) -> bool:
441
+ """True if any MICROSTRUCTURE gaps detected."""
442
+
443
+ def summary_dict(self) -> dict[str, int]:
444
+ """Return gap counts by tier name for logging.
445
+
446
+ Returns
447
+ -------
448
+ dict[str, int]
449
+ Mapping of tier name to gap count
450
+ """
451
+
452
+ def validate_continuity_tiered(
453
+ df: pd.DataFrame,
454
+ threshold_decimal_bps: int = 250,
455
+ *,
456
+ validation: str | dict | ValidationPreset = "standard",
457
+ symbol: str | None = None,
458
+ ) -> TieredValidationResult:
459
+ """Validate range bar continuity with tiered gap classification.
460
+
461
+ This function categorizes gaps by severity tier, enabling nuanced
462
+ handling of different gap magnitudes. It's the opt-in v6.2.0 API
463
+ that will become the default in v7.0.
464
+
465
+ Parameters
466
+ ----------
467
+ df : pd.DataFrame
468
+ Range bar DataFrame with OHLCV columns
469
+ threshold_decimal_bps : int, default=250
470
+ Range bar threshold (250 = 0.25% = 25 basis points)
471
+ validation : str, dict, or ValidationPreset, default="standard"
472
+ Validation configuration:
473
+ - "auto": Auto-detect asset class from symbol
474
+ - str: Preset name ("research", "strict", "crypto", etc.)
475
+ - dict: Custom config {"tolerance_pct": 0.01, "mode": "warn"}
476
+ - ValidationPreset: Direct preset instance
477
+ symbol : str, optional
478
+ Symbol for asset class auto-detection
479
+
480
+ Returns
481
+ -------
482
+ TieredValidationResult
483
+ Comprehensive result with per-tier statistics
484
+
485
+ Raises
486
+ ------
487
+ ContinuityError
488
+ If validation mode is "error" and tolerance exceeded
489
+ ContinuityWarning
490
+ If validation mode is "warn" and tolerance exceeded (via warnings module)
491
+
492
+ Examples
493
+ --------
494
+ >>> result = validate_continuity_tiered(df, validation="research")
495
+ >>> print(f"Valid: {result.is_valid}")
496
+ Valid: True
497
+ """
498
+
499
+ # ============================================================================
500
+ # Main API
501
+ # ============================================================================
502
+
503
+ @overload
504
+ def get_range_bars(
505
+ symbol: str,
506
+ start_date: str,
507
+ end_date: str,
508
+ threshold_decimal_bps: (
509
+ int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
510
+ ) = 250,
511
+ *,
512
+ ouroboros: Literal["year", "month", "week"] = ...,
513
+ include_orphaned_bars: bool = ...,
514
+ materialize: Literal[True] = ...,
515
+ batch_size: int = ...,
516
+ source: Literal["binance", "exness"] = ...,
517
+ market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = ...,
518
+ validation: Literal["permissive", "strict", "paranoid"] = ...,
519
+ include_incomplete: bool = ...,
520
+ include_microstructure: bool = ...,
521
+ include_exchange_sessions: bool = ..., # Issue #8
522
+ prevent_same_timestamp_close: bool = ...,
523
+ verify_checksum: bool = ...,
524
+ use_cache: bool = ...,
525
+ fetch_if_missing: bool = ...,
526
+ cache_dir: str | None = ...,
527
+ max_memory_mb: int | None = ..., # Issue #49
528
+ inter_bar_lookback_count: int | None = ..., # Issue #59
529
+ ) -> pd.DataFrame: ...
530
+ @overload
531
+ def get_range_bars(
532
+ symbol: str,
533
+ start_date: str,
534
+ end_date: str,
535
+ threshold_decimal_bps: (
536
+ int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
537
+ ) = 250,
538
+ *,
539
+ ouroboros: Literal["year", "month", "week"] = ...,
540
+ include_orphaned_bars: bool = ...,
541
+ materialize: Literal[False],
542
+ batch_size: int = ...,
543
+ source: Literal["binance", "exness"] = ...,
544
+ market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = ...,
545
+ validation: Literal["permissive", "strict", "paranoid"] = ...,
546
+ include_incomplete: bool = ...,
547
+ include_microstructure: bool = ...,
548
+ include_exchange_sessions: bool = ..., # Issue #8
549
+ prevent_same_timestamp_close: bool = ...,
550
+ verify_checksum: bool = ...,
551
+ use_cache: bool = ...,
552
+ fetch_if_missing: bool = ...,
553
+ cache_dir: str | None = ...,
554
+ max_memory_mb: int | None = ..., # Issue #49
555
+ inter_bar_lookback_count: int | None = ..., # Issue #59
556
+ ) -> Iterator[pl.DataFrame]: ...
557
+ def get_range_bars(
558
+ symbol: str,
559
+ start_date: str,
560
+ end_date: str,
561
+ threshold_decimal_bps: (
562
+ int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
563
+ ) = 250,
564
+ *,
565
+ # Ouroboros: Cyclical reset boundaries (v11.0+)
566
+ ouroboros: Literal["year", "month", "week"] = "year",
567
+ include_orphaned_bars: bool = False,
568
+ # Streaming options (v8.0+)
569
+ materialize: bool = True,
570
+ batch_size: int = 10_000,
571
+ # Data source configuration
572
+ source: Literal["binance", "exness"] = "binance",
573
+ market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = "spot",
574
+ # Exness-specific options
575
+ validation: Literal["permissive", "strict", "paranoid"] = "strict",
576
+ # Processing options
577
+ include_incomplete: bool = False,
578
+ include_microstructure: bool = False,
579
+ include_exchange_sessions: bool = False, # Issue #8: Exchange session flags
580
+ prevent_same_timestamp_close: bool = True,
581
+ # Data integrity (Issue #43)
582
+ verify_checksum: bool = True,
583
+ # Caching options
584
+ use_cache: bool = True,
585
+ fetch_if_missing: bool = True,
586
+ cache_dir: str | None = None,
587
+ # Memory guards (Issue #49)
588
+ max_memory_mb: int | None = None,
589
+ # Inter-bar features (Issue #59)
590
+ inter_bar_lookback_count: int | None = None,
591
+ ) -> pd.DataFrame | Iterator[pl.DataFrame]:
592
+ """Get range bars for a symbol with automatic data fetching and caching.
593
+
594
+ This is the single entry point for all range bar generation. It supports
595
+ multiple data sources (Binance crypto, Exness forex), all market types,
596
+ and exposes the full configurability of the underlying Rust engine.
597
+
598
+ Parameters
599
+ ----------
600
+ symbol : str
601
+ Trading symbol (uppercase).
602
+ - Binance: "BTCUSDT", "ETHUSDT", etc.
603
+ - Exness: "EURUSD", "GBPUSD", "XAUUSD", etc.
604
+ start_date : str
605
+ Start date in YYYY-MM-DD format.
606
+ end_date : str
607
+ End date in YYYY-MM-DD format.
608
+ threshold_decimal_bps : int or str, default=250
609
+ Threshold in 0.1bps units. Can be:
610
+ - Integer: Direct value (250 = 25bps = 0.25%)
611
+ - String preset: "micro" (1bps), "tight" (5bps), "standard" (10bps),
612
+ "medium" (25bps), "wide" (50bps), "macro" (100bps)
613
+ Valid range: 1-100,000 (0.001% to 100%)
614
+ materialize : bool, default=True
615
+ If True, return a single pd.DataFrame (legacy behavior).
616
+ If False, return an Iterator[pl.DataFrame] that yields batches
617
+ of bars for memory-efficient streaming (v8.0+).
618
+ batch_size : int, default=10_000
619
+ Number of bars per batch when materialize=False.
620
+ Each batch is ~500 KB. Only used in streaming mode.
621
+
622
+ source : str, default="binance"
623
+ Data source: "binance" or "exness"
624
+ market : str, default="spot"
625
+ Market type (Binance only):
626
+ - "spot": Spot market
627
+ - "futures-um" or "um": USD-M perpetual futures
628
+ - "futures-cm" or "cm": COIN-M perpetual futures
629
+ validation : str, default="strict"
630
+ Validation strictness (Exness only):
631
+ - "permissive": Basic checks (bid > 0, ask > 0, bid < ask)
632
+ - "strict": + Spread < 10% (catches obvious errors)
633
+ - "paranoid": + Spread < 1% (flags suspicious data)
634
+ include_incomplete : bool, default=False
635
+ Include the final incomplete bar (useful for analysis).
636
+ If False (default), only completed bars are returned.
637
+ include_microstructure : bool, default=False
638
+ Include market microstructure columns:
639
+ - buy_volume, sell_volume: Volume by aggressor side
640
+ - vwap: Volume-weighted average price
641
+ - trade_count: Number of trades in bar
642
+ - (Exness) spread_min, spread_max, spread_avg: Spread statistics
643
+ - (Issue #25) duration_us: Bar duration in microseconds
644
+ - (Issue #25) ofi: Order Flow Imbalance [-1, 1]
645
+ - (Issue #25) vwap_close_deviation: (close - vwap) / (high - low)
646
+ - (Issue #25) price_impact: Amihud-style illiquidity
647
+ - (Issue #25) kyle_lambda_proxy: Market depth proxy
648
+ - (Issue #25) trade_intensity: Trades per second
649
+ - (Issue #25) volume_per_trade: Average trade size
650
+ - (Issue #25) aggression_ratio: Buy/sell trade count ratio
651
+ - (Issue #25) aggregation_density: Trade fragmentation proxy
652
+ - (Issue #25) turnover_imbalance: Dollar-weighted OFI [-1, 1]
653
+ prevent_same_timestamp_close : bool, default=True
654
+ Prevent consecutive bars from having identical timestamps.
655
+ verify_checksum : bool, default=True
656
+ Verify SHA-256 checksum of downloaded data (Issue #43).
657
+ Enabled by default for data integrity. Set to False for
658
+ faster downloads when data integrity is verified elsewhere.
659
+ use_cache : bool, default=True
660
+ Cache tick data locally in Parquet format.
661
+ cache_dir : str or None, default=None
662
+ Custom cache directory. If None, uses platform default:
663
+ - macOS: ~/Library/Caches/rangebar/
664
+ - Linux: ~/.cache/rangebar/
665
+ - Windows: %LOCALAPPDATA%/terrylica/rangebar/Cache/
666
+ max_memory_mb : int or None, default=None
667
+ Memory budget in MB for tick data loading (Issue #49).
668
+ If estimated in-memory size exceeds this limit, raises MemoryError.
669
+ If None, uses automatic detection (80% of available RAM).
670
+ Set to 0 to disable all memory guards.
671
+ inter_bar_lookback_count : int or None, default=None
672
+ Number of trades to keep in lookback buffer for inter-bar feature
673
+ computation (Issue #59). If set, enables 16 inter-bar features
674
+ computed from trades BEFORE each bar opens. Recommended: 100-500.
675
+ If None (default), inter-bar features are disabled.
676
+
677
+ Returns
678
+ -------
679
+ pd.DataFrame or Iterator[pl.DataFrame]
680
+ If materialize=True (default): Single pd.DataFrame ready for
681
+ backtesting.py, with DatetimeIndex and OHLCV columns.
682
+
683
+ If materialize=False: Iterator yielding pl.DataFrame batches
684
+ (batch_size bars each) for memory-efficient streaming.
685
+
686
+ Columns: Open, High, Low, Close, Volume
687
+ (if include_microstructure) Additional columns
688
+
689
+ Raises
690
+ ------
691
+ ValueError
692
+ - Invalid threshold (outside 1-100,000 range)
693
+ - Invalid dates or date format
694
+ - Unknown source, market, or validation level
695
+ - Unknown threshold preset name
696
+ RuntimeError
697
+ - Data fetching failed
698
+ - No data available for date range
699
+ - Feature not enabled (e.g., Exness without exness feature)
700
+
701
+ Examples
702
+ --------
703
+ Basic usage - Binance spot:
704
+
705
+ >>> from rangebar import get_range_bars
706
+ >>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-06-30")
707
+
708
+ Using threshold presets:
709
+
710
+ >>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-03-31", threshold_decimal_bps="tight")
711
+
712
+ Binance USD-M Futures:
713
+
714
+ >>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-03-31", market="futures-um")
715
+
716
+ Exness forex with spread monitoring:
717
+
718
+ >>> df = get_range_bars(
719
+ ... "EURUSD", "2024-01-01", "2024-01-31",
720
+ ... source="exness",
721
+ ... threshold_decimal_bps="standard",
722
+ ... include_microstructure=True,
723
+ ... )
724
+
725
+ Use with backtesting.py:
726
+
727
+ >>> from backtesting import Backtest, Strategy
728
+ >>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-12-31")
729
+ >>> bt = Backtest(df, MyStrategy, cash=10000, commission=0.0002)
730
+ >>> stats = bt.run()
731
+
732
+ Notes
733
+ -----
734
+ Threshold units (0.1bps):
735
+ The threshold is specified in tenths of basis points for precision.
736
+ Common conversions:
737
+ - 10 = 1bps = 0.01%
738
+ - 100 = 10bps = 0.1%
739
+ - 250 = 25bps = 0.25%
740
+ - 1000 = 100bps = 1%
741
+
742
+ Tier-1 symbols:
743
+ 18 high-liquidity symbols available on ALL Binance markets:
744
+ AAVE, ADA, AVAX, BCH, BNB, BTC, DOGE, ETH, FIL,
745
+ LINK, LTC, NEAR, SOL, SUI, UNI, WIF, WLD, XRP
746
+
747
+ Non-lookahead guarantee:
748
+ - Threshold computed from bar OPEN price only
749
+ - Breaching trade included in closing bar
750
+ - No future information used in bar construction
751
+ """
752
+
753
+ def get_range_bars_pandas(
754
+ symbol: str,
755
+ start_date: str,
756
+ end_date: str,
757
+ threshold_decimal_bps: (
758
+ int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
759
+ ) = 250,
760
+ **kwargs: Any, # noqa: ANN401
761
+ ) -> pd.DataFrame:
762
+ """Get range bars as pandas DataFrame (deprecated compatibility shim).
763
+
764
+ .. deprecated:: 8.0
765
+ Use ``get_range_bars(materialize=True)`` directly instead.
766
+ This function will be removed in v9.0.
767
+
768
+ Parameters
769
+ ----------
770
+ symbol : str
771
+ Trading symbol (e.g., "BTCUSDT")
772
+ start_date : str
773
+ Start date in YYYY-MM-DD format
774
+ end_date : str
775
+ End date in YYYY-MM-DD format
776
+ threshold_decimal_bps : int or str, default=250
777
+ Threshold in decimal basis points
778
+ **kwargs
779
+ Additional arguments passed to ``get_range_bars()``
780
+
781
+ Returns
782
+ -------
783
+ pd.DataFrame
784
+ OHLCV DataFrame ready for backtesting.py
785
+ """
786
+
787
+ def get_n_range_bars(
788
+ symbol: str,
789
+ n_bars: int,
790
+ threshold_decimal_bps: (
791
+ int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
792
+ ) = 250,
793
+ *,
794
+ end_date: str | None = None,
795
+ source: Literal["binance", "exness"] = "binance",
796
+ market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = "spot",
797
+ include_microstructure: bool = False,
798
+ use_cache: bool = True,
799
+ fetch_if_missing: bool = True,
800
+ max_lookback_days: int = 90,
801
+ warn_if_fewer: bool = True,
802
+ validate_on_return: bool = False,
803
+ continuity_action: Literal["warn", "raise", "log"] = "warn",
804
+ chunk_size: int = 100_000,
805
+ cache_dir: str | None = None,
806
+ ) -> pd.DataFrame:
807
+ """Get exactly N range bars ending at or before a given date.
808
+
809
+ Unlike `get_range_bars()` which uses date bounds (producing variable bar counts),
810
+ this function returns a deterministic number of bars. This is useful for:
811
+ - ML training (exactly 10,000 samples)
812
+ - Walk-forward optimization (fixed window sizes)
813
+ - Consistent backtest comparisons
814
+
815
+ Parameters
816
+ ----------
817
+ symbol : str
818
+ Trading symbol (e.g., "BTCUSDT")
819
+ n_bars : int
820
+ Number of bars to retrieve. Must be > 0.
821
+ threshold_decimal_bps : int or str, default=250
822
+ Threshold in decimal basis points. Can be:
823
+ - Integer: Direct value (250 = 25bps = 0.25%)
824
+ - String preset: "micro", "tight", "standard", "medium", "wide", "macro"
825
+ end_date : str or None, default=None
826
+ End date in YYYY-MM-DD format. If None, uses most recent available data.
827
+ source : str, default="binance"
828
+ Data source: "binance" or "exness"
829
+ market : str, default="spot"
830
+ Market type (Binance only): "spot", "futures-um", or "futures-cm"
831
+ include_microstructure : bool, default=False
832
+ Include microstructure columns (vwap, buy_volume, sell_volume,
833
+ plus Issue #25 features: ofi, duration_us, price_impact, etc.)
834
+ use_cache : bool, default=True
835
+ Use ClickHouse cache for bar retrieval/storage
836
+ fetch_if_missing : bool, default=True
837
+ Fetch and process new data if cache doesn't have enough bars
838
+ max_lookback_days : int, default=90
839
+ Safety limit: maximum days to look back when fetching missing data.
840
+ Prevents runaway fetches on empty caches.
841
+ warn_if_fewer : bool, default=True
842
+ Emit UserWarning if returning fewer bars than requested.
843
+ validate_on_return : bool, default=False
844
+ If True, validate bar continuity before returning.
845
+ Uses continuity_action to determine behavior on failure.
846
+ continuity_action : str, default="warn"
847
+ Action when discontinuity found during validation:
848
+ - "warn": Log warning but return data
849
+ - "raise": Raise ContinuityError
850
+ - "log": Silent logging only
851
+ chunk_size : int, default=100_000
852
+ Number of ticks per processing chunk for memory efficiency.
853
+ Larger values = faster processing, more memory.
854
+ Default 100K = ~15MB memory overhead.
855
+ cache_dir : str or None, default=None
856
+ Custom cache directory for tick data (Tier 1).
857
+
858
+ Returns
859
+ -------
860
+ pd.DataFrame
861
+ OHLCV DataFrame with exactly n_bars rows (or fewer if not enough data),
862
+ sorted chronologically (oldest first). Columns:
863
+ - Open, High, Low, Close, Volume
864
+ - (if include_microstructure) vwap, buy_volume, sell_volume
865
+
866
+ Raises
867
+ ------
868
+ ValueError
869
+ - n_bars <= 0
870
+ - Invalid threshold
871
+ - Invalid date format
872
+ RuntimeError
873
+ - ClickHouse not available when use_cache=True
874
+ - Data fetching failed
875
+
876
+ Examples
877
+ --------
878
+ Get last 10,000 bars for ML training:
879
+
880
+ >>> from rangebar import get_n_range_bars
881
+ >>> df = get_n_range_bars("BTCUSDT", n_bars=10000)
882
+ >>> assert len(df) == 10000
883
+
884
+ Get 5,000 bars ending at specific date for walk-forward:
885
+
886
+ >>> df = get_n_range_bars("BTCUSDT", n_bars=5000, end_date="2024-06-01")
887
+
888
+ With safety limit (won't fetch more than 30 days of data):
889
+
890
+ >>> df = get_n_range_bars("BTCUSDT", n_bars=1000, max_lookback_days=30)
891
+
892
+ Notes
893
+ -----
894
+ Cache behavior:
895
+ - Fast path: If cache has >= n_bars, returns immediately (~50ms)
896
+ - Slow path: If cache has < n_bars and fetch_if_missing=True,
897
+ fetches additional data, computes bars, stores in cache, returns
898
+
899
+ Gap-filling algorithm:
900
+ Uses adaptive exponential backoff to estimate how many ticks to fetch.
901
+ Learns compression ratio (ticks/bar) for each (symbol, threshold) pair.
902
+
903
+ See Also
904
+ --------
905
+ get_range_bars : Date-bounded bar retrieval (variable bar count)
906
+ precompute_range_bars : Pre-compute continuous bars for WFO workflows
907
+ THRESHOLD_PRESETS : Named threshold values
908
+ """
909
+
910
+ def precompute_range_bars(
911
+ symbol: str,
912
+ start_date: str,
913
+ end_date: str,
914
+ threshold_decimal_bps: (
915
+ int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
916
+ ) = 250,
917
+ *,
918
+ source: Literal["binance", "exness"] = "binance",
919
+ market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = "spot",
920
+ chunk_size: int = 100_000,
921
+ invalidate_existing: Literal["overlap", "full", "none", "smart"] = "smart",
922
+ progress_callback: Callable[[PrecomputeProgress], None] | None = None,
923
+ include_microstructure: bool = False,
924
+ validate_on_complete: Literal["error", "warn", "skip"] = "error",
925
+ continuity_tolerance_pct: float = 0.001,
926
+ cache_dir: str | None = None,
927
+ ) -> PrecomputeResult:
928
+ """Precompute continuous range bars for a date range (single-pass, guaranteed continuity).
929
+
930
+ Designed for ML workflows requiring continuous bar sequences for training/validation.
931
+ Uses single-pass processing to guarantee the bar[i+1].open == bar[i].close invariant.
932
+
933
+ Parameters
934
+ ----------
935
+ symbol : str
936
+ Trading pair (e.g., "BTCUSDT")
937
+ start_date : str
938
+ Start date (inclusive) "YYYY-MM-DD"
939
+ end_date : str
940
+ End date (inclusive) "YYYY-MM-DD"
941
+ threshold_decimal_bps : int or str, default=250
942
+ Range bar threshold. Can be integer (250 = 0.25%) or preset name.
943
+ source : str, default="binance"
944
+ Data source: "binance" or "exness"
945
+ market : str, default="spot"
946
+ Market type for Binance: "spot", "futures-um"/"um", or "futures-cm"/"cm"
947
+ chunk_size : int, default=100_000
948
+ Ticks per processing chunk (~15MB memory per 100K ticks)
949
+ invalidate_existing : str, default="smart"
950
+ Cache invalidation strategy:
951
+ - "overlap": Invalidate only bars in date range
952
+ - "full": Invalidate ALL bars for symbol/threshold
953
+ - "none": Skip if any cached bars exist in range
954
+ - "smart": Invalidate overlapping + validate junction continuity
955
+ progress_callback : callable, optional
956
+ Callback for progress updates. Receives PrecomputeProgress dataclass.
957
+ include_microstructure : bool, default=False
958
+ Include order flow metrics (vwap, buy_volume, sell_volume)
959
+ validate_on_complete : str, default="error"
960
+ Continuity validation mode after precomputation:
961
+ - "error": Raise ContinuityError if discontinuities found
962
+ - "warn": Log warning but continue (sets continuity_valid=False)
963
+ - "skip": Skip validation entirely (continuity_valid=None)
964
+ continuity_tolerance_pct : float, default=0.001
965
+ Maximum allowed price gap percentage for continuity validation.
966
+ Default 0.1% (0.001) accommodates market microstructure events.
967
+ The total allowed gap is threshold_pct + continuity_tolerance_pct.
968
+ cache_dir : str or None, optional
969
+ Custom cache directory for tick data.
970
+
971
+ Returns
972
+ -------
973
+ PrecomputeResult
974
+ Dataclass with statistics: total_bars, total_ticks, elapsed_seconds,
975
+ continuity_valid, cache_key
976
+
977
+ Raises
978
+ ------
979
+ ValueError
980
+ Invalid parameters (dates, threshold, symbol)
981
+ RuntimeError
982
+ Fetch or processing failure
983
+ ContinuityError
984
+ If validate_on_complete=True and discontinuities found
985
+
986
+ Examples
987
+ --------
988
+ Basic precomputation:
989
+
990
+ >>> from rangebar import precompute_range_bars
991
+ >>> result = precompute_range_bars("BTCUSDT", "2024-01-01", "2024-03-31")
992
+ >>> print(f"Generated {result.total_bars} bars in {result.elapsed_seconds:.1f}s")
993
+
994
+ With progress callback:
995
+
996
+ >>> def on_progress(p):
997
+ ... print(f"{p.phase}: {p.months_completed}/{p.months_total} months")
998
+ >>> precompute_range_bars("BTCUSDT", "2024-01-01", "2024-06-30",
999
+ ... progress_callback=on_progress)
1000
+
1001
+ See Also
1002
+ --------
1003
+ get_n_range_bars : Count-bounded bar retrieval (uses precomputed cache)
1004
+ get_range_bars : Date-bounded bar retrieval
1005
+ """
1006
+
1007
+ def process_trades_polars(
1008
+ trades: pl.DataFrame | pl.LazyFrame,
1009
+ threshold_decimal_bps: int = 250,
1010
+ ) -> pd.DataFrame:
1011
+ """Process trades from Polars DataFrame (optimized pipeline).
1012
+
1013
+ This is the recommended API for Polars users. Uses lazy evaluation
1014
+ and minimal dict conversion for best performance.
1015
+
1016
+ Parameters
1017
+ ----------
1018
+ trades : polars.DataFrame or polars.LazyFrame
1019
+ Trade data with columns:
1020
+ - timestamp: int64 (milliseconds since epoch)
1021
+ - price: float
1022
+ - quantity (or volume): float
1023
+ threshold_decimal_bps : int, default=250
1024
+ Threshold in decimal basis points (250 = 25bps = 0.25%)
1025
+
1026
+ Returns
1027
+ -------
1028
+ pd.DataFrame
1029
+ OHLCV DataFrame ready for backtesting.py, with:
1030
+ - DatetimeIndex (timestamp)
1031
+ - Capitalized columns: Open, High, Low, Close, Volume
1032
+
1033
+ Examples
1034
+ --------
1035
+ With LazyFrame (predicate pushdown):
1036
+
1037
+ >>> import polars as pl
1038
+ >>> from rangebar import process_trades_polars
1039
+ >>> lazy_df = pl.scan_parquet("trades.parquet")
1040
+ >>> lazy_filtered = lazy_df.filter(pl.col("timestamp") >= 1704067200000)
1041
+ >>> df = process_trades_polars(lazy_filtered, threshold_decimal_bps=250)
1042
+
1043
+ With DataFrame:
1044
+
1045
+ >>> df = pl.read_parquet("trades.parquet")
1046
+ >>> bars = process_trades_polars(df)
1047
+
1048
+ Notes
1049
+ -----
1050
+ Performance optimization:
1051
+ - Only required columns are extracted (timestamp, price, quantity)
1052
+ - Lazy evaluation: predicates pushed to I/O layer
1053
+ - 2-3x faster than process_trades_to_dataframe() for Polars inputs
1054
+
1055
+ See Also
1056
+ --------
1057
+ process_trades_to_dataframe : Process trades from pandas DataFrame or dict list
1058
+ get_range_bars : Full pipeline with data fetching and caching
1059
+ """
1060
+
1061
+ def process_trades_to_dataframe(
1062
+ trades: list[dict] | pd.DataFrame,
1063
+ threshold_decimal_bps: int = 250,
1064
+ include_microstructure: bool = False,
1065
+ ) -> pd.DataFrame:
1066
+ """Process trades into range bars from pandas DataFrame or dict list.
1067
+
1068
+ Parameters
1069
+ ----------
1070
+ trades : list[dict] or pd.DataFrame
1071
+ Trade data. If list[dict], each dict needs:
1072
+ - timestamp: int (milliseconds since epoch)
1073
+ - price: float
1074
+ - quantity: float
1075
+ threshold_decimal_bps : int, default=250
1076
+ Threshold in decimal basis points (250 = 25bps = 0.25%)
1077
+ include_microstructure : bool, default=False
1078
+ Include microstructure columns (vwap, buy_volume, sell_volume)
1079
+
1080
+ Returns
1081
+ -------
1082
+ pd.DataFrame
1083
+ OHLCV DataFrame ready for backtesting.py
1084
+
1085
+ See Also
1086
+ --------
1087
+ process_trades_polars : Faster alternative for Polars inputs
1088
+ get_range_bars : Full pipeline with data fetching and caching
1089
+ """