rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,226 @@
1
+ # polars-exception: backtesting.py requires Pandas DataFrames with DatetimeIndex
2
+ # Issue #46: Modularization - Extract tick fetching loop from count_bounded.py
3
+ """Tick fetching orchestration with storage caching and deduplication.
4
+
5
+ This module provides the unified tick fetching loop used by both
6
+ _fill_gap_and_cache() and _fetch_and_compute_bars() in count_bounded.py.
7
+
8
+ The key insight: For 24/7 crypto markets, ALL ticks must be processed with
9
+ a SINGLE processor to maintain the bar[i+1].open == bar[i].close invariant.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ from dataclasses import dataclass
16
+ from datetime import UTC, datetime, timedelta
17
+ from pathlib import Path
18
+ from typing import TYPE_CHECKING
19
+
20
+ if TYPE_CHECKING:
21
+ import polars as pl
22
+
23
+ from rangebar.storage.parquet import TickStorage
24
+
25
+ logger = logging.getLogger("rangebar")
26
+
27
+
28
+ @dataclass
29
+ class FetchResult:
30
+ """Result of a tick fetching operation."""
31
+
32
+ ticks: pl.DataFrame | None
33
+ """Merged and deduplicated tick data, sorted chronologically."""
34
+
35
+ oldest_timestamp_ms: int | None
36
+ """Oldest timestamp in the fetched data (milliseconds)."""
37
+
38
+ total_ticks: int
39
+ """Total number of ticks fetched."""
40
+
41
+
42
+ def fetch_ticks_with_backoff(
43
+ *,
44
+ symbol: str,
45
+ source: str,
46
+ market: str,
47
+ target_ticks: int,
48
+ end_dt: datetime,
49
+ oldest_ts: int | None,
50
+ max_lookback_days: int,
51
+ storage: TickStorage,
52
+ cache_dir: Path | None = None,
53
+ max_attempts: int = 5,
54
+ initial_multiplier: float = 2.0,
55
+ ) -> FetchResult:
56
+ """Fetch tick data with adaptive exponential backoff.
57
+
58
+ This function implements the common tick fetching loop used by both
59
+ cache-aware and compute-only code paths. It handles:
60
+ - Adaptive backoff to estimate required tick volume
61
+ - Local storage caching (read existing, write new)
62
+ - Chronological merging and deduplication
63
+ - Lookback safety limits
64
+
65
+ Parameters
66
+ ----------
67
+ symbol : str
68
+ Trading symbol (e.g., "BTCUSDT", "EURUSD")
69
+ source : str
70
+ Data source: "binance" or "exness"
71
+ market : str
72
+ Normalized market type: "spot", "um", or "cm"
73
+ target_ticks : int
74
+ Target number of ticks to fetch (with buffer)
75
+ end_dt : datetime
76
+ End datetime for fetching (timezone-aware UTC)
77
+ oldest_ts : int | None
78
+ Oldest known timestamp (milliseconds) to fetch before, or None
79
+ max_lookback_days : int
80
+ Safety limit: maximum days to look back
81
+ storage : TickStorage
82
+ Tick storage instance for caching
83
+ cache_dir : Path | None, default=None
84
+ Custom cache directory (passed to storage)
85
+ max_attempts : int, default=5
86
+ Maximum number of fetch attempts with backoff
87
+ initial_multiplier : float, default=2.0
88
+ Initial backoff multiplier
89
+
90
+ Returns
91
+ -------
92
+ FetchResult
93
+ Contains merged tick data, oldest timestamp, and total tick count
94
+ """
95
+ import polars as pl
96
+
97
+ from .helpers import _fetch_binance, _fetch_exness
98
+
99
+ cache_symbol = f"{source}_{market}_{symbol}".upper()
100
+ all_tick_data: list[pl.DataFrame] = []
101
+ total_ticks = 0
102
+ multiplier = initial_multiplier
103
+ current_oldest_ts = oldest_ts
104
+
105
+ for _attempt in range(max_attempts):
106
+ # Calculate fetch range
107
+ if current_oldest_ts is not None:
108
+ fetch_end_dt = datetime.fromtimestamp(current_oldest_ts / 1000, tz=UTC)
109
+ else:
110
+ fetch_end_dt = end_dt
111
+
112
+ # Estimate days to fetch based on remaining ticks needed
113
+ remaining_ticks = target_ticks - total_ticks
114
+ days_to_fetch = max(1, remaining_ticks // 1_000_000)
115
+ days_to_fetch = min(days_to_fetch, max_lookback_days)
116
+
117
+ fetch_start_dt = fetch_end_dt - timedelta(days=days_to_fetch)
118
+
119
+ # Check lookback limit
120
+ if (end_dt - fetch_start_dt).days > max_lookback_days:
121
+ break
122
+
123
+ start_date = fetch_start_dt.strftime("%Y-%m-%d")
124
+ end_date_str = fetch_end_dt.strftime("%Y-%m-%d")
125
+ start_ts_fetch = int(fetch_start_dt.timestamp() * 1000)
126
+ end_ts_fetch = int(fetch_end_dt.timestamp() * 1000)
127
+
128
+ # Fetch tick data (from storage or source)
129
+ tick_data: pl.DataFrame
130
+ if storage.has_ticks(cache_symbol, start_ts_fetch, end_ts_fetch):
131
+ tick_data = storage.read_ticks(cache_symbol, start_ts_fetch, end_ts_fetch)
132
+ else:
133
+ if source == "binance":
134
+ tick_data = _fetch_binance(symbol, start_date, end_date_str, market)
135
+ else: # exness
136
+ tick_data = _fetch_exness(symbol, start_date, end_date_str, "strict")
137
+
138
+ if not tick_data.is_empty():
139
+ storage.write_ticks(cache_symbol, tick_data)
140
+
141
+ if tick_data.is_empty():
142
+ break
143
+
144
+ # Prepend (older data first)
145
+ all_tick_data.insert(0, tick_data)
146
+ total_ticks += len(tick_data)
147
+
148
+ # Update oldest timestamp for next iteration
149
+ if "timestamp" in tick_data.columns:
150
+ current_oldest_ts = int(tick_data["timestamp"].min())
151
+
152
+ # Check if we have enough ticks
153
+ if total_ticks >= target_ticks:
154
+ break
155
+
156
+ multiplier *= 2
157
+
158
+ if not all_tick_data:
159
+ return FetchResult(ticks=None, oldest_timestamp_ms=None, total_ticks=0)
160
+
161
+ # Merge all ticks chronologically
162
+ merged_ticks = pl.concat(all_tick_data)
163
+ merged_ticks = _sort_and_deduplicate(merged_ticks)
164
+
165
+ # Get oldest timestamp from merged data
166
+ final_oldest_ts: int | None = None
167
+ if "timestamp" in merged_ticks.columns:
168
+ final_oldest_ts = int(merged_ticks["timestamp"].min())
169
+
170
+ return FetchResult(
171
+ ticks=merged_ticks,
172
+ oldest_timestamp_ms=final_oldest_ts,
173
+ total_ticks=len(merged_ticks),
174
+ )
175
+
176
+
177
+ def _sort_and_deduplicate(ticks: pl.DataFrame) -> pl.DataFrame:
178
+ """Sort tick data chronologically and remove duplicates.
179
+
180
+ Sorting order follows Rust crate requirements: (timestamp, trade_id).
181
+ Deduplication uses trade_id or agg_trade_id to handle boundary overlaps.
182
+
183
+ Parameters
184
+ ----------
185
+ ticks : pl.DataFrame
186
+ Raw tick data (potentially with duplicates and unsorted)
187
+
188
+ Returns
189
+ -------
190
+ pl.DataFrame
191
+ Sorted and deduplicated tick data
192
+ """
193
+ # Sort by (timestamp, trade_id) - Rust crate requires this order
194
+ if "agg_trade_id" in ticks.columns:
195
+ ticks = ticks.sort(["timestamp", "agg_trade_id"])
196
+ # Deduplicate by agg_trade_id (Binance data may have duplicates at boundaries)
197
+ ticks = ticks.unique(subset=["agg_trade_id"], maintain_order=True)
198
+ elif "trade_id" in ticks.columns:
199
+ ticks = ticks.sort(["timestamp", "trade_id"])
200
+ ticks = ticks.unique(subset=["trade_id"], maintain_order=True)
201
+ else:
202
+ ticks = ticks.sort("timestamp")
203
+
204
+ return ticks
205
+
206
+
207
+ def estimate_ticks_per_bar(threshold_decimal_bps: int, base_ticks: int = 2500) -> int:
208
+ """Estimate ticks needed per bar based on threshold.
209
+
210
+ Uses inverse relationship: smaller threshold = more bars = fewer ticks per bar.
211
+ Calibrated for medium threshold (250 dbps) = 2500 ticks per bar.
212
+
213
+ Parameters
214
+ ----------
215
+ threshold_decimal_bps : int
216
+ Threshold in decimal basis points
217
+ base_ticks : int, default=2500
218
+ Base ticks per bar at 250 dbps
219
+
220
+ Returns
221
+ -------
222
+ int
223
+ Estimated ticks per bar for the given threshold
224
+ """
225
+ threshold_ratio = 250 / max(threshold_decimal_bps, 1)
226
+ return int(base_ticks * threshold_ratio)
rangebar/ouroboros.py ADDED
@@ -0,0 +1,454 @@
1
+ """Ouroboros: Cyclical reset boundaries for reproducible range bar construction.
2
+
3
+ Named after the Greek serpent eating its tail (οὐροβόρος), representing the
4
+ cyclical nature of year/month/week reset boundaries.
5
+
6
+ This module provides:
7
+ - Boundary calculation for year/month/week granularities
8
+ - Orphaned bar metadata for ML filtering
9
+ - Exchange session detection (Sydney/Tokyo/London/New York)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass
15
+ from datetime import UTC, date, datetime, timedelta
16
+ from enum import Enum
17
+ from typing import TYPE_CHECKING, Literal
18
+
19
+ if TYPE_CHECKING:
20
+ from collections.abc import Iterator
21
+
22
+ # ============================================================================
23
+ # Types
24
+ # ============================================================================
25
+
26
+
27
+ class OuroborosMode(str, Enum):
28
+ """Ouroboros granularity modes."""
29
+
30
+ YEAR = "year"
31
+ MONTH = "month"
32
+ WEEK = "week"
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class OuroborosBoundary:
37
+ """A single ouroboros reset boundary."""
38
+
39
+ timestamp: datetime
40
+ """UTC datetime of the boundary."""
41
+
42
+ mode: OuroborosMode
43
+ """Which granularity created this boundary."""
44
+
45
+ reason: str
46
+ """Human-readable reason (e.g., 'year_boundary', 'month_boundary')."""
47
+
48
+ @property
49
+ def timestamp_ms(self) -> int:
50
+ """Timestamp in milliseconds (for comparison with trade data)."""
51
+ return int(self.timestamp.timestamp() * 1000)
52
+
53
+ @property
54
+ def timestamp_us(self) -> int:
55
+ """Timestamp in microseconds."""
56
+ return int(self.timestamp.timestamp() * 1_000_000)
57
+
58
+
59
+ @dataclass
60
+ class OrphanedBarMetadata:
61
+ """Metadata for orphaned bars at ouroboros boundaries."""
62
+
63
+ is_orphan: bool = True
64
+ """Always True for orphaned bars."""
65
+
66
+ ouroboros_boundary: datetime | None = None
67
+ """Which boundary caused the orphan."""
68
+
69
+ reason: str | None = None
70
+ """Reason string: 'year_boundary', 'month_boundary', 'week_boundary'."""
71
+
72
+ expected_duration_us: int | None = None
73
+ """Expected duration if bar had completed normally."""
74
+
75
+
76
+ # ============================================================================
77
+ # Boundary Calculation
78
+ # ============================================================================
79
+
80
+
81
+ def get_ouroboros_boundaries(
82
+ start: date,
83
+ end: date,
84
+ mode: Literal["year", "month", "week"],
85
+ ) -> list[OuroborosBoundary]:
86
+ """Return all ouroboros reset points within the date range.
87
+
88
+ Parameters
89
+ ----------
90
+ start : date
91
+ Start date (inclusive)
92
+ end : date
93
+ End date (inclusive)
94
+ mode : {"year", "month", "week"}
95
+ Ouroboros granularity
96
+
97
+ Returns
98
+ -------
99
+ list[OuroborosBoundary]
100
+ Sorted list of boundaries within the date range
101
+
102
+ Examples
103
+ --------
104
+ >>> from datetime import date
105
+ >>> boundaries = get_ouroboros_boundaries(
106
+ ... date(2024, 1, 1), date(2024, 3, 31), "month"
107
+ ... )
108
+ >>> len(boundaries)
109
+ 3
110
+ >>> boundaries[0].reason
111
+ 'month_boundary'
112
+ """
113
+ boundaries: list[OuroborosBoundary] = []
114
+
115
+ if mode == "year":
116
+ for year in range(start.year, end.year + 2):
117
+ boundary_date = date(year, 1, 1)
118
+ if start <= boundary_date <= end:
119
+ boundaries.append(
120
+ OuroborosBoundary(
121
+ timestamp=datetime(year, 1, 1, 0, 0, 0, tzinfo=UTC),
122
+ mode=OuroborosMode.YEAR,
123
+ reason="year_boundary",
124
+ )
125
+ )
126
+
127
+ elif mode == "month":
128
+ current = date(start.year, start.month, 1)
129
+ while current <= end:
130
+ if current >= start:
131
+ boundaries.append(
132
+ OuroborosBoundary(
133
+ timestamp=datetime(
134
+ current.year,
135
+ current.month,
136
+ 1,
137
+ 0,
138
+ 0,
139
+ 0,
140
+ tzinfo=UTC,
141
+ ),
142
+ mode=OuroborosMode.MONTH,
143
+ reason="month_boundary",
144
+ )
145
+ )
146
+ # Next month
147
+ if current.month == 12:
148
+ current = date(current.year + 1, 1, 1)
149
+ else:
150
+ current = date(current.year, current.month + 1, 1)
151
+
152
+ elif mode == "week":
153
+ # Sunday 00:00:00 UTC boundaries
154
+ # Sunday = 6 in Python's weekday()
155
+ days_until_sunday = (6 - start.weekday()) % 7
156
+ if days_until_sunday == 0 and start.weekday() != 6:
157
+ # start is not Sunday, find next Sunday
158
+ days_until_sunday = 7
159
+ current = start + timedelta(days=days_until_sunday)
160
+
161
+ # Also include start date if it's a Sunday
162
+ if start.weekday() == 6:
163
+ current = start
164
+
165
+ while current <= end:
166
+ boundaries.append(
167
+ OuroborosBoundary(
168
+ timestamp=datetime(
169
+ current.year,
170
+ current.month,
171
+ current.day,
172
+ 0,
173
+ 0,
174
+ 0,
175
+ tzinfo=UTC,
176
+ ),
177
+ mode=OuroborosMode.WEEK,
178
+ reason="week_boundary",
179
+ )
180
+ )
181
+ current += timedelta(days=7)
182
+
183
+ return boundaries
184
+
185
+
186
+ def iter_ouroboros_segments(
187
+ start: date,
188
+ end: date,
189
+ mode: Literal["year", "month", "week"],
190
+ ) -> Iterator[tuple[datetime, datetime, OuroborosBoundary | None]]:
191
+ """Iterate over date segments between ouroboros boundaries.
192
+
193
+ Yields (segment_start, segment_end, boundary) tuples where boundary
194
+ is the ouroboros boundary at segment_start (None for first segment
195
+ if it doesn't start on a boundary).
196
+
197
+ Parameters
198
+ ----------
199
+ start : date
200
+ Start date
201
+ end : date
202
+ End date
203
+ mode : {"year", "month", "week"}
204
+ Ouroboros granularity
205
+
206
+ Yields
207
+ ------
208
+ tuple[datetime, datetime, OuroborosBoundary | None]
209
+ (segment_start, segment_end, boundary_at_start)
210
+ """
211
+ boundaries = get_ouroboros_boundaries(start, end, mode)
212
+
213
+ # Convert dates to datetimes
214
+ start_dt = datetime(start.year, start.month, start.day, 0, 0, 0, tzinfo=UTC)
215
+ end_dt = datetime(end.year, end.month, end.day, 23, 59, 59, 999999, tzinfo=UTC)
216
+
217
+ if not boundaries:
218
+ # No boundaries in range - single segment
219
+ yield (start_dt, end_dt, None)
220
+ return
221
+
222
+ # First segment: start to first boundary (if start is before first boundary)
223
+ if start_dt < boundaries[0].timestamp:
224
+ yield (start_dt, boundaries[0].timestamp - timedelta(microseconds=1), None)
225
+
226
+ # Middle segments: between consecutive boundaries
227
+ for i, boundary in enumerate(boundaries):
228
+ if i + 1 < len(boundaries):
229
+ segment_end = boundaries[i + 1].timestamp - timedelta(microseconds=1)
230
+ else:
231
+ segment_end = end_dt
232
+
233
+ # Only yield if segment start is before segment end
234
+ if boundary.timestamp <= end_dt:
235
+ yield (boundary.timestamp, segment_end, boundary)
236
+
237
+
238
+ # ============================================================================
239
+ # Exchange Market Sessions
240
+ # ============================================================================
241
+
242
+ # Market session hours in local time (aligned with actual exchange hours)
243
+ # Note: These are approximate for crypto; traditional markets have pre/post sessions
244
+ # Issue #8: Exchange sessions integration (corrected per exchange schedules)
245
+ EXCHANGE_SESSION_HOURS = {
246
+ "sydney": {"tz": "Australia/Sydney", "start": 10, "end": 16}, # ASX
247
+ "tokyo": {"tz": "Asia/Tokyo", "start": 9, "end": 15}, # TSE
248
+ "london": {"tz": "Europe/London", "start": 8, "end": 17}, # LSE
249
+ "newyork": {"tz": "America/New_York", "start": 10, "end": 16}, # NYSE
250
+ }
251
+
252
+
253
+ @dataclass(frozen=True)
254
+ class ExchangeSessionFlags:
255
+ """Boolean flags for active exchange market sessions."""
256
+
257
+ sydney: bool
258
+ tokyo: bool
259
+ london: bool
260
+ newyork: bool
261
+
262
+ def to_dict(self) -> dict[str, bool]:
263
+ """Convert to dict with column names."""
264
+ return {
265
+ "exchange_session_sydney": self.sydney,
266
+ "exchange_session_tokyo": self.tokyo,
267
+ "exchange_session_london": self.london,
268
+ "exchange_session_newyork": self.newyork,
269
+ }
270
+
271
+
272
+ def get_active_exchange_sessions(timestamp_utc: datetime) -> ExchangeSessionFlags:
273
+ """Determine which exchange market sessions are active at a given UTC time.
274
+
275
+ Parameters
276
+ ----------
277
+ timestamp_utc : datetime
278
+ UTC datetime to check (must be timezone-aware)
279
+
280
+ Returns
281
+ -------
282
+ ExchangeSessionFlags
283
+ Boolean flags for each session
284
+
285
+ Notes
286
+ -----
287
+ This is a simplified implementation that uses fixed hours.
288
+ For production use with DST accuracy, consider using nautilus_trader's
289
+ ForexSession implementation.
290
+ """
291
+ import zoneinfo
292
+
293
+ def is_in_session(session_name: str) -> bool:
294
+ info = EXCHANGE_SESSION_HOURS[session_name]
295
+ tz = zoneinfo.ZoneInfo(info["tz"])
296
+ local_time = timestamp_utc.astimezone(tz)
297
+ hour = local_time.hour
298
+ # Skip weekends
299
+ if local_time.weekday() >= 5:
300
+ return False
301
+ return info["start"] <= hour < info["end"]
302
+
303
+ return ExchangeSessionFlags(
304
+ sydney=is_in_session("sydney"),
305
+ tokyo=is_in_session("tokyo"),
306
+ london=is_in_session("london"),
307
+ newyork=is_in_session("newyork"),
308
+ )
309
+
310
+
311
+ # ============================================================================
312
+ # Dynamic Ouroboros for Forex
313
+ # ============================================================================
314
+
315
+ # Weekend gap threshold: 40 hours in milliseconds
316
+ # Forex markets close Friday ~21:00 UTC, reopen Sunday ~17:00 UTC (~44 hours)
317
+ # Using 40 hours as threshold to account for slight variations
318
+ WEEKEND_GAP_THRESHOLD_MS = 40 * 60 * 60 * 1000 # 40 hours
319
+
320
+
321
+ def detect_forex_weekend_boundaries(
322
+ timestamps_ms: list[int],
323
+ ) -> list[OuroborosBoundary]:
324
+ """Detect weekend boundaries from tick timestamps.
325
+
326
+ For Forex markets, the ouroboros point is the first tick after a weekend gap.
327
+ This handles DST automatically since we use actual data gaps, not calendar.
328
+
329
+ Parameters
330
+ ----------
331
+ timestamps_ms : list[int]
332
+ Sorted list of tick timestamps in milliseconds
333
+
334
+ Returns
335
+ -------
336
+ list[OuroborosBoundary]
337
+ List of weekend boundaries (first tick after each weekend gap)
338
+
339
+ Examples
340
+ --------
341
+ >>> timestamps = [1705057200000, 1705060800000, ...] # Friday ticks
342
+ >>> # ... weekend gap ...
343
+ >>> timestamps.extend([1705233600000, ...]) # Sunday ticks
344
+ >>> boundaries = detect_forex_weekend_boundaries(timestamps)
345
+ >>> len(boundaries) # One boundary at Sunday open
346
+ 1
347
+ """
348
+ if len(timestamps_ms) < 2:
349
+ return []
350
+
351
+ boundaries: list[OuroborosBoundary] = []
352
+
353
+ for i in range(1, len(timestamps_ms)):
354
+ gap_ms = timestamps_ms[i] - timestamps_ms[i - 1]
355
+
356
+ if gap_ms >= WEEKEND_GAP_THRESHOLD_MS:
357
+ # This is a weekend gap - the current tick is the ouroboros point
358
+ boundary_dt = datetime.fromtimestamp(timestamps_ms[i] / 1000, tz=UTC)
359
+ boundaries.append(
360
+ OuroborosBoundary(
361
+ timestamp=boundary_dt,
362
+ mode=OuroborosMode.WEEK,
363
+ reason="forex_weekend_boundary",
364
+ )
365
+ )
366
+
367
+ return boundaries
368
+
369
+
370
+ def iter_forex_ouroboros_segments(
371
+ timestamps_ms: list[int],
372
+ _start_date: date, # Reserved for future filtering
373
+ _end_date: date, # Reserved for future filtering
374
+ ) -> Iterator[tuple[int, int, OuroborosBoundary | None]]:
375
+ """Iterate over segments between Forex weekend boundaries.
376
+
377
+ Yields (start_idx, end_idx, boundary) tuples where:
378
+ - start_idx is the first tick index in the segment
379
+ - end_idx is the last tick index in the segment (inclusive)
380
+ - boundary is the OuroborosBoundary at start_idx (None for first segment)
381
+
382
+ Parameters
383
+ ----------
384
+ timestamps_ms : list[int]
385
+ Sorted list of tick timestamps in milliseconds
386
+ start_date : date
387
+ Start date (for filtering)
388
+ end_date : date
389
+ End date (for filtering)
390
+
391
+ Yields
392
+ ------
393
+ tuple[int, int, OuroborosBoundary | None]
394
+ (start_idx, end_idx, boundary_at_start)
395
+ """
396
+ if not timestamps_ms:
397
+ return
398
+
399
+ boundaries = detect_forex_weekend_boundaries(timestamps_ms)
400
+
401
+ if not boundaries:
402
+ # No weekend gaps - single segment
403
+ yield (0, len(timestamps_ms) - 1, None)
404
+ return
405
+
406
+ # Build boundary index map
407
+ boundary_timestamps = {b.timestamp_ms for b in boundaries}
408
+
409
+ current_start = 0
410
+ current_boundary: OuroborosBoundary | None = None
411
+
412
+ for i, ts_ms in enumerate(timestamps_ms):
413
+ if ts_ms in boundary_timestamps:
414
+ # End previous segment (if any)
415
+ if i > current_start:
416
+ yield (current_start, i - 1, current_boundary)
417
+
418
+ # Start new segment at this boundary
419
+ current_start = i
420
+ current_boundary = next(b for b in boundaries if b.timestamp_ms == ts_ms)
421
+
422
+ # Yield final segment
423
+ if current_start < len(timestamps_ms):
424
+ yield (current_start, len(timestamps_ms) - 1, current_boundary)
425
+
426
+
427
+ # ============================================================================
428
+ # Validation
429
+ # ============================================================================
430
+
431
+
432
+ def validate_ouroboros_mode(mode: str) -> Literal["year", "month", "week"]:
433
+ """Validate ouroboros mode string.
434
+
435
+ Parameters
436
+ ----------
437
+ mode : str
438
+ Mode to validate
439
+
440
+ Returns
441
+ -------
442
+ Literal["year", "month", "week"]
443
+ Validated mode
444
+
445
+ Raises
446
+ ------
447
+ ValueError
448
+ If mode is not valid
449
+ """
450
+ valid_modes = {"year", "month", "week"}
451
+ if mode not in valid_modes:
452
+ msg = f"Invalid ouroboros mode: {mode!r}. Must be one of: {valid_modes}"
453
+ raise ValueError(msg)
454
+ return mode # type: ignore[return-value]
@@ -0,0 +1,22 @@
1
+ # Modularization M2/M3: Extract RangeBarProcessor and process_trades_* from __init__.py
2
+ # Issue #46: Reduce __init__.py from 4,276 to ~500 lines
3
+ """Processor subpackage for range bar construction.
4
+
5
+ Provides the RangeBarProcessor class and related processing functions.
6
+ """
7
+
8
+ from .api import (
9
+ process_trades_chunked,
10
+ process_trades_polars,
11
+ process_trades_to_dataframe,
12
+ process_trades_to_dataframe_cached,
13
+ )
14
+ from .core import RangeBarProcessor
15
+
16
+ __all__ = [
17
+ "RangeBarProcessor",
18
+ "process_trades_chunked",
19
+ "process_trades_polars",
20
+ "process_trades_to_dataframe",
21
+ "process_trades_to_dataframe_cached",
22
+ ]