rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rangebar/CLAUDE.md +327 -0
- rangebar/__init__.py +227 -0
- rangebar/__init__.pyi +1089 -0
- rangebar/_core.cpython-313-darwin.so +0 -0
- rangebar/checkpoint.py +472 -0
- rangebar/cli.py +298 -0
- rangebar/clickhouse/CLAUDE.md +139 -0
- rangebar/clickhouse/__init__.py +100 -0
- rangebar/clickhouse/bulk_operations.py +309 -0
- rangebar/clickhouse/cache.py +734 -0
- rangebar/clickhouse/client.py +121 -0
- rangebar/clickhouse/config.py +141 -0
- rangebar/clickhouse/mixin.py +120 -0
- rangebar/clickhouse/preflight.py +504 -0
- rangebar/clickhouse/query_operations.py +345 -0
- rangebar/clickhouse/schema.sql +187 -0
- rangebar/clickhouse/tunnel.py +222 -0
- rangebar/constants.py +288 -0
- rangebar/conversion.py +177 -0
- rangebar/exceptions.py +207 -0
- rangebar/exness.py +364 -0
- rangebar/hooks.py +311 -0
- rangebar/logging.py +171 -0
- rangebar/notify/__init__.py +15 -0
- rangebar/notify/pushover.py +155 -0
- rangebar/notify/telegram.py +271 -0
- rangebar/orchestration/__init__.py +20 -0
- rangebar/orchestration/count_bounded.py +797 -0
- rangebar/orchestration/helpers.py +412 -0
- rangebar/orchestration/models.py +76 -0
- rangebar/orchestration/precompute.py +498 -0
- rangebar/orchestration/range_bars.py +736 -0
- rangebar/orchestration/tick_fetcher.py +226 -0
- rangebar/ouroboros.py +454 -0
- rangebar/processors/__init__.py +22 -0
- rangebar/processors/api.py +383 -0
- rangebar/processors/core.py +522 -0
- rangebar/resource_guard.py +567 -0
- rangebar/storage/__init__.py +22 -0
- rangebar/storage/checksum_registry.py +218 -0
- rangebar/storage/parquet.py +728 -0
- rangebar/streaming.py +300 -0
- rangebar/validation/__init__.py +69 -0
- rangebar/validation/cache_staleness.py +277 -0
- rangebar/validation/continuity.py +664 -0
- rangebar/validation/gap_classification.py +294 -0
- rangebar/validation/post_storage.py +317 -0
- rangebar/validation/tier1.py +175 -0
- rangebar/validation/tier2.py +261 -0
- rangebar-11.6.1.dist-info/METADATA +308 -0
- rangebar-11.6.1.dist-info/RECORD +54 -0
- rangebar-11.6.1.dist-info/WHEEL +4 -0
- rangebar-11.6.1.dist-info/entry_points.txt +2 -0
- rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# polars-exception: backtesting.py requires Pandas DataFrames with DatetimeIndex
|
|
2
|
+
# Issue #46: Modularization - Extract tick fetching loop from count_bounded.py
|
|
3
|
+
"""Tick fetching orchestration with storage caching and deduplication.
|
|
4
|
+
|
|
5
|
+
This module provides the unified tick fetching loop used by both
|
|
6
|
+
_fill_gap_and_cache() and _fetch_and_compute_bars() in count_bounded.py.
|
|
7
|
+
|
|
8
|
+
The key insight: For 24/7 crypto markets, ALL ticks must be processed with
|
|
9
|
+
a SINGLE processor to maintain the bar[i+1].open == bar[i].close invariant.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from datetime import UTC, datetime, timedelta
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
import polars as pl
|
|
22
|
+
|
|
23
|
+
from rangebar.storage.parquet import TickStorage
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger("rangebar")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class FetchResult:
|
|
30
|
+
"""Result of a tick fetching operation."""
|
|
31
|
+
|
|
32
|
+
ticks: pl.DataFrame | None
|
|
33
|
+
"""Merged and deduplicated tick data, sorted chronologically."""
|
|
34
|
+
|
|
35
|
+
oldest_timestamp_ms: int | None
|
|
36
|
+
"""Oldest timestamp in the fetched data (milliseconds)."""
|
|
37
|
+
|
|
38
|
+
total_ticks: int
|
|
39
|
+
"""Total number of ticks fetched."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def fetch_ticks_with_backoff(
|
|
43
|
+
*,
|
|
44
|
+
symbol: str,
|
|
45
|
+
source: str,
|
|
46
|
+
market: str,
|
|
47
|
+
target_ticks: int,
|
|
48
|
+
end_dt: datetime,
|
|
49
|
+
oldest_ts: int | None,
|
|
50
|
+
max_lookback_days: int,
|
|
51
|
+
storage: TickStorage,
|
|
52
|
+
cache_dir: Path | None = None,
|
|
53
|
+
max_attempts: int = 5,
|
|
54
|
+
initial_multiplier: float = 2.0,
|
|
55
|
+
) -> FetchResult:
|
|
56
|
+
"""Fetch tick data with adaptive exponential backoff.
|
|
57
|
+
|
|
58
|
+
This function implements the common tick fetching loop used by both
|
|
59
|
+
cache-aware and compute-only code paths. It handles:
|
|
60
|
+
- Adaptive backoff to estimate required tick volume
|
|
61
|
+
- Local storage caching (read existing, write new)
|
|
62
|
+
- Chronological merging and deduplication
|
|
63
|
+
- Lookback safety limits
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
symbol : str
|
|
68
|
+
Trading symbol (e.g., "BTCUSDT", "EURUSD")
|
|
69
|
+
source : str
|
|
70
|
+
Data source: "binance" or "exness"
|
|
71
|
+
market : str
|
|
72
|
+
Normalized market type: "spot", "um", or "cm"
|
|
73
|
+
target_ticks : int
|
|
74
|
+
Target number of ticks to fetch (with buffer)
|
|
75
|
+
end_dt : datetime
|
|
76
|
+
End datetime for fetching (timezone-aware UTC)
|
|
77
|
+
oldest_ts : int | None
|
|
78
|
+
Oldest known timestamp (milliseconds) to fetch before, or None
|
|
79
|
+
max_lookback_days : int
|
|
80
|
+
Safety limit: maximum days to look back
|
|
81
|
+
storage : TickStorage
|
|
82
|
+
Tick storage instance for caching
|
|
83
|
+
cache_dir : Path | None, default=None
|
|
84
|
+
Custom cache directory (passed to storage)
|
|
85
|
+
max_attempts : int, default=5
|
|
86
|
+
Maximum number of fetch attempts with backoff
|
|
87
|
+
initial_multiplier : float, default=2.0
|
|
88
|
+
Initial backoff multiplier
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
FetchResult
|
|
93
|
+
Contains merged tick data, oldest timestamp, and total tick count
|
|
94
|
+
"""
|
|
95
|
+
import polars as pl
|
|
96
|
+
|
|
97
|
+
from .helpers import _fetch_binance, _fetch_exness
|
|
98
|
+
|
|
99
|
+
cache_symbol = f"{source}_{market}_{symbol}".upper()
|
|
100
|
+
all_tick_data: list[pl.DataFrame] = []
|
|
101
|
+
total_ticks = 0
|
|
102
|
+
multiplier = initial_multiplier
|
|
103
|
+
current_oldest_ts = oldest_ts
|
|
104
|
+
|
|
105
|
+
for _attempt in range(max_attempts):
|
|
106
|
+
# Calculate fetch range
|
|
107
|
+
if current_oldest_ts is not None:
|
|
108
|
+
fetch_end_dt = datetime.fromtimestamp(current_oldest_ts / 1000, tz=UTC)
|
|
109
|
+
else:
|
|
110
|
+
fetch_end_dt = end_dt
|
|
111
|
+
|
|
112
|
+
# Estimate days to fetch based on remaining ticks needed
|
|
113
|
+
remaining_ticks = target_ticks - total_ticks
|
|
114
|
+
days_to_fetch = max(1, remaining_ticks // 1_000_000)
|
|
115
|
+
days_to_fetch = min(days_to_fetch, max_lookback_days)
|
|
116
|
+
|
|
117
|
+
fetch_start_dt = fetch_end_dt - timedelta(days=days_to_fetch)
|
|
118
|
+
|
|
119
|
+
# Check lookback limit
|
|
120
|
+
if (end_dt - fetch_start_dt).days > max_lookback_days:
|
|
121
|
+
break
|
|
122
|
+
|
|
123
|
+
start_date = fetch_start_dt.strftime("%Y-%m-%d")
|
|
124
|
+
end_date_str = fetch_end_dt.strftime("%Y-%m-%d")
|
|
125
|
+
start_ts_fetch = int(fetch_start_dt.timestamp() * 1000)
|
|
126
|
+
end_ts_fetch = int(fetch_end_dt.timestamp() * 1000)
|
|
127
|
+
|
|
128
|
+
# Fetch tick data (from storage or source)
|
|
129
|
+
tick_data: pl.DataFrame
|
|
130
|
+
if storage.has_ticks(cache_symbol, start_ts_fetch, end_ts_fetch):
|
|
131
|
+
tick_data = storage.read_ticks(cache_symbol, start_ts_fetch, end_ts_fetch)
|
|
132
|
+
else:
|
|
133
|
+
if source == "binance":
|
|
134
|
+
tick_data = _fetch_binance(symbol, start_date, end_date_str, market)
|
|
135
|
+
else: # exness
|
|
136
|
+
tick_data = _fetch_exness(symbol, start_date, end_date_str, "strict")
|
|
137
|
+
|
|
138
|
+
if not tick_data.is_empty():
|
|
139
|
+
storage.write_ticks(cache_symbol, tick_data)
|
|
140
|
+
|
|
141
|
+
if tick_data.is_empty():
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
# Prepend (older data first)
|
|
145
|
+
all_tick_data.insert(0, tick_data)
|
|
146
|
+
total_ticks += len(tick_data)
|
|
147
|
+
|
|
148
|
+
# Update oldest timestamp for next iteration
|
|
149
|
+
if "timestamp" in tick_data.columns:
|
|
150
|
+
current_oldest_ts = int(tick_data["timestamp"].min())
|
|
151
|
+
|
|
152
|
+
# Check if we have enough ticks
|
|
153
|
+
if total_ticks >= target_ticks:
|
|
154
|
+
break
|
|
155
|
+
|
|
156
|
+
multiplier *= 2
|
|
157
|
+
|
|
158
|
+
if not all_tick_data:
|
|
159
|
+
return FetchResult(ticks=None, oldest_timestamp_ms=None, total_ticks=0)
|
|
160
|
+
|
|
161
|
+
# Merge all ticks chronologically
|
|
162
|
+
merged_ticks = pl.concat(all_tick_data)
|
|
163
|
+
merged_ticks = _sort_and_deduplicate(merged_ticks)
|
|
164
|
+
|
|
165
|
+
# Get oldest timestamp from merged data
|
|
166
|
+
final_oldest_ts: int | None = None
|
|
167
|
+
if "timestamp" in merged_ticks.columns:
|
|
168
|
+
final_oldest_ts = int(merged_ticks["timestamp"].min())
|
|
169
|
+
|
|
170
|
+
return FetchResult(
|
|
171
|
+
ticks=merged_ticks,
|
|
172
|
+
oldest_timestamp_ms=final_oldest_ts,
|
|
173
|
+
total_ticks=len(merged_ticks),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _sort_and_deduplicate(ticks: pl.DataFrame) -> pl.DataFrame:
|
|
178
|
+
"""Sort tick data chronologically and remove duplicates.
|
|
179
|
+
|
|
180
|
+
Sorting order follows Rust crate requirements: (timestamp, trade_id).
|
|
181
|
+
Deduplication uses trade_id or agg_trade_id to handle boundary overlaps.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
ticks : pl.DataFrame
|
|
186
|
+
Raw tick data (potentially with duplicates and unsorted)
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
pl.DataFrame
|
|
191
|
+
Sorted and deduplicated tick data
|
|
192
|
+
"""
|
|
193
|
+
# Sort by (timestamp, trade_id) - Rust crate requires this order
|
|
194
|
+
if "agg_trade_id" in ticks.columns:
|
|
195
|
+
ticks = ticks.sort(["timestamp", "agg_trade_id"])
|
|
196
|
+
# Deduplicate by agg_trade_id (Binance data may have duplicates at boundaries)
|
|
197
|
+
ticks = ticks.unique(subset=["agg_trade_id"], maintain_order=True)
|
|
198
|
+
elif "trade_id" in ticks.columns:
|
|
199
|
+
ticks = ticks.sort(["timestamp", "trade_id"])
|
|
200
|
+
ticks = ticks.unique(subset=["trade_id"], maintain_order=True)
|
|
201
|
+
else:
|
|
202
|
+
ticks = ticks.sort("timestamp")
|
|
203
|
+
|
|
204
|
+
return ticks
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def estimate_ticks_per_bar(threshold_decimal_bps: int, base_ticks: int = 2500) -> int:
|
|
208
|
+
"""Estimate ticks needed per bar based on threshold.
|
|
209
|
+
|
|
210
|
+
Uses inverse relationship: smaller threshold = more bars = fewer ticks per bar.
|
|
211
|
+
Calibrated for medium threshold (250 dbps) = 2500 ticks per bar.
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
threshold_decimal_bps : int
|
|
216
|
+
Threshold in decimal basis points
|
|
217
|
+
base_ticks : int, default=2500
|
|
218
|
+
Base ticks per bar at 250 dbps
|
|
219
|
+
|
|
220
|
+
Returns
|
|
221
|
+
-------
|
|
222
|
+
int
|
|
223
|
+
Estimated ticks per bar for the given threshold
|
|
224
|
+
"""
|
|
225
|
+
threshold_ratio = 250 / max(threshold_decimal_bps, 1)
|
|
226
|
+
return int(base_ticks * threshold_ratio)
|
rangebar/ouroboros.py
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
"""Ouroboros: Cyclical reset boundaries for reproducible range bar construction.
|
|
2
|
+
|
|
3
|
+
Named after the Greek serpent eating its tail (οὐροβόρος), representing the
|
|
4
|
+
cyclical nature of year/month/week reset boundaries.
|
|
5
|
+
|
|
6
|
+
This module provides:
|
|
7
|
+
- Boundary calculation for year/month/week granularities
|
|
8
|
+
- Orphaned bar metadata for ML filtering
|
|
9
|
+
- Exchange session detection (Sydney/Tokyo/London/New York)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from datetime import UTC, date, datetime, timedelta
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import TYPE_CHECKING, Literal
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import Iterator
|
|
21
|
+
|
|
22
|
+
# ============================================================================
|
|
23
|
+
# Types
|
|
24
|
+
# ============================================================================
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class OuroborosMode(str, Enum):
|
|
28
|
+
"""Ouroboros granularity modes."""
|
|
29
|
+
|
|
30
|
+
YEAR = "year"
|
|
31
|
+
MONTH = "month"
|
|
32
|
+
WEEK = "week"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class OuroborosBoundary:
|
|
37
|
+
"""A single ouroboros reset boundary."""
|
|
38
|
+
|
|
39
|
+
timestamp: datetime
|
|
40
|
+
"""UTC datetime of the boundary."""
|
|
41
|
+
|
|
42
|
+
mode: OuroborosMode
|
|
43
|
+
"""Which granularity created this boundary."""
|
|
44
|
+
|
|
45
|
+
reason: str
|
|
46
|
+
"""Human-readable reason (e.g., 'year_boundary', 'month_boundary')."""
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def timestamp_ms(self) -> int:
|
|
50
|
+
"""Timestamp in milliseconds (for comparison with trade data)."""
|
|
51
|
+
return int(self.timestamp.timestamp() * 1000)
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def timestamp_us(self) -> int:
|
|
55
|
+
"""Timestamp in microseconds."""
|
|
56
|
+
return int(self.timestamp.timestamp() * 1_000_000)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class OrphanedBarMetadata:
|
|
61
|
+
"""Metadata for orphaned bars at ouroboros boundaries."""
|
|
62
|
+
|
|
63
|
+
is_orphan: bool = True
|
|
64
|
+
"""Always True for orphaned bars."""
|
|
65
|
+
|
|
66
|
+
ouroboros_boundary: datetime | None = None
|
|
67
|
+
"""Which boundary caused the orphan."""
|
|
68
|
+
|
|
69
|
+
reason: str | None = None
|
|
70
|
+
"""Reason string: 'year_boundary', 'month_boundary', 'week_boundary'."""
|
|
71
|
+
|
|
72
|
+
expected_duration_us: int | None = None
|
|
73
|
+
"""Expected duration if bar had completed normally."""
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ============================================================================
|
|
77
|
+
# Boundary Calculation
|
|
78
|
+
# ============================================================================
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_ouroboros_boundaries(
|
|
82
|
+
start: date,
|
|
83
|
+
end: date,
|
|
84
|
+
mode: Literal["year", "month", "week"],
|
|
85
|
+
) -> list[OuroborosBoundary]:
|
|
86
|
+
"""Return all ouroboros reset points within the date range.
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
start : date
|
|
91
|
+
Start date (inclusive)
|
|
92
|
+
end : date
|
|
93
|
+
End date (inclusive)
|
|
94
|
+
mode : {"year", "month", "week"}
|
|
95
|
+
Ouroboros granularity
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
list[OuroborosBoundary]
|
|
100
|
+
Sorted list of boundaries within the date range
|
|
101
|
+
|
|
102
|
+
Examples
|
|
103
|
+
--------
|
|
104
|
+
>>> from datetime import date
|
|
105
|
+
>>> boundaries = get_ouroboros_boundaries(
|
|
106
|
+
... date(2024, 1, 1), date(2024, 3, 31), "month"
|
|
107
|
+
... )
|
|
108
|
+
>>> len(boundaries)
|
|
109
|
+
3
|
|
110
|
+
>>> boundaries[0].reason
|
|
111
|
+
'month_boundary'
|
|
112
|
+
"""
|
|
113
|
+
boundaries: list[OuroborosBoundary] = []
|
|
114
|
+
|
|
115
|
+
if mode == "year":
|
|
116
|
+
for year in range(start.year, end.year + 2):
|
|
117
|
+
boundary_date = date(year, 1, 1)
|
|
118
|
+
if start <= boundary_date <= end:
|
|
119
|
+
boundaries.append(
|
|
120
|
+
OuroborosBoundary(
|
|
121
|
+
timestamp=datetime(year, 1, 1, 0, 0, 0, tzinfo=UTC),
|
|
122
|
+
mode=OuroborosMode.YEAR,
|
|
123
|
+
reason="year_boundary",
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
elif mode == "month":
|
|
128
|
+
current = date(start.year, start.month, 1)
|
|
129
|
+
while current <= end:
|
|
130
|
+
if current >= start:
|
|
131
|
+
boundaries.append(
|
|
132
|
+
OuroborosBoundary(
|
|
133
|
+
timestamp=datetime(
|
|
134
|
+
current.year,
|
|
135
|
+
current.month,
|
|
136
|
+
1,
|
|
137
|
+
0,
|
|
138
|
+
0,
|
|
139
|
+
0,
|
|
140
|
+
tzinfo=UTC,
|
|
141
|
+
),
|
|
142
|
+
mode=OuroborosMode.MONTH,
|
|
143
|
+
reason="month_boundary",
|
|
144
|
+
)
|
|
145
|
+
)
|
|
146
|
+
# Next month
|
|
147
|
+
if current.month == 12:
|
|
148
|
+
current = date(current.year + 1, 1, 1)
|
|
149
|
+
else:
|
|
150
|
+
current = date(current.year, current.month + 1, 1)
|
|
151
|
+
|
|
152
|
+
elif mode == "week":
|
|
153
|
+
# Sunday 00:00:00 UTC boundaries
|
|
154
|
+
# Sunday = 6 in Python's weekday()
|
|
155
|
+
days_until_sunday = (6 - start.weekday()) % 7
|
|
156
|
+
if days_until_sunday == 0 and start.weekday() != 6:
|
|
157
|
+
# start is not Sunday, find next Sunday
|
|
158
|
+
days_until_sunday = 7
|
|
159
|
+
current = start + timedelta(days=days_until_sunday)
|
|
160
|
+
|
|
161
|
+
# Also include start date if it's a Sunday
|
|
162
|
+
if start.weekday() == 6:
|
|
163
|
+
current = start
|
|
164
|
+
|
|
165
|
+
while current <= end:
|
|
166
|
+
boundaries.append(
|
|
167
|
+
OuroborosBoundary(
|
|
168
|
+
timestamp=datetime(
|
|
169
|
+
current.year,
|
|
170
|
+
current.month,
|
|
171
|
+
current.day,
|
|
172
|
+
0,
|
|
173
|
+
0,
|
|
174
|
+
0,
|
|
175
|
+
tzinfo=UTC,
|
|
176
|
+
),
|
|
177
|
+
mode=OuroborosMode.WEEK,
|
|
178
|
+
reason="week_boundary",
|
|
179
|
+
)
|
|
180
|
+
)
|
|
181
|
+
current += timedelta(days=7)
|
|
182
|
+
|
|
183
|
+
return boundaries
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def iter_ouroboros_segments(
|
|
187
|
+
start: date,
|
|
188
|
+
end: date,
|
|
189
|
+
mode: Literal["year", "month", "week"],
|
|
190
|
+
) -> Iterator[tuple[datetime, datetime, OuroborosBoundary | None]]:
|
|
191
|
+
"""Iterate over date segments between ouroboros boundaries.
|
|
192
|
+
|
|
193
|
+
Yields (segment_start, segment_end, boundary) tuples where boundary
|
|
194
|
+
is the ouroboros boundary at segment_start (None for first segment
|
|
195
|
+
if it doesn't start on a boundary).
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
start : date
|
|
200
|
+
Start date
|
|
201
|
+
end : date
|
|
202
|
+
End date
|
|
203
|
+
mode : {"year", "month", "week"}
|
|
204
|
+
Ouroboros granularity
|
|
205
|
+
|
|
206
|
+
Yields
|
|
207
|
+
------
|
|
208
|
+
tuple[datetime, datetime, OuroborosBoundary | None]
|
|
209
|
+
(segment_start, segment_end, boundary_at_start)
|
|
210
|
+
"""
|
|
211
|
+
boundaries = get_ouroboros_boundaries(start, end, mode)
|
|
212
|
+
|
|
213
|
+
# Convert dates to datetimes
|
|
214
|
+
start_dt = datetime(start.year, start.month, start.day, 0, 0, 0, tzinfo=UTC)
|
|
215
|
+
end_dt = datetime(end.year, end.month, end.day, 23, 59, 59, 999999, tzinfo=UTC)
|
|
216
|
+
|
|
217
|
+
if not boundaries:
|
|
218
|
+
# No boundaries in range - single segment
|
|
219
|
+
yield (start_dt, end_dt, None)
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
# First segment: start to first boundary (if start is before first boundary)
|
|
223
|
+
if start_dt < boundaries[0].timestamp:
|
|
224
|
+
yield (start_dt, boundaries[0].timestamp - timedelta(microseconds=1), None)
|
|
225
|
+
|
|
226
|
+
# Middle segments: between consecutive boundaries
|
|
227
|
+
for i, boundary in enumerate(boundaries):
|
|
228
|
+
if i + 1 < len(boundaries):
|
|
229
|
+
segment_end = boundaries[i + 1].timestamp - timedelta(microseconds=1)
|
|
230
|
+
else:
|
|
231
|
+
segment_end = end_dt
|
|
232
|
+
|
|
233
|
+
# Only yield if segment start is before segment end
|
|
234
|
+
if boundary.timestamp <= end_dt:
|
|
235
|
+
yield (boundary.timestamp, segment_end, boundary)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# ============================================================================
|
|
239
|
+
# Exchange Market Sessions
|
|
240
|
+
# ============================================================================
|
|
241
|
+
|
|
242
|
+
# Market session hours in local time (aligned with actual exchange hours)
|
|
243
|
+
# Note: These are approximate for crypto; traditional markets have pre/post sessions
|
|
244
|
+
# Issue #8: Exchange sessions integration (corrected per exchange schedules)
|
|
245
|
+
EXCHANGE_SESSION_HOURS = {
|
|
246
|
+
"sydney": {"tz": "Australia/Sydney", "start": 10, "end": 16}, # ASX
|
|
247
|
+
"tokyo": {"tz": "Asia/Tokyo", "start": 9, "end": 15}, # TSE
|
|
248
|
+
"london": {"tz": "Europe/London", "start": 8, "end": 17}, # LSE
|
|
249
|
+
"newyork": {"tz": "America/New_York", "start": 10, "end": 16}, # NYSE
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
@dataclass(frozen=True)
|
|
254
|
+
class ExchangeSessionFlags:
|
|
255
|
+
"""Boolean flags for active exchange market sessions."""
|
|
256
|
+
|
|
257
|
+
sydney: bool
|
|
258
|
+
tokyo: bool
|
|
259
|
+
london: bool
|
|
260
|
+
newyork: bool
|
|
261
|
+
|
|
262
|
+
def to_dict(self) -> dict[str, bool]:
|
|
263
|
+
"""Convert to dict with column names."""
|
|
264
|
+
return {
|
|
265
|
+
"exchange_session_sydney": self.sydney,
|
|
266
|
+
"exchange_session_tokyo": self.tokyo,
|
|
267
|
+
"exchange_session_london": self.london,
|
|
268
|
+
"exchange_session_newyork": self.newyork,
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def get_active_exchange_sessions(timestamp_utc: datetime) -> ExchangeSessionFlags:
|
|
273
|
+
"""Determine which exchange market sessions are active at a given UTC time.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
timestamp_utc : datetime
|
|
278
|
+
UTC datetime to check (must be timezone-aware)
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
ExchangeSessionFlags
|
|
283
|
+
Boolean flags for each session
|
|
284
|
+
|
|
285
|
+
Notes
|
|
286
|
+
-----
|
|
287
|
+
This is a simplified implementation that uses fixed hours.
|
|
288
|
+
For production use with DST accuracy, consider using nautilus_trader's
|
|
289
|
+
ForexSession implementation.
|
|
290
|
+
"""
|
|
291
|
+
import zoneinfo
|
|
292
|
+
|
|
293
|
+
def is_in_session(session_name: str) -> bool:
|
|
294
|
+
info = EXCHANGE_SESSION_HOURS[session_name]
|
|
295
|
+
tz = zoneinfo.ZoneInfo(info["tz"])
|
|
296
|
+
local_time = timestamp_utc.astimezone(tz)
|
|
297
|
+
hour = local_time.hour
|
|
298
|
+
# Skip weekends
|
|
299
|
+
if local_time.weekday() >= 5:
|
|
300
|
+
return False
|
|
301
|
+
return info["start"] <= hour < info["end"]
|
|
302
|
+
|
|
303
|
+
return ExchangeSessionFlags(
|
|
304
|
+
sydney=is_in_session("sydney"),
|
|
305
|
+
tokyo=is_in_session("tokyo"),
|
|
306
|
+
london=is_in_session("london"),
|
|
307
|
+
newyork=is_in_session("newyork"),
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
# ============================================================================
|
|
312
|
+
# Dynamic Ouroboros for Forex
|
|
313
|
+
# ============================================================================
|
|
314
|
+
|
|
315
|
+
# Weekend gap threshold: 40 hours in milliseconds
|
|
316
|
+
# Forex markets close Friday ~21:00 UTC, reopen Sunday ~17:00 UTC (~44 hours)
|
|
317
|
+
# Using 40 hours as threshold to account for slight variations
|
|
318
|
+
WEEKEND_GAP_THRESHOLD_MS = 40 * 60 * 60 * 1000 # 40 hours
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def detect_forex_weekend_boundaries(
|
|
322
|
+
timestamps_ms: list[int],
|
|
323
|
+
) -> list[OuroborosBoundary]:
|
|
324
|
+
"""Detect weekend boundaries from tick timestamps.
|
|
325
|
+
|
|
326
|
+
For Forex markets, the ouroboros point is the first tick after a weekend gap.
|
|
327
|
+
This handles DST automatically since we use actual data gaps, not calendar.
|
|
328
|
+
|
|
329
|
+
Parameters
|
|
330
|
+
----------
|
|
331
|
+
timestamps_ms : list[int]
|
|
332
|
+
Sorted list of tick timestamps in milliseconds
|
|
333
|
+
|
|
334
|
+
Returns
|
|
335
|
+
-------
|
|
336
|
+
list[OuroborosBoundary]
|
|
337
|
+
List of weekend boundaries (first tick after each weekend gap)
|
|
338
|
+
|
|
339
|
+
Examples
|
|
340
|
+
--------
|
|
341
|
+
>>> timestamps = [1705057200000, 1705060800000, ...] # Friday ticks
|
|
342
|
+
>>> # ... weekend gap ...
|
|
343
|
+
>>> timestamps.extend([1705233600000, ...]) # Sunday ticks
|
|
344
|
+
>>> boundaries = detect_forex_weekend_boundaries(timestamps)
|
|
345
|
+
>>> len(boundaries) # One boundary at Sunday open
|
|
346
|
+
1
|
|
347
|
+
"""
|
|
348
|
+
if len(timestamps_ms) < 2:
|
|
349
|
+
return []
|
|
350
|
+
|
|
351
|
+
boundaries: list[OuroborosBoundary] = []
|
|
352
|
+
|
|
353
|
+
for i in range(1, len(timestamps_ms)):
|
|
354
|
+
gap_ms = timestamps_ms[i] - timestamps_ms[i - 1]
|
|
355
|
+
|
|
356
|
+
if gap_ms >= WEEKEND_GAP_THRESHOLD_MS:
|
|
357
|
+
# This is a weekend gap - the current tick is the ouroboros point
|
|
358
|
+
boundary_dt = datetime.fromtimestamp(timestamps_ms[i] / 1000, tz=UTC)
|
|
359
|
+
boundaries.append(
|
|
360
|
+
OuroborosBoundary(
|
|
361
|
+
timestamp=boundary_dt,
|
|
362
|
+
mode=OuroborosMode.WEEK,
|
|
363
|
+
reason="forex_weekend_boundary",
|
|
364
|
+
)
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
return boundaries
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def iter_forex_ouroboros_segments(
|
|
371
|
+
timestamps_ms: list[int],
|
|
372
|
+
_start_date: date, # Reserved for future filtering
|
|
373
|
+
_end_date: date, # Reserved for future filtering
|
|
374
|
+
) -> Iterator[tuple[int, int, OuroborosBoundary | None]]:
|
|
375
|
+
"""Iterate over segments between Forex weekend boundaries.
|
|
376
|
+
|
|
377
|
+
Yields (start_idx, end_idx, boundary) tuples where:
|
|
378
|
+
- start_idx is the first tick index in the segment
|
|
379
|
+
- end_idx is the last tick index in the segment (inclusive)
|
|
380
|
+
- boundary is the OuroborosBoundary at start_idx (None for first segment)
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
timestamps_ms : list[int]
|
|
385
|
+
Sorted list of tick timestamps in milliseconds
|
|
386
|
+
start_date : date
|
|
387
|
+
Start date (for filtering)
|
|
388
|
+
end_date : date
|
|
389
|
+
End date (for filtering)
|
|
390
|
+
|
|
391
|
+
Yields
|
|
392
|
+
------
|
|
393
|
+
tuple[int, int, OuroborosBoundary | None]
|
|
394
|
+
(start_idx, end_idx, boundary_at_start)
|
|
395
|
+
"""
|
|
396
|
+
if not timestamps_ms:
|
|
397
|
+
return
|
|
398
|
+
|
|
399
|
+
boundaries = detect_forex_weekend_boundaries(timestamps_ms)
|
|
400
|
+
|
|
401
|
+
if not boundaries:
|
|
402
|
+
# No weekend gaps - single segment
|
|
403
|
+
yield (0, len(timestamps_ms) - 1, None)
|
|
404
|
+
return
|
|
405
|
+
|
|
406
|
+
# Build boundary index map
|
|
407
|
+
boundary_timestamps = {b.timestamp_ms for b in boundaries}
|
|
408
|
+
|
|
409
|
+
current_start = 0
|
|
410
|
+
current_boundary: OuroborosBoundary | None = None
|
|
411
|
+
|
|
412
|
+
for i, ts_ms in enumerate(timestamps_ms):
|
|
413
|
+
if ts_ms in boundary_timestamps:
|
|
414
|
+
# End previous segment (if any)
|
|
415
|
+
if i > current_start:
|
|
416
|
+
yield (current_start, i - 1, current_boundary)
|
|
417
|
+
|
|
418
|
+
# Start new segment at this boundary
|
|
419
|
+
current_start = i
|
|
420
|
+
current_boundary = next(b for b in boundaries if b.timestamp_ms == ts_ms)
|
|
421
|
+
|
|
422
|
+
# Yield final segment
|
|
423
|
+
if current_start < len(timestamps_ms):
|
|
424
|
+
yield (current_start, len(timestamps_ms) - 1, current_boundary)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
# ============================================================================
|
|
428
|
+
# Validation
|
|
429
|
+
# ============================================================================
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def validate_ouroboros_mode(mode: str) -> Literal["year", "month", "week"]:
|
|
433
|
+
"""Validate ouroboros mode string.
|
|
434
|
+
|
|
435
|
+
Parameters
|
|
436
|
+
----------
|
|
437
|
+
mode : str
|
|
438
|
+
Mode to validate
|
|
439
|
+
|
|
440
|
+
Returns
|
|
441
|
+
-------
|
|
442
|
+
Literal["year", "month", "week"]
|
|
443
|
+
Validated mode
|
|
444
|
+
|
|
445
|
+
Raises
|
|
446
|
+
------
|
|
447
|
+
ValueError
|
|
448
|
+
If mode is not valid
|
|
449
|
+
"""
|
|
450
|
+
valid_modes = {"year", "month", "week"}
|
|
451
|
+
if mode not in valid_modes:
|
|
452
|
+
msg = f"Invalid ouroboros mode: {mode!r}. Must be one of: {valid_modes}"
|
|
453
|
+
raise ValueError(msg)
|
|
454
|
+
return mode # type: ignore[return-value]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Modularization M2/M3: Extract RangeBarProcessor and process_trades_* from __init__.py
|
|
2
|
+
# Issue #46: Reduce __init__.py from 4,276 to ~500 lines
|
|
3
|
+
"""Processor subpackage for range bar construction.
|
|
4
|
+
|
|
5
|
+
Provides the RangeBarProcessor class and related processing functions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .api import (
|
|
9
|
+
process_trades_chunked,
|
|
10
|
+
process_trades_polars,
|
|
11
|
+
process_trades_to_dataframe,
|
|
12
|
+
process_trades_to_dataframe_cached,
|
|
13
|
+
)
|
|
14
|
+
from .core import RangeBarProcessor
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"RangeBarProcessor",
|
|
18
|
+
"process_trades_chunked",
|
|
19
|
+
"process_trades_polars",
|
|
20
|
+
"process_trades_to_dataframe",
|
|
21
|
+
"process_trades_to_dataframe_cached",
|
|
22
|
+
]
|