rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rangebar/CLAUDE.md +327 -0
- rangebar/__init__.py +227 -0
- rangebar/__init__.pyi +1089 -0
- rangebar/_core.cpython-313-darwin.so +0 -0
- rangebar/checkpoint.py +472 -0
- rangebar/cli.py +298 -0
- rangebar/clickhouse/CLAUDE.md +139 -0
- rangebar/clickhouse/__init__.py +100 -0
- rangebar/clickhouse/bulk_operations.py +309 -0
- rangebar/clickhouse/cache.py +734 -0
- rangebar/clickhouse/client.py +121 -0
- rangebar/clickhouse/config.py +141 -0
- rangebar/clickhouse/mixin.py +120 -0
- rangebar/clickhouse/preflight.py +504 -0
- rangebar/clickhouse/query_operations.py +345 -0
- rangebar/clickhouse/schema.sql +187 -0
- rangebar/clickhouse/tunnel.py +222 -0
- rangebar/constants.py +288 -0
- rangebar/conversion.py +177 -0
- rangebar/exceptions.py +207 -0
- rangebar/exness.py +364 -0
- rangebar/hooks.py +311 -0
- rangebar/logging.py +171 -0
- rangebar/notify/__init__.py +15 -0
- rangebar/notify/pushover.py +155 -0
- rangebar/notify/telegram.py +271 -0
- rangebar/orchestration/__init__.py +20 -0
- rangebar/orchestration/count_bounded.py +797 -0
- rangebar/orchestration/helpers.py +412 -0
- rangebar/orchestration/models.py +76 -0
- rangebar/orchestration/precompute.py +498 -0
- rangebar/orchestration/range_bars.py +736 -0
- rangebar/orchestration/tick_fetcher.py +226 -0
- rangebar/ouroboros.py +454 -0
- rangebar/processors/__init__.py +22 -0
- rangebar/processors/api.py +383 -0
- rangebar/processors/core.py +522 -0
- rangebar/resource_guard.py +567 -0
- rangebar/storage/__init__.py +22 -0
- rangebar/storage/checksum_registry.py +218 -0
- rangebar/storage/parquet.py +728 -0
- rangebar/streaming.py +300 -0
- rangebar/validation/__init__.py +69 -0
- rangebar/validation/cache_staleness.py +277 -0
- rangebar/validation/continuity.py +664 -0
- rangebar/validation/gap_classification.py +294 -0
- rangebar/validation/post_storage.py +317 -0
- rangebar/validation/tier1.py +175 -0
- rangebar/validation/tier2.py +261 -0
- rangebar-11.6.1.dist-info/METADATA +308 -0
- rangebar-11.6.1.dist-info/RECORD +54 -0
- rangebar-11.6.1.dist-info/WHEEL +4 -0
- rangebar-11.6.1.dist-info/entry_points.txt +2 -0
- rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
"""Resource guards for memory-safe range bar processing (Issue #49).
|
|
2
|
+
|
|
3
|
+
Provides cross-platform memory monitoring and process-level caps
|
|
4
|
+
using only stdlib modules (no psutil dependency).
|
|
5
|
+
|
|
6
|
+
MEM-009: Process-level RLIMIT_AS cap (MemoryError instead of OOM kill)
|
|
7
|
+
MEM-010: Pre-flight memory estimation before tick loading
|
|
8
|
+
MEM-011: Environment variable default (RANGEBAR_MAX_MEMORY_GB)
|
|
9
|
+
MEM-012: Multiprocessing-safe memory guard (Issue #61)
|
|
10
|
+
|
|
11
|
+
Environment Variables
|
|
12
|
+
---------------------
|
|
13
|
+
RANGEBAR_MAX_MEMORY_GB : float
|
|
14
|
+
Default memory limit in GB. When set, all calls to
|
|
15
|
+
ensure_memory_limit() will use this as the default cap.
|
|
16
|
+
Example: RANGEBAR_MAX_MEMORY_GB=45 (for 45 GB limit)
|
|
17
|
+
|
|
18
|
+
RANGEBAR_MAX_MEMORY_PCT : float
|
|
19
|
+
Default memory limit as fraction of total RAM (0.0-1.0).
|
|
20
|
+
Example: RANGEBAR_MAX_MEMORY_PCT=0.7 (for 70% of RAM)
|
|
21
|
+
|
|
22
|
+
RANGEBAR_NO_MEMORY_GUARD : str
|
|
23
|
+
Set to "1" to disable automatic memory guard entirely.
|
|
24
|
+
Useful for multiprocessing workers or when RLIMIT_AS conflicts
|
|
25
|
+
with Polars/Rust memory allocators.
|
|
26
|
+
|
|
27
|
+
If both GB and PCT are set, the smaller limit is used.
|
|
28
|
+
|
|
29
|
+
Multiprocessing Safety (Issue #61)
|
|
30
|
+
----------------------------------
|
|
31
|
+
On Linux, RLIMIT_AS (virtual address space) conflicts with mmap-based
|
|
32
|
+
allocators used by Polars and Rust. This can cause "Cannot allocate memory"
|
|
33
|
+
errors even when physical RAM is available.
|
|
34
|
+
|
|
35
|
+
The auto_memory_guard() function:
|
|
36
|
+
1. Skips if RANGEBAR_NO_MEMORY_GUARD=1
|
|
37
|
+
2. Skips if running in a multiprocessing child (detected via _RANGEBAR_GUARD_PID)
|
|
38
|
+
3. On Linux, uses RLIMIT_DATA instead of RLIMIT_AS to avoid mmap conflicts
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from __future__ import annotations
|
|
42
|
+
|
|
43
|
+
import os
|
|
44
|
+
import resource
|
|
45
|
+
import subprocess
|
|
46
|
+
import sys
|
|
47
|
+
from dataclasses import dataclass
|
|
48
|
+
from pathlib import Path
|
|
49
|
+
from typing import TYPE_CHECKING
|
|
50
|
+
|
|
51
|
+
if TYPE_CHECKING:
|
|
52
|
+
from rangebar.storage.parquet import TickStorage
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class MemoryInfo:
|
|
57
|
+
"""Current process and system memory snapshot."""
|
|
58
|
+
|
|
59
|
+
process_rss_mb: int
|
|
60
|
+
"""Resident set size of current process in MB."""
|
|
61
|
+
|
|
62
|
+
system_total_mb: int
|
|
63
|
+
"""Total physical RAM in MB."""
|
|
64
|
+
|
|
65
|
+
system_available_mb: int
|
|
66
|
+
"""Available RAM in MB (approximate on macOS)."""
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def usage_pct(self) -> float:
|
|
70
|
+
"""Process RSS as fraction of total system RAM."""
|
|
71
|
+
if self.system_total_mb == 0:
|
|
72
|
+
return 0.0
|
|
73
|
+
return self.process_rss_mb / self.system_total_mb
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_memory_info() -> MemoryInfo:
|
|
77
|
+
"""Get current process and system memory info.
|
|
78
|
+
|
|
79
|
+
Cross-platform: uses /proc/self/status on Linux,
|
|
80
|
+
resource.getrusage() on macOS. No external dependencies.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
MemoryInfo
|
|
85
|
+
Snapshot of process and system memory.
|
|
86
|
+
"""
|
|
87
|
+
process_rss_mb = _get_process_rss_mb()
|
|
88
|
+
system_total_mb = _get_system_total_mb()
|
|
89
|
+
system_available_mb = _get_system_available_mb()
|
|
90
|
+
|
|
91
|
+
return MemoryInfo(
|
|
92
|
+
process_rss_mb=process_rss_mb,
|
|
93
|
+
system_total_mb=system_total_mb,
|
|
94
|
+
system_available_mb=system_available_mb,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def set_memory_limit(
|
|
99
|
+
*,
|
|
100
|
+
max_gb: float | None = None,
|
|
101
|
+
max_pct: float | None = None,
|
|
102
|
+
) -> int:
|
|
103
|
+
"""Set process virtual memory limit (MEM-009).
|
|
104
|
+
|
|
105
|
+
Causes MemoryError instead of OOM kill when limit exceeded.
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
max_gb : float | None
|
|
110
|
+
Hard limit in gigabytes.
|
|
111
|
+
max_pct : float | None
|
|
112
|
+
Fraction of total system RAM (e.g., 0.8 for 80%).
|
|
113
|
+
If both max_gb and max_pct are given, uses the smaller limit.
|
|
114
|
+
|
|
115
|
+
Returns
|
|
116
|
+
-------
|
|
117
|
+
int
|
|
118
|
+
The limit set in bytes, or -1 if no limit was set.
|
|
119
|
+
|
|
120
|
+
Notes
|
|
121
|
+
-----
|
|
122
|
+
On macOS, RLIMIT_RSS is used (advisory, not enforced by kernel).
|
|
123
|
+
On Linux, RLIMIT_AS is used (hard cap on virtual address space).
|
|
124
|
+
"""
|
|
125
|
+
if max_gb is None and max_pct is None:
|
|
126
|
+
return -1
|
|
127
|
+
|
|
128
|
+
limits: list[int] = []
|
|
129
|
+
|
|
130
|
+
if max_gb is not None:
|
|
131
|
+
limits.append(int(max_gb * 1024 * 1024 * 1024))
|
|
132
|
+
|
|
133
|
+
if max_pct is not None:
|
|
134
|
+
total_bytes = _get_system_total_mb() * 1024 * 1024
|
|
135
|
+
limits.append(int(total_bytes * max_pct))
|
|
136
|
+
|
|
137
|
+
limit_bytes = min(limits)
|
|
138
|
+
|
|
139
|
+
# MEM-009: Set process-level cap
|
|
140
|
+
# MEM-012: Use RLIMIT_DATA on Linux to avoid mmap conflicts (Issue #61)
|
|
141
|
+
try:
|
|
142
|
+
if sys.platform == "darwin":
|
|
143
|
+
# macOS: RLIMIT_RSS is advisory (kernel doesn't enforce)
|
|
144
|
+
# but Python's allocator may still raise MemoryError
|
|
145
|
+
# Query current hard limit and cap to that if needed
|
|
146
|
+
_, hard_limit = resource.getrlimit(resource.RLIMIT_RSS)
|
|
147
|
+
if hard_limit != resource.RLIM_INFINITY and limit_bytes > hard_limit:
|
|
148
|
+
limit_bytes = hard_limit
|
|
149
|
+
resource.setrlimit(resource.RLIMIT_RSS, (limit_bytes, limit_bytes))
|
|
150
|
+
else:
|
|
151
|
+
# Linux: Use RLIMIT_DATA instead of RLIMIT_AS (Issue #61)
|
|
152
|
+
# RLIMIT_AS limits virtual address space including mmap regions,
|
|
153
|
+
# which conflicts with Polars/Rust allocators that use mmap().
|
|
154
|
+
# RLIMIT_DATA limits the data segment (heap) which is more
|
|
155
|
+
# appropriate for controlling actual memory allocation.
|
|
156
|
+
#
|
|
157
|
+
# Note: RLIMIT_DATA is less strict than RLIMIT_AS - it won't
|
|
158
|
+
# prevent mmap allocations, but it will limit malloc/brk usage.
|
|
159
|
+
# For full protection, users should rely on cgroups or systemd.
|
|
160
|
+
_, hard_limit = resource.getrlimit(resource.RLIMIT_DATA)
|
|
161
|
+
if hard_limit != resource.RLIM_INFINITY and limit_bytes > hard_limit:
|
|
162
|
+
limit_bytes = hard_limit
|
|
163
|
+
resource.setrlimit(resource.RLIMIT_DATA, (limit_bytes, limit_bytes))
|
|
164
|
+
except (ValueError, OSError):
|
|
165
|
+
# Can't set limit (e.g., unprivileged user, macOS restrictions)
|
|
166
|
+
return -1
|
|
167
|
+
|
|
168
|
+
return limit_bytes
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# ---------------------------------------------------------------------------
|
|
172
|
+
# MEM-011: Default memory limit from environment
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
# Mutable container to track if memory limit was applied (avoids global stmt)
|
|
176
|
+
_memory_limit_state: dict[str, bool] = {"applied": False}
|
|
177
|
+
|
|
178
|
+
# MEM-012: Environment variable key for multiprocessing-safe state tracking
|
|
179
|
+
# This survives fork() and allows child processes to detect parent's limit
|
|
180
|
+
_GUARD_PID_KEY = "_RANGEBAR_GUARD_PID"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _parse_env_float(name: str) -> float | None:
|
|
184
|
+
"""Parse environment variable as float, returning None on failure."""
|
|
185
|
+
value = os.environ.get(name)
|
|
186
|
+
if not value:
|
|
187
|
+
return None
|
|
188
|
+
try:
|
|
189
|
+
return float(value)
|
|
190
|
+
except ValueError:
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def get_default_memory_limit() -> tuple[float | None, float | None]:
|
|
195
|
+
"""Get default memory limit from environment variables.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
tuple[float | None, float | None]
|
|
200
|
+
(max_gb, max_pct) from RANGEBAR_MAX_MEMORY_GB and
|
|
201
|
+
RANGEBAR_MAX_MEMORY_PCT environment variables.
|
|
202
|
+
"""
|
|
203
|
+
max_gb = _parse_env_float("RANGEBAR_MAX_MEMORY_GB")
|
|
204
|
+
|
|
205
|
+
max_pct = _parse_env_float("RANGEBAR_MAX_MEMORY_PCT")
|
|
206
|
+
if max_pct is not None and not 0.0 < max_pct <= 1.0:
|
|
207
|
+
max_pct = None
|
|
208
|
+
|
|
209
|
+
return max_gb, max_pct
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def ensure_memory_limit(
|
|
213
|
+
*,
|
|
214
|
+
max_gb: float | None = None,
|
|
215
|
+
max_pct: float | None = None,
|
|
216
|
+
fallback_pct: float = 0.7,
|
|
217
|
+
) -> int:
|
|
218
|
+
"""Ensure a process memory limit is set (MEM-011).
|
|
219
|
+
|
|
220
|
+
Idempotent: only sets limit on first call per process.
|
|
221
|
+
Uses environment variables if no explicit limit provided.
|
|
222
|
+
|
|
223
|
+
Priority order:
|
|
224
|
+
1. Explicit max_gb/max_pct parameters
|
|
225
|
+
2. RANGEBAR_MAX_MEMORY_GB / RANGEBAR_MAX_MEMORY_PCT environment vars
|
|
226
|
+
3. fallback_pct of total system RAM (default 70%)
|
|
227
|
+
|
|
228
|
+
Parameters
|
|
229
|
+
----------
|
|
230
|
+
max_gb : float | None
|
|
231
|
+
Explicit limit in GB (overrides environment).
|
|
232
|
+
max_pct : float | None
|
|
233
|
+
Explicit limit as fraction of RAM (overrides environment).
|
|
234
|
+
fallback_pct : float
|
|
235
|
+
Fallback percentage of RAM if no explicit limit or env var.
|
|
236
|
+
Default 0.7 (70% of system RAM).
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
int
|
|
241
|
+
The limit set in bytes, or -1 if limit was already applied.
|
|
242
|
+
|
|
243
|
+
Examples
|
|
244
|
+
--------
|
|
245
|
+
>>> # Set default 70% limit
|
|
246
|
+
>>> ensure_memory_limit()
|
|
247
|
+
|
|
248
|
+
>>> # Override with explicit limit
|
|
249
|
+
>>> ensure_memory_limit(max_gb=45)
|
|
250
|
+
|
|
251
|
+
>>> # Respect environment variable
|
|
252
|
+
>>> import os
|
|
253
|
+
>>> os.environ["RANGEBAR_MAX_MEMORY_GB"] = "50"
|
|
254
|
+
>>> ensure_memory_limit() # Uses 50 GB from env
|
|
255
|
+
"""
|
|
256
|
+
if _memory_limit_state["applied"]:
|
|
257
|
+
return -1
|
|
258
|
+
|
|
259
|
+
# Priority 1: Explicit parameters
|
|
260
|
+
if max_gb is not None or max_pct is not None:
|
|
261
|
+
limit = set_memory_limit(max_gb=max_gb, max_pct=max_pct)
|
|
262
|
+
_memory_limit_state["applied"] = True
|
|
263
|
+
return limit
|
|
264
|
+
|
|
265
|
+
# Priority 2: Environment variables
|
|
266
|
+
env_gb, env_pct = get_default_memory_limit()
|
|
267
|
+
if env_gb is not None or env_pct is not None:
|
|
268
|
+
limit = set_memory_limit(max_gb=env_gb, max_pct=env_pct)
|
|
269
|
+
_memory_limit_state["applied"] = True
|
|
270
|
+
return limit
|
|
271
|
+
|
|
272
|
+
# Priority 3: Fallback percentage
|
|
273
|
+
limit = set_memory_limit(max_pct=fallback_pct)
|
|
274
|
+
_memory_limit_state["applied"] = True
|
|
275
|
+
return limit
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _is_multiprocessing_child() -> bool:
|
|
279
|
+
"""Check if we're in a multiprocessing child process (MEM-012).
|
|
280
|
+
|
|
281
|
+
Detects if memory guard was already applied by a parent process
|
|
282
|
+
by checking the _RANGEBAR_GUARD_PID environment variable.
|
|
283
|
+
|
|
284
|
+
Returns True if:
|
|
285
|
+
1. _RANGEBAR_GUARD_PID is set (parent applied the guard)
|
|
286
|
+
2. Current PID differs from stored PID (we're a child)
|
|
287
|
+
"""
|
|
288
|
+
guard_pid = os.environ.get(_GUARD_PID_KEY)
|
|
289
|
+
if not guard_pid:
|
|
290
|
+
return False
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
parent_pid = int(guard_pid)
|
|
294
|
+
# If our PID differs from the stored PID, we're a child
|
|
295
|
+
# The child inherits the RLIMIT from parent, no need to re-apply
|
|
296
|
+
return os.getpid() != parent_pid
|
|
297
|
+
except ValueError:
|
|
298
|
+
return False
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def auto_memory_guard() -> int:
|
|
302
|
+
"""Automatically enable memory guard at module import (MEM-011, MEM-012).
|
|
303
|
+
|
|
304
|
+
This function is called when `import rangebar` is executed.
|
|
305
|
+
It ensures a memory limit is always set unless explicitly disabled
|
|
306
|
+
via RANGEBAR_NO_MEMORY_GUARD=1.
|
|
307
|
+
|
|
308
|
+
Multiprocessing Safety (Issue #61)
|
|
309
|
+
----------------------------------
|
|
310
|
+
- Skips if RANGEBAR_NO_MEMORY_GUARD=1
|
|
311
|
+
- Skips if running in a multiprocessing child (limit inherited from parent)
|
|
312
|
+
- On Linux, uses RLIMIT_DATA instead of RLIMIT_AS to avoid mmap conflicts
|
|
313
|
+
|
|
314
|
+
Returns
|
|
315
|
+
-------
|
|
316
|
+
int
|
|
317
|
+
The limit set in bytes, or -1 if disabled or already applied.
|
|
318
|
+
|
|
319
|
+
Environment Variables
|
|
320
|
+
---------------------
|
|
321
|
+
RANGEBAR_NO_MEMORY_GUARD : str
|
|
322
|
+
Set to "1" to disable automatic memory guard.
|
|
323
|
+
RANGEBAR_MAX_MEMORY_GB : float
|
|
324
|
+
Override default limit (see ensure_memory_limit).
|
|
325
|
+
RANGEBAR_MAX_MEMORY_PCT : float
|
|
326
|
+
Override default limit as fraction of RAM.
|
|
327
|
+
"""
|
|
328
|
+
# Check explicit disable first
|
|
329
|
+
if os.environ.get("RANGEBAR_NO_MEMORY_GUARD") == "1":
|
|
330
|
+
return -1
|
|
331
|
+
|
|
332
|
+
# MEM-012: Skip in multiprocessing children (Issue #61)
|
|
333
|
+
# Child processes inherit RLIMIT from parent via fork()
|
|
334
|
+
if _is_multiprocessing_child():
|
|
335
|
+
_memory_limit_state["applied"] = True # Mark as handled
|
|
336
|
+
return -1
|
|
337
|
+
|
|
338
|
+
# Apply the limit
|
|
339
|
+
limit = ensure_memory_limit()
|
|
340
|
+
|
|
341
|
+
# MEM-012: Store our PID so child processes can detect inheritance
|
|
342
|
+
if limit > 0:
|
|
343
|
+
os.environ[_GUARD_PID_KEY] = str(os.getpid())
|
|
344
|
+
|
|
345
|
+
return limit
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
@dataclass(frozen=True)
|
|
349
|
+
class MemoryEstimate:
|
|
350
|
+
"""Pre-flight memory estimate for tick data loading (MEM-010)."""
|
|
351
|
+
|
|
352
|
+
parquet_bytes: int
|
|
353
|
+
"""Total on-disk compressed size of Parquet files."""
|
|
354
|
+
|
|
355
|
+
estimated_memory_mb: int
|
|
356
|
+
"""Estimated in-memory size after decompression."""
|
|
357
|
+
|
|
358
|
+
file_count: int
|
|
359
|
+
"""Number of Parquet files that would be loaded."""
|
|
360
|
+
|
|
361
|
+
system_available_mb: int
|
|
362
|
+
"""Available system RAM at estimation time."""
|
|
363
|
+
|
|
364
|
+
# Thresholds for memory recommendation (fraction of available RAM)
|
|
365
|
+
_SAFE_THRESHOLD: float = 0.5
|
|
366
|
+
_STREAMING_THRESHOLD: float = 0.8
|
|
367
|
+
|
|
368
|
+
@property
|
|
369
|
+
def can_fit(self) -> bool:
|
|
370
|
+
"""True if estimated size fits within 80% of available RAM."""
|
|
371
|
+
return self.estimated_memory_mb < (
|
|
372
|
+
self.system_available_mb * self._STREAMING_THRESHOLD
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
@property
|
|
376
|
+
def recommendation(self) -> str:
|
|
377
|
+
"""One of 'safe', 'streaming_recommended', 'will_oom'."""
|
|
378
|
+
if self.estimated_memory_mb == 0:
|
|
379
|
+
return "safe"
|
|
380
|
+
ratio = self.estimated_memory_mb / max(self.system_available_mb, 1)
|
|
381
|
+
if ratio < self._SAFE_THRESHOLD:
|
|
382
|
+
return "safe"
|
|
383
|
+
if ratio < self._STREAMING_THRESHOLD:
|
|
384
|
+
return "streaming_recommended"
|
|
385
|
+
return "will_oom"
|
|
386
|
+
|
|
387
|
+
def check_or_raise(self, max_mb: int | None = None) -> None:
|
|
388
|
+
"""Raise MemoryError if estimate exceeds budget.
|
|
389
|
+
|
|
390
|
+
Parameters
|
|
391
|
+
----------
|
|
392
|
+
max_mb : int | None
|
|
393
|
+
Explicit budget in MB. If None, uses 80% of available RAM.
|
|
394
|
+
"""
|
|
395
|
+
budget = max_mb if max_mb is not None else int(
|
|
396
|
+
self.system_available_mb * 0.8
|
|
397
|
+
)
|
|
398
|
+
if self.estimated_memory_mb > budget:
|
|
399
|
+
msg = (
|
|
400
|
+
f"Estimated {self.estimated_memory_mb} MB for "
|
|
401
|
+
f"{self.file_count} Parquet files, exceeds budget "
|
|
402
|
+
f"{budget} MB (available: {self.system_available_mb} MB). "
|
|
403
|
+
f"Use precompute_range_bars() or read_ticks_streaming() "
|
|
404
|
+
f"for memory-safe processing."
|
|
405
|
+
)
|
|
406
|
+
raise MemoryError(msg)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def estimate_tick_memory(
|
|
410
|
+
storage: TickStorage,
|
|
411
|
+
symbol: str,
|
|
412
|
+
start_ts: int,
|
|
413
|
+
end_ts: int,
|
|
414
|
+
*,
|
|
415
|
+
compression_ratio: float = 4.0,
|
|
416
|
+
) -> MemoryEstimate:
|
|
417
|
+
"""Estimate memory required to load tick data (MEM-010).
|
|
418
|
+
|
|
419
|
+
Uses Parquet file sizes on disk as a proxy, without reading data.
|
|
420
|
+
|
|
421
|
+
Parameters
|
|
422
|
+
----------
|
|
423
|
+
storage : TickStorage
|
|
424
|
+
Tick storage instance.
|
|
425
|
+
symbol : str
|
|
426
|
+
Cache symbol (e.g., "BINANCE_SPOT_BTCUSDT").
|
|
427
|
+
start_ts : int
|
|
428
|
+
Start timestamp in milliseconds.
|
|
429
|
+
end_ts : int
|
|
430
|
+
End timestamp in milliseconds.
|
|
431
|
+
compression_ratio : float
|
|
432
|
+
Expected decompression ratio (default 4.0, empirically measured
|
|
433
|
+
for Binance aggTrades Parquet files).
|
|
434
|
+
|
|
435
|
+
Returns
|
|
436
|
+
-------
|
|
437
|
+
MemoryEstimate
|
|
438
|
+
Estimate with recommendation.
|
|
439
|
+
"""
|
|
440
|
+
symbol_dir = storage._get_symbol_dir(symbol)
|
|
441
|
+
|
|
442
|
+
if not symbol_dir.exists():
|
|
443
|
+
return MemoryEstimate(
|
|
444
|
+
parquet_bytes=0,
|
|
445
|
+
estimated_memory_mb=0,
|
|
446
|
+
file_count=0,
|
|
447
|
+
system_available_mb=_get_system_available_mb(),
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
parquet_files = sorted(symbol_dir.glob("*.parquet"))
|
|
451
|
+
|
|
452
|
+
# Filter to relevant months
|
|
453
|
+
start_month = storage._timestamp_to_year_month(start_ts)
|
|
454
|
+
end_month = storage._timestamp_to_year_month(end_ts)
|
|
455
|
+
parquet_files = [
|
|
456
|
+
f for f in parquet_files if start_month <= f.stem <= end_month
|
|
457
|
+
]
|
|
458
|
+
|
|
459
|
+
if not parquet_files:
|
|
460
|
+
return MemoryEstimate(
|
|
461
|
+
parquet_bytes=0,
|
|
462
|
+
estimated_memory_mb=0,
|
|
463
|
+
file_count=0,
|
|
464
|
+
system_available_mb=_get_system_available_mb(),
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
total_bytes = sum(f.stat().st_size for f in parquet_files)
|
|
468
|
+
estimated_mb = int(total_bytes * compression_ratio / (1024 * 1024))
|
|
469
|
+
|
|
470
|
+
return MemoryEstimate(
|
|
471
|
+
parquet_bytes=total_bytes,
|
|
472
|
+
estimated_memory_mb=estimated_mb,
|
|
473
|
+
file_count=len(parquet_files),
|
|
474
|
+
system_available_mb=_get_system_available_mb(),
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
# ---------------------------------------------------------------------------
|
|
479
|
+
# Platform-specific helpers
|
|
480
|
+
# ---------------------------------------------------------------------------
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _get_process_rss_mb() -> int:
|
|
484
|
+
"""Get current process RSS in MB."""
|
|
485
|
+
if sys.platform == "darwin":
|
|
486
|
+
# macOS: getrusage returns bytes
|
|
487
|
+
ru = resource.getrusage(resource.RUSAGE_SELF)
|
|
488
|
+
return int(ru.ru_maxrss / (1024 * 1024))
|
|
489
|
+
# Linux: /proc/self/status has VmRSS in kB
|
|
490
|
+
try:
|
|
491
|
+
status = Path("/proc/self/status").read_text()
|
|
492
|
+
for line in status.splitlines():
|
|
493
|
+
if line.startswith("VmRSS:"):
|
|
494
|
+
kb = int(line.split()[1])
|
|
495
|
+
return kb // 1024
|
|
496
|
+
except (FileNotFoundError, ValueError, IndexError):
|
|
497
|
+
pass
|
|
498
|
+
# Fallback
|
|
499
|
+
ru = resource.getrusage(resource.RUSAGE_SELF)
|
|
500
|
+
return int(ru.ru_maxrss // 1024)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _get_system_total_mb() -> int:
|
|
504
|
+
"""Get total system RAM in MB."""
|
|
505
|
+
try:
|
|
506
|
+
if sys.platform == "darwin":
|
|
507
|
+
# macOS: sysctl hw.memsize
|
|
508
|
+
result = subprocess.run(
|
|
509
|
+
["sysctl", "-n", "hw.memsize"],
|
|
510
|
+
capture_output=True,
|
|
511
|
+
text=True,
|
|
512
|
+
check=True,
|
|
513
|
+
)
|
|
514
|
+
return int(result.stdout.strip()) // (1024 * 1024)
|
|
515
|
+
# Linux: /proc/meminfo
|
|
516
|
+
meminfo = Path("/proc/meminfo").read_text()
|
|
517
|
+
for line in meminfo.splitlines():
|
|
518
|
+
if line.startswith("MemTotal:"):
|
|
519
|
+
kb = int(line.split()[1])
|
|
520
|
+
return kb // 1024
|
|
521
|
+
except (FileNotFoundError, ValueError, subprocess.SubprocessError):
|
|
522
|
+
pass
|
|
523
|
+
# Fallback: os.sysconf
|
|
524
|
+
try:
|
|
525
|
+
pages = os.sysconf("SC_PHYS_PAGES")
|
|
526
|
+
page_size = os.sysconf("SC_PAGE_SIZE")
|
|
527
|
+
return (pages * page_size) // (1024 * 1024)
|
|
528
|
+
except (ValueError, OSError):
|
|
529
|
+
return 0
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def _get_system_available_mb() -> int:
|
|
533
|
+
"""Get available system RAM in MB (approximate)."""
|
|
534
|
+
try:
|
|
535
|
+
if sys.platform == "darwin":
|
|
536
|
+
# macOS: vm_stat gives free + inactive pages
|
|
537
|
+
result = subprocess.run(
|
|
538
|
+
["vm_stat"],
|
|
539
|
+
capture_output=True,
|
|
540
|
+
text=True,
|
|
541
|
+
check=True,
|
|
542
|
+
)
|
|
543
|
+
free = 0
|
|
544
|
+
inactive = 0
|
|
545
|
+
page_size = 16384 # Default Apple Silicon
|
|
546
|
+
for line in result.stdout.splitlines():
|
|
547
|
+
if "page size of" in line:
|
|
548
|
+
page_size = int(
|
|
549
|
+
line.split("page size of")[1].strip().rstrip(")")
|
|
550
|
+
)
|
|
551
|
+
elif "Pages free:" in line:
|
|
552
|
+
free = int(line.split(":")[1].strip().rstrip("."))
|
|
553
|
+
elif "Pages inactive:" in line:
|
|
554
|
+
inactive = int(
|
|
555
|
+
line.split(":")[1].strip().rstrip(".")
|
|
556
|
+
)
|
|
557
|
+
return (free + inactive) * page_size // (1024 * 1024)
|
|
558
|
+
# Linux: /proc/meminfo MemAvailable
|
|
559
|
+
meminfo = Path("/proc/meminfo").read_text()
|
|
560
|
+
for line in meminfo.splitlines():
|
|
561
|
+
if line.startswith("MemAvailable:"):
|
|
562
|
+
kb = int(line.split()[1])
|
|
563
|
+
return kb // 1024
|
|
564
|
+
except (FileNotFoundError, ValueError, subprocess.SubprocessError):
|
|
565
|
+
pass
|
|
566
|
+
# Fallback: assume 50% of total is available
|
|
567
|
+
return _get_system_total_mb() // 2
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Local Parquet storage for tick data (Tier 1 cache).
|
|
2
|
+
|
|
3
|
+
This module provides cross-platform tick data storage using Parquet files
|
|
4
|
+
with ZSTD compression (level 3) for optimal balance of size and speed.
|
|
5
|
+
|
|
6
|
+
Storage Location (via platformdirs):
|
|
7
|
+
- macOS: ~/Library/Caches/rangebar/ticks/
|
|
8
|
+
- Linux: ~/.cache/rangebar/ticks/ (respects XDG_CACHE_HOME)
|
|
9
|
+
- Windows: %USERPROFILE%\\AppData\\Local\\terrylica\\rangebar\\Cache\\ticks\\
|
|
10
|
+
|
|
11
|
+
Examples
|
|
12
|
+
--------
|
|
13
|
+
>>> from rangebar.storage import TickStorage
|
|
14
|
+
>>>
|
|
15
|
+
>>> storage = TickStorage()
|
|
16
|
+
>>> storage.write_ticks("BTCUSDT", trades_df)
|
|
17
|
+
>>> df = storage.read_ticks("BTCUSDT", start_ts, end_ts)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from .parquet import TickStorage, get_cache_dir
|
|
21
|
+
|
|
22
|
+
__all__ = ["TickStorage", "get_cache_dir"]
|