quantmllibrary 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantml/__init__.py +74 -0
- quantml/autograd.py +154 -0
- quantml/cli/__init__.py +10 -0
- quantml/cli/run_experiment.py +385 -0
- quantml/config/__init__.py +28 -0
- quantml/config/config.py +259 -0
- quantml/data/__init__.py +33 -0
- quantml/data/cache.py +149 -0
- quantml/data/feature_store.py +234 -0
- quantml/data/futures.py +254 -0
- quantml/data/loaders.py +236 -0
- quantml/data/memory_optimizer.py +234 -0
- quantml/data/validators.py +390 -0
- quantml/experiments/__init__.py +23 -0
- quantml/experiments/logger.py +208 -0
- quantml/experiments/results.py +158 -0
- quantml/experiments/tracker.py +223 -0
- quantml/features/__init__.py +25 -0
- quantml/features/base.py +104 -0
- quantml/features/gap_features.py +124 -0
- quantml/features/registry.py +138 -0
- quantml/features/volatility_features.py +140 -0
- quantml/features/volume_features.py +142 -0
- quantml/functional.py +37 -0
- quantml/models/__init__.py +27 -0
- quantml/models/attention.py +258 -0
- quantml/models/dropout.py +130 -0
- quantml/models/gru.py +319 -0
- quantml/models/linear.py +112 -0
- quantml/models/lstm.py +353 -0
- quantml/models/mlp.py +286 -0
- quantml/models/normalization.py +289 -0
- quantml/models/rnn.py +154 -0
- quantml/models/tcn.py +238 -0
- quantml/online.py +209 -0
- quantml/ops.py +1707 -0
- quantml/optim/__init__.py +42 -0
- quantml/optim/adafactor.py +206 -0
- quantml/optim/adagrad.py +157 -0
- quantml/optim/adam.py +267 -0
- quantml/optim/lookahead.py +97 -0
- quantml/optim/quant_optimizer.py +228 -0
- quantml/optim/radam.py +192 -0
- quantml/optim/rmsprop.py +203 -0
- quantml/optim/schedulers.py +286 -0
- quantml/optim/sgd.py +181 -0
- quantml/py.typed +0 -0
- quantml/streaming.py +175 -0
- quantml/tensor.py +462 -0
- quantml/time_series.py +447 -0
- quantml/training/__init__.py +135 -0
- quantml/training/alpha_eval.py +203 -0
- quantml/training/backtest.py +280 -0
- quantml/training/backtest_analysis.py +168 -0
- quantml/training/cv.py +106 -0
- quantml/training/data_loader.py +177 -0
- quantml/training/ensemble.py +84 -0
- quantml/training/feature_importance.py +135 -0
- quantml/training/features.py +364 -0
- quantml/training/futures_backtest.py +266 -0
- quantml/training/gradient_clipping.py +206 -0
- quantml/training/losses.py +248 -0
- quantml/training/lr_finder.py +127 -0
- quantml/training/metrics.py +376 -0
- quantml/training/regularization.py +89 -0
- quantml/training/trainer.py +239 -0
- quantml/training/walk_forward.py +190 -0
- quantml/utils/__init__.py +51 -0
- quantml/utils/gradient_check.py +274 -0
- quantml/utils/logging.py +181 -0
- quantml/utils/ops_cpu.py +231 -0
- quantml/utils/profiling.py +364 -0
- quantml/utils/reproducibility.py +220 -0
- quantml/utils/serialization.py +335 -0
- quantmllibrary-0.1.0.dist-info/METADATA +536 -0
- quantmllibrary-0.1.0.dist-info/RECORD +79 -0
- quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
- quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
- quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
quantml/data/futures.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Futures-specific data handling.
|
|
3
|
+
|
|
4
|
+
Handles contract rolls, holidays, gaps, and session-based data.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Dict, Optional, Tuple, Any
|
|
8
|
+
from datetime import datetime, timedelta
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FuturesDataHandler:
|
|
12
|
+
"""
|
|
13
|
+
Handler for futures-specific data operations.
|
|
14
|
+
|
|
15
|
+
Features:
|
|
16
|
+
- Contract roll detection and handling
|
|
17
|
+
- Holiday and partial session handling
|
|
18
|
+
- Overnight gap detection
|
|
19
|
+
- Session-based filtering (RTH vs ETH)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
instrument: str,
|
|
25
|
+
roll_method: str = "volume" # "volume", "open_interest", "date"
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Initialize futures data handler.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
instrument: Instrument symbol (ES, MES, NQ, MNQ)
|
|
32
|
+
roll_method: Method for detecting rolls ("volume", "open_interest", "date")
|
|
33
|
+
"""
|
|
34
|
+
self.instrument = instrument
|
|
35
|
+
self.roll_method = roll_method
|
|
36
|
+
self.contract_size = self._get_contract_size(instrument)
|
|
37
|
+
|
|
38
|
+
def _get_contract_size(self, instrument: str) -> float:
|
|
39
|
+
"""Get contract multiplier for instrument."""
|
|
40
|
+
sizes = {
|
|
41
|
+
'ES': 50.0,
|
|
42
|
+
'MES': 5.0,
|
|
43
|
+
'NQ': 20.0,
|
|
44
|
+
'MNQ': 2.0,
|
|
45
|
+
'YM': 5.0,
|
|
46
|
+
'MYM': 0.5
|
|
47
|
+
}
|
|
48
|
+
return sizes.get(instrument.upper(), 50.0)
|
|
49
|
+
|
|
50
|
+
def detect_contract_rolls(
|
|
51
|
+
self,
|
|
52
|
+
prices: List[float],
|
|
53
|
+
volumes: Optional[List[float]] = None,
|
|
54
|
+
dates: Optional[List[datetime]] = None
|
|
55
|
+
) -> List[int]:
|
|
56
|
+
"""
|
|
57
|
+
Detect contract roll dates.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
prices: Price data
|
|
61
|
+
volumes: Volume data (for volume-based detection)
|
|
62
|
+
dates: Date data (for date-based detection)
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of indices where rolls occur
|
|
66
|
+
"""
|
|
67
|
+
roll_indices = []
|
|
68
|
+
|
|
69
|
+
if self.roll_method == "volume":
|
|
70
|
+
if volumes is None:
|
|
71
|
+
raise ValueError("Volume data required for volume-based roll detection")
|
|
72
|
+
|
|
73
|
+
# Detect roll when volume drops significantly
|
|
74
|
+
for i in range(1, len(volumes)):
|
|
75
|
+
if volumes[i] < volumes[i-1] * 0.3: # Volume drops by 70%
|
|
76
|
+
roll_indices.append(i)
|
|
77
|
+
|
|
78
|
+
elif self.roll_method == "date":
|
|
79
|
+
if dates is None:
|
|
80
|
+
raise ValueError("Date data required for date-based roll detection")
|
|
81
|
+
|
|
82
|
+
# Typical roll dates (third Friday of month)
|
|
83
|
+
for i, date in enumerate(dates):
|
|
84
|
+
if date.weekday() == 4 and 15 <= date.day <= 21: # Third Friday
|
|
85
|
+
roll_indices.append(i)
|
|
86
|
+
|
|
87
|
+
return roll_indices
|
|
88
|
+
|
|
89
|
+
def detect_overnight_gaps(
|
|
90
|
+
self,
|
|
91
|
+
closes: List[float],
|
|
92
|
+
opens: List[float],
|
|
93
|
+
dates: Optional[List[datetime]] = None
|
|
94
|
+
) -> List[Dict[str, Any]]:
|
|
95
|
+
"""
|
|
96
|
+
Detect overnight gaps.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
closes: Closing prices
|
|
100
|
+
opens: Opening prices
|
|
101
|
+
dates: Date data (to identify holiday gaps)
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of gap dictionaries
|
|
105
|
+
"""
|
|
106
|
+
gaps = []
|
|
107
|
+
|
|
108
|
+
for i in range(1, len(opens)):
|
|
109
|
+
if closes[i-1] > 0:
|
|
110
|
+
gap = (opens[i] - closes[i-1]) / closes[i-1]
|
|
111
|
+
|
|
112
|
+
# Check if holiday gap (more than 1 day between dates)
|
|
113
|
+
is_holiday = False
|
|
114
|
+
if dates and i < len(dates) and i-1 < len(dates):
|
|
115
|
+
days_diff = (dates[i] - dates[i-1]).days
|
|
116
|
+
is_holiday = days_diff > 1
|
|
117
|
+
|
|
118
|
+
gaps.append({
|
|
119
|
+
'index': i,
|
|
120
|
+
'gap': gap,
|
|
121
|
+
'gap_size': abs(gap),
|
|
122
|
+
'is_holiday': is_holiday,
|
|
123
|
+
'prev_close': closes[i-1],
|
|
124
|
+
'current_open': opens[i]
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
return gaps
|
|
128
|
+
|
|
129
|
+
def filter_session(
|
|
130
|
+
self,
|
|
131
|
+
data: Dict[str, List[Any]],
|
|
132
|
+
session_type: str = "RTH",
|
|
133
|
+
timestamps: Optional[List[datetime]] = None
|
|
134
|
+
) -> Dict[str, List[Any]]:
|
|
135
|
+
"""
|
|
136
|
+
Filter data by trading session.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
data: Data dictionary
|
|
140
|
+
session_type: "RTH" (9:30-16:00 ET) or "ETH" (all hours)
|
|
141
|
+
timestamps: Timestamp data
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Filtered data dictionary
|
|
145
|
+
"""
|
|
146
|
+
if session_type == "ETH":
|
|
147
|
+
return data # No filtering
|
|
148
|
+
|
|
149
|
+
if timestamps is None:
|
|
150
|
+
# Assume all data is RTH if no timestamps
|
|
151
|
+
return data
|
|
152
|
+
|
|
153
|
+
# RTH: 9:30 AM - 4:00 PM ET
|
|
154
|
+
filtered_indices = []
|
|
155
|
+
for i, ts in enumerate(timestamps):
|
|
156
|
+
hour = ts.hour
|
|
157
|
+
minute = ts.minute
|
|
158
|
+
|
|
159
|
+
# Convert to ET (simplified, assumes UTC-5)
|
|
160
|
+
# In production, use proper timezone handling
|
|
161
|
+
if 9 <= hour < 16 or (hour == 9 and minute >= 30):
|
|
162
|
+
filtered_indices.append(i)
|
|
163
|
+
|
|
164
|
+
filtered_data = {}
|
|
165
|
+
for key, values in data.items():
|
|
166
|
+
filtered_data[key] = [values[i] for i in filtered_indices]
|
|
167
|
+
|
|
168
|
+
return filtered_data
|
|
169
|
+
|
|
170
|
+
def handle_missing_data(
|
|
171
|
+
self,
|
|
172
|
+
data: Dict[str, List[float]],
|
|
173
|
+
method: str = "forward_fill"
|
|
174
|
+
) -> Dict[str, List[float]]:
|
|
175
|
+
"""
|
|
176
|
+
Handle missing data (NaN, None, zeros).
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
data: Data dictionary
|
|
180
|
+
method: "forward_fill", "backward_fill", "interpolate", "drop"
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Data with missing values handled
|
|
184
|
+
"""
|
|
185
|
+
cleaned_data = {}
|
|
186
|
+
|
|
187
|
+
for key, values in data.items():
|
|
188
|
+
cleaned = []
|
|
189
|
+
last_valid = None
|
|
190
|
+
|
|
191
|
+
for val in values:
|
|
192
|
+
if val is None or (isinstance(val, float) and (val != val or val == 0.0)):
|
|
193
|
+
# Missing or invalid
|
|
194
|
+
if method == "forward_fill" and last_valid is not None:
|
|
195
|
+
cleaned.append(last_valid)
|
|
196
|
+
elif method == "drop":
|
|
197
|
+
continue # Skip
|
|
198
|
+
else:
|
|
199
|
+
cleaned.append(0.0) # Default
|
|
200
|
+
else:
|
|
201
|
+
cleaned.append(val)
|
|
202
|
+
last_valid = val
|
|
203
|
+
|
|
204
|
+
if method == "backward_fill":
|
|
205
|
+
# Fill backwards
|
|
206
|
+
for i in range(len(cleaned)-2, -1, -1):
|
|
207
|
+
if cleaned[i] == 0.0 and cleaned[i+1] != 0.0:
|
|
208
|
+
cleaned[i] = cleaned[i+1]
|
|
209
|
+
|
|
210
|
+
cleaned_data[key] = cleaned
|
|
211
|
+
|
|
212
|
+
return cleaned_data
|
|
213
|
+
|
|
214
|
+
def align_contract_data(
|
|
215
|
+
self,
|
|
216
|
+
front_month: Dict[str, List[float]],
|
|
217
|
+
back_month: Dict[str, List[float]],
|
|
218
|
+
roll_indices: List[int]
|
|
219
|
+
) -> Dict[str, List[float]]:
|
|
220
|
+
"""
|
|
221
|
+
Align data across contract rolls.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
front_month: Front month contract data
|
|
225
|
+
back_month: Back month contract data
|
|
226
|
+
roll_indices: Indices where rolls occur
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Aligned data dictionary
|
|
230
|
+
"""
|
|
231
|
+
aligned = {}
|
|
232
|
+
|
|
233
|
+
for key in front_month.keys():
|
|
234
|
+
if key not in back_month:
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
aligned_values = []
|
|
238
|
+
front_data = front_month[key]
|
|
239
|
+
back_data = back_month[key]
|
|
240
|
+
|
|
241
|
+
for i in range(len(front_data)):
|
|
242
|
+
if i in roll_indices:
|
|
243
|
+
# Use back month data after roll
|
|
244
|
+
if i < len(back_data):
|
|
245
|
+
aligned_values.append(back_data[i])
|
|
246
|
+
else:
|
|
247
|
+
aligned_values.append(front_data[i])
|
|
248
|
+
else:
|
|
249
|
+
aligned_values.append(front_data[i])
|
|
250
|
+
|
|
251
|
+
aligned[key] = aligned_values
|
|
252
|
+
|
|
253
|
+
return aligned
|
|
254
|
+
|
quantml/data/loaders.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Standardized data loaders for market data.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional, Dict, Any, Callable
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
# Try to import pandas
|
|
9
|
+
try:
|
|
10
|
+
import pandas as pd
|
|
11
|
+
HAS_PANDAS = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
HAS_PANDAS = False
|
|
14
|
+
pd = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def load_csv_data(
|
|
18
|
+
filepath: str,
|
|
19
|
+
price_column: str = "close",
|
|
20
|
+
volume_column: str = "volume",
|
|
21
|
+
timestamp_column: Optional[str] = "timestamp",
|
|
22
|
+
date_format: Optional[str] = None
|
|
23
|
+
) -> Dict[str, List]:
|
|
24
|
+
"""
|
|
25
|
+
Load market data from CSV file.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
filepath: Path to CSV file
|
|
29
|
+
price_column: Name of price column
|
|
30
|
+
volume_column: Name of volume column
|
|
31
|
+
timestamp_column: Name of timestamp column (optional)
|
|
32
|
+
date_format: Date format string (optional)
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Dictionary with 'prices', 'volumes', and optionally 'timestamps'
|
|
36
|
+
"""
|
|
37
|
+
if not HAS_PANDAS:
|
|
38
|
+
raise ImportError("pandas required for CSV loading. Install with: pip install pandas")
|
|
39
|
+
|
|
40
|
+
if not os.path.exists(filepath):
|
|
41
|
+
raise FileNotFoundError(f"Data file not found: {filepath}")
|
|
42
|
+
|
|
43
|
+
df = pd.read_csv(filepath)
|
|
44
|
+
|
|
45
|
+
# Validate columns exist
|
|
46
|
+
if price_column not in df.columns:
|
|
47
|
+
raise ValueError(f"Price column '{price_column}' not found in CSV")
|
|
48
|
+
|
|
49
|
+
if volume_column not in df.columns:
|
|
50
|
+
raise ValueError(f"Volume column '{volume_column}' not found in CSV")
|
|
51
|
+
|
|
52
|
+
# Extract data
|
|
53
|
+
prices = df[price_column].tolist()
|
|
54
|
+
volumes = df[volume_column].tolist()
|
|
55
|
+
|
|
56
|
+
result = {
|
|
57
|
+
'prices': prices,
|
|
58
|
+
'volumes': volumes
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
# Add timestamps if available
|
|
62
|
+
if timestamp_column and timestamp_column in df.columns:
|
|
63
|
+
if date_format:
|
|
64
|
+
timestamps = pd.to_datetime(df[timestamp_column], format=date_format).tolist()
|
|
65
|
+
else:
|
|
66
|
+
timestamps = pd.to_datetime(df[timestamp_column]).tolist()
|
|
67
|
+
result['timestamps'] = timestamps
|
|
68
|
+
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def load_dataframe(
|
|
73
|
+
df: Any,
|
|
74
|
+
price_column: str = "close",
|
|
75
|
+
volume_column: str = "volume",
|
|
76
|
+
timestamp_column: Optional[str] = "timestamp"
|
|
77
|
+
) -> Dict[str, List]:
|
|
78
|
+
"""
|
|
79
|
+
Load market data from pandas DataFrame.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
df: Pandas DataFrame
|
|
83
|
+
price_column: Name of price column
|
|
84
|
+
volume_column: Name of volume column
|
|
85
|
+
timestamp_column: Name of timestamp column (optional)
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Dictionary with 'prices', 'volumes', and optionally 'timestamps'
|
|
89
|
+
"""
|
|
90
|
+
if not HAS_PANDAS:
|
|
91
|
+
raise ImportError("pandas required. Install with: pip install pandas")
|
|
92
|
+
|
|
93
|
+
if not isinstance(df, pd.DataFrame):
|
|
94
|
+
raise TypeError("df must be a pandas DataFrame")
|
|
95
|
+
|
|
96
|
+
# Validate columns
|
|
97
|
+
if price_column not in df.columns:
|
|
98
|
+
raise ValueError(f"Price column '{price_column}' not found")
|
|
99
|
+
|
|
100
|
+
if volume_column not in df.columns:
|
|
101
|
+
raise ValueError(f"Volume column '{volume_column}' not found")
|
|
102
|
+
|
|
103
|
+
# Extract data
|
|
104
|
+
prices = df[price_column].tolist()
|
|
105
|
+
volumes = df[volume_column].tolist()
|
|
106
|
+
|
|
107
|
+
result = {
|
|
108
|
+
'prices': prices,
|
|
109
|
+
'volumes': volumes
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
# Add timestamps if available
|
|
113
|
+
if timestamp_column and timestamp_column in df.columns:
|
|
114
|
+
timestamps = pd.to_datetime(df[timestamp_column]).tolist()
|
|
115
|
+
result['timestamps'] = timestamps
|
|
116
|
+
|
|
117
|
+
return result
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class DataLoader:
|
|
121
|
+
"""Generic data loader with validation."""
|
|
122
|
+
|
|
123
|
+
def __init__(
|
|
124
|
+
self,
|
|
125
|
+
load_fn: Callable,
|
|
126
|
+
validate: bool = True,
|
|
127
|
+
handle_missing: str = "forward_fill"
|
|
128
|
+
):
|
|
129
|
+
"""
|
|
130
|
+
Initialize data loader.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
load_fn: Function to load data
|
|
134
|
+
validate: Whether to validate loaded data
|
|
135
|
+
handle_missing: How to handle missing values (forward_fill, drop, interpolate)
|
|
136
|
+
"""
|
|
137
|
+
self.load_fn = load_fn
|
|
138
|
+
self.validate = validate
|
|
139
|
+
self.handle_missing = handle_missing
|
|
140
|
+
|
|
141
|
+
def load(
|
|
142
|
+
self,
|
|
143
|
+
*args,
|
|
144
|
+
**kwargs
|
|
145
|
+
) -> Dict[str, List]:
|
|
146
|
+
"""
|
|
147
|
+
Load data using the configured function.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
*args: Arguments for load function
|
|
151
|
+
**kwargs: Keyword arguments for load function
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Dictionary with loaded data
|
|
155
|
+
"""
|
|
156
|
+
data = self.load_fn(*args, **kwargs)
|
|
157
|
+
|
|
158
|
+
if self.validate:
|
|
159
|
+
from quantml.data.validators import validate_price_data
|
|
160
|
+
is_valid, errors = validate_price_data(
|
|
161
|
+
data.get('prices', []),
|
|
162
|
+
data.get('volumes'),
|
|
163
|
+
data.get('timestamps')
|
|
164
|
+
)
|
|
165
|
+
if not is_valid:
|
|
166
|
+
raise ValueError(f"Data validation failed: {errors}")
|
|
167
|
+
|
|
168
|
+
# Handle missing values
|
|
169
|
+
if self.handle_missing != "drop":
|
|
170
|
+
data = self._handle_missing(data)
|
|
171
|
+
|
|
172
|
+
return data
|
|
173
|
+
|
|
174
|
+
def _handle_missing(self, data: Dict[str, List]) -> Dict[str, List]:
|
|
175
|
+
"""Handle missing values in data."""
|
|
176
|
+
if self.handle_missing == "forward_fill":
|
|
177
|
+
return self._forward_fill(data)
|
|
178
|
+
elif self.handle_missing == "interpolate":
|
|
179
|
+
return self._interpolate(data)
|
|
180
|
+
else:
|
|
181
|
+
return data
|
|
182
|
+
|
|
183
|
+
def _forward_fill(self, data: Dict[str, List]) -> Dict[str, List]:
|
|
184
|
+
"""Forward fill missing values."""
|
|
185
|
+
import math
|
|
186
|
+
|
|
187
|
+
prices = data.get('prices', [])
|
|
188
|
+
volumes = data.get('volumes', [])
|
|
189
|
+
|
|
190
|
+
# Forward fill prices
|
|
191
|
+
last_valid_price = None
|
|
192
|
+
for i, price in enumerate(prices):
|
|
193
|
+
if price is None or (isinstance(price, float) and math.isnan(price)):
|
|
194
|
+
if last_valid_price is not None:
|
|
195
|
+
prices[i] = last_valid_price
|
|
196
|
+
else:
|
|
197
|
+
last_valid_price = price
|
|
198
|
+
|
|
199
|
+
# Forward fill volumes
|
|
200
|
+
if volumes:
|
|
201
|
+
last_valid_volume = None
|
|
202
|
+
for i, volume in enumerate(volumes):
|
|
203
|
+
if volume is None or (isinstance(volume, float) and math.isnan(volume)):
|
|
204
|
+
if last_valid_volume is not None:
|
|
205
|
+
volumes[i] = last_valid_volume
|
|
206
|
+
else:
|
|
207
|
+
last_valid_volume = volume
|
|
208
|
+
|
|
209
|
+
data['prices'] = prices
|
|
210
|
+
if volumes:
|
|
211
|
+
data['volumes'] = volumes
|
|
212
|
+
|
|
213
|
+
return data
|
|
214
|
+
|
|
215
|
+
def _interpolate(self, data: Dict[str, List]) -> Dict[str, List]:
|
|
216
|
+
"""Interpolate missing values."""
|
|
217
|
+
if not HAS_PANDAS:
|
|
218
|
+
# Fallback to forward fill
|
|
219
|
+
return self._forward_fill(data)
|
|
220
|
+
|
|
221
|
+
import math
|
|
222
|
+
|
|
223
|
+
prices = data.get('prices', [])
|
|
224
|
+
|
|
225
|
+
# Convert to Series and interpolate
|
|
226
|
+
prices_series = pd.Series(prices)
|
|
227
|
+
prices_series = prices_series.interpolate(method='linear')
|
|
228
|
+
data['prices'] = prices_series.tolist()
|
|
229
|
+
|
|
230
|
+
if 'volumes' in data:
|
|
231
|
+
volumes_series = pd.Series(data['volumes'])
|
|
232
|
+
volumes_series = volumes_series.interpolate(method='linear')
|
|
233
|
+
data['volumes'] = volumes_series.tolist()
|
|
234
|
+
|
|
235
|
+
return data
|
|
236
|
+
|