quantmllibrary 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. quantml/__init__.py +74 -0
  2. quantml/autograd.py +154 -0
  3. quantml/cli/__init__.py +10 -0
  4. quantml/cli/run_experiment.py +385 -0
  5. quantml/config/__init__.py +28 -0
  6. quantml/config/config.py +259 -0
  7. quantml/data/__init__.py +33 -0
  8. quantml/data/cache.py +149 -0
  9. quantml/data/feature_store.py +234 -0
  10. quantml/data/futures.py +254 -0
  11. quantml/data/loaders.py +236 -0
  12. quantml/data/memory_optimizer.py +234 -0
  13. quantml/data/validators.py +390 -0
  14. quantml/experiments/__init__.py +23 -0
  15. quantml/experiments/logger.py +208 -0
  16. quantml/experiments/results.py +158 -0
  17. quantml/experiments/tracker.py +223 -0
  18. quantml/features/__init__.py +25 -0
  19. quantml/features/base.py +104 -0
  20. quantml/features/gap_features.py +124 -0
  21. quantml/features/registry.py +138 -0
  22. quantml/features/volatility_features.py +140 -0
  23. quantml/features/volume_features.py +142 -0
  24. quantml/functional.py +37 -0
  25. quantml/models/__init__.py +27 -0
  26. quantml/models/attention.py +258 -0
  27. quantml/models/dropout.py +130 -0
  28. quantml/models/gru.py +319 -0
  29. quantml/models/linear.py +112 -0
  30. quantml/models/lstm.py +353 -0
  31. quantml/models/mlp.py +286 -0
  32. quantml/models/normalization.py +289 -0
  33. quantml/models/rnn.py +154 -0
  34. quantml/models/tcn.py +238 -0
  35. quantml/online.py +209 -0
  36. quantml/ops.py +1707 -0
  37. quantml/optim/__init__.py +42 -0
  38. quantml/optim/adafactor.py +206 -0
  39. quantml/optim/adagrad.py +157 -0
  40. quantml/optim/adam.py +267 -0
  41. quantml/optim/lookahead.py +97 -0
  42. quantml/optim/quant_optimizer.py +228 -0
  43. quantml/optim/radam.py +192 -0
  44. quantml/optim/rmsprop.py +203 -0
  45. quantml/optim/schedulers.py +286 -0
  46. quantml/optim/sgd.py +181 -0
  47. quantml/py.typed +0 -0
  48. quantml/streaming.py +175 -0
  49. quantml/tensor.py +462 -0
  50. quantml/time_series.py +447 -0
  51. quantml/training/__init__.py +135 -0
  52. quantml/training/alpha_eval.py +203 -0
  53. quantml/training/backtest.py +280 -0
  54. quantml/training/backtest_analysis.py +168 -0
  55. quantml/training/cv.py +106 -0
  56. quantml/training/data_loader.py +177 -0
  57. quantml/training/ensemble.py +84 -0
  58. quantml/training/feature_importance.py +135 -0
  59. quantml/training/features.py +364 -0
  60. quantml/training/futures_backtest.py +266 -0
  61. quantml/training/gradient_clipping.py +206 -0
  62. quantml/training/losses.py +248 -0
  63. quantml/training/lr_finder.py +127 -0
  64. quantml/training/metrics.py +376 -0
  65. quantml/training/regularization.py +89 -0
  66. quantml/training/trainer.py +239 -0
  67. quantml/training/walk_forward.py +190 -0
  68. quantml/utils/__init__.py +51 -0
  69. quantml/utils/gradient_check.py +274 -0
  70. quantml/utils/logging.py +181 -0
  71. quantml/utils/ops_cpu.py +231 -0
  72. quantml/utils/profiling.py +364 -0
  73. quantml/utils/reproducibility.py +220 -0
  74. quantml/utils/serialization.py +335 -0
  75. quantmllibrary-0.1.0.dist-info/METADATA +536 -0
  76. quantmllibrary-0.1.0.dist-info/RECORD +79 -0
  77. quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
  78. quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
  79. quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,254 @@
1
+ """
2
+ Futures-specific data handling.
3
+
4
+ Handles contract rolls, holidays, gaps, and session-based data.
5
+ """
6
+
7
+ from typing import List, Dict, Optional, Tuple, Any
8
+ from datetime import datetime, timedelta
9
+
10
+
11
+ class FuturesDataHandler:
12
+ """
13
+ Handler for futures-specific data operations.
14
+
15
+ Features:
16
+ - Contract roll detection and handling
17
+ - Holiday and partial session handling
18
+ - Overnight gap detection
19
+ - Session-based filtering (RTH vs ETH)
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ instrument: str,
25
+ roll_method: str = "volume" # "volume", "open_interest", "date"
26
+ ):
27
+ """
28
+ Initialize futures data handler.
29
+
30
+ Args:
31
+ instrument: Instrument symbol (ES, MES, NQ, MNQ)
32
+ roll_method: Method for detecting rolls ("volume", "open_interest", "date")
33
+ """
34
+ self.instrument = instrument
35
+ self.roll_method = roll_method
36
+ self.contract_size = self._get_contract_size(instrument)
37
+
38
+ def _get_contract_size(self, instrument: str) -> float:
39
+ """Get contract multiplier for instrument."""
40
+ sizes = {
41
+ 'ES': 50.0,
42
+ 'MES': 5.0,
43
+ 'NQ': 20.0,
44
+ 'MNQ': 2.0,
45
+ 'YM': 5.0,
46
+ 'MYM': 0.5
47
+ }
48
+ return sizes.get(instrument.upper(), 50.0)
49
+
50
+ def detect_contract_rolls(
51
+ self,
52
+ prices: List[float],
53
+ volumes: Optional[List[float]] = None,
54
+ dates: Optional[List[datetime]] = None
55
+ ) -> List[int]:
56
+ """
57
+ Detect contract roll dates.
58
+
59
+ Args:
60
+ prices: Price data
61
+ volumes: Volume data (for volume-based detection)
62
+ dates: Date data (for date-based detection)
63
+
64
+ Returns:
65
+ List of indices where rolls occur
66
+ """
67
+ roll_indices = []
68
+
69
+ if self.roll_method == "volume":
70
+ if volumes is None:
71
+ raise ValueError("Volume data required for volume-based roll detection")
72
+
73
+ # Detect roll when volume drops significantly
74
+ for i in range(1, len(volumes)):
75
+ if volumes[i] < volumes[i-1] * 0.3: # Volume drops by 70%
76
+ roll_indices.append(i)
77
+
78
+ elif self.roll_method == "date":
79
+ if dates is None:
80
+ raise ValueError("Date data required for date-based roll detection")
81
+
82
+ # Typical roll dates (third Friday of month)
83
+ for i, date in enumerate(dates):
84
+ if date.weekday() == 4 and 15 <= date.day <= 21: # Third Friday
85
+ roll_indices.append(i)
86
+
87
+ return roll_indices
88
+
89
+ def detect_overnight_gaps(
90
+ self,
91
+ closes: List[float],
92
+ opens: List[float],
93
+ dates: Optional[List[datetime]] = None
94
+ ) -> List[Dict[str, Any]]:
95
+ """
96
+ Detect overnight gaps.
97
+
98
+ Args:
99
+ closes: Closing prices
100
+ opens: Opening prices
101
+ dates: Date data (to identify holiday gaps)
102
+
103
+ Returns:
104
+ List of gap dictionaries
105
+ """
106
+ gaps = []
107
+
108
+ for i in range(1, len(opens)):
109
+ if closes[i-1] > 0:
110
+ gap = (opens[i] - closes[i-1]) / closes[i-1]
111
+
112
+ # Check if holiday gap (more than 1 day between dates)
113
+ is_holiday = False
114
+ if dates and i < len(dates) and i-1 < len(dates):
115
+ days_diff = (dates[i] - dates[i-1]).days
116
+ is_holiday = days_diff > 1
117
+
118
+ gaps.append({
119
+ 'index': i,
120
+ 'gap': gap,
121
+ 'gap_size': abs(gap),
122
+ 'is_holiday': is_holiday,
123
+ 'prev_close': closes[i-1],
124
+ 'current_open': opens[i]
125
+ })
126
+
127
+ return gaps
128
+
129
+ def filter_session(
130
+ self,
131
+ data: Dict[str, List[Any]],
132
+ session_type: str = "RTH",
133
+ timestamps: Optional[List[datetime]] = None
134
+ ) -> Dict[str, List[Any]]:
135
+ """
136
+ Filter data by trading session.
137
+
138
+ Args:
139
+ data: Data dictionary
140
+ session_type: "RTH" (9:30-16:00 ET) or "ETH" (all hours)
141
+ timestamps: Timestamp data
142
+
143
+ Returns:
144
+ Filtered data dictionary
145
+ """
146
+ if session_type == "ETH":
147
+ return data # No filtering
148
+
149
+ if timestamps is None:
150
+ # Assume all data is RTH if no timestamps
151
+ return data
152
+
153
+ # RTH: 9:30 AM - 4:00 PM ET
154
+ filtered_indices = []
155
+ for i, ts in enumerate(timestamps):
156
+ hour = ts.hour
157
+ minute = ts.minute
158
+
159
+ # Convert to ET (simplified, assumes UTC-5)
160
+ # In production, use proper timezone handling
161
+ if 9 <= hour < 16 or (hour == 9 and minute >= 30):
162
+ filtered_indices.append(i)
163
+
164
+ filtered_data = {}
165
+ for key, values in data.items():
166
+ filtered_data[key] = [values[i] for i in filtered_indices]
167
+
168
+ return filtered_data
169
+
170
+ def handle_missing_data(
171
+ self,
172
+ data: Dict[str, List[float]],
173
+ method: str = "forward_fill"
174
+ ) -> Dict[str, List[float]]:
175
+ """
176
+ Handle missing data (NaN, None, zeros).
177
+
178
+ Args:
179
+ data: Data dictionary
180
+ method: "forward_fill", "backward_fill", "interpolate", "drop"
181
+
182
+ Returns:
183
+ Data with missing values handled
184
+ """
185
+ cleaned_data = {}
186
+
187
+ for key, values in data.items():
188
+ cleaned = []
189
+ last_valid = None
190
+
191
+ for val in values:
192
+ if val is None or (isinstance(val, float) and (val != val or val == 0.0)):
193
+ # Missing or invalid
194
+ if method == "forward_fill" and last_valid is not None:
195
+ cleaned.append(last_valid)
196
+ elif method == "drop":
197
+ continue # Skip
198
+ else:
199
+ cleaned.append(0.0) # Default
200
+ else:
201
+ cleaned.append(val)
202
+ last_valid = val
203
+
204
+ if method == "backward_fill":
205
+ # Fill backwards
206
+ for i in range(len(cleaned)-2, -1, -1):
207
+ if cleaned[i] == 0.0 and cleaned[i+1] != 0.0:
208
+ cleaned[i] = cleaned[i+1]
209
+
210
+ cleaned_data[key] = cleaned
211
+
212
+ return cleaned_data
213
+
214
+ def align_contract_data(
215
+ self,
216
+ front_month: Dict[str, List[float]],
217
+ back_month: Dict[str, List[float]],
218
+ roll_indices: List[int]
219
+ ) -> Dict[str, List[float]]:
220
+ """
221
+ Align data across contract rolls.
222
+
223
+ Args:
224
+ front_month: Front month contract data
225
+ back_month: Back month contract data
226
+ roll_indices: Indices where rolls occur
227
+
228
+ Returns:
229
+ Aligned data dictionary
230
+ """
231
+ aligned = {}
232
+
233
+ for key in front_month.keys():
234
+ if key not in back_month:
235
+ continue
236
+
237
+ aligned_values = []
238
+ front_data = front_month[key]
239
+ back_data = back_month[key]
240
+
241
+ for i in range(len(front_data)):
242
+ if i in roll_indices:
243
+ # Use back month data after roll
244
+ if i < len(back_data):
245
+ aligned_values.append(back_data[i])
246
+ else:
247
+ aligned_values.append(front_data[i])
248
+ else:
249
+ aligned_values.append(front_data[i])
250
+
251
+ aligned[key] = aligned_values
252
+
253
+ return aligned
254
+
@@ -0,0 +1,236 @@
1
+ """
2
+ Standardized data loaders for market data.
3
+ """
4
+
5
+ from typing import List, Optional, Dict, Any, Callable
6
+ import os
7
+
8
+ # Try to import pandas
9
+ try:
10
+ import pandas as pd
11
+ HAS_PANDAS = True
12
+ except ImportError:
13
+ HAS_PANDAS = False
14
+ pd = None
15
+
16
+
17
+ def load_csv_data(
18
+ filepath: str,
19
+ price_column: str = "close",
20
+ volume_column: str = "volume",
21
+ timestamp_column: Optional[str] = "timestamp",
22
+ date_format: Optional[str] = None
23
+ ) -> Dict[str, List]:
24
+ """
25
+ Load market data from CSV file.
26
+
27
+ Args:
28
+ filepath: Path to CSV file
29
+ price_column: Name of price column
30
+ volume_column: Name of volume column
31
+ timestamp_column: Name of timestamp column (optional)
32
+ date_format: Date format string (optional)
33
+
34
+ Returns:
35
+ Dictionary with 'prices', 'volumes', and optionally 'timestamps'
36
+ """
37
+ if not HAS_PANDAS:
38
+ raise ImportError("pandas required for CSV loading. Install with: pip install pandas")
39
+
40
+ if not os.path.exists(filepath):
41
+ raise FileNotFoundError(f"Data file not found: {filepath}")
42
+
43
+ df = pd.read_csv(filepath)
44
+
45
+ # Validate columns exist
46
+ if price_column not in df.columns:
47
+ raise ValueError(f"Price column '{price_column}' not found in CSV")
48
+
49
+ if volume_column not in df.columns:
50
+ raise ValueError(f"Volume column '{volume_column}' not found in CSV")
51
+
52
+ # Extract data
53
+ prices = df[price_column].tolist()
54
+ volumes = df[volume_column].tolist()
55
+
56
+ result = {
57
+ 'prices': prices,
58
+ 'volumes': volumes
59
+ }
60
+
61
+ # Add timestamps if available
62
+ if timestamp_column and timestamp_column in df.columns:
63
+ if date_format:
64
+ timestamps = pd.to_datetime(df[timestamp_column], format=date_format).tolist()
65
+ else:
66
+ timestamps = pd.to_datetime(df[timestamp_column]).tolist()
67
+ result['timestamps'] = timestamps
68
+
69
+ return result
70
+
71
+
72
+ def load_dataframe(
73
+ df: Any,
74
+ price_column: str = "close",
75
+ volume_column: str = "volume",
76
+ timestamp_column: Optional[str] = "timestamp"
77
+ ) -> Dict[str, List]:
78
+ """
79
+ Load market data from pandas DataFrame.
80
+
81
+ Args:
82
+ df: Pandas DataFrame
83
+ price_column: Name of price column
84
+ volume_column: Name of volume column
85
+ timestamp_column: Name of timestamp column (optional)
86
+
87
+ Returns:
88
+ Dictionary with 'prices', 'volumes', and optionally 'timestamps'
89
+ """
90
+ if not HAS_PANDAS:
91
+ raise ImportError("pandas required. Install with: pip install pandas")
92
+
93
+ if not isinstance(df, pd.DataFrame):
94
+ raise TypeError("df must be a pandas DataFrame")
95
+
96
+ # Validate columns
97
+ if price_column not in df.columns:
98
+ raise ValueError(f"Price column '{price_column}' not found")
99
+
100
+ if volume_column not in df.columns:
101
+ raise ValueError(f"Volume column '{volume_column}' not found")
102
+
103
+ # Extract data
104
+ prices = df[price_column].tolist()
105
+ volumes = df[volume_column].tolist()
106
+
107
+ result = {
108
+ 'prices': prices,
109
+ 'volumes': volumes
110
+ }
111
+
112
+ # Add timestamps if available
113
+ if timestamp_column and timestamp_column in df.columns:
114
+ timestamps = pd.to_datetime(df[timestamp_column]).tolist()
115
+ result['timestamps'] = timestamps
116
+
117
+ return result
118
+
119
+
120
+ class DataLoader:
121
+ """Generic data loader with validation."""
122
+
123
+ def __init__(
124
+ self,
125
+ load_fn: Callable,
126
+ validate: bool = True,
127
+ handle_missing: str = "forward_fill"
128
+ ):
129
+ """
130
+ Initialize data loader.
131
+
132
+ Args:
133
+ load_fn: Function to load data
134
+ validate: Whether to validate loaded data
135
+ handle_missing: How to handle missing values (forward_fill, drop, interpolate)
136
+ """
137
+ self.load_fn = load_fn
138
+ self.validate = validate
139
+ self.handle_missing = handle_missing
140
+
141
+ def load(
142
+ self,
143
+ *args,
144
+ **kwargs
145
+ ) -> Dict[str, List]:
146
+ """
147
+ Load data using the configured function.
148
+
149
+ Args:
150
+ *args: Arguments for load function
151
+ **kwargs: Keyword arguments for load function
152
+
153
+ Returns:
154
+ Dictionary with loaded data
155
+ """
156
+ data = self.load_fn(*args, **kwargs)
157
+
158
+ if self.validate:
159
+ from quantml.data.validators import validate_price_data
160
+ is_valid, errors = validate_price_data(
161
+ data.get('prices', []),
162
+ data.get('volumes'),
163
+ data.get('timestamps')
164
+ )
165
+ if not is_valid:
166
+ raise ValueError(f"Data validation failed: {errors}")
167
+
168
+ # Handle missing values
169
+ if self.handle_missing != "drop":
170
+ data = self._handle_missing(data)
171
+
172
+ return data
173
+
174
+ def _handle_missing(self, data: Dict[str, List]) -> Dict[str, List]:
175
+ """Handle missing values in data."""
176
+ if self.handle_missing == "forward_fill":
177
+ return self._forward_fill(data)
178
+ elif self.handle_missing == "interpolate":
179
+ return self._interpolate(data)
180
+ else:
181
+ return data
182
+
183
+ def _forward_fill(self, data: Dict[str, List]) -> Dict[str, List]:
184
+ """Forward fill missing values."""
185
+ import math
186
+
187
+ prices = data.get('prices', [])
188
+ volumes = data.get('volumes', [])
189
+
190
+ # Forward fill prices
191
+ last_valid_price = None
192
+ for i, price in enumerate(prices):
193
+ if price is None or (isinstance(price, float) and math.isnan(price)):
194
+ if last_valid_price is not None:
195
+ prices[i] = last_valid_price
196
+ else:
197
+ last_valid_price = price
198
+
199
+ # Forward fill volumes
200
+ if volumes:
201
+ last_valid_volume = None
202
+ for i, volume in enumerate(volumes):
203
+ if volume is None or (isinstance(volume, float) and math.isnan(volume)):
204
+ if last_valid_volume is not None:
205
+ volumes[i] = last_valid_volume
206
+ else:
207
+ last_valid_volume = volume
208
+
209
+ data['prices'] = prices
210
+ if volumes:
211
+ data['volumes'] = volumes
212
+
213
+ return data
214
+
215
+ def _interpolate(self, data: Dict[str, List]) -> Dict[str, List]:
216
+ """Interpolate missing values."""
217
+ if not HAS_PANDAS:
218
+ # Fallback to forward fill
219
+ return self._forward_fill(data)
220
+
221
+ import math
222
+
223
+ prices = data.get('prices', [])
224
+
225
+ # Convert to Series and interpolate
226
+ prices_series = pd.Series(prices)
227
+ prices_series = prices_series.interpolate(method='linear')
228
+ data['prices'] = prices_series.tolist()
229
+
230
+ if 'volumes' in data:
231
+ volumes_series = pd.Series(data['volumes'])
232
+ volumes_series = volumes_series.interpolate(method='linear')
233
+ data['volumes'] = volumes_series.tolist()
234
+
235
+ return data
236
+