quantmllibrary 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. quantml/__init__.py +74 -0
  2. quantml/autograd.py +154 -0
  3. quantml/cli/__init__.py +10 -0
  4. quantml/cli/run_experiment.py +385 -0
  5. quantml/config/__init__.py +28 -0
  6. quantml/config/config.py +259 -0
  7. quantml/data/__init__.py +33 -0
  8. quantml/data/cache.py +149 -0
  9. quantml/data/feature_store.py +234 -0
  10. quantml/data/futures.py +254 -0
  11. quantml/data/loaders.py +236 -0
  12. quantml/data/memory_optimizer.py +234 -0
  13. quantml/data/validators.py +390 -0
  14. quantml/experiments/__init__.py +23 -0
  15. quantml/experiments/logger.py +208 -0
  16. quantml/experiments/results.py +158 -0
  17. quantml/experiments/tracker.py +223 -0
  18. quantml/features/__init__.py +25 -0
  19. quantml/features/base.py +104 -0
  20. quantml/features/gap_features.py +124 -0
  21. quantml/features/registry.py +138 -0
  22. quantml/features/volatility_features.py +140 -0
  23. quantml/features/volume_features.py +142 -0
  24. quantml/functional.py +37 -0
  25. quantml/models/__init__.py +27 -0
  26. quantml/models/attention.py +258 -0
  27. quantml/models/dropout.py +130 -0
  28. quantml/models/gru.py +319 -0
  29. quantml/models/linear.py +112 -0
  30. quantml/models/lstm.py +353 -0
  31. quantml/models/mlp.py +286 -0
  32. quantml/models/normalization.py +289 -0
  33. quantml/models/rnn.py +154 -0
  34. quantml/models/tcn.py +238 -0
  35. quantml/online.py +209 -0
  36. quantml/ops.py +1707 -0
  37. quantml/optim/__init__.py +42 -0
  38. quantml/optim/adafactor.py +206 -0
  39. quantml/optim/adagrad.py +157 -0
  40. quantml/optim/adam.py +267 -0
  41. quantml/optim/lookahead.py +97 -0
  42. quantml/optim/quant_optimizer.py +228 -0
  43. quantml/optim/radam.py +192 -0
  44. quantml/optim/rmsprop.py +203 -0
  45. quantml/optim/schedulers.py +286 -0
  46. quantml/optim/sgd.py +181 -0
  47. quantml/py.typed +0 -0
  48. quantml/streaming.py +175 -0
  49. quantml/tensor.py +462 -0
  50. quantml/time_series.py +447 -0
  51. quantml/training/__init__.py +135 -0
  52. quantml/training/alpha_eval.py +203 -0
  53. quantml/training/backtest.py +280 -0
  54. quantml/training/backtest_analysis.py +168 -0
  55. quantml/training/cv.py +106 -0
  56. quantml/training/data_loader.py +177 -0
  57. quantml/training/ensemble.py +84 -0
  58. quantml/training/feature_importance.py +135 -0
  59. quantml/training/features.py +364 -0
  60. quantml/training/futures_backtest.py +266 -0
  61. quantml/training/gradient_clipping.py +206 -0
  62. quantml/training/losses.py +248 -0
  63. quantml/training/lr_finder.py +127 -0
  64. quantml/training/metrics.py +376 -0
  65. quantml/training/regularization.py +89 -0
  66. quantml/training/trainer.py +239 -0
  67. quantml/training/walk_forward.py +190 -0
  68. quantml/utils/__init__.py +51 -0
  69. quantml/utils/gradient_check.py +274 -0
  70. quantml/utils/logging.py +181 -0
  71. quantml/utils/ops_cpu.py +231 -0
  72. quantml/utils/profiling.py +364 -0
  73. quantml/utils/reproducibility.py +220 -0
  74. quantml/utils/serialization.py +335 -0
  75. quantmllibrary-0.1.0.dist-info/METADATA +536 -0
  76. quantmllibrary-0.1.0.dist-info/RECORD +79 -0
  77. quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
  78. quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
  79. quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,259 @@
1
+ """
2
+ Configuration management for QuantML experiments.
3
+
4
+ Supports YAML/JSON config files and command-line argument integration.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ from typing import Dict, Any, Optional, List
10
+ from dataclasses import dataclass, field, asdict
11
+ import argparse
12
+
13
+ # Try to import YAML
14
+ try:
15
+ import yaml
16
+ HAS_YAML = True
17
+ except ImportError:
18
+ HAS_YAML = False
19
+
20
+
21
+ @dataclass
22
+ class DataConfig:
23
+ """Data loading and preprocessing configuration."""
24
+ instrument: str = "ES" # MES, ES, MNQ, NQ, etc.
25
+ start_date: str = "2015-01-01"
26
+ end_date: str = "2024-12-31"
27
+ data_source: str = "csv" # csv, database, api
28
+ data_path: Optional[str] = None
29
+ frequency: str = "1min" # 1min, 5min, daily
30
+ validate_data: bool = True
31
+ handle_missing: str = "forward_fill" # forward_fill, drop, interpolate
32
+ cache_features: bool = True
33
+ feature_cache_path: str = "./cache/features"
34
+
35
+
36
+ @dataclass
37
+ class FeatureConfig:
38
+ """Feature engineering configuration."""
39
+ enabled_features: List[str] = field(default_factory=lambda: [
40
+ "lagged_price",
41
+ "rolling_mean",
42
+ "rolling_std",
43
+ "returns",
44
+ "volatility"
45
+ ])
46
+ lag_periods: List[int] = field(default_factory=lambda: [1, 5, 10, 20])
47
+ rolling_windows: List[int] = field(default_factory=lambda: [20, 50])
48
+ normalize: bool = True
49
+ normalization_method: str = "zscore" # zscore, minmax, robust
50
+ alpha_factors: Dict[str, Any] = field(default_factory=lambda: {
51
+ "momentum": {"enabled": True, "lookback": 20},
52
+ "mean_reversion": {"enabled": True, "window": 20},
53
+ "volatility": {"enabled": True, "window": 20}
54
+ })
55
+
56
+
57
+ @dataclass
58
+ class ModelConfig:
59
+ """Model architecture and hyperparameters."""
60
+ model_type: str = "Linear" # Linear, SimpleRNN, TCN
61
+ in_features: int = 10
62
+ out_features: int = 1
63
+ hidden_size: Optional[int] = None
64
+ bias: bool = True
65
+ dropout: float = 0.0
66
+ activation: Optional[str] = None
67
+
68
+
69
+ @dataclass
70
+ class TrainingConfig:
71
+ """Training configuration."""
72
+ optimizer: str = "Adam" # SGD, Adam, RMSProp, etc.
73
+ learning_rate: float = 0.001
74
+ batch_size: int = 32
75
+ epochs: int = 100
76
+ loss_function: str = "mse_loss" # mse_loss, sharpe_loss, etc.
77
+ early_stopping: Dict[str, Any] = field(default_factory=lambda: {
78
+ "enabled": True,
79
+ "patience": 10,
80
+ "min_delta": 0.0001
81
+ })
82
+ gradient_clipping: Dict[str, Any] = field(default_factory=lambda: {
83
+ "enabled": False,
84
+ "max_norm": 1.0
85
+ })
86
+ scheduler: Optional[Dict[str, Any]] = None
87
+ walk_forward: Dict[str, Any] = field(default_factory=lambda: {
88
+ "enabled": True,
89
+ "train_size": 500,
90
+ "test_size": 100,
91
+ "window_type": "expanding" # expanding, rolling
92
+ })
93
+
94
+
95
+ @dataclass
96
+ class ExperimentConfig:
97
+ """Complete experiment configuration."""
98
+ name: str = "default_experiment"
99
+ description: str = ""
100
+ random_seed: int = 42
101
+ data: DataConfig = field(default_factory=DataConfig)
102
+ features: FeatureConfig = field(default_factory=FeatureConfig)
103
+ model: ModelConfig = field(default_factory=ModelConfig)
104
+ training: TrainingConfig = field(default_factory=TrainingConfig)
105
+ output_dir: str = "./experiments"
106
+ log_level: str = "INFO"
107
+
108
+ def to_dict(self) -> Dict[str, Any]:
109
+ """Convert config to dictionary."""
110
+ return asdict(self)
111
+
112
+ @classmethod
113
+ def from_dict(cls, data: Dict[str, Any]) -> 'ExperimentConfig':
114
+ """Create config from dictionary."""
115
+ # Handle nested configs
116
+ if 'data' in data and isinstance(data['data'], dict):
117
+ data['data'] = DataConfig(**data['data'])
118
+ if 'features' in data and isinstance(data['features'], dict):
119
+ data['features'] = FeatureConfig(**data['features'])
120
+ if 'model' in data and isinstance(data['model'], dict):
121
+ data['model'] = ModelConfig(**data['model'])
122
+ if 'training' in data and isinstance(data['training'], dict):
123
+ data['training'] = TrainingConfig(**data['training'])
124
+ return cls(**data)
125
+
126
+ def validate(self) -> List[str]:
127
+ """Validate configuration and return list of errors."""
128
+ errors = []
129
+
130
+ # Validate dates
131
+ try:
132
+ from datetime import datetime
133
+ datetime.strptime(self.data.start_date, "%Y-%m-%d")
134
+ datetime.strptime(self.data.end_date, "%Y-%m-%d")
135
+ except ValueError:
136
+ errors.append("Invalid date format. Use YYYY-MM-DD")
137
+
138
+ # Validate learning rate
139
+ if self.training.learning_rate <= 0:
140
+ errors.append("Learning rate must be positive")
141
+
142
+ # Validate batch size
143
+ if self.training.batch_size <= 0:
144
+ errors.append("Batch size must be positive")
145
+
146
+ # Validate model features
147
+ if self.model.in_features <= 0:
148
+ errors.append("Model input features must be positive")
149
+
150
+ return errors
151
+
152
+
153
+ class Config:
154
+ """Main configuration class."""
155
+
156
+ @staticmethod
157
+ def load_yaml(filepath: str) -> ExperimentConfig:
158
+ """Load configuration from YAML file."""
159
+ if not HAS_YAML:
160
+ raise ImportError("PyYAML not installed. Install with: pip install pyyaml")
161
+
162
+ with open(filepath, 'r') as f:
163
+ data = yaml.safe_load(f)
164
+
165
+ return ExperimentConfig.from_dict(data)
166
+
167
+ @staticmethod
168
+ def load_json(filepath: str) -> ExperimentConfig:
169
+ """Load configuration from JSON file."""
170
+ with open(filepath, 'r') as f:
171
+ data = json.load(f)
172
+
173
+ return ExperimentConfig.from_dict(data)
174
+
175
+ @staticmethod
176
+ def save_yaml(config: ExperimentConfig, filepath: str):
177
+ """Save configuration to YAML file."""
178
+ if not HAS_YAML:
179
+ raise ImportError("PyYAML not installed. Install with: pyyaml")
180
+
181
+ os.makedirs(os.path.dirname(filepath) or '.', exist_ok=True)
182
+
183
+ with open(filepath, 'w') as f:
184
+ yaml.dump(config.to_dict(), f, default_flow_style=False, sort_keys=False)
185
+
186
+ @staticmethod
187
+ def save_json(config: ExperimentConfig, filepath: str):
188
+ """Save configuration to JSON file."""
189
+ os.makedirs(os.path.dirname(filepath) or '.', exist_ok=True)
190
+
191
+ with open(filepath, 'w') as f:
192
+ json.dump(config.to_dict(), f, indent=2)
193
+
194
+
195
+ def load_config(filepath: str) -> ExperimentConfig:
196
+ """Load configuration from file (auto-detect YAML/JSON)."""
197
+ ext = os.path.splitext(filepath)[1].lower()
198
+
199
+ if ext in ['.yaml', '.yml']:
200
+ return Config.load_yaml(filepath)
201
+ elif ext == '.json':
202
+ return Config.load_json(filepath)
203
+ else:
204
+ raise ValueError(f"Unsupported config file format: {ext}. Use .yaml, .yml, or .json")
205
+
206
+
207
+ def save_config(config: ExperimentConfig, filepath: str):
208
+ """Save configuration to file (auto-detect YAML/JSON)."""
209
+ ext = os.path.splitext(filepath)[1].lower()
210
+
211
+ if ext in ['.yaml', '.yml']:
212
+ Config.save_yaml(config, filepath)
213
+ elif ext == '.json':
214
+ Config.save_json(config, filepath)
215
+ else:
216
+ raise ValueError(f"Unsupported config file format: {ext}. Use .yaml, .yml, or .json")
217
+
218
+
219
+ def create_argparser() -> argparse.ArgumentParser:
220
+ """Create argument parser with common config options."""
221
+ parser = argparse.ArgumentParser(description='QuantML Experiment Runner')
222
+
223
+ parser.add_argument('--config', type=str, help='Path to config file (YAML/JSON)')
224
+ parser.add_argument('--instrument', type=str, help='Trading instrument (ES, MES, NQ, MNQ)')
225
+ parser.add_argument('--start-date', type=str, help='Start date (YYYY-MM-DD)')
226
+ parser.add_argument('--end-date', type=str, help='End date (YYYY-MM-DD)')
227
+ parser.add_argument('--model-type', type=str, help='Model type (Linear, SimpleRNN, TCN)')
228
+ parser.add_argument('--learning-rate', type=float, help='Learning rate')
229
+ parser.add_argument('--epochs', type=int, help='Number of epochs')
230
+ parser.add_argument('--batch-size', type=int, help='Batch size')
231
+ parser.add_argument('--random-seed', type=int, help='Random seed')
232
+ parser.add_argument('--output-dir', type=str, help='Output directory')
233
+
234
+ return parser
235
+
236
+
237
+ def merge_config_with_args(config: ExperimentConfig, args: argparse.Namespace) -> ExperimentConfig:
238
+ """Merge command-line arguments into config."""
239
+ if args.instrument:
240
+ config.data.instrument = args.instrument
241
+ if args.start_date:
242
+ config.data.start_date = args.start_date
243
+ if args.end_date:
244
+ config.data.end_date = args.end_date
245
+ if args.model_type:
246
+ config.model.model_type = args.model_type
247
+ if args.learning_rate:
248
+ config.training.learning_rate = args.learning_rate
249
+ if args.epochs:
250
+ config.training.epochs = args.epochs
251
+ if args.batch_size:
252
+ config.training.batch_size = args.batch_size
253
+ if args.random_seed:
254
+ config.random_seed = args.random_seed
255
+ if args.output_dir:
256
+ config.output_dir = args.output_dir
257
+
258
+ return config
259
+
@@ -0,0 +1,33 @@
1
+ """
2
+ QuantML Data Management
3
+
4
+ This module provides data loading, validation, and caching utilities.
5
+ """
6
+
7
+ from quantml.data.validators import (
8
+ validate_price_data,
9
+ validate_timestamps,
10
+ check_duplicates,
11
+ check_missing_values,
12
+ generate_data_quality_report,
13
+ DataQualityReport
14
+ )
15
+
16
+ from quantml.data.loaders import (
17
+ load_csv_data,
18
+ load_dataframe,
19
+ DataLoader
20
+ )
21
+
22
+ __all__ = [
23
+ 'validate_price_data',
24
+ 'validate_timestamps',
25
+ 'check_duplicates',
26
+ 'check_missing_values',
27
+ 'generate_data_quality_report',
28
+ 'DataQualityReport',
29
+ 'load_csv_data',
30
+ 'load_dataframe',
31
+ 'DataLoader'
32
+ ]
33
+
quantml/data/cache.py ADDED
@@ -0,0 +1,149 @@
1
+ """
2
+ Cache management utilities.
3
+ """
4
+
5
+ from typing import Optional, Callable, Any
6
+ from functools import wraps
7
+ import hashlib
8
+ import json
9
+ import os
10
+
11
+ from quantml.data.feature_store import FeatureStore
12
+
13
+
14
+ def cached_features(
15
+ cache_dir: str = "./cache/features",
16
+ use_cache: bool = True
17
+ ):
18
+ """
19
+ Decorator to cache feature computation results.
20
+
21
+ Args:
22
+ cache_dir: Cache directory
23
+ use_cache: Whether to use cache
24
+
25
+ Example:
26
+ @cached_features(cache_dir="./cache")
27
+ def compute_features(data):
28
+ # Expensive computation
29
+ return features
30
+ """
31
+ def decorator(func: Callable) -> Callable:
32
+ store = FeatureStore(cache_dir=cache_dir)
33
+
34
+ @wraps(func)
35
+ def wrapper(*args, **kwargs):
36
+ if not use_cache:
37
+ return func(*args, **kwargs)
38
+
39
+ # Generate cache key from function arguments
40
+ cache_key_data = {
41
+ 'func': func.__name__,
42
+ 'args': str(args),
43
+ 'kwargs': json.dumps(kwargs, sort_keys=True)
44
+ }
45
+ cache_key_str = json.dumps(cache_key_data, sort_keys=True)
46
+ cache_key = hashlib.md5(cache_key_str.encode()).hexdigest()
47
+
48
+ # Check cache
49
+ if store.cache_exists(cache_key):
50
+ features, _ = store.load_features(cache_key)
51
+ return features
52
+
53
+ # Compute features
54
+ features = func(*args, **kwargs)
55
+
56
+ # Save to cache
57
+ # Extract metadata from kwargs if available
58
+ instrument = kwargs.get('instrument', 'unknown')
59
+ start_date = kwargs.get('start_date', 'unknown')
60
+ end_date = kwargs.get('end_date', 'unknown')
61
+ feature_config = kwargs.get('feature_config', {})
62
+
63
+ store.save_features(
64
+ features,
65
+ instrument,
66
+ start_date,
67
+ end_date,
68
+ feature_config
69
+ )
70
+
71
+ return features
72
+
73
+ return wrapper
74
+ return decorator
75
+
76
+
77
+ class CacheManager:
78
+ """Cache manager for feature computation."""
79
+
80
+ def __init__(self, cache_dir: str = "./cache/features"):
81
+ """
82
+ Initialize cache manager.
83
+
84
+ Args:
85
+ cache_dir: Cache directory
86
+ """
87
+ self.store = FeatureStore(cache_dir=cache_dir)
88
+ self.cache_dir = cache_dir
89
+
90
+ def get_or_compute(
91
+ self,
92
+ cache_key: str,
93
+ compute_fn: Callable,
94
+ *args,
95
+ **kwargs
96
+ ) -> Any:
97
+ """
98
+ Get from cache or compute.
99
+
100
+ Args:
101
+ cache_key: Cache key
102
+ compute_fn: Function to compute if not cached
103
+ *args: Arguments for compute function
104
+ **kwargs: Keyword arguments for compute function
105
+
106
+ Returns:
107
+ Cached or computed result
108
+ """
109
+ if self.store.cache_exists(cache_key):
110
+ features, _ = self.store.load_features(cache_key)
111
+ return features
112
+
113
+ # Compute
114
+ result = compute_fn(*args, **kwargs)
115
+
116
+ # Save to cache if result is features
117
+ if isinstance(result, list) and len(result) > 0:
118
+ if isinstance(result[0], list): # List of feature vectors
119
+ instrument = kwargs.get('instrument', 'unknown')
120
+ start_date = kwargs.get('start_date', 'unknown')
121
+ end_date = kwargs.get('end_date', 'unknown')
122
+ feature_config = kwargs.get('feature_config', {})
123
+
124
+ self.store.save_features(
125
+ result,
126
+ instrument,
127
+ start_date,
128
+ end_date,
129
+ feature_config
130
+ )
131
+
132
+ return result
133
+
134
+ def clear_cache(self, pattern: Optional[str] = None):
135
+ """
136
+ Clear cache.
137
+
138
+ Args:
139
+ pattern: Optional pattern to match (e.g., instrument name)
140
+ """
141
+ if pattern:
142
+ self.store.invalidate_cache(instrument=pattern)
143
+ else:
144
+ # Clear all
145
+ for filename in os.listdir(self.cache_dir):
146
+ filepath = os.path.join(self.cache_dir, filename)
147
+ if os.path.isfile(filepath):
148
+ os.remove(filepath)
149
+
@@ -0,0 +1,234 @@
1
+ """
2
+ Feature store for caching computed features.
3
+
4
+ Uses Parquet format for efficient storage and loading of large feature datasets.
5
+ """
6
+
7
+ import os
8
+ import hashlib
9
+ import json
10
+ from typing import List, Dict, Any, Optional
11
+ from datetime import datetime
12
+
13
+ # Try to import pandas and pyarrow
14
+ try:
15
+ import pandas as pd
16
+ HAS_PANDAS = True
17
+ except ImportError:
18
+ HAS_PANDAS = False
19
+ pd = None
20
+
21
+ try:
22
+ import pyarrow.parquet as pq
23
+ HAS_PARQUET = True
24
+ except ImportError:
25
+ HAS_PARQUET = False
26
+ pq = None
27
+
28
+
29
+ class FeatureStore:
30
+ """Feature caching system using Parquet format."""
31
+
32
+ def __init__(
33
+ self,
34
+ cache_dir: str = "./cache/features",
35
+ use_parquet: bool = True
36
+ ):
37
+ """
38
+ Initialize feature store.
39
+
40
+ Args:
41
+ cache_dir: Directory for cached features
42
+ use_parquet: Whether to use Parquet format (requires pyarrow)
43
+ """
44
+ self.cache_dir = cache_dir
45
+ self.use_parquet = use_parquet and HAS_PARQUET
46
+
47
+ if not self.use_parquet and not HAS_PANDAS:
48
+ raise ImportError(
49
+ "Either pandas or pyarrow required. "
50
+ "Install with: pip install pandas pyarrow"
51
+ )
52
+
53
+ os.makedirs(cache_dir, exist_ok=True)
54
+
55
+ def _generate_cache_key(
56
+ self,
57
+ instrument: str,
58
+ start_date: str,
59
+ end_date: str,
60
+ feature_config: Dict[str, Any]
61
+ ) -> str:
62
+ """Generate cache key from configuration."""
63
+ key_data = {
64
+ 'instrument': instrument,
65
+ 'start_date': start_date,
66
+ 'end_date': end_date,
67
+ 'features': feature_config
68
+ }
69
+ key_str = json.dumps(key_data, sort_keys=True)
70
+ return hashlib.md5(key_str.encode()).hexdigest()
71
+
72
+ def _get_cache_path(self, cache_key: str) -> str:
73
+ """Get cache file path."""
74
+ if self.use_parquet:
75
+ return os.path.join(self.cache_dir, f"{cache_key}.parquet")
76
+ else:
77
+ return os.path.join(self.cache_dir, f"{cache_key}.csv")
78
+
79
+ def _get_metadata_path(self, cache_key: str) -> str:
80
+ """Get metadata file path."""
81
+ return os.path.join(self.cache_dir, f"{cache_key}_metadata.json")
82
+
83
+ def save_features(
84
+ self,
85
+ features: List[List[float]],
86
+ instrument: str,
87
+ start_date: str,
88
+ end_date: str,
89
+ feature_config: Dict[str, Any],
90
+ metadata: Optional[Dict[str, Any]] = None
91
+ ) -> str:
92
+ """
93
+ Save features to cache.
94
+
95
+ Args:
96
+ features: List of feature vectors
97
+ instrument: Trading instrument
98
+ start_date: Start date
99
+ end_date: End date
100
+ feature_config: Feature configuration
101
+ metadata: Optional metadata
102
+
103
+ Returns:
104
+ Cache key
105
+ """
106
+ cache_key = self._generate_cache_key(
107
+ instrument, start_date, end_date, feature_config
108
+ )
109
+
110
+ cache_path = self._get_cache_path(cache_key)
111
+ metadata_path = self._get_metadata_path(cache_key)
112
+
113
+ # Convert to DataFrame
114
+ if not HAS_PANDAS:
115
+ raise ImportError("pandas required for feature storage")
116
+
117
+ df = pd.DataFrame(features)
118
+
119
+ # Save features
120
+ if self.use_parquet:
121
+ df.to_parquet(cache_path, compression='snappy', index=False)
122
+ else:
123
+ df.to_csv(cache_path, index=False)
124
+
125
+ # Save metadata
126
+ meta = {
127
+ 'cache_key': cache_key,
128
+ 'instrument': instrument,
129
+ 'start_date': start_date,
130
+ 'end_date': end_date,
131
+ 'feature_config': feature_config,
132
+ 'num_features': len(features[0]) if features else 0,
133
+ 'num_samples': len(features),
134
+ 'created_at': datetime.now().isoformat(),
135
+ 'metadata': metadata or {}
136
+ }
137
+
138
+ with open(metadata_path, 'w') as f:
139
+ json.dump(meta, f, indent=2)
140
+
141
+ return cache_key
142
+
143
+ def load_features(self, cache_key: str) -> tuple:
144
+ """
145
+ Load features from cache.
146
+
147
+ Args:
148
+ cache_key: Cache key
149
+
150
+ Returns:
151
+ Tuple of (features, metadata)
152
+ """
153
+ cache_path = self._get_cache_path(cache_key)
154
+ metadata_path = self._get_metadata_path(cache_key)
155
+
156
+ if not os.path.exists(cache_path):
157
+ raise FileNotFoundError(f"Cache not found: {cache_key}")
158
+
159
+ # Load features
160
+ if not HAS_PANDAS:
161
+ raise ImportError("pandas required for feature loading")
162
+
163
+ if self.use_parquet:
164
+ df = pd.read_parquet(cache_path)
165
+ else:
166
+ df = pd.read_csv(cache_path)
167
+
168
+ features = df.values.tolist()
169
+
170
+ # Load metadata
171
+ metadata = {}
172
+ if os.path.exists(metadata_path):
173
+ with open(metadata_path, 'r') as f:
174
+ metadata = json.load(f)
175
+
176
+ return features, metadata
177
+
178
+ def cache_exists(self, cache_key: str) -> bool:
179
+ """Check if cache exists."""
180
+ cache_path = self._get_cache_path(cache_key)
181
+ return os.path.exists(cache_path)
182
+
183
+ def invalidate_cache(
184
+ self,
185
+ instrument: Optional[str] = None,
186
+ cache_key: Optional[str] = None
187
+ ):
188
+ """
189
+ Invalidate cache entries.
190
+
191
+ Args:
192
+ instrument: Invalidate all caches for this instrument
193
+ cache_key: Invalidate specific cache key
194
+ """
195
+ if cache_key:
196
+ cache_path = self._get_cache_path(cache_key)
197
+ metadata_path = self._get_metadata_path(cache_key)
198
+
199
+ if os.path.exists(cache_path):
200
+ os.remove(cache_path)
201
+ if os.path.exists(metadata_path):
202
+ os.remove(metadata_path)
203
+
204
+ elif instrument:
205
+ # Remove all caches for instrument
206
+ for filename in os.listdir(self.cache_dir):
207
+ if filename.endswith('_metadata.json'):
208
+ metadata_path = os.path.join(self.cache_dir, filename)
209
+ try:
210
+ with open(metadata_path, 'r') as f:
211
+ meta = json.load(f)
212
+ if meta.get('instrument') == instrument:
213
+ cache_key = meta.get('cache_key')
214
+ if cache_key:
215
+ self.invalidate_cache(cache_key=cache_key)
216
+ except Exception:
217
+ pass
218
+
219
+ def list_caches(self) -> List[Dict[str, Any]]:
220
+ """List all cached features."""
221
+ caches = []
222
+
223
+ for filename in os.listdir(self.cache_dir):
224
+ if filename.endswith('_metadata.json'):
225
+ metadata_path = os.path.join(self.cache_dir, filename)
226
+ try:
227
+ with open(metadata_path, 'r') as f:
228
+ meta = json.load(f)
229
+ caches.append(meta)
230
+ except Exception:
231
+ pass
232
+
233
+ return caches
234
+