quantmllibrary 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantml/__init__.py +74 -0
- quantml/autograd.py +154 -0
- quantml/cli/__init__.py +10 -0
- quantml/cli/run_experiment.py +385 -0
- quantml/config/__init__.py +28 -0
- quantml/config/config.py +259 -0
- quantml/data/__init__.py +33 -0
- quantml/data/cache.py +149 -0
- quantml/data/feature_store.py +234 -0
- quantml/data/futures.py +254 -0
- quantml/data/loaders.py +236 -0
- quantml/data/memory_optimizer.py +234 -0
- quantml/data/validators.py +390 -0
- quantml/experiments/__init__.py +23 -0
- quantml/experiments/logger.py +208 -0
- quantml/experiments/results.py +158 -0
- quantml/experiments/tracker.py +223 -0
- quantml/features/__init__.py +25 -0
- quantml/features/base.py +104 -0
- quantml/features/gap_features.py +124 -0
- quantml/features/registry.py +138 -0
- quantml/features/volatility_features.py +140 -0
- quantml/features/volume_features.py +142 -0
- quantml/functional.py +37 -0
- quantml/models/__init__.py +27 -0
- quantml/models/attention.py +258 -0
- quantml/models/dropout.py +130 -0
- quantml/models/gru.py +319 -0
- quantml/models/linear.py +112 -0
- quantml/models/lstm.py +353 -0
- quantml/models/mlp.py +286 -0
- quantml/models/normalization.py +289 -0
- quantml/models/rnn.py +154 -0
- quantml/models/tcn.py +238 -0
- quantml/online.py +209 -0
- quantml/ops.py +1707 -0
- quantml/optim/__init__.py +42 -0
- quantml/optim/adafactor.py +206 -0
- quantml/optim/adagrad.py +157 -0
- quantml/optim/adam.py +267 -0
- quantml/optim/lookahead.py +97 -0
- quantml/optim/quant_optimizer.py +228 -0
- quantml/optim/radam.py +192 -0
- quantml/optim/rmsprop.py +203 -0
- quantml/optim/schedulers.py +286 -0
- quantml/optim/sgd.py +181 -0
- quantml/py.typed +0 -0
- quantml/streaming.py +175 -0
- quantml/tensor.py +462 -0
- quantml/time_series.py +447 -0
- quantml/training/__init__.py +135 -0
- quantml/training/alpha_eval.py +203 -0
- quantml/training/backtest.py +280 -0
- quantml/training/backtest_analysis.py +168 -0
- quantml/training/cv.py +106 -0
- quantml/training/data_loader.py +177 -0
- quantml/training/ensemble.py +84 -0
- quantml/training/feature_importance.py +135 -0
- quantml/training/features.py +364 -0
- quantml/training/futures_backtest.py +266 -0
- quantml/training/gradient_clipping.py +206 -0
- quantml/training/losses.py +248 -0
- quantml/training/lr_finder.py +127 -0
- quantml/training/metrics.py +376 -0
- quantml/training/regularization.py +89 -0
- quantml/training/trainer.py +239 -0
- quantml/training/walk_forward.py +190 -0
- quantml/utils/__init__.py +51 -0
- quantml/utils/gradient_check.py +274 -0
- quantml/utils/logging.py +181 -0
- quantml/utils/ops_cpu.py +231 -0
- quantml/utils/profiling.py +364 -0
- quantml/utils/reproducibility.py +220 -0
- quantml/utils/serialization.py +335 -0
- quantmllibrary-0.1.0.dist-info/METADATA +536 -0
- quantmllibrary-0.1.0.dist-info/RECORD +79 -0
- quantmllibrary-0.1.0.dist-info/WHEEL +5 -0
- quantmllibrary-0.1.0.dist-info/licenses/LICENSE +22 -0
- quantmllibrary-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CSV/JSON logger for experiment tracking.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import json
|
|
7
|
+
from typing import Dict, Any, List, Optional
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class CSVExperimentLogger:
|
|
13
|
+
"""
|
|
14
|
+
CSV-based experiment logger.
|
|
15
|
+
|
|
16
|
+
Logs experiments to a CSV file for easy comparison and analysis.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, log_file: str = "experiments_log.csv"):
|
|
20
|
+
"""
|
|
21
|
+
Initialize CSV logger.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
log_file: Path to CSV log file
|
|
25
|
+
"""
|
|
26
|
+
self.log_file = Path(log_file)
|
|
27
|
+
self.fieldnames = [
|
|
28
|
+
'timestamp',
|
|
29
|
+
'experiment_id',
|
|
30
|
+
'model_type',
|
|
31
|
+
'optimizer',
|
|
32
|
+
'learning_rate',
|
|
33
|
+
'batch_size',
|
|
34
|
+
'epochs',
|
|
35
|
+
'dataset_version',
|
|
36
|
+
'feature_set',
|
|
37
|
+
'random_seed',
|
|
38
|
+
'ic',
|
|
39
|
+
'rank_ic',
|
|
40
|
+
'sharpe_ratio',
|
|
41
|
+
'total_return',
|
|
42
|
+
'max_drawdown',
|
|
43
|
+
'win_rate',
|
|
44
|
+
'n_trades',
|
|
45
|
+
'notes'
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
def log_experiment(
|
|
49
|
+
self,
|
|
50
|
+
experiment_id: str,
|
|
51
|
+
config: Dict[str, Any],
|
|
52
|
+
results: Dict[str, Any],
|
|
53
|
+
notes: Optional[str] = None
|
|
54
|
+
):
|
|
55
|
+
"""
|
|
56
|
+
Log an experiment to CSV.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
experiment_id: Unique experiment identifier
|
|
60
|
+
config: Experiment configuration
|
|
61
|
+
results: Experiment results
|
|
62
|
+
notes: Optional notes
|
|
63
|
+
"""
|
|
64
|
+
file_exists = self.log_file.exists()
|
|
65
|
+
|
|
66
|
+
with open(self.log_file, 'a', newline='') as f:
|
|
67
|
+
writer = csv.DictWriter(f, fieldnames=self.fieldnames)
|
|
68
|
+
|
|
69
|
+
if not file_exists:
|
|
70
|
+
writer.writeheader()
|
|
71
|
+
|
|
72
|
+
row = {
|
|
73
|
+
'timestamp': datetime.now().isoformat(),
|
|
74
|
+
'experiment_id': experiment_id,
|
|
75
|
+
'model_type': config.get('model', {}).get('model_type', ''),
|
|
76
|
+
'optimizer': config.get('training', {}).get('optimizer', ''),
|
|
77
|
+
'learning_rate': config.get('training', {}).get('learning_rate', ''),
|
|
78
|
+
'batch_size': config.get('training', {}).get('batch_size', ''),
|
|
79
|
+
'epochs': config.get('training', {}).get('epochs', ''),
|
|
80
|
+
'dataset_version': config.get('data', {}).get('dataset_version', ''),
|
|
81
|
+
'feature_set': ','.join(config.get('features', {}).get('enabled_features', [])),
|
|
82
|
+
'random_seed': config.get('random_seed', ''),
|
|
83
|
+
'ic': results.get('alpha_metrics', {}).get('ic', ''),
|
|
84
|
+
'rank_ic': results.get('alpha_metrics', {}).get('rank_ic', ''),
|
|
85
|
+
'sharpe_ratio': results.get('backtest_results', {}).get('sharpe_ratio', ''),
|
|
86
|
+
'total_return': results.get('backtest_results', {}).get('total_return', ''),
|
|
87
|
+
'max_drawdown': results.get('backtest_results', {}).get('max_drawdown', ''),
|
|
88
|
+
'win_rate': results.get('backtest_results', {}).get('win_rate', ''),
|
|
89
|
+
'n_trades': results.get('backtest_results', {}).get('n_trades', ''),
|
|
90
|
+
'notes': notes or ''
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
writer.writerow(row)
|
|
94
|
+
|
|
95
|
+
def load_experiments(self) -> List[Dict[str, Any]]:
|
|
96
|
+
"""
|
|
97
|
+
Load all experiments from CSV.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
List of experiment dictionaries
|
|
101
|
+
"""
|
|
102
|
+
if not self.log_file.exists():
|
|
103
|
+
return []
|
|
104
|
+
|
|
105
|
+
experiments = []
|
|
106
|
+
with open(self.log_file, 'r') as f:
|
|
107
|
+
reader = csv.DictReader(f)
|
|
108
|
+
for row in reader:
|
|
109
|
+
experiments.append(row)
|
|
110
|
+
|
|
111
|
+
return experiments
|
|
112
|
+
|
|
113
|
+
def get_best_experiment(self, metric: str = 'sharpe_ratio') -> Optional[Dict[str, Any]]:
|
|
114
|
+
"""
|
|
115
|
+
Get best experiment by metric.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
metric: Metric to optimize (default: 'sharpe_ratio')
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Best experiment dictionary or None
|
|
122
|
+
"""
|
|
123
|
+
experiments = self.load_experiments()
|
|
124
|
+
|
|
125
|
+
if not experiments:
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
# Filter valid values
|
|
129
|
+
valid_exps = []
|
|
130
|
+
for exp in experiments:
|
|
131
|
+
try:
|
|
132
|
+
value = float(exp.get(metric, 0))
|
|
133
|
+
if value != 0:
|
|
134
|
+
valid_exps.append((exp, value))
|
|
135
|
+
except (ValueError, TypeError):
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
if not valid_exps:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
# Sort by metric (descending)
|
|
142
|
+
valid_exps.sort(key=lambda x: x[1], reverse=True)
|
|
143
|
+
return valid_exps[0][0]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class JSONExperimentLogger:
|
|
147
|
+
"""
|
|
148
|
+
JSON-based experiment logger.
|
|
149
|
+
|
|
150
|
+
Logs experiments to JSON files for detailed tracking.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def __init__(self, log_dir: str = "experiments"):
|
|
154
|
+
"""
|
|
155
|
+
Initialize JSON logger.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
log_dir: Directory to store JSON logs
|
|
159
|
+
"""
|
|
160
|
+
self.log_dir = Path(log_dir)
|
|
161
|
+
self.log_dir.mkdir(parents=True, exist_ok=True)
|
|
162
|
+
|
|
163
|
+
def log_experiment(
|
|
164
|
+
self,
|
|
165
|
+
experiment_id: str,
|
|
166
|
+
config: Dict[str, Any],
|
|
167
|
+
results: Dict[str, Any],
|
|
168
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
169
|
+
):
|
|
170
|
+
"""
|
|
171
|
+
Log experiment to JSON.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
experiment_id: Unique experiment identifier
|
|
175
|
+
config: Experiment configuration
|
|
176
|
+
results: Experiment results
|
|
177
|
+
metadata: Optional metadata
|
|
178
|
+
"""
|
|
179
|
+
log_data = {
|
|
180
|
+
'experiment_id': experiment_id,
|
|
181
|
+
'timestamp': datetime.now().isoformat(),
|
|
182
|
+
'config': config,
|
|
183
|
+
'results': results,
|
|
184
|
+
'metadata': metadata or {}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
log_file = self.log_dir / f"{experiment_id}.json"
|
|
188
|
+
with open(log_file, 'w') as f:
|
|
189
|
+
json.dump(log_data, f, indent=2, default=str)
|
|
190
|
+
|
|
191
|
+
def load_experiment(self, experiment_id: str) -> Optional[Dict[str, Any]]:
|
|
192
|
+
"""
|
|
193
|
+
Load experiment from JSON.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
experiment_id: Experiment identifier
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Experiment data or None
|
|
200
|
+
"""
|
|
201
|
+
log_file = self.log_dir / f"{experiment_id}.json"
|
|
202
|
+
|
|
203
|
+
if not log_file.exists():
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
with open(log_file, 'r') as f:
|
|
207
|
+
return json.load(f)
|
|
208
|
+
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Experiment results comparison and analysis.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Dict, Any, List, Optional
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def compare_experiments(experiment_dirs: List[str]) -> Dict[str, Any]:
|
|
11
|
+
"""
|
|
12
|
+
Compare multiple experiment runs.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
experiment_dirs: List of experiment directory paths
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Comparison dictionary
|
|
19
|
+
"""
|
|
20
|
+
experiments = []
|
|
21
|
+
|
|
22
|
+
for exp_dir in experiment_dirs:
|
|
23
|
+
results_path = Path(exp_dir) / 'results.json'
|
|
24
|
+
metadata_path = Path(exp_dir) / 'metadata.json'
|
|
25
|
+
|
|
26
|
+
results = {}
|
|
27
|
+
metadata = {}
|
|
28
|
+
|
|
29
|
+
if results_path.exists():
|
|
30
|
+
with open(results_path, 'r') as f:
|
|
31
|
+
results = json.load(f)
|
|
32
|
+
|
|
33
|
+
if metadata_path.exists():
|
|
34
|
+
with open(metadata_path, 'r') as f:
|
|
35
|
+
metadata = json.load(f)
|
|
36
|
+
|
|
37
|
+
experiments.append({
|
|
38
|
+
'dir': str(exp_dir),
|
|
39
|
+
'metadata': metadata,
|
|
40
|
+
'results': results
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
# Extract key metrics
|
|
44
|
+
comparison = {
|
|
45
|
+
'n_experiments': len(experiments),
|
|
46
|
+
'experiments': []
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
for exp in experiments:
|
|
50
|
+
alpha_metrics = exp['results'].get('alpha_metrics', {})
|
|
51
|
+
backtest_results = exp['results'].get('backtest_results', {})
|
|
52
|
+
|
|
53
|
+
comparison['experiments'].append({
|
|
54
|
+
'experiment_id': Path(exp['dir']).name,
|
|
55
|
+
'model_type': exp['metadata'].get('model_type', ''),
|
|
56
|
+
'optimizer': exp['metadata'].get('optimizer', ''),
|
|
57
|
+
'learning_rate': exp['metadata'].get('learning_rate', ''),
|
|
58
|
+
'ic': alpha_metrics.get('ic', 0.0),
|
|
59
|
+
'rank_ic': alpha_metrics.get('rank_ic', 0.0),
|
|
60
|
+
'sharpe_ratio': backtest_results.get('sharpe_ratio', 0.0),
|
|
61
|
+
'total_return': backtest_results.get('total_return', 0.0),
|
|
62
|
+
'max_drawdown': backtest_results.get('max_drawdown', 0.0),
|
|
63
|
+
'win_rate': backtest_results.get('win_rate', 0.0)
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
return comparison
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def generate_summary_table(comparison: Dict[str, Any]) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Generate summary table for paper.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
comparison: Comparison dictionary
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Formatted table string
|
|
78
|
+
"""
|
|
79
|
+
lines = []
|
|
80
|
+
lines.append("Experiment Comparison")
|
|
81
|
+
lines.append("=" * 100)
|
|
82
|
+
lines.append(f"{'Experiment':<20} {'Model':<10} {'IC':<8} {'Sharpe':<8} {'Return':<10} {'MaxDD':<8}")
|
|
83
|
+
lines.append("-" * 100)
|
|
84
|
+
|
|
85
|
+
for exp in comparison['experiments']:
|
|
86
|
+
lines.append(
|
|
87
|
+
f"{exp['experiment_id']:<20} "
|
|
88
|
+
f"{exp['model_type']:<10} "
|
|
89
|
+
f"{exp['ic']:>7.4f} "
|
|
90
|
+
f"{exp['sharpe_ratio']:>7.4f} "
|
|
91
|
+
f"{exp['total_return']*100:>9.2f}% "
|
|
92
|
+
f"{exp['max_drawdown']:>7.4f}"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
lines.append("=" * 100)
|
|
96
|
+
|
|
97
|
+
return "\n".join(lines)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def export_for_paper(comparison: Dict[str, Any], output_path: str, format: str = 'json'):
|
|
101
|
+
"""
|
|
102
|
+
Export results for paper.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
comparison: Comparison dictionary
|
|
106
|
+
output_path: Output file path
|
|
107
|
+
format: Export format ('json', 'csv', 'latex')
|
|
108
|
+
"""
|
|
109
|
+
if format == 'json':
|
|
110
|
+
with open(output_path, 'w') as f:
|
|
111
|
+
json.dump(comparison, f, indent=2, default=str)
|
|
112
|
+
|
|
113
|
+
elif format == 'csv':
|
|
114
|
+
import csv
|
|
115
|
+
with open(output_path, 'w', newline='') as f:
|
|
116
|
+
writer = csv.DictWriter(f, fieldnames=[
|
|
117
|
+
'experiment_id', 'model_type', 'optimizer', 'ic', 'sharpe_ratio',
|
|
118
|
+
'total_return', 'max_drawdown'
|
|
119
|
+
])
|
|
120
|
+
writer.writeheader()
|
|
121
|
+
for exp in comparison['experiments']:
|
|
122
|
+
writer.writerow({
|
|
123
|
+
'experiment_id': exp['experiment_id'],
|
|
124
|
+
'model_type': exp['model_type'],
|
|
125
|
+
'optimizer': exp['optimizer'],
|
|
126
|
+
'ic': exp['ic'],
|
|
127
|
+
'sharpe_ratio': exp['sharpe_ratio'],
|
|
128
|
+
'total_return': exp['total_return'],
|
|
129
|
+
'max_drawdown': exp['max_drawdown']
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
elif format == 'latex':
|
|
133
|
+
# Generate LaTeX table
|
|
134
|
+
lines = []
|
|
135
|
+
lines.append("\\begin{table}[h]")
|
|
136
|
+
lines.append("\\centering")
|
|
137
|
+
lines.append("\\begin{tabular}{lcccc}")
|
|
138
|
+
lines.append("\\hline")
|
|
139
|
+
lines.append("Experiment & Model & IC & Sharpe & Return \\\\")
|
|
140
|
+
lines.append("\\hline")
|
|
141
|
+
|
|
142
|
+
for exp in comparison['experiments']:
|
|
143
|
+
lines.append(
|
|
144
|
+
f"{exp['experiment_id']} & "
|
|
145
|
+
f"{exp['model_type']} & "
|
|
146
|
+
f"{exp['ic']:.4f} & "
|
|
147
|
+
f"{exp['sharpe_ratio']:.4f} & "
|
|
148
|
+
f"{exp['total_return']*100:.2f}\\% \\\\"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
lines.append("\\hline")
|
|
152
|
+
lines.append("\\end{tabular}")
|
|
153
|
+
lines.append("\\caption{Experiment Results}")
|
|
154
|
+
lines.append("\\end{table}")
|
|
155
|
+
|
|
156
|
+
with open(output_path, 'w') as f:
|
|
157
|
+
f.write("\n".join(lines))
|
|
158
|
+
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enhanced experiment tracker with dataset versioning and results comparison.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
import csv
|
|
8
|
+
from typing import Dict, Any, Optional, List
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ExperimentTracker:
|
|
14
|
+
"""
|
|
15
|
+
Enhanced experiment tracker.
|
|
16
|
+
|
|
17
|
+
Tracks:
|
|
18
|
+
- Dataset version
|
|
19
|
+
- Feature set version
|
|
20
|
+
- Hyperparameters
|
|
21
|
+
- Random seed
|
|
22
|
+
- Model architecture
|
|
23
|
+
- Run date/time with git hash
|
|
24
|
+
- Results
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, experiment_dir: str):
|
|
28
|
+
"""
|
|
29
|
+
Initialize experiment tracker.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
experiment_dir: Directory to store experiment data
|
|
33
|
+
"""
|
|
34
|
+
self.experiment_dir = Path(experiment_dir)
|
|
35
|
+
self.experiment_dir.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
|
|
37
|
+
self.metadata = {}
|
|
38
|
+
self.results = {}
|
|
39
|
+
self.start_time = datetime.now()
|
|
40
|
+
|
|
41
|
+
def log_config(self, config: Dict[str, Any]):
|
|
42
|
+
"""Log experiment configuration."""
|
|
43
|
+
self.metadata['config'] = config
|
|
44
|
+
self.metadata['random_seed'] = config.get('random_seed')
|
|
45
|
+
self.metadata['model_type'] = config.get('model', {}).get('model_type')
|
|
46
|
+
self.metadata['optimizer'] = config.get('training', {}).get('optimizer')
|
|
47
|
+
self.metadata['learning_rate'] = config.get('training', {}).get('learning_rate')
|
|
48
|
+
|
|
49
|
+
def log_dataset_version(self, dataset_path: str, version: Optional[str] = None):
|
|
50
|
+
"""
|
|
51
|
+
Log dataset version.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
dataset_path: Path to dataset
|
|
55
|
+
version: Optional version string (if None, uses file modification time)
|
|
56
|
+
"""
|
|
57
|
+
if version is None:
|
|
58
|
+
# Use file modification time as version
|
|
59
|
+
if os.path.exists(dataset_path):
|
|
60
|
+
mtime = os.path.getmtime(dataset_path)
|
|
61
|
+
version = datetime.fromtimestamp(mtime).isoformat()
|
|
62
|
+
else:
|
|
63
|
+
version = "unknown"
|
|
64
|
+
|
|
65
|
+
self.metadata['dataset_path'] = dataset_path
|
|
66
|
+
self.metadata['dataset_version'] = version
|
|
67
|
+
|
|
68
|
+
def log_feature_set(self, features: List[str], version: Optional[str] = None):
|
|
69
|
+
"""
|
|
70
|
+
Log feature set.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
features: List of feature names
|
|
74
|
+
version: Optional version string (hash of feature list)
|
|
75
|
+
"""
|
|
76
|
+
if version is None:
|
|
77
|
+
# Create hash from feature list
|
|
78
|
+
import hashlib
|
|
79
|
+
feature_str = ','.join(sorted(features))
|
|
80
|
+
version = hashlib.md5(feature_str.encode()).hexdigest()[:8]
|
|
81
|
+
|
|
82
|
+
self.metadata['features'] = features
|
|
83
|
+
self.metadata['feature_set_version'] = version
|
|
84
|
+
|
|
85
|
+
def log_model_architecture(self, model_config: Dict[str, Any]):
|
|
86
|
+
"""Log model architecture details."""
|
|
87
|
+
self.metadata['model_architecture'] = model_config
|
|
88
|
+
|
|
89
|
+
def log_git_hash(self):
|
|
90
|
+
"""Log current git commit hash."""
|
|
91
|
+
try:
|
|
92
|
+
import subprocess
|
|
93
|
+
result = subprocess.run(
|
|
94
|
+
['git', 'rev-parse', 'HEAD'],
|
|
95
|
+
capture_output=True,
|
|
96
|
+
text=True,
|
|
97
|
+
cwd=Path(__file__).parent.parent.parent
|
|
98
|
+
)
|
|
99
|
+
if result.returncode == 0:
|
|
100
|
+
self.metadata['git_hash'] = result.stdout.strip()
|
|
101
|
+
except Exception:
|
|
102
|
+
self.metadata['git_hash'] = "unknown"
|
|
103
|
+
|
|
104
|
+
def log_results(self, results: Dict[str, Any]):
|
|
105
|
+
"""Log experiment results."""
|
|
106
|
+
self.results = results
|
|
107
|
+
self.metadata['results'] = results
|
|
108
|
+
|
|
109
|
+
def save(self):
|
|
110
|
+
"""Save experiment data to disk."""
|
|
111
|
+
self.metadata['start_time'] = self.start_time.isoformat()
|
|
112
|
+
self.metadata['end_time'] = datetime.now().isoformat()
|
|
113
|
+
|
|
114
|
+
# Save metadata
|
|
115
|
+
metadata_path = self.experiment_dir / 'metadata.json'
|
|
116
|
+
with open(metadata_path, 'w') as f:
|
|
117
|
+
json.dump(self.metadata, f, indent=2, default=str)
|
|
118
|
+
|
|
119
|
+
# Save results
|
|
120
|
+
if self.results:
|
|
121
|
+
results_path = self.experiment_dir / 'results.json'
|
|
122
|
+
with open(results_path, 'w') as f:
|
|
123
|
+
json.dump(self.results, f, indent=2, default=str)
|
|
124
|
+
|
|
125
|
+
# Append to CSV log
|
|
126
|
+
self._append_to_csv_log()
|
|
127
|
+
|
|
128
|
+
def _append_to_csv_log(self):
|
|
129
|
+
"""Append experiment to CSV log for comparison."""
|
|
130
|
+
csv_path = self.experiment_dir.parent / 'experiments_log.csv'
|
|
131
|
+
|
|
132
|
+
# Check if CSV exists
|
|
133
|
+
file_exists = csv_path.exists()
|
|
134
|
+
|
|
135
|
+
with open(csv_path, 'a', newline='') as f:
|
|
136
|
+
writer = csv.DictWriter(f, fieldnames=[
|
|
137
|
+
'experiment_id',
|
|
138
|
+
'start_time',
|
|
139
|
+
'model_type',
|
|
140
|
+
'optimizer',
|
|
141
|
+
'learning_rate',
|
|
142
|
+
'dataset_version',
|
|
143
|
+
'feature_set_version',
|
|
144
|
+
'git_hash',
|
|
145
|
+
'ic',
|
|
146
|
+
'sharpe_ratio',
|
|
147
|
+
'total_return',
|
|
148
|
+
'experiment_dir'
|
|
149
|
+
])
|
|
150
|
+
|
|
151
|
+
if not file_exists:
|
|
152
|
+
writer.writeheader()
|
|
153
|
+
|
|
154
|
+
row = {
|
|
155
|
+
'experiment_id': self.experiment_dir.name,
|
|
156
|
+
'start_time': self.metadata.get('start_time', ''),
|
|
157
|
+
'model_type': self.metadata.get('model_type', ''),
|
|
158
|
+
'optimizer': self.metadata.get('optimizer', ''),
|
|
159
|
+
'learning_rate': self.metadata.get('learning_rate', ''),
|
|
160
|
+
'dataset_version': self.metadata.get('dataset_version', ''),
|
|
161
|
+
'feature_set_version': self.metadata.get('feature_set_version', ''),
|
|
162
|
+
'git_hash': self.metadata.get('git_hash', ''),
|
|
163
|
+
'ic': self.results.get('alpha_metrics', {}).get('ic', ''),
|
|
164
|
+
'sharpe_ratio': self.results.get('backtest_results', {}).get('sharpe_ratio', ''),
|
|
165
|
+
'total_return': self.results.get('backtest_results', {}).get('total_return', ''),
|
|
166
|
+
'experiment_dir': str(self.experiment_dir)
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
writer.writerow(row)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def compare_experiments(experiment_dirs: List[str]) -> Dict[str, Any]:
|
|
173
|
+
"""
|
|
174
|
+
Compare multiple experiments.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
experiment_dirs: List of experiment directory paths
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Comparison dictionary
|
|
181
|
+
"""
|
|
182
|
+
experiments = []
|
|
183
|
+
|
|
184
|
+
for exp_dir in experiment_dirs:
|
|
185
|
+
metadata_path = Path(exp_dir) / 'metadata.json'
|
|
186
|
+
results_path = Path(exp_dir) / 'results.json'
|
|
187
|
+
|
|
188
|
+
if metadata_path.exists():
|
|
189
|
+
with open(metadata_path, 'r') as f:
|
|
190
|
+
metadata = json.load(f)
|
|
191
|
+
else:
|
|
192
|
+
metadata = {}
|
|
193
|
+
|
|
194
|
+
if results_path.exists():
|
|
195
|
+
with open(results_path, 'r') as f:
|
|
196
|
+
results = json.load(f)
|
|
197
|
+
else:
|
|
198
|
+
results = {}
|
|
199
|
+
|
|
200
|
+
experiments.append({
|
|
201
|
+
'dir': exp_dir,
|
|
202
|
+
'metadata': metadata,
|
|
203
|
+
'results': results
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
# Create comparison
|
|
207
|
+
comparison = {
|
|
208
|
+
'n_experiments': len(experiments),
|
|
209
|
+
'experiments': []
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
for exp in experiments:
|
|
213
|
+
comparison['experiments'].append({
|
|
214
|
+
'dir': exp['dir'],
|
|
215
|
+
'model_type': exp['metadata'].get('model_type'),
|
|
216
|
+
'optimizer': exp['metadata'].get('optimizer'),
|
|
217
|
+
'ic': exp['results'].get('alpha_metrics', {}).get('ic'),
|
|
218
|
+
'sharpe': exp['results'].get('backtest_results', {}).get('sharpe_ratio'),
|
|
219
|
+
'return': exp['results'].get('backtest_results', {}).get('total_return')
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
return comparison
|
|
223
|
+
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
QuantML Feature Registry
|
|
3
|
+
|
|
4
|
+
Plugin-based feature system for easy feature addition and configuration.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from quantml.features.base import BaseFeature, Feature
|
|
8
|
+
from quantml.features.registry import FeatureRegistry, register_feature
|
|
9
|
+
from quantml.features.gap_features import OvernightGapFeature, GapSizeFeature
|
|
10
|
+
from quantml.features.volume_features import VolumeRegimeFeature, VolumeShockFeature
|
|
11
|
+
from quantml.features.volatility_features import VolatilityRegimeFeature, RealizedVolatilityFeature
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
'BaseFeature',
|
|
15
|
+
'Feature',
|
|
16
|
+
'FeatureRegistry',
|
|
17
|
+
'register_feature',
|
|
18
|
+
'OvernightGapFeature',
|
|
19
|
+
'GapSizeFeature',
|
|
20
|
+
'VolumeRegimeFeature',
|
|
21
|
+
'VolumeShockFeature',
|
|
22
|
+
'VolatilityRegimeFeature',
|
|
23
|
+
'RealizedVolatilityFeature'
|
|
24
|
+
]
|
|
25
|
+
|