ivolatility-backtesting 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ivolatility-backtesting might be problematic. Click here for more details.
- ivolatility_backtesting/__init__.py +12 -4
- ivolatility_backtesting/ivolatility_backtesting.py +1260 -86
- {ivolatility_backtesting-1.6.0.dist-info → ivolatility_backtesting-1.7.0.dist-info}/METADATA +1 -1
- ivolatility_backtesting-1.7.0.dist-info/RECORD +7 -0
- ivolatility_backtesting-1.6.0.dist-info/RECORD +0 -7
- {ivolatility_backtesting-1.6.0.dist-info → ivolatility_backtesting-1.7.0.dist-info}/WHEEL +0 -0
- {ivolatility_backtesting-1.6.0.dist-info → ivolatility_backtesting-1.7.0.dist-info}/licenses/LICENSE +0 -0
- {ivolatility_backtesting-1.6.0.dist-info → ivolatility_backtesting-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -11,13 +11,15 @@ import pandas as pd
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
import matplotlib.pyplot as plt
|
|
13
13
|
import seaborn as sns
|
|
14
|
-
from datetime import datetime
|
|
14
|
+
from datetime import datetime, timedelta
|
|
15
15
|
import ivolatility as ivol
|
|
16
16
|
import os
|
|
17
17
|
import time
|
|
18
18
|
import psutil
|
|
19
19
|
import warnings
|
|
20
20
|
from itertools import product
|
|
21
|
+
import sys
|
|
22
|
+
from typing import Dict, List, Optional, Tuple, Union, Any
|
|
21
23
|
warnings.filterwarnings('ignore', category=pd.errors.SettingWithCopyWarning)
|
|
22
24
|
warnings.filterwarnings('ignore', message='.*SettingWithCopyWarning.*')
|
|
23
25
|
warnings.filterwarnings('ignore', category=FutureWarning)
|
|
@@ -362,9 +364,90 @@ def init_api(api_key=None):
|
|
|
362
364
|
APIManager.initialize(api_key)
|
|
363
365
|
|
|
364
366
|
|
|
365
|
-
def api_call(endpoint, debug=False, **kwargs):
|
|
366
|
-
"""
|
|
367
|
+
def api_call(endpoint, cache_config=None, debug=False, **kwargs):
|
|
368
|
+
"""
|
|
369
|
+
Make API call with automatic response normalization and caching
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
endpoint: API endpoint path
|
|
373
|
+
cache_config: Cache configuration dict (optional, enables caching if provided)
|
|
374
|
+
debug: Debug mode flag
|
|
375
|
+
**kwargs: API parameters
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
Normalized API response or None
|
|
379
|
+
"""
|
|
367
380
|
try:
|
|
381
|
+
# Check if caching is enabled
|
|
382
|
+
use_cache = cache_config is not None and (
|
|
383
|
+
cache_config.get('disk_enabled', False) or
|
|
384
|
+
cache_config.get('memory_enabled', False)
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
cache_manager = None
|
|
388
|
+
cache_key = None
|
|
389
|
+
data_type = None
|
|
390
|
+
|
|
391
|
+
if use_cache:
|
|
392
|
+
# Initialize cache manager
|
|
393
|
+
cache_manager = UniversalCacheManager(cache_config)
|
|
394
|
+
|
|
395
|
+
# Create cache key from endpoint and params (human-readable)
|
|
396
|
+
# Determine data type based on endpoint (supports EOD + INTRADAY for both STOCK + OPTIONS)
|
|
397
|
+
is_intraday = 'intraday' in endpoint
|
|
398
|
+
is_options = 'options' in endpoint
|
|
399
|
+
is_stock = 'stock' in endpoint
|
|
400
|
+
|
|
401
|
+
if is_intraday and is_options:
|
|
402
|
+
# Intraday options data: /equities/intraday/options-rawiv
|
|
403
|
+
data_type = 'options_intraday'
|
|
404
|
+
symbol = kwargs.get('symbol', 'UNKNOWN')
|
|
405
|
+
date = kwargs.get('date', 'UNKNOWN')
|
|
406
|
+
cache_key = f"{symbol}_{date}"
|
|
407
|
+
elif is_intraday and is_stock:
|
|
408
|
+
# Intraday stock data: /equities/intraday/stock-prices
|
|
409
|
+
data_type = 'stock_intraday'
|
|
410
|
+
symbol = kwargs.get('symbol', 'UNKNOWN')
|
|
411
|
+
date = kwargs.get('date', 'UNKNOWN')
|
|
412
|
+
cache_key = f"{symbol}_{date}"
|
|
413
|
+
elif is_options:
|
|
414
|
+
# EOD options data: /equities/eod/options-rawiv
|
|
415
|
+
data_type = 'options_eod'
|
|
416
|
+
symbol = kwargs.get('symbol', 'UNKNOWN')
|
|
417
|
+
from_date = kwargs.get('from_', kwargs.get('date', 'UNKNOWN'))
|
|
418
|
+
to_date = kwargs.get('to', from_date)
|
|
419
|
+
if from_date != to_date:
|
|
420
|
+
cache_key = f"{symbol}_{from_date}_{to_date}"
|
|
421
|
+
else:
|
|
422
|
+
cache_key = f"{symbol}_{from_date}"
|
|
423
|
+
elif is_stock:
|
|
424
|
+
# EOD stock data: /equities/eod/stock-prices
|
|
425
|
+
data_type = 'stock_eod'
|
|
426
|
+
symbol = kwargs.get('symbol', 'UNKNOWN')
|
|
427
|
+
from_date = kwargs.get('from_', kwargs.get('date', 'UNKNOWN'))
|
|
428
|
+
to_date = kwargs.get('to', from_date)
|
|
429
|
+
if from_date != to_date:
|
|
430
|
+
cache_key = f"{symbol}_{from_date}_{to_date}"
|
|
431
|
+
else:
|
|
432
|
+
cache_key = f"{symbol}_{from_date}"
|
|
433
|
+
else:
|
|
434
|
+
# Fallback for other endpoints
|
|
435
|
+
sorted_params = sorted([(k, v) for k, v in kwargs.items()])
|
|
436
|
+
param_hash = abs(hash(str(sorted_params)))
|
|
437
|
+
cache_key = f"{endpoint.replace('/', '_')}_{param_hash}"
|
|
438
|
+
data_type = 'default'
|
|
439
|
+
|
|
440
|
+
# Try to get from cache
|
|
441
|
+
cached_data = cache_manager.get(cache_key, data_type)
|
|
442
|
+
if cached_data is not None:
|
|
443
|
+
if debug or cache_config.get('debug', False):
|
|
444
|
+
print(f"[CACHE] ✓ Cache hit: {endpoint} ({len(cached_data) if hasattr(cached_data, '__len__') else '?'} records)")
|
|
445
|
+
# Return in same format as API (dict with 'data' key)
|
|
446
|
+
if isinstance(cached_data, pd.DataFrame):
|
|
447
|
+
return {'data': cached_data.to_dict('records'), 'status': 'success'}
|
|
448
|
+
return cached_data
|
|
449
|
+
|
|
450
|
+
# Cache miss or caching disabled - make API call
|
|
368
451
|
if debug and APIManager._api_key:
|
|
369
452
|
base_url = "https://restapi.ivolatility.com"
|
|
370
453
|
url_params = {}
|
|
@@ -387,6 +470,23 @@ def api_call(endpoint, debug=False, **kwargs):
|
|
|
387
470
|
print(f"[api_call] Endpoint: {endpoint}")
|
|
388
471
|
print(f"[api_call] Params: {kwargs}")
|
|
389
472
|
|
|
473
|
+
# Save to cache if enabled and data is valid
|
|
474
|
+
if use_cache and normalized is not None and cache_manager is not None:
|
|
475
|
+
# Convert dict response to DataFrame for caching
|
|
476
|
+
if isinstance(normalized, dict) and 'data' in normalized:
|
|
477
|
+
try:
|
|
478
|
+
cache_data = pd.DataFrame(normalized['data'])
|
|
479
|
+
if len(cache_data) > 0: # Only cache non-empty data
|
|
480
|
+
cache_manager.set(cache_key, cache_data, data_type)
|
|
481
|
+
if debug or cache_config.get('debug', False):
|
|
482
|
+
print(f"[CACHE] 💾 Saved to cache: {endpoint} ({len(cache_data)} records)")
|
|
483
|
+
else:
|
|
484
|
+
if debug or cache_config.get('debug', False):
|
|
485
|
+
print(f"[CACHE] ⚠️ Skipped caching empty data: {endpoint}")
|
|
486
|
+
except Exception as e:
|
|
487
|
+
if debug or cache_config.get('debug', False):
|
|
488
|
+
print(f"[CACHE] ❌ Error converting to cache format: {e}")
|
|
489
|
+
|
|
390
490
|
return normalized
|
|
391
491
|
|
|
392
492
|
except Exception as e:
|
|
@@ -760,9 +860,17 @@ class PositionManager:
|
|
|
760
860
|
self.config = config
|
|
761
861
|
self.debug = debug
|
|
762
862
|
|
|
763
|
-
|
|
863
|
+
# Stop-loss enable logic:
|
|
864
|
+
# 1) Respect explicit flag if provided
|
|
865
|
+
# 2) Otherwise infer from stop_loss_config.enabled for convenience
|
|
866
|
+
explicit_flag = config.get('stop_loss_enabled')
|
|
867
|
+
sl_cfg = config.get('stop_loss_config', {})
|
|
868
|
+
inferred_flag = bool(sl_cfg.get('enabled', False))
|
|
869
|
+
|
|
870
|
+
self.sl_enabled = explicit_flag if explicit_flag is not None else inferred_flag
|
|
871
|
+
|
|
764
872
|
if self.sl_enabled:
|
|
765
|
-
self.sl_config =
|
|
873
|
+
self.sl_config = sl_cfg
|
|
766
874
|
self.sl_manager = StopLossManager()
|
|
767
875
|
else:
|
|
768
876
|
self.sl_config = None
|
|
@@ -1097,7 +1205,7 @@ class BacktestAnalyzer:
|
|
|
1097
1205
|
self.metrics['best_trade'] = trades_df['pnl'].max()
|
|
1098
1206
|
self.metrics['worst_trade'] = trades_df['pnl'].min()
|
|
1099
1207
|
|
|
1100
|
-
if len(winning) > 0 and len(losing) > 0:
|
|
1208
|
+
if len(winning) > 0 and len(losing) > 0 and self.metrics['avg_loss'] != 0:
|
|
1101
1209
|
self.metrics['avg_win_loss_ratio'] = abs(self.metrics['avg_win'] / self.metrics['avg_loss'])
|
|
1102
1210
|
else:
|
|
1103
1211
|
self.metrics['avg_win_loss_ratio'] = 0
|
|
@@ -1600,7 +1708,7 @@ class ChartGenerator:
|
|
|
1600
1708
|
if show_plots:
|
|
1601
1709
|
plt.show()
|
|
1602
1710
|
else:
|
|
1603
|
-
plt.close() #
|
|
1711
|
+
plt.close() # Close without displaying
|
|
1604
1712
|
|
|
1605
1713
|
print(f"Chart saved: {filename}")
|
|
1606
1714
|
|
|
@@ -2218,13 +2326,13 @@ def create_stoploss_comparison_chart(results, filename='stoploss_comparison.png'
|
|
|
2218
2326
|
# ============================================================
|
|
2219
2327
|
def preload_options_data(config, progress_widgets=None):
|
|
2220
2328
|
"""
|
|
2221
|
-
|
|
2222
|
-
|
|
2329
|
+
Preload options data for optimization.
|
|
2330
|
+
Loads data ONCE and returns cache.
|
|
2223
2331
|
|
|
2224
2332
|
Returns:
|
|
2225
2333
|
tuple: (lean_df, options_cache)
|
|
2226
|
-
- lean_df: DataFrame
|
|
2227
|
-
- options_cache: dict {date: DataFrame}
|
|
2334
|
+
- lean_df: DataFrame with IV lean history
|
|
2335
|
+
- options_cache: dict {date: DataFrame} with options data
|
|
2228
2336
|
"""
|
|
2229
2337
|
if progress_widgets:
|
|
2230
2338
|
progress_bar, status_label, monitor, start_time = progress_widgets
|
|
@@ -2259,14 +2367,14 @@ def preload_options_data(config, progress_widgets=None):
|
|
|
2259
2367
|
|
|
2260
2368
|
# Store lean calculations
|
|
2261
2369
|
lean_history = []
|
|
2262
|
-
|
|
2370
|
+
all_options_data = [] # List to collect all options DataFrames
|
|
2263
2371
|
|
|
2264
2372
|
# Track time for ETA
|
|
2265
2373
|
preload_start_time = time.time()
|
|
2266
2374
|
|
|
2267
2375
|
try:
|
|
2268
|
-
|
|
2269
|
-
|
|
2376
|
+
# Use api_call with caching instead of direct ivol API
|
|
2377
|
+
cache_config = config.get('cache_config')
|
|
2270
2378
|
|
|
2271
2379
|
# Process each chunk
|
|
2272
2380
|
for chunk_idx, (chunk_start, chunk_end) in enumerate(date_chunks):
|
|
@@ -2280,16 +2388,24 @@ def preload_options_data(config, progress_widgets=None):
|
|
|
2280
2388
|
message=f"🔄 Loading chunk {chunk_idx+1}/{len(date_chunks)}"
|
|
2281
2389
|
)
|
|
2282
2390
|
|
|
2283
|
-
|
|
2391
|
+
# Use api_call with caching (supports disk + memory cache)
|
|
2392
|
+
raw_data = api_call(
|
|
2393
|
+
'/equities/eod/options-rawiv',
|
|
2394
|
+
cache_config,
|
|
2284
2395
|
symbol=symbol,
|
|
2285
2396
|
from_=chunk_start.strftime('%Y-%m-%d'),
|
|
2286
|
-
to=chunk_end.strftime('%Y-%m-%d')
|
|
2397
|
+
to=chunk_end.strftime('%Y-%m-%d'),
|
|
2398
|
+
debug=cache_config.get('debug', False) if cache_config else False
|
|
2287
2399
|
)
|
|
2288
2400
|
|
|
2289
2401
|
if raw_data is None:
|
|
2290
2402
|
continue
|
|
2291
2403
|
|
|
2292
|
-
|
|
2404
|
+
# api_call returns dict with 'data' key
|
|
2405
|
+
if isinstance(raw_data, dict) and 'data' in raw_data:
|
|
2406
|
+
df = pd.DataFrame(raw_data['data'])
|
|
2407
|
+
else:
|
|
2408
|
+
df = pd.DataFrame(raw_data)
|
|
2293
2409
|
|
|
2294
2410
|
if df.empty:
|
|
2295
2411
|
continue
|
|
@@ -2334,10 +2450,8 @@ def preload_options_data(config, progress_widgets=None):
|
|
|
2334
2450
|
gc.collect()
|
|
2335
2451
|
continue
|
|
2336
2452
|
|
|
2337
|
-
#
|
|
2338
|
-
|
|
2339
|
-
if date_val not in options_cache:
|
|
2340
|
-
options_cache[date_val] = df[df['date'] == date_val].copy()
|
|
2453
|
+
# Collect all options data
|
|
2454
|
+
all_options_data.append(df.copy())
|
|
2341
2455
|
|
|
2342
2456
|
# Calculate lean for this chunk
|
|
2343
2457
|
trading_dates = sorted(df['date'].unique())
|
|
@@ -2387,29 +2501,419 @@ def preload_options_data(config, progress_widgets=None):
|
|
|
2387
2501
|
lean_df['stock_price'] = lean_df['stock_price'].astype('float32')
|
|
2388
2502
|
lean_df['iv_lean'] = lean_df['iv_lean'].astype('float32')
|
|
2389
2503
|
|
|
2390
|
-
|
|
2504
|
+
# Combine all options data into single DataFrame
|
|
2505
|
+
if all_options_data:
|
|
2506
|
+
options_df = pd.concat(all_options_data, ignore_index=True)
|
|
2507
|
+
# Ensure date column is properly formatted
|
|
2508
|
+
options_df['date'] = pd.to_datetime(options_df['date']).dt.date
|
|
2509
|
+
options_df['expiration'] = pd.to_datetime(options_df['expiration']).dt.date
|
|
2510
|
+
else:
|
|
2511
|
+
options_df = pd.DataFrame()
|
|
2512
|
+
|
|
2513
|
+
del lean_history, all_options_data
|
|
2391
2514
|
gc.collect()
|
|
2392
2515
|
|
|
2393
2516
|
if progress_widgets:
|
|
2394
|
-
status_label.value = f"<b style='color:#00cc00'>✓ Data preloaded: {len(lean_df)} days, {len(
|
|
2517
|
+
status_label.value = f"<b style='color:#00cc00'>✓ Data preloaded: {len(lean_df)} days, {len(options_df)} options records</b>"
|
|
2395
2518
|
progress_bar.value = 35
|
|
2396
2519
|
|
|
2397
|
-
print(f"✓ Data preloaded: {len(lean_df)} days, {len(
|
|
2520
|
+
print(f"✓ Data preloaded: {len(lean_df)} days, {len(options_df)} options records")
|
|
2398
2521
|
|
|
2399
|
-
return lean_df,
|
|
2522
|
+
return lean_df, options_df
|
|
2400
2523
|
|
|
2401
2524
|
except Exception as e:
|
|
2402
2525
|
print(f"Error preloading data: {e}")
|
|
2403
2526
|
return pd.DataFrame(), {}
|
|
2404
2527
|
|
|
2405
2528
|
|
|
2529
|
+
# ============================================================
|
|
2530
|
+
# UNIVERSAL DATA PRELOADER V2 (NEW!)
|
|
2531
|
+
# ============================================================
|
|
2532
|
+
def preload_data_universal(config, data_requests=None):
|
|
2533
|
+
"""
|
|
2534
|
+
🚀 TRULY UNIVERSAL DATA PRELOADER - Works with ANY API endpoint!
|
|
2535
|
+
|
|
2536
|
+
Supports:
|
|
2537
|
+
- EOD data: options-rawiv, stock-prices, ivs-by-delta, ivx, etc.
|
|
2538
|
+
- Intraday data: OPTIONS_INTRADAY, stock intraday, etc.
|
|
2539
|
+
- Any custom endpoint with any parameters
|
|
2540
|
+
- Automatic chunking for date ranges
|
|
2541
|
+
- Manual single-date requests
|
|
2542
|
+
|
|
2543
|
+
Args:
|
|
2544
|
+
config: Strategy configuration (start_date, end_date, symbol)
|
|
2545
|
+
data_requests: List of data requests to load. If None, tries auto-detection.
|
|
2546
|
+
|
|
2547
|
+
Format:
|
|
2548
|
+
[
|
|
2549
|
+
{
|
|
2550
|
+
'name': 'options_data', # Your name for this dataset
|
|
2551
|
+
'endpoint': '/equities/eod/options-rawiv',
|
|
2552
|
+
'params': {...}, # Base params (symbol, etc.)
|
|
2553
|
+
'chunking': { # Optional: for date-range data
|
|
2554
|
+
'enabled': True,
|
|
2555
|
+
'date_param': 'from_', # Param name for start date
|
|
2556
|
+
'date_param_to': 'to', # Param name for end date
|
|
2557
|
+
'chunk_days': 90 # Chunk size in days
|
|
2558
|
+
},
|
|
2559
|
+
'post_process': lambda df: df, # Optional: process DataFrame
|
|
2560
|
+
},
|
|
2561
|
+
{
|
|
2562
|
+
'name': 'ivx_data',
|
|
2563
|
+
'endpoint': '/equities/eod/ivx',
|
|
2564
|
+
'params': {
|
|
2565
|
+
'symbol': config['symbol'],
|
|
2566
|
+
'from_': config['start_date'],
|
|
2567
|
+
'to': config['end_date']
|
|
2568
|
+
},
|
|
2569
|
+
'chunking': {'enabled': False} # Single request
|
|
2570
|
+
},
|
|
2571
|
+
{
|
|
2572
|
+
'name': 'options_intraday',
|
|
2573
|
+
'endpoint': '/equities/intraday/options-rawiv',
|
|
2574
|
+
'params': {
|
|
2575
|
+
'symbol': config['symbol']
|
|
2576
|
+
},
|
|
2577
|
+
'date_list': True, # Load for each date separately
|
|
2578
|
+
'date_param': 'date'
|
|
2579
|
+
}
|
|
2580
|
+
]
|
|
2581
|
+
|
|
2582
|
+
Returns:
|
|
2583
|
+
dict: Preloaded data with keys like:
|
|
2584
|
+
{
|
|
2585
|
+
'_preloaded_options_data': DataFrame,
|
|
2586
|
+
'_preloaded_ivx_data': DataFrame,
|
|
2587
|
+
'_preloaded_options_intraday': DataFrame,
|
|
2588
|
+
'_stats': {...}
|
|
2589
|
+
}
|
|
2590
|
+
|
|
2591
|
+
Usage in strategy:
|
|
2592
|
+
# Check for ANY preloaded data
|
|
2593
|
+
if any(k.startswith('_preloaded_') for k in config):
|
|
2594
|
+
options_df = config.get('_preloaded_options_data', pd.DataFrame()).copy()
|
|
2595
|
+
ivx_df = config.get('_preloaded_ivx_data', pd.DataFrame()).copy()
|
|
2596
|
+
else:
|
|
2597
|
+
# Load fresh
|
|
2598
|
+
...
|
|
2599
|
+
"""
|
|
2600
|
+
|
|
2601
|
+
print("\n" + "="*80)
|
|
2602
|
+
print("🚀 UNIVERSAL PRELOADER V2 - Supports ANY endpoint (EOD/Intraday/IVX/etc.)")
|
|
2603
|
+
print("="*80)
|
|
2604
|
+
start_time = time.time()
|
|
2605
|
+
|
|
2606
|
+
# Extract common config
|
|
2607
|
+
start_date = datetime.strptime(config['start_date'], '%Y-%m-%d').date()
|
|
2608
|
+
end_date = datetime.strptime(config['end_date'], '%Y-%m-%d').date()
|
|
2609
|
+
symbol = config['symbol']
|
|
2610
|
+
cache_config = config.get('cache_config', get_cache_config())
|
|
2611
|
+
|
|
2612
|
+
# Auto-detection if not specified
|
|
2613
|
+
if data_requests is None:
|
|
2614
|
+
data_requests = _auto_detect_requests(config)
|
|
2615
|
+
print(f"\n🔍 Auto-detected {len(data_requests)} data requests from config")
|
|
2616
|
+
|
|
2617
|
+
preloaded = {}
|
|
2618
|
+
total_rows = 0
|
|
2619
|
+
|
|
2620
|
+
# Process each data request
|
|
2621
|
+
for req_idx, request in enumerate(data_requests, 1):
|
|
2622
|
+
req_name = request['name']
|
|
2623
|
+
endpoint = request['endpoint']
|
|
2624
|
+
base_params = request.get('params', {})
|
|
2625
|
+
chunking = request.get('chunking', {'enabled': False})
|
|
2626
|
+
post_process = request.get('post_process', None)
|
|
2627
|
+
date_list = request.get('date_list', False)
|
|
2628
|
+
|
|
2629
|
+
print(f"\n[{req_idx}/{len(data_requests)}] 📊 Loading: {req_name}")
|
|
2630
|
+
print(f" Endpoint: {endpoint}")
|
|
2631
|
+
|
|
2632
|
+
all_data = []
|
|
2633
|
+
|
|
2634
|
+
# ========================================================
|
|
2635
|
+
# MODE 1: DATE LIST (one request per date, e.g., intraday)
|
|
2636
|
+
# ========================================================
|
|
2637
|
+
if date_list:
|
|
2638
|
+
date_param = request.get('date_param', 'date')
|
|
2639
|
+
trading_days = pd.bdate_range(start_date, end_date).date
|
|
2640
|
+
|
|
2641
|
+
print(f" Mode: Date list ({len(trading_days)} dates)")
|
|
2642
|
+
|
|
2643
|
+
for day_idx, date in enumerate(trading_days):
|
|
2644
|
+
params = base_params.copy()
|
|
2645
|
+
params[date_param] = date.strftime('%Y-%m-%d')
|
|
2646
|
+
|
|
2647
|
+
if day_idx % max(1, len(trading_days) // 10) == 0:
|
|
2648
|
+
print(f" Progress: {day_idx}/{len(trading_days)} dates...")
|
|
2649
|
+
|
|
2650
|
+
response = api_call(endpoint, cache_config, **params)
|
|
2651
|
+
if response and 'data' in response:
|
|
2652
|
+
df = pd.DataFrame(response['data'])
|
|
2653
|
+
if len(df) > 0:
|
|
2654
|
+
all_data.append(df)
|
|
2655
|
+
|
|
2656
|
+
# ========================================================
|
|
2657
|
+
# MODE 2: CHUNKED LOADING (date ranges in chunks)
|
|
2658
|
+
# ========================================================
|
|
2659
|
+
elif chunking.get('enabled', False):
|
|
2660
|
+
date_param_from = chunking.get('date_param', 'from_')
|
|
2661
|
+
date_param_to = chunking.get('date_param_to', 'to')
|
|
2662
|
+
chunk_days = chunking.get('chunk_days', 90)
|
|
2663
|
+
chunk_size = timedelta(days=chunk_days)
|
|
2664
|
+
|
|
2665
|
+
current = start_date
|
|
2666
|
+
chunks = []
|
|
2667
|
+
while current <= end_date:
|
|
2668
|
+
chunk_end = min(current + chunk_size, end_date)
|
|
2669
|
+
chunks.append((current, chunk_end))
|
|
2670
|
+
current = chunk_end + timedelta(days=1)
|
|
2671
|
+
|
|
2672
|
+
print(f" Mode: Chunked ({len(chunks)} chunks of {chunk_days} days)")
|
|
2673
|
+
|
|
2674
|
+
for chunk_idx, (chunk_start, chunk_end) in enumerate(chunks):
|
|
2675
|
+
params = base_params.copy()
|
|
2676
|
+
params[date_param_from] = chunk_start.strftime('%Y-%m-%d')
|
|
2677
|
+
params[date_param_to] = chunk_end.strftime('%Y-%m-%d')
|
|
2678
|
+
|
|
2679
|
+
if chunk_idx % max(1, len(chunks) // 5) == 0:
|
|
2680
|
+
print(f" Progress: {chunk_idx+1}/{len(chunks)} chunks...")
|
|
2681
|
+
|
|
2682
|
+
response = api_call(endpoint, cache_config, **params)
|
|
2683
|
+
if response and 'data' in response:
|
|
2684
|
+
df = pd.DataFrame(response['data'])
|
|
2685
|
+
if len(df) > 0:
|
|
2686
|
+
all_data.append(df)
|
|
2687
|
+
|
|
2688
|
+
# ========================================================
|
|
2689
|
+
# MODE 3: SINGLE REQUEST (no chunking/date list)
|
|
2690
|
+
# ========================================================
|
|
2691
|
+
else:
|
|
2692
|
+
print(f" Mode: Single request")
|
|
2693
|
+
|
|
2694
|
+
params = base_params.copy()
|
|
2695
|
+
response = api_call(endpoint, cache_config, **params)
|
|
2696
|
+
if response and 'data' in response:
|
|
2697
|
+
df = pd.DataFrame(response['data'])
|
|
2698
|
+
if len(df) > 0:
|
|
2699
|
+
all_data.append(df)
|
|
2700
|
+
|
|
2701
|
+
# ========================================================
|
|
2702
|
+
# COMBINE AND STORE
|
|
2703
|
+
# ========================================================
|
|
2704
|
+
if len(all_data) > 0:
|
|
2705
|
+
combined_df = pd.concat(all_data, ignore_index=True)
|
|
2706
|
+
|
|
2707
|
+
# Apply post-processing if provided
|
|
2708
|
+
if post_process is not None:
|
|
2709
|
+
try:
|
|
2710
|
+
combined_df = post_process(combined_df)
|
|
2711
|
+
except Exception as e:
|
|
2712
|
+
print(f" ⚠️ Post-processing failed: {e}")
|
|
2713
|
+
|
|
2714
|
+
# Auto-process common date columns
|
|
2715
|
+
combined_df = _auto_process_dates(combined_df)
|
|
2716
|
+
|
|
2717
|
+
# Store with standardized key
|
|
2718
|
+
key = f"_preloaded_{req_name}"
|
|
2719
|
+
preloaded[key] = combined_df
|
|
2720
|
+
total_rows += len(combined_df)
|
|
2721
|
+
|
|
2722
|
+
print(f" ✓ Loaded: {len(combined_df):,} rows → {key}")
|
|
2723
|
+
else:
|
|
2724
|
+
print(f" ⚠️ No data returned")
|
|
2725
|
+
|
|
2726
|
+
# ========================================================
|
|
2727
|
+
# SUMMARY
|
|
2728
|
+
# ========================================================
|
|
2729
|
+
elapsed = time.time() - start_time
|
|
2730
|
+
|
|
2731
|
+
# Collect detailed stats for each dataset
|
|
2732
|
+
dataset_details = {}
|
|
2733
|
+
for k in preloaded.keys():
|
|
2734
|
+
if k.startswith('_preloaded_'):
|
|
2735
|
+
dataset_name = k.replace('_preloaded_', '')
|
|
2736
|
+
df = preloaded[k]
|
|
2737
|
+
dataset_details[dataset_name] = {
|
|
2738
|
+
'rows': len(df),
|
|
2739
|
+
'endpoint': None
|
|
2740
|
+
}
|
|
2741
|
+
|
|
2742
|
+
# Map dataset names to endpoints from data_requests
|
|
2743
|
+
if data_requests:
|
|
2744
|
+
for req in data_requests:
|
|
2745
|
+
req_name = req.get('name', 'unknown')
|
|
2746
|
+
if req_name in dataset_details:
|
|
2747
|
+
dataset_details[req_name]['endpoint'] = req.get('endpoint', 'unknown')
|
|
2748
|
+
|
|
2749
|
+
preloaded['_stats'] = {
|
|
2750
|
+
'load_time_seconds': int(elapsed),
|
|
2751
|
+
'total_rows': total_rows,
|
|
2752
|
+
'data_count': len([k for k in preloaded.keys() if k.startswith('_preloaded_')]),
|
|
2753
|
+
'datasets': [k.replace('_preloaded_', '') for k in preloaded.keys() if k.startswith('_preloaded_')],
|
|
2754
|
+
'dataset_details': dataset_details
|
|
2755
|
+
}
|
|
2756
|
+
|
|
2757
|
+
print(f"\n{'='*80}")
|
|
2758
|
+
print(f"✅ PRELOAD COMPLETE:")
|
|
2759
|
+
print(f" • Time: {int(elapsed)}s")
|
|
2760
|
+
print(f" • Total rows: {total_rows:,}")
|
|
2761
|
+
print(f" • Datasets: {preloaded['_stats']['data_count']}")
|
|
2762
|
+
for ds in preloaded['_stats']['datasets']:
|
|
2763
|
+
print(f" - {ds}")
|
|
2764
|
+
print(f" • Cached in RAM for 4-5x speedup! 🚀")
|
|
2765
|
+
print(f"{'='*80}\n")
|
|
2766
|
+
|
|
2767
|
+
return preloaded
|
|
2768
|
+
|
|
2769
|
+
|
|
2770
|
+
def _auto_detect_requests(config):
|
|
2771
|
+
"""Auto-detect what data to load based on config keys"""
|
|
2772
|
+
requests = []
|
|
2773
|
+
|
|
2774
|
+
# Always load options data for options strategies
|
|
2775
|
+
requests.append({
|
|
2776
|
+
'name': 'options',
|
|
2777
|
+
'endpoint': '/equities/eod/options-rawiv',
|
|
2778
|
+
'params': {
|
|
2779
|
+
'symbol': config['symbol']
|
|
2780
|
+
},
|
|
2781
|
+
'chunking': {
|
|
2782
|
+
'enabled': True,
|
|
2783
|
+
'date_param': 'from_',
|
|
2784
|
+
'date_param_to': 'to',
|
|
2785
|
+
'chunk_days': 90
|
|
2786
|
+
},
|
|
2787
|
+
'post_process': lambda df: _process_options_df(df)
|
|
2788
|
+
})
|
|
2789
|
+
|
|
2790
|
+
# Load IV surface if strategy uses term structure
|
|
2791
|
+
if any(k in config for k in ['short_tenor', 'long_tenor', 'delta_target']):
|
|
2792
|
+
requests.append({
|
|
2793
|
+
'name': 'ivs_surface',
|
|
2794
|
+
'endpoint': '/equities/eod/ivs-by-delta',
|
|
2795
|
+
'params': {
|
|
2796
|
+
'symbol': config['symbol'],
|
|
2797
|
+
'deltaFrom': config.get('delta_target', 0.5) - 0.05,
|
|
2798
|
+
'deltaTo': config.get('delta_target', 0.5) + 0.05,
|
|
2799
|
+
'periodFrom': config.get('short_tenor', 30) - 7,
|
|
2800
|
+
'periodTo': config.get('long_tenor', 90) + 7
|
|
2801
|
+
},
|
|
2802
|
+
'chunking': {
|
|
2803
|
+
'enabled': True,
|
|
2804
|
+
'date_param': 'from_',
|
|
2805
|
+
'date_param_to': 'to',
|
|
2806
|
+
'chunk_days': 90
|
|
2807
|
+
}
|
|
2808
|
+
})
|
|
2809
|
+
|
|
2810
|
+
# Load stock prices
|
|
2811
|
+
requests.append({
|
|
2812
|
+
'name': 'stock',
|
|
2813
|
+
'endpoint': '/equities/eod/stock-prices',
|
|
2814
|
+
'params': {
|
|
2815
|
+
'symbol': config['symbol']
|
|
2816
|
+
},
|
|
2817
|
+
'chunking': {
|
|
2818
|
+
'enabled': True,
|
|
2819
|
+
'date_param': 'from_',
|
|
2820
|
+
'date_param_to': 'to',
|
|
2821
|
+
'chunk_days': 365 # Stock data is lightweight
|
|
2822
|
+
}
|
|
2823
|
+
})
|
|
2824
|
+
|
|
2825
|
+
return requests
|
|
2826
|
+
|
|
2827
|
+
|
|
2828
|
+
def _process_options_df(df):
|
|
2829
|
+
"""Process options DataFrame: dates + DTE + OPTIMIZATIONS (5-10x faster!)"""
|
|
2830
|
+
# Basic date processing
|
|
2831
|
+
if 'date' in df.columns:
|
|
2832
|
+
df['date'] = pd.to_datetime(df['date']).dt.date
|
|
2833
|
+
if 'expiration' in df.columns:
|
|
2834
|
+
df['expiration'] = pd.to_datetime(df['expiration']).dt.date
|
|
2835
|
+
|
|
2836
|
+
if 'date' in df.columns and 'expiration' in df.columns:
|
|
2837
|
+
df = df.copy()
|
|
2838
|
+
df['dte'] = (pd.to_datetime(df['expiration']) -
|
|
2839
|
+
pd.to_datetime(df['date'])).dt.days
|
|
2840
|
+
|
|
2841
|
+
# ========================================================
|
|
2842
|
+
# CRITICAL: SORT BY DATE FIRST! (Required for time-series)
|
|
2843
|
+
# ========================================================
|
|
2844
|
+
if 'date' in df.columns:
|
|
2845
|
+
# Check if already sorted (skip if yes, fast!)
|
|
2846
|
+
if not df['date'].is_monotonic_increasing:
|
|
2847
|
+
df = df.sort_values('date') # ✅ Sort only if needed
|
|
2848
|
+
|
|
2849
|
+
# ========================================================
|
|
2850
|
+
# AUTOMATIC OPTIMIZATIONS (applied by library)
|
|
2851
|
+
# ========================================================
|
|
2852
|
+
|
|
2853
|
+
# These optimizations are SAFE to apply automatically:
|
|
2854
|
+
# - Categorical types for low-cardinality columns
|
|
2855
|
+
# - Optimized numeric types (float32/int16 instead of float64/int64)
|
|
2856
|
+
#
|
|
2857
|
+
# NOTE: We do NOT set index on 'date' in library functions because:
|
|
2858
|
+
# - It breaks existing code that uses .loc with non-date indices
|
|
2859
|
+
# - Requires all strategies to handle Series vs scalar results
|
|
2860
|
+
|
|
2861
|
+
# Convert Call/Put to categorical (60% less RAM, 2x faster filtering)
|
|
2862
|
+
if 'Call/Put' in df.columns:
|
|
2863
|
+
df['Call/Put'] = df['Call/Put'].astype('category')
|
|
2864
|
+
|
|
2865
|
+
# Optimize data types (50% less RAM)
|
|
2866
|
+
# float32 for prices (4 bytes instead of 8, enough precision)
|
|
2867
|
+
float32_cols = ['strike', 'bid', 'ask', 'iv', 'price', 'mid', 'delta', 'gamma', 'vega', 'theta']
|
|
2868
|
+
for col in float32_cols:
|
|
2869
|
+
if col in df.columns:
|
|
2870
|
+
df[col] = pd.to_numeric(df[col], errors='coerce').astype('float32')
|
|
2871
|
+
|
|
2872
|
+
# int16 for DTE (2 bytes instead of 8, max 32767 days)
|
|
2873
|
+
if 'dte' in df.columns:
|
|
2874
|
+
df['dte'] = df['dte'].astype('int16')
|
|
2875
|
+
|
|
2876
|
+
return df
|
|
2877
|
+
|
|
2878
|
+
|
|
2879
|
+
def _auto_process_dates(df):
|
|
2880
|
+
"""Auto-process common date columns + SORT BY DATE"""
|
|
2881
|
+
date_columns = ['date', 'expiration', 'trade_date', 'time']
|
|
2882
|
+
|
|
2883
|
+
for col in date_columns:
|
|
2884
|
+
if col in df.columns:
|
|
2885
|
+
try:
|
|
2886
|
+
if col == 'time':
|
|
2887
|
+
# Keep time as string or datetime
|
|
2888
|
+
pass
|
|
2889
|
+
else:
|
|
2890
|
+
df[col] = pd.to_datetime(df[col]).dt.date
|
|
2891
|
+
except:
|
|
2892
|
+
pass # Already in correct format or not a date
|
|
2893
|
+
|
|
2894
|
+
# ========================================================
|
|
2895
|
+
# CRITICAL: SORT BY DATE! (Required for time-series)
|
|
2896
|
+
# ========================================================
|
|
2897
|
+
if 'date' in df.columns:
|
|
2898
|
+
# Check if already sorted (O(1) check vs O(N log N) sort)
|
|
2899
|
+
if not df['date'].is_monotonic_increasing:
|
|
2900
|
+
df = df.sort_values('date') # ✅ Sort only if needed
|
|
2901
|
+
elif 'trade_date' in df.columns:
|
|
2902
|
+
if not df['trade_date'].is_monotonic_increasing:
|
|
2903
|
+
df = df.sort_values('trade_date') # Alternative date column
|
|
2904
|
+
|
|
2905
|
+
return df
|
|
2906
|
+
|
|
2907
|
+
|
|
2406
2908
|
# ============================================================
|
|
2407
2909
|
# NEW: OPTIMIZATION FRAMEWORK
|
|
2408
2910
|
# ============================================================
|
|
2409
2911
|
def optimize_parameters(base_config, param_grid, strategy_function,
|
|
2410
2912
|
optimization_metric='sharpe', min_trades=5,
|
|
2411
2913
|
max_drawdown_limit=None, parallel=False,
|
|
2412
|
-
export_each_combo=True # ←
|
|
2914
|
+
export_each_combo=True, # ← NEW PARAMETER
|
|
2915
|
+
optimization_config=None, # ← NEW PARAMETER FOR PRESETS
|
|
2916
|
+
results_folder=None # ← NEW: Use existing folder or create new
|
|
2413
2917
|
):
|
|
2414
2918
|
"""
|
|
2415
2919
|
Optimize strategy parameters across multiple combinations
|
|
@@ -2419,7 +2923,7 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2419
2923
|
param_grid: Dict of parameters to optimize
|
|
2420
2924
|
Example: {'z_score_entry': [1.0, 1.5, 2.0], 'z_score_exit': [0.1, 0.3, 0.5]}
|
|
2421
2925
|
strategy_function: Strategy function to run
|
|
2422
|
-
optimization_metric: Metric to optimize ('sharpe', 'total_return', 'profit_factor', 'calmar')
|
|
2926
|
+
optimization_metric: Metric to optimize ('sharpe', 'total_return', 'total_pnl', 'profit_factor', 'calmar')
|
|
2423
2927
|
min_trades: Minimum number of trades required
|
|
2424
2928
|
max_drawdown_limit: Maximum acceptable drawdown (e.g., 0.10 for 10%)
|
|
2425
2929
|
parallel: Use parallel processing (not implemented yet)
|
|
@@ -2429,10 +2933,35 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2429
2933
|
tuple: (results_df, best_params, results_folder)
|
|
2430
2934
|
"""
|
|
2431
2935
|
|
|
2432
|
-
#
|
|
2433
|
-
|
|
2434
|
-
|
|
2435
|
-
|
|
2936
|
+
# Check if optimization_config has preset and apply it automatically
|
|
2937
|
+
if optimization_config and isinstance(optimization_config, dict) and 'preset' in optimization_config:
|
|
2938
|
+
preset = optimization_config['preset']
|
|
2939
|
+
print(f"🔄 Auto-applying preset: {preset}")
|
|
2940
|
+
apply_optimization_preset(optimization_config, preset)
|
|
2941
|
+
print_preset_info(optimization_config)
|
|
2942
|
+
|
|
2943
|
+
# Use preset parameters for grid and validation criteria
|
|
2944
|
+
param_grid = optimization_config['param_grid']
|
|
2945
|
+
min_trades = optimization_config['min_trades']
|
|
2946
|
+
max_drawdown_limit = optimization_config['max_drawdown_limit']
|
|
2947
|
+
|
|
2948
|
+
# Use optimization_config for optimization_metric if available
|
|
2949
|
+
if 'optimization_metric' in optimization_config:
|
|
2950
|
+
optimization_metric = optimization_config['optimization_metric']
|
|
2951
|
+
|
|
2952
|
+
# Use optimization_config for execution settings if available
|
|
2953
|
+
if 'parallel' in optimization_config:
|
|
2954
|
+
parallel = optimization_config['parallel']
|
|
2955
|
+
if 'export_each_combo' in optimization_config:
|
|
2956
|
+
export_each_combo = optimization_config['export_each_combo']
|
|
2957
|
+
|
|
2958
|
+
# ═══ ADD AT THE BEGINNING OF FUNCTION ═══
|
|
2959
|
+
# Create results folder (or use provided one)
|
|
2960
|
+
if results_folder is None:
|
|
2961
|
+
results_folder = create_optimization_folder()
|
|
2962
|
+
print(f"📊 Results will be saved to: {results_folder}\n")
|
|
2963
|
+
else:
|
|
2964
|
+
print(f"📊 Using existing results folder: {results_folder}\n")
|
|
2436
2965
|
|
|
2437
2966
|
# Record start time
|
|
2438
2967
|
optimization_start_time = datetime.now()
|
|
@@ -2496,7 +3025,7 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2496
3025
|
print("📥 PRELOADING OPTIONS DATA (loads ONCE, reused for all combinations)")
|
|
2497
3026
|
print("="*80)
|
|
2498
3027
|
|
|
2499
|
-
preloaded_lean_df,
|
|
3028
|
+
preloaded_lean_df, preloaded_options_df = preload_options_data(
|
|
2500
3029
|
base_config,
|
|
2501
3030
|
progress_widgets=shared_progress['progress_widgets'] if shared_progress else None
|
|
2502
3031
|
)
|
|
@@ -2534,7 +3063,7 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2534
3063
|
|
|
2535
3064
|
# ═══ ADD PRELOADED DATA TO CONFIG ═══
|
|
2536
3065
|
test_config['_preloaded_lean_df'] = preloaded_lean_df
|
|
2537
|
-
test_config['_preloaded_options_cache'] =
|
|
3066
|
+
test_config['_preloaded_options_cache'] = preloaded_options_df
|
|
2538
3067
|
|
|
2539
3068
|
# Update progress
|
|
2540
3069
|
if has_widgets:
|
|
@@ -2550,7 +3079,7 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2550
3079
|
if idx % max(1, total_combinations // 10) == 0:
|
|
2551
3080
|
print(f"[{idx}/{total_combinations}] {param_str}")
|
|
2552
3081
|
|
|
2553
|
-
# ═══
|
|
3082
|
+
# ═══ MODIFY run_backtest CALL (lines ~2240-2248) ═══
|
|
2554
3083
|
try:
|
|
2555
3084
|
# Create compact parameter string (e.g., Z1.0_E0.1_PT20)
|
|
2556
3085
|
param_parts = []
|
|
@@ -2585,11 +3114,11 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2585
3114
|
strategy_function,
|
|
2586
3115
|
test_config,
|
|
2587
3116
|
print_report=False,
|
|
2588
|
-
create_charts=export_each_combo, # ←
|
|
2589
|
-
export_results=export_each_combo, # ←
|
|
3117
|
+
create_charts=export_each_combo, # ← CREATE CHARTS (saved but not displayed)
|
|
3118
|
+
export_results=export_each_combo, # ← MODIFIED
|
|
2590
3119
|
progress_context=shared_progress,
|
|
2591
|
-
chart_filename=os.path.join(combo_folder, 'equity_curve.png') if export_each_combo else None, # ←
|
|
2592
|
-
export_prefix=os.path.join(combo_folder, combo_prefix) if export_each_combo else None # ←
|
|
3120
|
+
chart_filename=os.path.join(combo_folder, 'equity_curve.png') if export_each_combo else None, # ← CHARTS SAVED
|
|
3121
|
+
export_prefix=os.path.join(combo_folder, combo_prefix) if export_each_combo else None # ← ADDED
|
|
2593
3122
|
)
|
|
2594
3123
|
|
|
2595
3124
|
# Check validity
|
|
@@ -2670,6 +3199,8 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2670
3199
|
valid_so_far.sort(key=lambda x: x['sharpe'], reverse=True)
|
|
2671
3200
|
elif optimization_metric == 'total_return':
|
|
2672
3201
|
valid_so_far.sort(key=lambda x: x['total_return'], reverse=True)
|
|
3202
|
+
elif optimization_metric == 'total_pnl':
|
|
3203
|
+
valid_so_far.sort(key=lambda x: x['total_pnl'], reverse=True)
|
|
2673
3204
|
elif optimization_metric == 'profit_factor':
|
|
2674
3205
|
valid_so_far.sort(key=lambda x: x['profit_factor'], reverse=True)
|
|
2675
3206
|
elif optimization_metric == 'calmar':
|
|
@@ -2693,7 +3224,10 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2693
3224
|
except Exception as e:
|
|
2694
3225
|
print(f"\n[{idx}/{total_combinations}] {param_str}")
|
|
2695
3226
|
print("-" * 80)
|
|
2696
|
-
print(f" ✗ ERROR: {str(e)
|
|
3227
|
+
print(f" ✗ ERROR: {str(e)}")
|
|
3228
|
+
import traceback
|
|
3229
|
+
print(" Full traceback:")
|
|
3230
|
+
traceback.print_exc()
|
|
2697
3231
|
|
|
2698
3232
|
result = {
|
|
2699
3233
|
'combination_id': idx,
|
|
@@ -2721,7 +3255,7 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2721
3255
|
for col in numeric_columns:
|
|
2722
3256
|
results_df[col] = results_df[col].round(5)
|
|
2723
3257
|
|
|
2724
|
-
# ═══
|
|
3258
|
+
# ═══ ADD SUMMARY SAVE TO FOLDER ═══
|
|
2725
3259
|
summary_path = os.path.join(results_folder, 'optimization_summary.csv')
|
|
2726
3260
|
results_df.to_csv(summary_path, index=False)
|
|
2727
3261
|
print(f"\n✓ Summary saved: {summary_path}")
|
|
@@ -2741,6 +3275,8 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2741
3275
|
best_idx = valid_results['sharpe'].idxmax()
|
|
2742
3276
|
elif optimization_metric == 'total_return':
|
|
2743
3277
|
best_idx = valid_results['total_return'].idxmax()
|
|
3278
|
+
elif optimization_metric == 'total_pnl':
|
|
3279
|
+
best_idx = valid_results['total_pnl'].idxmax()
|
|
2744
3280
|
elif optimization_metric == 'profit_factor':
|
|
2745
3281
|
best_idx = valid_results['profit_factor'].idxmax()
|
|
2746
3282
|
elif optimization_metric == 'calmar':
|
|
@@ -2753,6 +3289,12 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2753
3289
|
# Extract best parameters
|
|
2754
3290
|
best_params = {name: best_result[name] for name in param_names}
|
|
2755
3291
|
|
|
3292
|
+
# Add stop_loss_pct if it exists in config (it's handled separately in notebook)
|
|
3293
|
+
if 'stop_loss_config' in base_config and base_config['stop_loss_config']:
|
|
3294
|
+
stop_loss_value = base_config['stop_loss_config'].get('value')
|
|
3295
|
+
if stop_loss_value is not None:
|
|
3296
|
+
best_params['stop_loss_pct'] = stop_loss_value
|
|
3297
|
+
|
|
2756
3298
|
# Calculate total time
|
|
2757
3299
|
optimization_end_time = datetime.now()
|
|
2758
3300
|
total_duration = optimization_end_time - optimization_start_time
|
|
@@ -2760,39 +3302,81 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2760
3302
|
duration_str = format_time(total_duration.total_seconds())
|
|
2761
3303
|
|
|
2762
3304
|
# Print summary
|
|
2763
|
-
print("="*
|
|
2764
|
-
print(" "*
|
|
2765
|
-
print("
|
|
2766
|
-
print(f"
|
|
2767
|
-
print(f"
|
|
2768
|
-
print(f"
|
|
2769
|
-
print(f"
|
|
2770
|
-
print(f"
|
|
2771
|
-
print(f"
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
print("
|
|
2775
|
-
print("
|
|
2776
|
-
|
|
2777
|
-
|
|
3305
|
+
print("\n" + "="*120)
|
|
3306
|
+
print(" "*31 + "🏆 OPTIMIZATION COMPLETE 🏆")
|
|
3307
|
+
print(" "*31 + "=========================")
|
|
3308
|
+
print(f" • Started : {start_time_str}")
|
|
3309
|
+
print(f" • Finished : {end_time_str}")
|
|
3310
|
+
print(f" • Total Duration : {duration_str} ({int(total_duration.total_seconds())} seconds)")
|
|
3311
|
+
print(f" • Average per run : {total_duration.total_seconds() / total_combinations:.1f} seconds")
|
|
3312
|
+
print(f" • Total combinations : {total_combinations}")
|
|
3313
|
+
print(f" • Valid combinations : {len(valid_results)}")
|
|
3314
|
+
print(f" • Invalid combinations : {len(results_df) - len(valid_results)}")
|
|
3315
|
+
|
|
3316
|
+
print(f"\n📈 OPTIMIZATION METRIC:")
|
|
3317
|
+
print(f" • Metric optimized : {optimization_metric.upper()}")
|
|
3318
|
+
|
|
3319
|
+
# Format best parameters in one line (with special formatting for stop_loss_pct)
|
|
3320
|
+
param_parts = []
|
|
3321
|
+
for name, value in best_params.items():
|
|
3322
|
+
if name == 'stop_loss_pct':
|
|
3323
|
+
param_parts.append(f"stop_loss={value*100:.0f}%")
|
|
3324
|
+
else:
|
|
3325
|
+
param_parts.append(f"{name}={value}")
|
|
3326
|
+
param_str = ", ".join(param_parts)
|
|
3327
|
+
print(f" • Best parameters : {param_str}")
|
|
3328
|
+
|
|
3329
|
+
# Add intraday stop-loss info if enabled
|
|
3330
|
+
intraday_stops = base_config.get('intraday_stops', {})
|
|
3331
|
+
if intraday_stops.get('enabled', False):
|
|
3332
|
+
intraday_pct = intraday_stops.get('stop_pct', 0.03) * 100
|
|
3333
|
+
intraday_days = intraday_stops.get('min_days_before_intraday', 3)
|
|
3334
|
+
print(f" • Intraday stop-loss : Enabled ({intraday_pct:.0f}% after {intraday_days} days)")
|
|
3335
|
+
|
|
3336
|
+
print(f"\n🏆 BEST PERFORMANCE:")
|
|
3337
|
+
print(f" • Total Return : {best_result['total_return']:>10.2f}%")
|
|
3338
|
+
print(f" • Sharpe Ratio : {best_result['sharpe']:>10.2f}")
|
|
3339
|
+
print(f" • Max Drawdown : {best_result['max_drawdown']:>10.2f}%")
|
|
3340
|
+
print(f" • Win Rate : {best_result['win_rate']:>10.1f}%")
|
|
3341
|
+
print(f" • Profit Factor : {best_result['profit_factor']:>10.2f}")
|
|
3342
|
+
print(f" • Total Trades : {best_result['total_trades']:>10.0f}")
|
|
3343
|
+
|
|
3344
|
+
print(f"\n🔌 API ENDPOINTS:")
|
|
3345
|
+
# Extract real endpoints from preloaded data stats
|
|
3346
|
+
endpoints_info = []
|
|
3347
|
+
|
|
3348
|
+
if '_stats' in base_config and 'dataset_details' in base_config['_stats']:
|
|
3349
|
+
dataset_details = base_config['_stats']['dataset_details']
|
|
3350
|
+
for dataset_name, info in dataset_details.items():
|
|
3351
|
+
endpoint = info.get('endpoint')
|
|
3352
|
+
rows = info.get('rows', 0)
|
|
3353
|
+
if endpoint:
|
|
3354
|
+
endpoints_info.append((endpoint, rows))
|
|
3355
|
+
|
|
3356
|
+
# Check if intraday stops are enabled
|
|
3357
|
+
intraday_stops = base_config.get('intraday_stops', {})
|
|
3358
|
+
if intraday_stops.get('enabled', False):
|
|
3359
|
+
intraday_endpoint = "/equities/intraday/stock-prices"
|
|
3360
|
+
if not any(ep[0] == intraday_endpoint for ep in endpoints_info):
|
|
3361
|
+
endpoints_info.append((intraday_endpoint, "on-demand"))
|
|
3362
|
+
|
|
3363
|
+
if endpoints_info:
|
|
3364
|
+
for idx, (endpoint, rows) in enumerate(endpoints_info, 1):
|
|
3365
|
+
if isinstance(rows, int):
|
|
3366
|
+
print(f" {idx}. {endpoint:<45} ({rows:>10,} rows)")
|
|
3367
|
+
else:
|
|
3368
|
+
print(f" {idx}. {endpoint:<45} ({rows})")
|
|
3369
|
+
else:
|
|
3370
|
+
# Fallback to static list if no stats available
|
|
3371
|
+
print(f" 1. /equities/eod/options-rawiv")
|
|
3372
|
+
print(f" 2. /equities/eod/stock-prices")
|
|
3373
|
+
if intraday_stops.get('enabled', False):
|
|
3374
|
+
print(f" 3. /equities/intraday/stock-prices")
|
|
2778
3375
|
|
|
2779
|
-
print(
|
|
2780
|
-
print(" "*20 + "BEST PERFORMANCE")
|
|
2781
|
-
print("="*80)
|
|
2782
|
-
print(f"Total Return: {best_result['total_return']:>10.2f}%")
|
|
2783
|
-
print(f"Sharpe Ratio: {best_result['sharpe']:>10.2f}")
|
|
2784
|
-
print(f"Sortino Ratio: {best_result['sortino']:>10.2f}")
|
|
2785
|
-
print(f"Calmar Ratio: {best_result['calmar']:>10.2f}")
|
|
2786
|
-
print(f"Max Drawdown: {best_result['max_drawdown']:>10.2f}%")
|
|
2787
|
-
print(f"Win Rate: {best_result['win_rate']:>10.1f}%")
|
|
2788
|
-
print(f"Profit Factor: {best_result['profit_factor']:>10.2f}")
|
|
2789
|
-
print(f"Total Trades: {best_result['total_trades']:>10.0f}")
|
|
2790
|
-
print(f"Avg Win: ${best_result['avg_win']:>10.2f}")
|
|
2791
|
-
print(f"Avg Loss: ${best_result['avg_loss']:>10.2f}")
|
|
2792
|
-
print("="*80)
|
|
3376
|
+
print("="*120)
|
|
2793
3377
|
|
|
2794
3378
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
2795
|
-
#
|
|
3379
|
+
# NEW! FULL BACKTEST OF BEST COMBINATION WITH ALL CHARTS
|
|
2796
3380
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
2797
3381
|
print("\n" + "="*80)
|
|
2798
3382
|
print(" "*15 + "RUNNING FULL BACKTEST FOR BEST COMBINATION")
|
|
@@ -2804,7 +3388,7 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2804
3388
|
best_config = base_config.copy()
|
|
2805
3389
|
best_config.update(best_params)
|
|
2806
3390
|
best_config['_preloaded_lean_df'] = preloaded_lean_df
|
|
2807
|
-
best_config['_preloaded_options_cache'] =
|
|
3391
|
+
best_config['_preloaded_options_cache'] = preloaded_options_df
|
|
2808
3392
|
|
|
2809
3393
|
# Create folder for best combination
|
|
2810
3394
|
best_combo_folder = os.path.join(results_folder, 'best_combination')
|
|
@@ -2816,10 +3400,10 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2816
3400
|
best_analyzer = run_backtest(
|
|
2817
3401
|
strategy_function,
|
|
2818
3402
|
best_config,
|
|
2819
|
-
print_report=True, # ←
|
|
2820
|
-
create_charts=True, # ←
|
|
2821
|
-
export_results=True, # ←
|
|
2822
|
-
progress_context=None, # ←
|
|
3403
|
+
print_report=True, # ← SHOW FULL REPORT
|
|
3404
|
+
create_charts=True, # ← CREATE ALL CHARTS
|
|
3405
|
+
export_results=True, # ← EXPORT ALL FILES
|
|
3406
|
+
progress_context=None, # ← Normal mode
|
|
2823
3407
|
chart_filename=os.path.join(best_combo_folder, 'equity_curve.png'),
|
|
2824
3408
|
export_prefix=os.path.join(best_combo_folder, 'best')
|
|
2825
3409
|
)
|
|
@@ -2837,7 +3421,7 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2837
3421
|
print(f"✓ Best combination results saved to: {best_combo_folder}/")
|
|
2838
3422
|
|
|
2839
3423
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
2840
|
-
#
|
|
3424
|
+
# DISPLAY CHARTS FOR BEST COMBINATION IN NOTEBOOK
|
|
2841
3425
|
# ═══════════════════════════════════════════════════════════════════════════
|
|
2842
3426
|
try:
|
|
2843
3427
|
# Charts are displayed in the notebook, not here
|
|
@@ -2847,12 +3431,34 @@ def optimize_parameters(base_config, param_grid, strategy_function,
|
|
|
2847
3431
|
except Exception as e:
|
|
2848
3432
|
print(f"\n⚠ Could not display charts (saved to {best_combo_folder}/): {e}")
|
|
2849
3433
|
|
|
3434
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
3435
|
+
# CREATE OPTIMIZATION COMPARISON CHARTS (save only, display in notebook manually)
|
|
3436
|
+
# ═══════════════════════════════════════════════════════════════════════════
|
|
3437
|
+
print("\n" + "="*80)
|
|
3438
|
+
print(" "*15 + "CREATING OPTIMIZATION COMPARISON CHARTS")
|
|
3439
|
+
print("="*80)
|
|
3440
|
+
try:
|
|
3441
|
+
optimization_chart_path = os.path.join(results_folder, 'optimization_results.png')
|
|
3442
|
+
# Save chart but don't display (show_plot=False) - display will be done in notebook for combined results
|
|
3443
|
+
plot_optimization_results(
|
|
3444
|
+
results_df,
|
|
3445
|
+
param_names,
|
|
3446
|
+
filename=optimization_chart_path,
|
|
3447
|
+
show_plot=False # Don't display here - will be shown in notebook for combined results
|
|
3448
|
+
)
|
|
3449
|
+
print(f"✓ Optimization comparison charts saved to: {optimization_chart_path}")
|
|
3450
|
+
print(" (Chart will be displayed in notebook for combined results)")
|
|
3451
|
+
except Exception as e:
|
|
3452
|
+
print(f"⚠ Could not create optimization charts: {e}")
|
|
3453
|
+
import traceback
|
|
3454
|
+
traceback.print_exc()
|
|
3455
|
+
|
|
2850
3456
|
print("="*80 + "\n")
|
|
2851
3457
|
|
|
2852
3458
|
return results_df, best_params, results_folder
|
|
2853
3459
|
|
|
2854
3460
|
|
|
2855
|
-
def plot_optimization_results(results_df, param_names, filename='optimization_results.png'):
|
|
3461
|
+
def plot_optimization_results(results_df, param_names, filename='optimization_results.png', show_plot=True):
|
|
2856
3462
|
"""
|
|
2857
3463
|
Create visualization of optimization results
|
|
2858
3464
|
|
|
@@ -2860,10 +3466,16 @@ def plot_optimization_results(results_df, param_names, filename='optimization_re
|
|
|
2860
3466
|
results_df: Results DataFrame from optimize_parameters()
|
|
2861
3467
|
param_names: List of parameter names
|
|
2862
3468
|
filename: Output filename
|
|
3469
|
+
show_plot: If True, display plot in Jupyter notebook (default: True)
|
|
2863
3470
|
"""
|
|
2864
3471
|
import matplotlib.pyplot as plt
|
|
2865
3472
|
import seaborn as sns
|
|
2866
3473
|
|
|
3474
|
+
# Handle missing is_valid column (for combined results from multiple optimizations)
|
|
3475
|
+
if 'is_valid' not in results_df.columns:
|
|
3476
|
+
results_df = results_df.copy()
|
|
3477
|
+
results_df['is_valid'] = True
|
|
3478
|
+
|
|
2867
3479
|
valid_results = results_df[results_df['is_valid'] == True].copy()
|
|
2868
3480
|
|
|
2869
3481
|
if valid_results.empty:
|
|
@@ -2944,18 +3556,393 @@ def plot_optimization_results(results_df, param_names, filename='optimization_re
|
|
|
2944
3556
|
|
|
2945
3557
|
# 6. Top 10 combinations
|
|
2946
3558
|
ax6 = plt.subplot(2, 3, 6)
|
|
2947
|
-
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
|
|
2952
|
-
|
|
3559
|
+
if 'combination_id' in valid_results.columns:
|
|
3560
|
+
top_10 = valid_results.nlargest(10, 'sharpe')[['combination_id', 'sharpe']].sort_values('sharpe')
|
|
3561
|
+
ax6.barh(range(len(top_10)), top_10['sharpe'], color='green', alpha=0.7)
|
|
3562
|
+
ax6.set_yticks(range(len(top_10)))
|
|
3563
|
+
ax6.set_yticklabels([f"#{int(x)}" for x in top_10['combination_id']])
|
|
3564
|
+
ax6.set_xlabel('Sharpe Ratio', fontsize=10)
|
|
3565
|
+
ax6.set_title('Top 10 Combinations by Sharpe', fontsize=11, fontweight='bold')
|
|
3566
|
+
else:
|
|
3567
|
+
# Fallback: use index as combination ID
|
|
3568
|
+
top_10 = valid_results.nlargest(10, 'sharpe')['sharpe'].sort_values()
|
|
3569
|
+
ax6.barh(range(len(top_10)), top_10.values, color='green', alpha=0.7)
|
|
3570
|
+
ax6.set_yticks(range(len(top_10)))
|
|
3571
|
+
ax6.set_yticklabels([f"#{i+1}" for i in range(len(top_10))])
|
|
3572
|
+
ax6.set_xlabel('Sharpe Ratio', fontsize=10)
|
|
3573
|
+
ax6.set_title('Top 10 Combinations by Sharpe', fontsize=11, fontweight='bold')
|
|
2953
3574
|
ax6.grid(True, alpha=0.3, axis='x')
|
|
2954
3575
|
|
|
2955
3576
|
plt.tight_layout()
|
|
2956
3577
|
plt.savefig(filename, dpi=150, bbox_inches='tight')
|
|
2957
3578
|
print(f"\nVisualization saved: {filename}")
|
|
2958
|
-
|
|
3579
|
+
|
|
3580
|
+
# Display plot if requested
|
|
3581
|
+
if show_plot:
|
|
3582
|
+
try:
|
|
3583
|
+
# First try to use IPython.display.Image (most reliable in Jupyter)
|
|
3584
|
+
from IPython.display import display, Image
|
|
3585
|
+
import os
|
|
3586
|
+
if os.path.exists(filename):
|
|
3587
|
+
display(Image(filename))
|
|
3588
|
+
else:
|
|
3589
|
+
# If file doesn't exist yet, try plt.show()
|
|
3590
|
+
plt.show()
|
|
3591
|
+
except (ImportError, NameError):
|
|
3592
|
+
# Not in Jupyter or IPython not available - try plt.show()
|
|
3593
|
+
try:
|
|
3594
|
+
plt.show()
|
|
3595
|
+
except:
|
|
3596
|
+
plt.close()
|
|
3597
|
+
except Exception:
|
|
3598
|
+
# Any other error - try plt.show() as fallback
|
|
3599
|
+
try:
|
|
3600
|
+
plt.show()
|
|
3601
|
+
except:
|
|
3602
|
+
plt.close()
|
|
3603
|
+
else:
|
|
3604
|
+
plt.close() # Close without displaying
|
|
3605
|
+
|
|
3606
|
+
|
|
3607
|
+
# ============================================================
|
|
3608
|
+
# CACHE CONFIGURATION (integrated from universal_backend_system.py)
|
|
3609
|
+
# ============================================================
|
|
3610
|
+
def get_cache_config(disk_enabled: bool = True, memory_enabled: bool = True,
|
|
3611
|
+
memory_percent: int = 10, max_age_days: int = 7,
|
|
3612
|
+
debug: bool = False, cache_dir: str = 'cache',
|
|
3613
|
+
compression: bool = True, auto_cleanup: bool = True) -> Dict[str, Any]:
|
|
3614
|
+
"""
|
|
3615
|
+
Get cache configuration
|
|
3616
|
+
|
|
3617
|
+
Args:
|
|
3618
|
+
disk_enabled: Enable disk cache
|
|
3619
|
+
memory_enabled: Enable memory cache
|
|
3620
|
+
memory_percent: RAM percentage for cache (default 10%)
|
|
3621
|
+
max_age_days: Maximum cache age in days
|
|
3622
|
+
debug: Debug mode
|
|
3623
|
+
cache_dir: Cache directory
|
|
3624
|
+
compression: Use compression (Parquet + Snappy)
|
|
3625
|
+
auto_cleanup: Automatic cleanup of old cache
|
|
3626
|
+
|
|
3627
|
+
Returns:
|
|
3628
|
+
Dict with cache configuration
|
|
3629
|
+
"""
|
|
3630
|
+
return {
|
|
3631
|
+
'disk_enabled': disk_enabled,
|
|
3632
|
+
'memory_enabled': memory_enabled,
|
|
3633
|
+
'memory_percent': memory_percent,
|
|
3634
|
+
'max_age_days': max_age_days,
|
|
3635
|
+
'debug': debug,
|
|
3636
|
+
'cache_dir': cache_dir,
|
|
3637
|
+
'compression': compression,
|
|
3638
|
+
'auto_cleanup': auto_cleanup
|
|
3639
|
+
}
|
|
3640
|
+
|
|
3641
|
+
|
|
3642
|
+
# ============================================================
|
|
3643
|
+
# UNIVERSAL CACHE MANAGER (integrated from universal_backend_system.py)
|
|
3644
|
+
# ============================================================
|
|
3645
|
+
class UniversalCacheManager:
|
|
3646
|
+
"""Universal cache manager for any data types"""
|
|
3647
|
+
|
|
3648
|
+
# Mapping data types to cache directories
|
|
3649
|
+
DATA_TYPE_MAP = {
|
|
3650
|
+
'stock_eod': 'STOCK_EOD',
|
|
3651
|
+
'stock_intraday': 'STOCK_INTRADAY',
|
|
3652
|
+
'options_eod': 'OPTIONS_EOD',
|
|
3653
|
+
'options_intraday': 'OPTIONS_INTRADAY',
|
|
3654
|
+
# Backward compatibility (old naming):
|
|
3655
|
+
'stock': 'STOCK_EOD',
|
|
3656
|
+
'options': 'OPTIONS_EOD',
|
|
3657
|
+
'intraday': 'OPTIONS_INTRADAY', # Default intraday = options
|
|
3658
|
+
}
|
|
3659
|
+
|
|
3660
|
+
def __init__(self, cache_config: Dict[str, Any]):
|
|
3661
|
+
self.cache_config = cache_config
|
|
3662
|
+
self.disk_enabled = cache_config.get('disk_enabled', True)
|
|
3663
|
+
self.memory_enabled = cache_config.get('memory_enabled', True)
|
|
3664
|
+
self.memory_percent = cache_config.get('memory_percent', 10)
|
|
3665
|
+
self.max_age_days = cache_config.get('max_age_days', 7)
|
|
3666
|
+
self.debug = cache_config.get('debug', False)
|
|
3667
|
+
self.cache_dir = cache_config.get('cache_dir', 'cache')
|
|
3668
|
+
self.compression = cache_config.get('compression', True)
|
|
3669
|
+
self.auto_cleanup = cache_config.get('auto_cleanup', True)
|
|
3670
|
+
|
|
3671
|
+
# Calculate cache size in RAM
|
|
3672
|
+
if self.memory_enabled:
|
|
3673
|
+
total_memory = psutil.virtual_memory().total
|
|
3674
|
+
self.max_memory_bytes = int(total_memory * self.memory_percent / 100)
|
|
3675
|
+
self.memory_cache = {}
|
|
3676
|
+
self.cache_order = []
|
|
3677
|
+
else:
|
|
3678
|
+
self.max_memory_bytes = 0
|
|
3679
|
+
self.memory_cache = {}
|
|
3680
|
+
self.cache_order = []
|
|
3681
|
+
|
|
3682
|
+
# Create cache directories
|
|
3683
|
+
if self.disk_enabled:
|
|
3684
|
+
os.makedirs(self.cache_dir, exist_ok=True)
|
|
3685
|
+
|
|
3686
|
+
def get(self, key: str, data_type: str = 'default') -> Optional[Any]:
|
|
3687
|
+
"""Get data from cache"""
|
|
3688
|
+
try:
|
|
3689
|
+
# Check memory
|
|
3690
|
+
if self.memory_enabled and key in self.memory_cache:
|
|
3691
|
+
if self.debug:
|
|
3692
|
+
print(f"[CACHE] 🧠 Memory hit: {key}")
|
|
3693
|
+
return self.memory_cache[key]
|
|
3694
|
+
|
|
3695
|
+
# Check disk
|
|
3696
|
+
if self.disk_enabled:
|
|
3697
|
+
# Map data_type to proper directory structure using DATA_TYPE_MAP
|
|
3698
|
+
dir_name = self.DATA_TYPE_MAP.get(data_type, data_type.upper())
|
|
3699
|
+
data_dir = f"{self.cache_dir}/{dir_name}"
|
|
3700
|
+
|
|
3701
|
+
cache_file = os.path.join(data_dir, f"{key}.parquet")
|
|
3702
|
+
if os.path.exists(cache_file):
|
|
3703
|
+
if self._is_cache_valid(cache_file):
|
|
3704
|
+
data = self._load_from_disk(cache_file)
|
|
3705
|
+
if data is not None:
|
|
3706
|
+
# Save to memory
|
|
3707
|
+
if self.memory_enabled:
|
|
3708
|
+
self._save_to_memory(key, data)
|
|
3709
|
+
if self.debug:
|
|
3710
|
+
print(f"[CACHE] 💾 Disk hit: {key}")
|
|
3711
|
+
return data
|
|
3712
|
+
|
|
3713
|
+
# NEW: If exact match not found, search for overlapping cache
|
|
3714
|
+
# Only for date-range based cache types
|
|
3715
|
+
if data_type in ['stock_eod', 'options_eod', 'stock_intraday', 'options_intraday']:
|
|
3716
|
+
overlapping_data = self._find_overlapping_cache(key, data_type, data_dir)
|
|
3717
|
+
if overlapping_data is not None:
|
|
3718
|
+
# Save to memory for fast access
|
|
3719
|
+
if self.memory_enabled:
|
|
3720
|
+
self._save_to_memory(key, overlapping_data)
|
|
3721
|
+
return overlapping_data
|
|
3722
|
+
|
|
3723
|
+
if self.debug:
|
|
3724
|
+
print(f"[CACHE] ❌ Cache miss: {key}")
|
|
3725
|
+
return None
|
|
3726
|
+
|
|
3727
|
+
except Exception as e:
|
|
3728
|
+
if self.debug:
|
|
3729
|
+
print(f"[CACHE] ❌ Error getting {key}: {e}")
|
|
3730
|
+
return None
|
|
3731
|
+
|
|
3732
|
+
def set(self, key: str, data: Any, data_type: str = 'default') -> bool:
|
|
3733
|
+
"""Save data to cache"""
|
|
3734
|
+
try:
|
|
3735
|
+
# Save to memory
|
|
3736
|
+
if self.memory_enabled:
|
|
3737
|
+
self._save_to_memory(key, data)
|
|
3738
|
+
|
|
3739
|
+
# Save to disk
|
|
3740
|
+
if self.disk_enabled:
|
|
3741
|
+
# Map data_type to proper directory structure using DATA_TYPE_MAP
|
|
3742
|
+
dir_name = self.DATA_TYPE_MAP.get(data_type, data_type.upper())
|
|
3743
|
+
data_dir = f"{self.cache_dir}/{dir_name}"
|
|
3744
|
+
|
|
3745
|
+
# Create directory if it doesn't exist
|
|
3746
|
+
os.makedirs(data_dir, exist_ok=True)
|
|
3747
|
+
|
|
3748
|
+
cache_file = os.path.join(data_dir, f"{key}.parquet")
|
|
3749
|
+
self._save_to_disk(cache_file, data)
|
|
3750
|
+
|
|
3751
|
+
if self.debug:
|
|
3752
|
+
# Count records for reporting
|
|
3753
|
+
record_count = len(data) if hasattr(data, '__len__') else '?'
|
|
3754
|
+
print(f"[CACHE] 💾 Saved: {key}")
|
|
3755
|
+
print(f"[CACHE] 💾 Saved to cache: {data_type.upper()} ({record_count} records)")
|
|
3756
|
+
return True
|
|
3757
|
+
|
|
3758
|
+
except Exception as e:
|
|
3759
|
+
if self.debug:
|
|
3760
|
+
print(f"[CACHE] ❌ Error saving {key}: {e}")
|
|
3761
|
+
return False
|
|
3762
|
+
|
|
3763
|
+
def _save_to_memory(self, key: str, data: Any):
|
|
3764
|
+
"""Save to memory with LRU logic"""
|
|
3765
|
+
if key in self.memory_cache:
|
|
3766
|
+
self.cache_order.remove(key)
|
|
3767
|
+
else:
|
|
3768
|
+
# Check cache size
|
|
3769
|
+
while len(self.memory_cache) > 0 and self._get_memory_usage() > self.max_memory_bytes:
|
|
3770
|
+
oldest_key = self.cache_order.pop(0)
|
|
3771
|
+
del self.memory_cache[oldest_key]
|
|
3772
|
+
|
|
3773
|
+
self.memory_cache[key] = data
|
|
3774
|
+
self.cache_order.append(key)
|
|
3775
|
+
|
|
3776
|
+
def _save_to_disk(self, file_path: str, data: Any):
|
|
3777
|
+
"""Save to disk"""
|
|
3778
|
+
try:
|
|
3779
|
+
# Ensure directory exists
|
|
3780
|
+
file_dir = os.path.dirname(file_path)
|
|
3781
|
+
if file_dir and not os.path.exists(file_dir):
|
|
3782
|
+
os.makedirs(file_dir, exist_ok=True)
|
|
3783
|
+
|
|
3784
|
+
if isinstance(data, pd.DataFrame):
|
|
3785
|
+
if self.compression:
|
|
3786
|
+
data.to_parquet(file_path, compression='snappy')
|
|
3787
|
+
else:
|
|
3788
|
+
data.to_parquet(file_path)
|
|
3789
|
+
elif isinstance(data, dict):
|
|
3790
|
+
# Convert dict to DataFrame
|
|
3791
|
+
df = pd.DataFrame([data])
|
|
3792
|
+
if self.compression:
|
|
3793
|
+
df.to_parquet(file_path, compression='snappy')
|
|
3794
|
+
else:
|
|
3795
|
+
df.to_parquet(file_path)
|
|
3796
|
+
else:
|
|
3797
|
+
# Try to convert to DataFrame
|
|
3798
|
+
df = pd.DataFrame(data)
|
|
3799
|
+
if self.compression:
|
|
3800
|
+
df.to_parquet(file_path, compression='snappy')
|
|
3801
|
+
else:
|
|
3802
|
+
df.to_parquet(file_path)
|
|
3803
|
+
except Exception as e:
|
|
3804
|
+
if self.debug:
|
|
3805
|
+
print(f"[CACHE] ❌ Error saving to disk: {e}")
|
|
3806
|
+
|
|
3807
|
+
def _load_from_disk(self, file_path: str) -> Optional[Any]:
|
|
3808
|
+
"""Load from disk"""
|
|
3809
|
+
try:
|
|
3810
|
+
return pd.read_parquet(file_path)
|
|
3811
|
+
except Exception as e:
|
|
3812
|
+
if self.debug:
|
|
3813
|
+
print(f"[CACHE] ❌ Error loading from disk: {e}")
|
|
3814
|
+
return None
|
|
3815
|
+
|
|
3816
|
+
def _is_cache_valid(self, file_path: str) -> bool:
|
|
3817
|
+
"""Check cache validity"""
|
|
3818
|
+
if not os.path.exists(file_path):
|
|
3819
|
+
return False
|
|
3820
|
+
|
|
3821
|
+
file_age = time.time() - os.path.getmtime(file_path)
|
|
3822
|
+
max_age_seconds = self.max_age_days * 24 * 3600
|
|
3823
|
+
|
|
3824
|
+
return file_age < max_age_seconds
|
|
3825
|
+
|
|
3826
|
+
def _get_memory_usage(self) -> int:
|
|
3827
|
+
"""Get memory usage"""
|
|
3828
|
+
total_size = 0
|
|
3829
|
+
for key, value in self.memory_cache.items():
|
|
3830
|
+
try:
|
|
3831
|
+
if hasattr(value, 'memory_usage'):
|
|
3832
|
+
total_size += value.memory_usage(deep=True).sum()
|
|
3833
|
+
else:
|
|
3834
|
+
total_size += sys.getsizeof(value)
|
|
3835
|
+
except:
|
|
3836
|
+
total_size += sys.getsizeof(value)
|
|
3837
|
+
return total_size
|
|
3838
|
+
|
|
3839
|
+
def _find_overlapping_cache(self, key: str, data_type: str, data_dir: str) -> Optional[Any]:
|
|
3840
|
+
"""
|
|
3841
|
+
Find cache files with overlapping date ranges
|
|
3842
|
+
|
|
3843
|
+
Args:
|
|
3844
|
+
key: Cache key (format: SYMBOL_START_END or SYMBOL_DATE)
|
|
3845
|
+
data_type: Data type (stock_eod, options_eod, etc.)
|
|
3846
|
+
data_dir: Cache directory
|
|
3847
|
+
|
|
3848
|
+
Returns:
|
|
3849
|
+
Filtered data if overlapping cache found, None otherwise
|
|
3850
|
+
"""
|
|
3851
|
+
try:
|
|
3852
|
+
import re
|
|
3853
|
+
import glob
|
|
3854
|
+
from datetime import datetime
|
|
3855
|
+
|
|
3856
|
+
# Parse symbol and dates from key
|
|
3857
|
+
# Format: "SPY_2024-07-01_2025-10-29" or "SPY_2024-07-01"
|
|
3858
|
+
match = re.search(r'^([A-Z]+)_(\d{4}-\d{2}-\d{2})(?:_(\d{4}-\d{2}-\d{2}))?$', key)
|
|
3859
|
+
if not match:
|
|
3860
|
+
return None
|
|
3861
|
+
|
|
3862
|
+
symbol = match.group(1)
|
|
3863
|
+
start_date_str = match.group(2)
|
|
3864
|
+
end_date_str = match.group(3) if match.group(3) else start_date_str
|
|
3865
|
+
|
|
3866
|
+
# Parse dates
|
|
3867
|
+
start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date()
|
|
3868
|
+
end_date = datetime.strptime(end_date_str, '%Y-%m-%d').date()
|
|
3869
|
+
|
|
3870
|
+
# Find all cache files for this symbol
|
|
3871
|
+
if not os.path.exists(data_dir):
|
|
3872
|
+
return None
|
|
3873
|
+
|
|
3874
|
+
pattern = os.path.join(data_dir, f"{symbol}_*.parquet")
|
|
3875
|
+
cache_files = glob.glob(pattern)
|
|
3876
|
+
|
|
3877
|
+
if not cache_files:
|
|
3878
|
+
return None
|
|
3879
|
+
|
|
3880
|
+
# Search for best overlapping cache
|
|
3881
|
+
best_match = None
|
|
3882
|
+
best_size = float('inf') # Prefer smallest file that covers range
|
|
3883
|
+
|
|
3884
|
+
for cache_file in cache_files:
|
|
3885
|
+
# Skip if cache is not valid
|
|
3886
|
+
if not self._is_cache_valid(cache_file):
|
|
3887
|
+
continue
|
|
3888
|
+
|
|
3889
|
+
# Parse dates from filename
|
|
3890
|
+
filename = os.path.basename(cache_file)
|
|
3891
|
+
file_match = re.search(r'(\d{4}-\d{2}-\d{2})(?:_(\d{4}-\d{2}-\d{2}))?', filename)
|
|
3892
|
+
|
|
3893
|
+
if not file_match:
|
|
3894
|
+
continue
|
|
3895
|
+
|
|
3896
|
+
cached_start_str = file_match.group(1)
|
|
3897
|
+
cached_end_str = file_match.group(2) if file_match.group(2) else cached_start_str
|
|
3898
|
+
|
|
3899
|
+
cached_start = datetime.strptime(cached_start_str, '%Y-%m-%d').date()
|
|
3900
|
+
cached_end = datetime.strptime(cached_end_str, '%Y-%m-%d').date()
|
|
3901
|
+
|
|
3902
|
+
# Check if cached range CONTAINS requested range
|
|
3903
|
+
if cached_start <= start_date and cached_end >= end_date:
|
|
3904
|
+
# Calculate file size (prefer smaller files)
|
|
3905
|
+
file_size = os.path.getsize(cache_file)
|
|
3906
|
+
|
|
3907
|
+
if file_size < best_size:
|
|
3908
|
+
best_match = cache_file
|
|
3909
|
+
best_size = file_size
|
|
3910
|
+
|
|
3911
|
+
if best_match:
|
|
3912
|
+
if self.debug:
|
|
3913
|
+
print(f"[CACHE] 🔍 Found overlapping cache: {os.path.basename(best_match)}")
|
|
3914
|
+
print(f"[CACHE] Requested: {start_date_str} → {end_date_str}")
|
|
3915
|
+
print(f"[CACHE] Filtering and loading...")
|
|
3916
|
+
|
|
3917
|
+
# Load and filter data
|
|
3918
|
+
df = pd.read_parquet(best_match)
|
|
3919
|
+
|
|
3920
|
+
# Ensure date column is in correct format
|
|
3921
|
+
if 'date' in df.columns:
|
|
3922
|
+
if df['date'].dtype == 'object':
|
|
3923
|
+
df['date'] = pd.to_datetime(df['date']).dt.date
|
|
3924
|
+
elif pd.api.types.is_datetime64_any_dtype(df['date']):
|
|
3925
|
+
df['date'] = df['date'].dt.date
|
|
3926
|
+
|
|
3927
|
+
# Filter by date range
|
|
3928
|
+
filtered = df[(df['date'] >= start_date) & (df['date'] <= end_date)].copy()
|
|
3929
|
+
|
|
3930
|
+
if self.debug:
|
|
3931
|
+
print(f"[CACHE] ✓ Overlapping cache hit: {len(filtered)} records (filtered from {len(df)})")
|
|
3932
|
+
|
|
3933
|
+
return filtered
|
|
3934
|
+
else:
|
|
3935
|
+
# No date column to filter - return as is
|
|
3936
|
+
if self.debug:
|
|
3937
|
+
print(f"[CACHE] ✓ Overlapping cache hit: {len(df)} records (no date filtering)")
|
|
3938
|
+
return df
|
|
3939
|
+
|
|
3940
|
+
return None
|
|
3941
|
+
|
|
3942
|
+
except Exception as e:
|
|
3943
|
+
if self.debug:
|
|
3944
|
+
print(f"[CACHE] ⚠️ Error searching for overlapping cache: {e}")
|
|
3945
|
+
return None
|
|
2959
3946
|
|
|
2960
3947
|
|
|
2961
3948
|
# Export all
|
|
@@ -2969,5 +3956,192 @@ __all__ = [
|
|
|
2969
3956
|
'create_stoploss_comparison_chart',
|
|
2970
3957
|
'optimize_parameters', 'plot_optimization_results',
|
|
2971
3958
|
'create_optimization_folder',
|
|
2972
|
-
'preload_options_data'
|
|
2973
|
-
|
|
3959
|
+
'preload_options_data',
|
|
3960
|
+
'preload_data_universal', # NEW: Universal preloader V2
|
|
3961
|
+
# New caching functions
|
|
3962
|
+
# Optimization preset functions
|
|
3963
|
+
'apply_optimization_preset', 'list_optimization_presets',
|
|
3964
|
+
'calculate_combinations_count', 'print_preset_info',
|
|
3965
|
+
'get_cache_config', 'UniversalCacheManager'
|
|
3966
|
+
]
|
|
3967
|
+
|
|
3968
|
+
|
|
3969
|
+
# ============================================================
|
|
3970
|
+
# OPTIMIZATION PRESET FUNCTIONS
|
|
3971
|
+
# ============================================================
|
|
3972
|
+
|
|
3973
|
+
def apply_optimization_preset(config, preset='default'):
|
|
3974
|
+
"""
|
|
3975
|
+
Apply built-in optimization preset to config
|
|
3976
|
+
|
|
3977
|
+
Args:
|
|
3978
|
+
config: Configuration dictionary (will be updated)
|
|
3979
|
+
preset: Preset name ('default', 'quick_test', 'aggressive', 'conservative')
|
|
3980
|
+
|
|
3981
|
+
Returns:
|
|
3982
|
+
dict: Updated configuration
|
|
3983
|
+
"""
|
|
3984
|
+
presets = {
|
|
3985
|
+
'default': {
|
|
3986
|
+
'param_grid': {
|
|
3987
|
+
'z_score_entry': [0.8, 1.0, 1.2, 1.5],
|
|
3988
|
+
'z_score_exit': [0.05, 0.1, 0.15],
|
|
3989
|
+
'lookback_period': [45, 60, 90],
|
|
3990
|
+
'dte_target': [30, 45, 60]
|
|
3991
|
+
},
|
|
3992
|
+
'optimization_metric': 'sharpe',
|
|
3993
|
+
'min_trades': 5,
|
|
3994
|
+
'max_drawdown_limit': 0.50,
|
|
3995
|
+
'parallel': False,
|
|
3996
|
+
# 'export_each_combo': True, # ← Убрано, будет использоваться из основного конфига
|
|
3997
|
+
'results_folder_prefix': 'optimization',
|
|
3998
|
+
'chart_filename': 'optimization_analysis.png',
|
|
3999
|
+
'show_progress': True,
|
|
4000
|
+
'verbose': True
|
|
4001
|
+
},
|
|
4002
|
+
'quick_test': {
|
|
4003
|
+
'param_grid': {
|
|
4004
|
+
'z_score_entry': [1.0, 1.5],
|
|
4005
|
+
'z_score_exit': [0.1],
|
|
4006
|
+
'lookback_period': [60],
|
|
4007
|
+
'dte_target': [45]
|
|
4008
|
+
},
|
|
4009
|
+
'optimization_metric': 'sharpe',
|
|
4010
|
+
'min_trades': 3,
|
|
4011
|
+
'max_drawdown_limit': 0.40,
|
|
4012
|
+
'parallel': False,
|
|
4013
|
+
# 'export_each_combo': False, # ← Убрано, будет использоваться из основного конфига
|
|
4014
|
+
'results_folder_prefix': 'quick_test',
|
|
4015
|
+
'chart_filename': 'quick_test_analysis.png',
|
|
4016
|
+
'show_progress': True,
|
|
4017
|
+
'verbose': False
|
|
4018
|
+
},
|
|
4019
|
+
'aggressive': {
|
|
4020
|
+
'param_grid': {
|
|
4021
|
+
'z_score_entry': [1.5, 2.0, 2.5],
|
|
4022
|
+
'z_score_exit': [0.05, 0.1],
|
|
4023
|
+
'lookback_period': [30, 45, 60],
|
|
4024
|
+
'dte_target': [30, 45]
|
|
4025
|
+
},
|
|
4026
|
+
'optimization_metric': 'total_return',
|
|
4027
|
+
'min_trades': 10,
|
|
4028
|
+
'max_drawdown_limit': 0.60,
|
|
4029
|
+
'parallel': False,
|
|
4030
|
+
# 'export_each_combo': True, # ← Убрано, будет использоваться из основного конфига
|
|
4031
|
+
'results_folder_prefix': 'aggressive',
|
|
4032
|
+
'chart_filename': 'aggressive_analysis.png',
|
|
4033
|
+
'show_progress': True,
|
|
4034
|
+
'verbose': True
|
|
4035
|
+
},
|
|
4036
|
+
'conservative': {
|
|
4037
|
+
'param_grid': {
|
|
4038
|
+
'z_score_entry': [0.8, 1.0],
|
|
4039
|
+
'z_score_exit': [0.1, 0.15, 0.2],
|
|
4040
|
+
'lookback_period': [60, 90, 120],
|
|
4041
|
+
'dte_target': [45, 60, 90]
|
|
4042
|
+
},
|
|
4043
|
+
'optimization_metric': 'calmar',
|
|
4044
|
+
'min_trades': 8,
|
|
4045
|
+
'max_drawdown_limit': 0.25,
|
|
4046
|
+
'parallel': False,
|
|
4047
|
+
# 'export_each_combo': True, # ← Убрано, будет использоваться из основного конфига
|
|
4048
|
+
'results_folder_prefix': 'conservative',
|
|
4049
|
+
'chart_filename': 'conservative_analysis.png',
|
|
4050
|
+
'show_progress': True,
|
|
4051
|
+
'verbose': True
|
|
4052
|
+
}
|
|
4053
|
+
}
|
|
4054
|
+
|
|
4055
|
+
if preset not in presets:
|
|
4056
|
+
available = list(presets.keys())
|
|
4057
|
+
raise ValueError(f"Preset '{preset}' not found. Available: {available}")
|
|
4058
|
+
|
|
4059
|
+
# Update only specific fields from preset
|
|
4060
|
+
preset_data = presets[preset]
|
|
4061
|
+
|
|
4062
|
+
# Save user-defined param_grid if it exists (user override has priority)
|
|
4063
|
+
user_param_grid = config.get('param_grid')
|
|
4064
|
+
|
|
4065
|
+
fields_to_update = [
|
|
4066
|
+
'param_grid', 'min_trades', 'max_drawdown_limit',
|
|
4067
|
+
'optimization_metric', 'parallel', 'export_each_combo',
|
|
4068
|
+
'results_folder_prefix', 'chart_filename',
|
|
4069
|
+
'show_progress', 'verbose'
|
|
4070
|
+
]
|
|
4071
|
+
|
|
4072
|
+
for field in fields_to_update:
|
|
4073
|
+
if field in preset_data:
|
|
4074
|
+
# Special handling for param_grid: preserve user's param_grid if provided
|
|
4075
|
+
if field == 'param_grid' and user_param_grid is not None:
|
|
4076
|
+
# User defined param_grid - don't override with preset
|
|
4077
|
+
continue
|
|
4078
|
+
config[field] = preset_data[field]
|
|
4079
|
+
|
|
4080
|
+
# Restore user's param_grid if it was saved (preserve user override)
|
|
4081
|
+
if user_param_grid is not None:
|
|
4082
|
+
config['param_grid'] = user_param_grid
|
|
4083
|
+
|
|
4084
|
+
print(f"✓ Applied preset: {preset}")
|
|
4085
|
+
if user_param_grid is not None:
|
|
4086
|
+
print(f" (Preserved user-defined param_grid)")
|
|
4087
|
+
|
|
4088
|
+
return config
|
|
4089
|
+
|
|
4090
|
+
|
|
4091
|
+
def calculate_combinations_count(param_grid):
|
|
4092
|
+
"""
|
|
4093
|
+
Calculate total number of parameter combinations
|
|
4094
|
+
|
|
4095
|
+
Args:
|
|
4096
|
+
param_grid: Dictionary with parameter lists
|
|
4097
|
+
|
|
4098
|
+
Returns:
|
|
4099
|
+
int: Total number of combinations
|
|
4100
|
+
"""
|
|
4101
|
+
import math
|
|
4102
|
+
return math.prod(len(values) for values in param_grid.values())
|
|
4103
|
+
|
|
4104
|
+
|
|
4105
|
+
def print_preset_info(config):
|
|
4106
|
+
"""
|
|
4107
|
+
Print preset information and combination count
|
|
4108
|
+
|
|
4109
|
+
Args:
|
|
4110
|
+
config: Configuration dictionary with preset applied
|
|
4111
|
+
"""
|
|
4112
|
+
preset = config.get('preset', 'unknown')
|
|
4113
|
+
combinations = calculate_combinations_count(config['param_grid'])
|
|
4114
|
+
|
|
4115
|
+
print(f"\n{'='*60}")
|
|
4116
|
+
print(f"OPTIMIZATION PRESET: {preset.upper()}")
|
|
4117
|
+
print(f"{'='*60}")
|
|
4118
|
+
print(f"Total combinations: {combinations}")
|
|
4119
|
+
print(f"Optimization metric: {config.get('optimization_metric', 'sharpe')}")
|
|
4120
|
+
print(f"Min trades required: {config.get('min_trades', 10)}")
|
|
4121
|
+
print(f"Max drawdown limit: {config.get('max_drawdown_limit', 0.50)}")
|
|
4122
|
+
print(f"Parallel execution: {config.get('parallel', True)}")
|
|
4123
|
+
print(f"Export each combo: {config.get('export_each_combo', False)}")
|
|
4124
|
+
print(f"{'='*60}\n")
|
|
4125
|
+
|
|
4126
|
+
|
|
4127
|
+
def list_optimization_presets():
|
|
4128
|
+
"""Show available built-in presets"""
|
|
4129
|
+
presets = {
|
|
4130
|
+
'default': 'Standard configuration (4×3×3×3 = 108 combinations)',
|
|
4131
|
+
'quick_test': 'Quick test (2×1×1×1 = 2 combinations)',
|
|
4132
|
+
'aggressive': 'Aggressive strategy (3×2×3×2 = 36 combinations)',
|
|
4133
|
+
'conservative': 'Conservative strategy (2×3×3×3 = 54 combinations)'
|
|
4134
|
+
}
|
|
4135
|
+
|
|
4136
|
+
print("\n📋 AVAILABLE OPTIMIZATION PRESETS:")
|
|
4137
|
+
print("-" * 60)
|
|
4138
|
+
for name, desc in presets.items():
|
|
4139
|
+
print(f" {name:<12} | {desc}")
|
|
4140
|
+
print("-" * 60)
|
|
4141
|
+
|
|
4142
|
+
|
|
4143
|
+
|
|
4144
|
+
|
|
4145
|
+
|
|
4146
|
+
|
|
4147
|
+
|