sigma-terminal 2.0.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sigma/strategy.py ADDED
@@ -0,0 +1,753 @@
1
+ """Strategy discovery - Hypothesis generation and rule conversion."""
2
+
3
+ import re
4
+ from datetime import date, timedelta
5
+ from typing import Any, Dict, List, Optional, Tuple, Union
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from pydantic import BaseModel, Field
10
+
11
+ from .analytics import PerformanceAnalytics
12
+
13
+
14
+ # ============================================================================
15
+ # DATA MODELS
16
+ # ============================================================================
17
+
18
+ class TradingRule(BaseModel):
19
+ """A trading rule specification."""
20
+ name: str
21
+ description: str
22
+ entry_condition: str
23
+ exit_condition: Optional[str] = None
24
+ position_sizing: str = "fixed" # fixed, volatility_scaled, kelly
25
+ stop_loss: Optional[float] = None
26
+ take_profit: Optional[float] = None
27
+ max_holding_period: Optional[int] = None # days
28
+ required_data: List[str] = Field(default_factory=list)
29
+ parameters: Dict[str, Any] = Field(default_factory=dict)
30
+
31
+
32
+ class HypothesisResult(BaseModel):
33
+ """Result of hypothesis testing."""
34
+ hypothesis: str
35
+ supported: bool
36
+ confidence: float
37
+ evidence: List[str]
38
+ metrics: Dict[str, float]
39
+ sample_size: int
40
+ caveats: List[str] = Field(default_factory=list)
41
+
42
+
43
+ class StrategyIdea(BaseModel):
44
+ """A strategy idea with rationale."""
45
+ name: str
46
+ thesis: str
47
+ rules: List[TradingRule]
48
+ expected_edge: str
49
+ risk_factors: List[str]
50
+ data_requirements: List[str]
51
+
52
+
53
+ # ============================================================================
54
+ # HYPOTHESIS GENERATOR
55
+ # ============================================================================
56
+
57
+ class HypothesisGenerator:
58
+ """Generate testable hypotheses from observations or queries."""
59
+
60
+ # Common hypothesis templates
61
+ HYPOTHESIS_TEMPLATES = {
62
+ "momentum": [
63
+ "Assets with positive {period}-period momentum outperform",
64
+ "Strong recent performance predicts continued strength",
65
+ "Winners keep winning over {period} periods",
66
+ ],
67
+ "mean_reversion": [
68
+ "Extreme {direction} moves tend to reverse",
69
+ "Oversold conditions predict bounces",
70
+ "Overbought conditions predict pullbacks",
71
+ ],
72
+ "seasonality": [
73
+ "{month} shows consistent {direction} bias",
74
+ "Day-of-week effects exist in this asset",
75
+ "Year-end rally effect is statistically significant",
76
+ ],
77
+ "volatility": [
78
+ "Low volatility periods precede high volatility",
79
+ "Volatility clustering is exploitable",
80
+ "Implied volatility overestimates realized volatility",
81
+ ],
82
+ "correlation": [
83
+ "Correlation breaks down during crises",
84
+ "Cross-asset momentum signals are predictive",
85
+ "Sector rotation patterns are persistent",
86
+ ],
87
+ "fundamental": [
88
+ "Value outperforms over long horizons",
89
+ "Quality metrics predict outperformance",
90
+ "Earnings surprises have momentum",
91
+ ],
92
+ }
93
+
94
+ def generate_hypotheses(
95
+ self,
96
+ context: str,
97
+ category: Optional[str] = None,
98
+ ) -> List[str]:
99
+ """Generate relevant hypotheses based on context."""
100
+
101
+ hypotheses = []
102
+
103
+ if category and category in self.HYPOTHESIS_TEMPLATES:
104
+ hypotheses.extend(self.HYPOTHESIS_TEMPLATES[category])
105
+ else:
106
+ # Auto-detect relevant categories from context
107
+ context_lower = context.lower()
108
+
109
+ if any(w in context_lower for w in ["momentum", "trend", "winning"]):
110
+ hypotheses.extend(self.HYPOTHESIS_TEMPLATES["momentum"])
111
+
112
+ if any(w in context_lower for w in ["revert", "bounce", "oversold", "overbought"]):
113
+ hypotheses.extend(self.HYPOTHESIS_TEMPLATES["mean_reversion"])
114
+
115
+ if any(w in context_lower for w in ["january", "month", "day", "seasonal"]):
116
+ hypotheses.extend(self.HYPOTHESIS_TEMPLATES["seasonality"])
117
+
118
+ if any(w in context_lower for w in ["volatility", "vol", "vix"]):
119
+ hypotheses.extend(self.HYPOTHESIS_TEMPLATES["volatility"])
120
+
121
+ if any(w in context_lower for w in ["correlation", "hedge", "diversif"]):
122
+ hypotheses.extend(self.HYPOTHESIS_TEMPLATES["correlation"])
123
+
124
+ if any(w in context_lower for w in ["value", "quality", "earnings", "fundamental"]):
125
+ hypotheses.extend(self.HYPOTHESIS_TEMPLATES["fundamental"])
126
+
127
+ return hypotheses if hypotheses else list(self.HYPOTHESIS_TEMPLATES["momentum"])
128
+
129
+ def parse_hypothesis_from_query(self, query: str) -> str:
130
+ """Extract a testable hypothesis from a natural language query."""
131
+
132
+ # Common patterns
133
+ patterns = [
134
+ (r"(does|do)\s+(.+)\s+(outperform|beat|predict)", r"\2 predicts outperformance"),
135
+ (r"(is|are)\s+(.+)\s+(better|worse)", r"\2 is a significant factor"),
136
+ (r"(can|could)\s+(.+)\s+(work|predict)", r"\2 has predictive power"),
137
+ (r"what if\s+(.+)", r"\1 is exploitable"),
138
+ ]
139
+
140
+ for pattern, replacement in patterns:
141
+ match = re.search(pattern, query, re.IGNORECASE)
142
+ if match:
143
+ return re.sub(pattern, replacement, query, flags=re.IGNORECASE)
144
+
145
+ # Default: convert to testable statement
146
+ return f"The pattern described in '{query}' is statistically significant"
147
+
148
+
149
+ # ============================================================================
150
+ # HYPOTHESIS TESTER
151
+ # ============================================================================
152
+
153
+ class HypothesisTester:
154
+ """Test hypotheses with statistical rigor."""
155
+
156
+ def __init__(self):
157
+ self.performance = PerformanceAnalytics()
158
+
159
+ def test_momentum_hypothesis(
160
+ self,
161
+ returns: pd.Series,
162
+ lookback: int = 252,
163
+ holding: int = 21,
164
+ ) -> HypothesisResult:
165
+ """Test if momentum is predictive."""
166
+
167
+ # Calculate rolling momentum
168
+ momentum = returns.rolling(lookback).apply(lambda x: (1 + x).prod() - 1)
169
+
170
+ # Create forward returns
171
+ forward_returns = returns.rolling(holding).apply(lambda x: (1 + x).prod() - 1).shift(-holding)
172
+
173
+ # Align and clean
174
+ df = pd.concat([momentum, forward_returns], axis=1).dropna()
175
+ df.columns = ["momentum", "forward_return"]
176
+
177
+ if len(df) < 100:
178
+ return HypothesisResult(
179
+ hypothesis=f"{lookback}-period momentum predicts {holding}-period forward returns",
180
+ supported=False,
181
+ confidence=0.0,
182
+ evidence=["Insufficient data"],
183
+ metrics={},
184
+ sample_size=len(df),
185
+ caveats=["Need at least 100 observations"],
186
+ )
187
+
188
+ # Split into quintiles
189
+ df["quintile"] = pd.qcut(df["momentum"], 5, labels=[1, 2, 3, 4, 5])
190
+
191
+ # Calculate returns by quintile
192
+ quintile_returns = df.groupby("quintile")["forward_return"].mean()
193
+
194
+ # Long-short spread
195
+ long_short = quintile_returns.iloc[-1] - quintile_returns.iloc[0]
196
+
197
+ # Calculate t-statistic
198
+ q5_returns = df[df["quintile"] == 5]["forward_return"]
199
+ q1_returns = df[df["quintile"] == 1]["forward_return"]
200
+
201
+ from scipy import stats
202
+ t_stat, p_value = stats.ttest_ind(q5_returns, q1_returns)
203
+
204
+ supported = p_value < 0.05 and long_short > 0
205
+ confidence = 1 - p_value
206
+
207
+ return HypothesisResult(
208
+ hypothesis=f"{lookback}-period momentum predicts {holding}-period forward returns",
209
+ supported=supported,
210
+ confidence=confidence,
211
+ evidence=[
212
+ f"Long-short spread: {long_short:.2%} per period",
213
+ f"t-statistic: {t_stat:.2f}",
214
+ f"p-value: {p_value:.4f}",
215
+ f"Top quintile avg: {quintile_returns.iloc[-1]:.2%}",
216
+ f"Bottom quintile avg: {quintile_returns.iloc[0]:.2%}",
217
+ ],
218
+ metrics={
219
+ "long_short_spread": long_short,
220
+ "t_statistic": t_stat,
221
+ "p_value": p_value,
222
+ },
223
+ sample_size=len(df),
224
+ caveats=[
225
+ "Past performance may not predict future results",
226
+ "Transaction costs not included",
227
+ "May be period-specific",
228
+ ],
229
+ )
230
+
231
+ def test_mean_reversion_hypothesis(
232
+ self,
233
+ returns: pd.Series,
234
+ threshold: float = 2.0, # Standard deviations
235
+ holding: int = 5,
236
+ ) -> HypothesisResult:
237
+ """Test if extreme moves tend to reverse."""
238
+
239
+ # Calculate z-scores of returns
240
+ mean_return = returns.mean()
241
+ std_return = returns.std()
242
+ z_scores = (returns - mean_return) / std_return
243
+
244
+ # Identify extreme moves
245
+ extreme_down = z_scores < -threshold
246
+ extreme_up = z_scores > threshold
247
+
248
+ # Forward returns after extremes
249
+ forward_returns = returns.rolling(holding).apply(lambda x: (1 + x).prod() - 1).shift(-holding)
250
+
251
+ df = pd.concat([z_scores, forward_returns, extreme_down, extreme_up], axis=1).dropna()
252
+ df.columns = ["z_score", "forward_return", "extreme_down", "extreme_up"]
253
+
254
+ # Calculate results
255
+ after_down = df[df["extreme_down"]]["forward_return"].mean()
256
+ after_up = df[df["extreme_up"]]["forward_return"].mean()
257
+ n_down = df["extreme_down"].sum()
258
+ n_up = df["extreme_up"].sum()
259
+
260
+ # Reversion detected if:
261
+ # - Extreme down is followed by positive returns
262
+ # - Extreme up is followed by negative returns
263
+ supported = after_down > 0 and after_up < 0
264
+
265
+ # Confidence based on sample size and magnitude
266
+ if n_down >= 30 and n_up >= 30:
267
+ confidence = 0.8
268
+ elif n_down >= 10 and n_up >= 10:
269
+ confidence = 0.5
270
+ else:
271
+ confidence = 0.2
272
+
273
+ return HypothesisResult(
274
+ hypothesis=f"Extreme moves ({threshold}σ) tend to reverse over {holding} days",
275
+ supported=supported,
276
+ confidence=confidence,
277
+ evidence=[
278
+ f"Return after extreme down: {after_down:.2%} (n={n_down:.0f})",
279
+ f"Return after extreme up: {after_up:.2%} (n={n_up:.0f})",
280
+ ],
281
+ metrics={
282
+ "return_after_extreme_down": after_down,
283
+ "return_after_extreme_up": after_up,
284
+ "n_extreme_down": float(n_down),
285
+ "n_extreme_up": float(n_up),
286
+ },
287
+ sample_size=len(df),
288
+ caveats=[
289
+ "Threshold choice affects results",
290
+ "May not account for regime changes",
291
+ "Sample size may be small for extreme events",
292
+ ],
293
+ )
294
+
295
+ def test_seasonality_hypothesis(
296
+ self,
297
+ returns: pd.Series,
298
+ period: str = "month", # month, dayofweek
299
+ ) -> HypothesisResult:
300
+ """Test if seasonality is statistically significant."""
301
+
302
+ if period == "month":
303
+ groups = returns.groupby(returns.index.month)
304
+ labels = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
305
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
306
+ else:
307
+ groups = returns.groupby(returns.index.dayofweek)
308
+ labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
309
+
310
+ # Calculate mean and std for each period
311
+ period_stats = groups.agg(["mean", "std", "count"])
312
+
313
+ # ANOVA test
314
+ from scipy import stats
315
+ group_data = [group.values for name, group in groups]
316
+ f_stat, p_value = stats.f_oneway(*group_data)
317
+
318
+ supported = p_value < 0.05
319
+ confidence = 1 - p_value
320
+
321
+ # Find best and worst periods
322
+ means = groups.mean()
323
+ best_idx = means.idxmax()
324
+ worst_idx = means.idxmin()
325
+
326
+ if period == "month":
327
+ best_label = labels[int(best_idx) - 1]
328
+ worst_label = labels[int(worst_idx) - 1]
329
+ else:
330
+ best_label = labels[int(best_idx)]
331
+ worst_label = labels[int(worst_idx)]
332
+
333
+ return HypothesisResult(
334
+ hypothesis=f"There are significant {period}ly patterns in returns",
335
+ supported=supported,
336
+ confidence=confidence,
337
+ evidence=[
338
+ f"F-statistic: {f_stat:.2f}",
339
+ f"p-value: {p_value:.4f}",
340
+ f"Best {period}: {best_label} ({means[best_idx]:.4%})",
341
+ f"Worst {period}: {worst_label} ({means[worst_idx]:.4%})",
342
+ ],
343
+ metrics={
344
+ "f_statistic": f_stat,
345
+ "p_value": p_value,
346
+ "best_period_return": means[best_idx],
347
+ "worst_period_return": means[worst_idx],
348
+ },
349
+ sample_size=len(returns),
350
+ caveats=[
351
+ "Seasonality may change over time",
352
+ "Sample period affects results",
353
+ "May be coincidental",
354
+ ],
355
+ )
356
+
357
+
358
+ # ============================================================================
359
+ # RULE CONVERTER
360
+ # ============================================================================
361
+
362
+ class RuleConverter:
363
+ """Convert natural language rules to algorithmic specifications."""
364
+
365
+ # Signal definitions
366
+ SIGNAL_PATTERNS = {
367
+ "ma_cross": {
368
+ "patterns": [r"(\d+)\s*(?:day|d)\s*(?:ma|moving average)\s*cross(?:es|ing)?\s*(?:above|below)?\s*(\d+)\s*(?:day|d)"],
369
+ "template": "MA({fast}) crosses MA({slow})",
370
+ },
371
+ "rsi": {
372
+ "patterns": [r"rsi\s*(?:below|under|<)\s*(\d+)", r"rsi\s*(?:above|over|>)\s*(\d+)"],
373
+ "template": "RSI {condition} {threshold}",
374
+ },
375
+ "price_breakout": {
376
+ "patterns": [r"(?:price|close)\s*(?:breaks?|crosses?)\s*(?:above|below)\s*(\d+)\s*(?:day|d)\s*(?:high|low)"],
377
+ "template": "Price breaks {period}-day {level}",
378
+ },
379
+ "volatility": {
380
+ "patterns": [r"(?:vol|volatility)\s*(?:below|under|<)\s*(\d+)%?", r"low\s*(?:vol|volatility)"],
381
+ "template": "Volatility condition",
382
+ },
383
+ }
384
+
385
+ def parse_rule(self, text: str) -> Optional[TradingRule]:
386
+ """Parse a natural language rule into a TradingRule."""
387
+
388
+ text_lower = text.lower()
389
+
390
+ # Detect signal type
391
+ for signal_type, config in self.SIGNAL_PATTERNS.items():
392
+ for pattern in config["patterns"]:
393
+ match = re.search(pattern, text_lower)
394
+ if match:
395
+ return self._create_rule(signal_type, match, text)
396
+
397
+ # Generic rule if no specific pattern matched
398
+ return TradingRule(
399
+ name="custom_rule",
400
+ description=text,
401
+ entry_condition=text,
402
+ required_data=["price"],
403
+ parameters={},
404
+ )
405
+
406
+ def _create_rule(
407
+ self,
408
+ signal_type: str,
409
+ match: re.Match,
410
+ original_text: str,
411
+ ) -> TradingRule:
412
+ """Create a TradingRule from a matched pattern."""
413
+
414
+ if signal_type == "ma_cross":
415
+ fast = int(match.group(1))
416
+ slow = int(match.group(2))
417
+ return TradingRule(
418
+ name="ma_crossover",
419
+ description=f"{fast}/{slow} Moving Average Crossover",
420
+ entry_condition=f"SMA({fast}) > SMA({slow})",
421
+ exit_condition=f"SMA({fast}) < SMA({slow})",
422
+ required_data=["close"],
423
+ parameters={"fast_period": fast, "slow_period": slow},
424
+ )
425
+
426
+ elif signal_type == "rsi":
427
+ threshold = int(match.group(1))
428
+ is_oversold = "below" in original_text.lower() or "<" in original_text
429
+ return TradingRule(
430
+ name="rsi_signal",
431
+ description=f"RSI {'Oversold' if is_oversold else 'Overbought'} Signal",
432
+ entry_condition=f"RSI < {threshold}" if is_oversold else f"RSI > {threshold}",
433
+ exit_condition=f"RSI > {100 - threshold}" if is_oversold else f"RSI < {threshold}",
434
+ required_data=["close"],
435
+ parameters={"threshold": threshold, "period": 14},
436
+ )
437
+
438
+ elif signal_type == "price_breakout":
439
+ period = int(match.group(1))
440
+ is_high = "high" in original_text.lower()
441
+ return TradingRule(
442
+ name="price_breakout",
443
+ description=f"{period}-Day {'High' if is_high else 'Low'} Breakout",
444
+ entry_condition=f"Close > {period}-day high" if is_high else f"Close < {period}-day low",
445
+ required_data=["close", "high" if is_high else "low"],
446
+ parameters={"period": period, "breakout_type": "high" if is_high else "low"},
447
+ )
448
+
449
+ # Default
450
+ return TradingRule(
451
+ name=signal_type,
452
+ description=original_text,
453
+ entry_condition=original_text,
454
+ required_data=["close"],
455
+ parameters={},
456
+ )
457
+
458
+ def rule_to_python(self, rule: TradingRule) -> str:
459
+ """Convert a TradingRule to Python code."""
460
+
461
+ if rule.name == "ma_crossover":
462
+ fast = rule.parameters.get("fast_period", 10)
463
+ slow = rule.parameters.get("slow_period", 50)
464
+ return f'''
465
+ def generate_signals(prices: pd.Series) -> pd.Series:
466
+ """MA Crossover: {rule.description}"""
467
+ fast_ma = prices.rolling({fast}).mean()
468
+ slow_ma = prices.rolling({slow}).mean()
469
+
470
+ signal = pd.Series(0, index=prices.index)
471
+ signal[fast_ma > slow_ma] = 1 # Long
472
+ signal[fast_ma < slow_ma] = -1 # Short or flat
473
+
474
+ return signal
475
+ '''
476
+
477
+ elif rule.name == "rsi_signal":
478
+ threshold = rule.parameters.get("threshold", 30)
479
+ period = rule.parameters.get("period", 14)
480
+ return f'''
481
+ def generate_signals(prices: pd.Series) -> pd.Series:
482
+ """RSI Signal: {rule.description}"""
483
+ delta = prices.diff()
484
+ gain = delta.where(delta > 0, 0).rolling({period}).mean()
485
+ loss = (-delta.where(delta < 0, 0)).rolling({period}).mean()
486
+
487
+ rs = gain / loss
488
+ rsi = 100 - (100 / (1 + rs))
489
+
490
+ signal = pd.Series(0, index=prices.index)
491
+ signal[rsi < {threshold}] = 1 # Buy on oversold
492
+ signal[rsi > {100 - threshold}] = -1 # Sell on overbought
493
+
494
+ return signal
495
+ '''
496
+
497
+ elif rule.name == "price_breakout":
498
+ period = rule.parameters.get("period", 20)
499
+ breakout_type = rule.parameters.get("breakout_type", "high")
500
+ return f'''
501
+ def generate_signals(prices: pd.DataFrame) -> pd.Series:
502
+ """Breakout Signal: {rule.description}"""
503
+ close = prices['close']
504
+
505
+ {'high_n = prices["high"].rolling('+str(period)+').max()' if breakout_type == 'high' else 'low_n = prices["low"].rolling('+str(period)+').min()'}
506
+
507
+ signal = pd.Series(0, index=close.index)
508
+ {'signal[close > high_n.shift(1)] = 1' if breakout_type == 'high' else 'signal[close < low_n.shift(1)] = -1'}
509
+
510
+ return signal
511
+ '''
512
+
513
+ # Generic template
514
+ return f'''
515
+ def generate_signals(prices: pd.Series) -> pd.Series:
516
+ """Custom Signal: {rule.description}
517
+
518
+ Entry: {rule.entry_condition}
519
+ Exit: {rule.exit_condition or "Reverse signal"}
520
+ """
521
+ # TODO: Implement custom logic
522
+ signal = pd.Series(0, index=prices.index)
523
+ return signal
524
+ '''
525
+
526
+
527
+ # ============================================================================
528
+ # FAILURE MODE DETECTOR
529
+ # ============================================================================
530
+
531
+ class FailureModeDetector:
532
+ """Detect potential failure modes in strategies."""
533
+
534
+ FAILURE_MODES = {
535
+ "overfitting": {
536
+ "description": "Strategy may be overfitted to historical data",
537
+ "checks": ["parameter_sensitivity", "out_of_sample"],
538
+ },
539
+ "regime_dependency": {
540
+ "description": "Strategy may only work in specific market regimes",
541
+ "checks": ["regime_breakdown"],
542
+ },
543
+ "capacity_limit": {
544
+ "description": "Strategy may have limited capacity",
545
+ "checks": ["market_impact", "liquidity"],
546
+ },
547
+ "crowding": {
548
+ "description": "Strategy may be crowded by similar traders",
549
+ "checks": ["signal_correlation"],
550
+ },
551
+ "data_mining": {
552
+ "description": "Results may be due to data mining bias",
553
+ "checks": ["multiple_testing"],
554
+ },
555
+ }
556
+
557
+ def detect_failure_modes(
558
+ self,
559
+ strategy_results: Dict[str, Any],
560
+ returns: pd.Series,
561
+ ) -> List[Dict[str, Any]]:
562
+ """Detect potential failure modes in a strategy."""
563
+
564
+ failures = []
565
+
566
+ # Check for overfitting signals
567
+ if self._check_overfitting(strategy_results):
568
+ failures.append({
569
+ "mode": "overfitting",
570
+ "severity": "high",
571
+ "description": self.FAILURE_MODES["overfitting"]["description"],
572
+ "evidence": self._get_overfitting_evidence(strategy_results),
573
+ "mitigation": "Use walk-forward optimization, reduce parameters",
574
+ })
575
+
576
+ # Check for regime dependency
577
+ if self._check_regime_dependency(strategy_results, returns):
578
+ failures.append({
579
+ "mode": "regime_dependency",
580
+ "severity": "medium",
581
+ "description": self.FAILURE_MODES["regime_dependency"]["description"],
582
+ "evidence": ["Performance varies significantly across market regimes"],
583
+ "mitigation": "Add regime filters or diversify strategies",
584
+ })
585
+
586
+ # Check for data mining
587
+ if self._check_data_mining(strategy_results):
588
+ failures.append({
589
+ "mode": "data_mining",
590
+ "severity": "medium",
591
+ "description": self.FAILURE_MODES["data_mining"]["description"],
592
+ "evidence": ["Multiple parameters tested without correction"],
593
+ "mitigation": "Apply multiple testing correction, use holdout data",
594
+ })
595
+
596
+ return failures
597
+
598
+ def _check_overfitting(self, results: Dict[str, Any]) -> bool:
599
+ """Check for signs of overfitting."""
600
+
601
+ # High in-sample performance but not robust
602
+ sharpe = results.get("sharpe_ratio", 0)
603
+ n_params = results.get("num_parameters", 0)
604
+ n_trades = results.get("num_trades", 100)
605
+
606
+ # Too good to be true Sharpe
607
+ if sharpe > 3:
608
+ return True
609
+
610
+ # Too many parameters relative to trades
611
+ if n_params > 0 and n_trades / n_params < 20:
612
+ return True
613
+
614
+ return False
615
+
616
+ def _check_regime_dependency(
617
+ self,
618
+ results: Dict[str, Any],
619
+ returns: pd.Series,
620
+ ) -> bool:
621
+ """Check if strategy is regime-dependent."""
622
+
623
+ # Calculate volatility regimes
624
+ vol = returns.rolling(63).std() * np.sqrt(252)
625
+ high_vol_threshold = vol.quantile(0.75)
626
+
627
+ # Check if performance differs significantly by regime
628
+ strategy_returns = results.get("strategy_returns", returns)
629
+
630
+ if isinstance(strategy_returns, pd.Series):
631
+ high_vol_perf = strategy_returns[vol > high_vol_threshold].mean()
632
+ low_vol_perf = strategy_returns[vol <= high_vol_threshold].mean()
633
+
634
+ # Significant difference suggests regime dependency
635
+ if abs(high_vol_perf - low_vol_perf) > 0.1 * abs(strategy_returns.mean()):
636
+ return True
637
+
638
+ return False
639
+
640
+ def _check_data_mining(self, results: Dict[str, Any]) -> bool:
641
+ """Check for data mining bias."""
642
+
643
+ # Simple heuristics
644
+ n_params = results.get("num_parameters", 0)
645
+
646
+ # Many parameters suggest potential data mining
647
+ if n_params > 5:
648
+ return True
649
+
650
+ return False
651
+
652
+ def _get_overfitting_evidence(self, results: Dict[str, Any]) -> List[str]:
653
+ """Get evidence of overfitting."""
654
+
655
+ evidence = []
656
+
657
+ sharpe = results.get("sharpe_ratio", 0)
658
+ if sharpe > 3:
659
+ evidence.append(f"Unusually high Sharpe ratio: {sharpe:.2f}")
660
+
661
+ n_params = results.get("num_parameters", 0)
662
+ n_trades = results.get("num_trades", 100)
663
+ if n_params > 0 and n_trades / n_params < 20:
664
+ evidence.append(f"Low trade/parameter ratio: {n_trades/n_params:.1f}")
665
+
666
+ return evidence
667
+
668
+
669
+ # ============================================================================
670
+ # STRATEGY GENERATOR
671
+ # ============================================================================
672
+
673
+ class StrategyGenerator:
674
+ """Generate strategy ideas from hypotheses and rules."""
675
+
676
+ STRATEGY_TEMPLATES = {
677
+ "momentum": StrategyIdea(
678
+ name="Trend Following",
679
+ thesis="Trends persist due to behavioral biases and institutional flows",
680
+ rules=[
681
+ TradingRule(
682
+ name="price_above_ma",
683
+ description="Price above 200-day MA",
684
+ entry_condition="Close > SMA(200)",
685
+ exit_condition="Close < SMA(200)",
686
+ required_data=["close"],
687
+ parameters={"period": 200},
688
+ ),
689
+ ],
690
+ expected_edge="2-4% annualized alpha in trending markets",
691
+ risk_factors=["Choppy markets", "Regime changes", "Crowding"],
692
+ data_requirements=["Daily prices", "Volume"],
693
+ ),
694
+ "mean_reversion": StrategyIdea(
695
+ name="Mean Reversion",
696
+ thesis="Prices revert to fair value after overreaction",
697
+ rules=[
698
+ TradingRule(
699
+ name="oversold_bounce",
700
+ description="Buy when RSI oversold",
701
+ entry_condition="RSI(14) < 30",
702
+ exit_condition="RSI(14) > 70",
703
+ required_data=["close"],
704
+ parameters={"rsi_period": 14, "oversold": 30, "overbought": 70},
705
+ ),
706
+ ],
707
+ expected_edge="1-3% annualized alpha",
708
+ risk_factors=["Trending markets", "Extended drawdowns", "Value traps"],
709
+ data_requirements=["Daily prices"],
710
+ ),
711
+ "quality": StrategyIdea(
712
+ name="Quality Factor",
713
+ thesis="High-quality companies outperform over long horizons",
714
+ rules=[
715
+ TradingRule(
716
+ name="quality_screen",
717
+ description="Screen for quality metrics",
718
+ entry_condition="ROE > 15% AND Debt/Equity < 0.5 AND Margin Trend > 0",
719
+ exit_condition="Quality score deteriorates",
720
+ required_data=["fundamentals"],
721
+ parameters={"roe_threshold": 0.15, "de_threshold": 0.5},
722
+ ),
723
+ ],
724
+ expected_edge="2-5% annualized alpha over market cycles",
725
+ risk_factors=["Valuation multiples", "Factor crowding", "Sector concentration"],
726
+ data_requirements=["Quarterly fundamentals", "Daily prices"],
727
+ ),
728
+ }
729
+
730
+ def suggest_strategies(
731
+ self,
732
+ hypothesis_results: List[HypothesisResult],
733
+ market_context: Optional[Dict[str, Any]] = None,
734
+ ) -> List[StrategyIdea]:
735
+ """Suggest strategies based on hypothesis testing results."""
736
+
737
+ suggestions = []
738
+
739
+ for result in hypothesis_results:
740
+ if result.supported and result.confidence > 0.8:
741
+ # Map hypothesis to strategy template
742
+ hypothesis_lower = result.hypothesis.lower()
743
+
744
+ if "momentum" in hypothesis_lower or "trend" in hypothesis_lower:
745
+ suggestions.append(self.STRATEGY_TEMPLATES["momentum"])
746
+
747
+ elif "revert" in hypothesis_lower or "extreme" in hypothesis_lower:
748
+ suggestions.append(self.STRATEGY_TEMPLATES["mean_reversion"])
749
+
750
+ elif "quality" in hypothesis_lower or "fundamental" in hypothesis_lower:
751
+ suggestions.append(self.STRATEGY_TEMPLATES["quality"])
752
+
753
+ return suggestions