sigma-terminal 2.0.1__py3-none-any.whl → 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sigma/__init__.py +182 -6
- sigma/__main__.py +2 -2
- sigma/analytics/__init__.py +636 -0
- sigma/app.py +563 -898
- sigma/backtest.py +372 -0
- sigma/charts.py +407 -0
- sigma/cli.py +434 -0
- sigma/comparison.py +611 -0
- sigma/config.py +195 -0
- sigma/core/__init__.py +4 -17
- sigma/core/engine.py +493 -0
- sigma/core/intent.py +595 -0
- sigma/core/models.py +516 -125
- sigma/data/__init__.py +681 -0
- sigma/data/models.py +130 -0
- sigma/llm.py +401 -0
- sigma/monitoring.py +666 -0
- sigma/portfolio.py +697 -0
- sigma/reporting.py +658 -0
- sigma/robustness.py +675 -0
- sigma/setup.py +305 -402
- sigma/strategy.py +753 -0
- sigma/tools/backtest.py +23 -5
- sigma/tools.py +617 -0
- sigma/visualization.py +766 -0
- sigma_terminal-3.2.0.dist-info/METADATA +298 -0
- sigma_terminal-3.2.0.dist-info/RECORD +30 -0
- sigma_terminal-3.2.0.dist-info/entry_points.txt +6 -0
- sigma_terminal-3.2.0.dist-info/licenses/LICENSE +25 -0
- sigma/core/agent.py +0 -205
- sigma/core/config.py +0 -119
- sigma/core/llm.py +0 -794
- sigma/tools/__init__.py +0 -5
- sigma/tools/charts.py +0 -400
- sigma/tools/financial.py +0 -1457
- sigma/ui/__init__.py +0 -1
- sigma_terminal-2.0.1.dist-info/METADATA +0 -222
- sigma_terminal-2.0.1.dist-info/RECORD +0 -19
- sigma_terminal-2.0.1.dist-info/entry_points.txt +0 -2
- sigma_terminal-2.0.1.dist-info/licenses/LICENSE +0 -42
- {sigma_terminal-2.0.1.dist-info → sigma_terminal-3.2.0.dist-info}/WHEEL +0 -0
sigma/strategy.py
ADDED
|
@@ -0,0 +1,753 @@
|
|
|
1
|
+
"""Strategy discovery - Hypothesis generation and rule conversion."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import date, timedelta
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from .analytics import PerformanceAnalytics
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ============================================================================
|
|
15
|
+
# DATA MODELS
|
|
16
|
+
# ============================================================================
|
|
17
|
+
|
|
18
|
+
class TradingRule(BaseModel):
|
|
19
|
+
"""A trading rule specification."""
|
|
20
|
+
name: str
|
|
21
|
+
description: str
|
|
22
|
+
entry_condition: str
|
|
23
|
+
exit_condition: Optional[str] = None
|
|
24
|
+
position_sizing: str = "fixed" # fixed, volatility_scaled, kelly
|
|
25
|
+
stop_loss: Optional[float] = None
|
|
26
|
+
take_profit: Optional[float] = None
|
|
27
|
+
max_holding_period: Optional[int] = None # days
|
|
28
|
+
required_data: List[str] = Field(default_factory=list)
|
|
29
|
+
parameters: Dict[str, Any] = Field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class HypothesisResult(BaseModel):
|
|
33
|
+
"""Result of hypothesis testing."""
|
|
34
|
+
hypothesis: str
|
|
35
|
+
supported: bool
|
|
36
|
+
confidence: float
|
|
37
|
+
evidence: List[str]
|
|
38
|
+
metrics: Dict[str, float]
|
|
39
|
+
sample_size: int
|
|
40
|
+
caveats: List[str] = Field(default_factory=list)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class StrategyIdea(BaseModel):
|
|
44
|
+
"""A strategy idea with rationale."""
|
|
45
|
+
name: str
|
|
46
|
+
thesis: str
|
|
47
|
+
rules: List[TradingRule]
|
|
48
|
+
expected_edge: str
|
|
49
|
+
risk_factors: List[str]
|
|
50
|
+
data_requirements: List[str]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ============================================================================
|
|
54
|
+
# HYPOTHESIS GENERATOR
|
|
55
|
+
# ============================================================================
|
|
56
|
+
|
|
57
|
+
class HypothesisGenerator:
|
|
58
|
+
"""Generate testable hypotheses from observations or queries."""
|
|
59
|
+
|
|
60
|
+
# Common hypothesis templates
|
|
61
|
+
HYPOTHESIS_TEMPLATES = {
|
|
62
|
+
"momentum": [
|
|
63
|
+
"Assets with positive {period}-period momentum outperform",
|
|
64
|
+
"Strong recent performance predicts continued strength",
|
|
65
|
+
"Winners keep winning over {period} periods",
|
|
66
|
+
],
|
|
67
|
+
"mean_reversion": [
|
|
68
|
+
"Extreme {direction} moves tend to reverse",
|
|
69
|
+
"Oversold conditions predict bounces",
|
|
70
|
+
"Overbought conditions predict pullbacks",
|
|
71
|
+
],
|
|
72
|
+
"seasonality": [
|
|
73
|
+
"{month} shows consistent {direction} bias",
|
|
74
|
+
"Day-of-week effects exist in this asset",
|
|
75
|
+
"Year-end rally effect is statistically significant",
|
|
76
|
+
],
|
|
77
|
+
"volatility": [
|
|
78
|
+
"Low volatility periods precede high volatility",
|
|
79
|
+
"Volatility clustering is exploitable",
|
|
80
|
+
"Implied volatility overestimates realized volatility",
|
|
81
|
+
],
|
|
82
|
+
"correlation": [
|
|
83
|
+
"Correlation breaks down during crises",
|
|
84
|
+
"Cross-asset momentum signals are predictive",
|
|
85
|
+
"Sector rotation patterns are persistent",
|
|
86
|
+
],
|
|
87
|
+
"fundamental": [
|
|
88
|
+
"Value outperforms over long horizons",
|
|
89
|
+
"Quality metrics predict outperformance",
|
|
90
|
+
"Earnings surprises have momentum",
|
|
91
|
+
],
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
def generate_hypotheses(
|
|
95
|
+
self,
|
|
96
|
+
context: str,
|
|
97
|
+
category: Optional[str] = None,
|
|
98
|
+
) -> List[str]:
|
|
99
|
+
"""Generate relevant hypotheses based on context."""
|
|
100
|
+
|
|
101
|
+
hypotheses = []
|
|
102
|
+
|
|
103
|
+
if category and category in self.HYPOTHESIS_TEMPLATES:
|
|
104
|
+
hypotheses.extend(self.HYPOTHESIS_TEMPLATES[category])
|
|
105
|
+
else:
|
|
106
|
+
# Auto-detect relevant categories from context
|
|
107
|
+
context_lower = context.lower()
|
|
108
|
+
|
|
109
|
+
if any(w in context_lower for w in ["momentum", "trend", "winning"]):
|
|
110
|
+
hypotheses.extend(self.HYPOTHESIS_TEMPLATES["momentum"])
|
|
111
|
+
|
|
112
|
+
if any(w in context_lower for w in ["revert", "bounce", "oversold", "overbought"]):
|
|
113
|
+
hypotheses.extend(self.HYPOTHESIS_TEMPLATES["mean_reversion"])
|
|
114
|
+
|
|
115
|
+
if any(w in context_lower for w in ["january", "month", "day", "seasonal"]):
|
|
116
|
+
hypotheses.extend(self.HYPOTHESIS_TEMPLATES["seasonality"])
|
|
117
|
+
|
|
118
|
+
if any(w in context_lower for w in ["volatility", "vol", "vix"]):
|
|
119
|
+
hypotheses.extend(self.HYPOTHESIS_TEMPLATES["volatility"])
|
|
120
|
+
|
|
121
|
+
if any(w in context_lower for w in ["correlation", "hedge", "diversif"]):
|
|
122
|
+
hypotheses.extend(self.HYPOTHESIS_TEMPLATES["correlation"])
|
|
123
|
+
|
|
124
|
+
if any(w in context_lower for w in ["value", "quality", "earnings", "fundamental"]):
|
|
125
|
+
hypotheses.extend(self.HYPOTHESIS_TEMPLATES["fundamental"])
|
|
126
|
+
|
|
127
|
+
return hypotheses if hypotheses else list(self.HYPOTHESIS_TEMPLATES["momentum"])
|
|
128
|
+
|
|
129
|
+
def parse_hypothesis_from_query(self, query: str) -> str:
|
|
130
|
+
"""Extract a testable hypothesis from a natural language query."""
|
|
131
|
+
|
|
132
|
+
# Common patterns
|
|
133
|
+
patterns = [
|
|
134
|
+
(r"(does|do)\s+(.+)\s+(outperform|beat|predict)", r"\2 predicts outperformance"),
|
|
135
|
+
(r"(is|are)\s+(.+)\s+(better|worse)", r"\2 is a significant factor"),
|
|
136
|
+
(r"(can|could)\s+(.+)\s+(work|predict)", r"\2 has predictive power"),
|
|
137
|
+
(r"what if\s+(.+)", r"\1 is exploitable"),
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
for pattern, replacement in patterns:
|
|
141
|
+
match = re.search(pattern, query, re.IGNORECASE)
|
|
142
|
+
if match:
|
|
143
|
+
return re.sub(pattern, replacement, query, flags=re.IGNORECASE)
|
|
144
|
+
|
|
145
|
+
# Default: convert to testable statement
|
|
146
|
+
return f"The pattern described in '{query}' is statistically significant"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ============================================================================
|
|
150
|
+
# HYPOTHESIS TESTER
|
|
151
|
+
# ============================================================================
|
|
152
|
+
|
|
153
|
+
class HypothesisTester:
|
|
154
|
+
"""Test hypotheses with statistical rigor."""
|
|
155
|
+
|
|
156
|
+
def __init__(self):
|
|
157
|
+
self.performance = PerformanceAnalytics()
|
|
158
|
+
|
|
159
|
+
def test_momentum_hypothesis(
|
|
160
|
+
self,
|
|
161
|
+
returns: pd.Series,
|
|
162
|
+
lookback: int = 252,
|
|
163
|
+
holding: int = 21,
|
|
164
|
+
) -> HypothesisResult:
|
|
165
|
+
"""Test if momentum is predictive."""
|
|
166
|
+
|
|
167
|
+
# Calculate rolling momentum
|
|
168
|
+
momentum = returns.rolling(lookback).apply(lambda x: (1 + x).prod() - 1)
|
|
169
|
+
|
|
170
|
+
# Create forward returns
|
|
171
|
+
forward_returns = returns.rolling(holding).apply(lambda x: (1 + x).prod() - 1).shift(-holding)
|
|
172
|
+
|
|
173
|
+
# Align and clean
|
|
174
|
+
df = pd.concat([momentum, forward_returns], axis=1).dropna()
|
|
175
|
+
df.columns = ["momentum", "forward_return"]
|
|
176
|
+
|
|
177
|
+
if len(df) < 100:
|
|
178
|
+
return HypothesisResult(
|
|
179
|
+
hypothesis=f"{lookback}-period momentum predicts {holding}-period forward returns",
|
|
180
|
+
supported=False,
|
|
181
|
+
confidence=0.0,
|
|
182
|
+
evidence=["Insufficient data"],
|
|
183
|
+
metrics={},
|
|
184
|
+
sample_size=len(df),
|
|
185
|
+
caveats=["Need at least 100 observations"],
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Split into quintiles
|
|
189
|
+
df["quintile"] = pd.qcut(df["momentum"], 5, labels=[1, 2, 3, 4, 5])
|
|
190
|
+
|
|
191
|
+
# Calculate returns by quintile
|
|
192
|
+
quintile_returns = df.groupby("quintile")["forward_return"].mean()
|
|
193
|
+
|
|
194
|
+
# Long-short spread
|
|
195
|
+
long_short = quintile_returns.iloc[-1] - quintile_returns.iloc[0]
|
|
196
|
+
|
|
197
|
+
# Calculate t-statistic
|
|
198
|
+
q5_returns = df[df["quintile"] == 5]["forward_return"]
|
|
199
|
+
q1_returns = df[df["quintile"] == 1]["forward_return"]
|
|
200
|
+
|
|
201
|
+
from scipy import stats
|
|
202
|
+
t_stat, p_value = stats.ttest_ind(q5_returns, q1_returns)
|
|
203
|
+
|
|
204
|
+
supported = p_value < 0.05 and long_short > 0
|
|
205
|
+
confidence = 1 - p_value
|
|
206
|
+
|
|
207
|
+
return HypothesisResult(
|
|
208
|
+
hypothesis=f"{lookback}-period momentum predicts {holding}-period forward returns",
|
|
209
|
+
supported=supported,
|
|
210
|
+
confidence=confidence,
|
|
211
|
+
evidence=[
|
|
212
|
+
f"Long-short spread: {long_short:.2%} per period",
|
|
213
|
+
f"t-statistic: {t_stat:.2f}",
|
|
214
|
+
f"p-value: {p_value:.4f}",
|
|
215
|
+
f"Top quintile avg: {quintile_returns.iloc[-1]:.2%}",
|
|
216
|
+
f"Bottom quintile avg: {quintile_returns.iloc[0]:.2%}",
|
|
217
|
+
],
|
|
218
|
+
metrics={
|
|
219
|
+
"long_short_spread": long_short,
|
|
220
|
+
"t_statistic": t_stat,
|
|
221
|
+
"p_value": p_value,
|
|
222
|
+
},
|
|
223
|
+
sample_size=len(df),
|
|
224
|
+
caveats=[
|
|
225
|
+
"Past performance may not predict future results",
|
|
226
|
+
"Transaction costs not included",
|
|
227
|
+
"May be period-specific",
|
|
228
|
+
],
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
def test_mean_reversion_hypothesis(
|
|
232
|
+
self,
|
|
233
|
+
returns: pd.Series,
|
|
234
|
+
threshold: float = 2.0, # Standard deviations
|
|
235
|
+
holding: int = 5,
|
|
236
|
+
) -> HypothesisResult:
|
|
237
|
+
"""Test if extreme moves tend to reverse."""
|
|
238
|
+
|
|
239
|
+
# Calculate z-scores of returns
|
|
240
|
+
mean_return = returns.mean()
|
|
241
|
+
std_return = returns.std()
|
|
242
|
+
z_scores = (returns - mean_return) / std_return
|
|
243
|
+
|
|
244
|
+
# Identify extreme moves
|
|
245
|
+
extreme_down = z_scores < -threshold
|
|
246
|
+
extreme_up = z_scores > threshold
|
|
247
|
+
|
|
248
|
+
# Forward returns after extremes
|
|
249
|
+
forward_returns = returns.rolling(holding).apply(lambda x: (1 + x).prod() - 1).shift(-holding)
|
|
250
|
+
|
|
251
|
+
df = pd.concat([z_scores, forward_returns, extreme_down, extreme_up], axis=1).dropna()
|
|
252
|
+
df.columns = ["z_score", "forward_return", "extreme_down", "extreme_up"]
|
|
253
|
+
|
|
254
|
+
# Calculate results
|
|
255
|
+
after_down = df[df["extreme_down"]]["forward_return"].mean()
|
|
256
|
+
after_up = df[df["extreme_up"]]["forward_return"].mean()
|
|
257
|
+
n_down = df["extreme_down"].sum()
|
|
258
|
+
n_up = df["extreme_up"].sum()
|
|
259
|
+
|
|
260
|
+
# Reversion detected if:
|
|
261
|
+
# - Extreme down is followed by positive returns
|
|
262
|
+
# - Extreme up is followed by negative returns
|
|
263
|
+
supported = after_down > 0 and after_up < 0
|
|
264
|
+
|
|
265
|
+
# Confidence based on sample size and magnitude
|
|
266
|
+
if n_down >= 30 and n_up >= 30:
|
|
267
|
+
confidence = 0.8
|
|
268
|
+
elif n_down >= 10 and n_up >= 10:
|
|
269
|
+
confidence = 0.5
|
|
270
|
+
else:
|
|
271
|
+
confidence = 0.2
|
|
272
|
+
|
|
273
|
+
return HypothesisResult(
|
|
274
|
+
hypothesis=f"Extreme moves ({threshold}σ) tend to reverse over {holding} days",
|
|
275
|
+
supported=supported,
|
|
276
|
+
confidence=confidence,
|
|
277
|
+
evidence=[
|
|
278
|
+
f"Return after extreme down: {after_down:.2%} (n={n_down:.0f})",
|
|
279
|
+
f"Return after extreme up: {after_up:.2%} (n={n_up:.0f})",
|
|
280
|
+
],
|
|
281
|
+
metrics={
|
|
282
|
+
"return_after_extreme_down": after_down,
|
|
283
|
+
"return_after_extreme_up": after_up,
|
|
284
|
+
"n_extreme_down": float(n_down),
|
|
285
|
+
"n_extreme_up": float(n_up),
|
|
286
|
+
},
|
|
287
|
+
sample_size=len(df),
|
|
288
|
+
caveats=[
|
|
289
|
+
"Threshold choice affects results",
|
|
290
|
+
"May not account for regime changes",
|
|
291
|
+
"Sample size may be small for extreme events",
|
|
292
|
+
],
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
def test_seasonality_hypothesis(
|
|
296
|
+
self,
|
|
297
|
+
returns: pd.Series,
|
|
298
|
+
period: str = "month", # month, dayofweek
|
|
299
|
+
) -> HypothesisResult:
|
|
300
|
+
"""Test if seasonality is statistically significant."""
|
|
301
|
+
|
|
302
|
+
if period == "month":
|
|
303
|
+
groups = returns.groupby(returns.index.month)
|
|
304
|
+
labels = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
305
|
+
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
|
|
306
|
+
else:
|
|
307
|
+
groups = returns.groupby(returns.index.dayofweek)
|
|
308
|
+
labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
|
309
|
+
|
|
310
|
+
# Calculate mean and std for each period
|
|
311
|
+
period_stats = groups.agg(["mean", "std", "count"])
|
|
312
|
+
|
|
313
|
+
# ANOVA test
|
|
314
|
+
from scipy import stats
|
|
315
|
+
group_data = [group.values for name, group in groups]
|
|
316
|
+
f_stat, p_value = stats.f_oneway(*group_data)
|
|
317
|
+
|
|
318
|
+
supported = p_value < 0.05
|
|
319
|
+
confidence = 1 - p_value
|
|
320
|
+
|
|
321
|
+
# Find best and worst periods
|
|
322
|
+
means = groups.mean()
|
|
323
|
+
best_idx = means.idxmax()
|
|
324
|
+
worst_idx = means.idxmin()
|
|
325
|
+
|
|
326
|
+
if period == "month":
|
|
327
|
+
best_label = labels[int(best_idx) - 1]
|
|
328
|
+
worst_label = labels[int(worst_idx) - 1]
|
|
329
|
+
else:
|
|
330
|
+
best_label = labels[int(best_idx)]
|
|
331
|
+
worst_label = labels[int(worst_idx)]
|
|
332
|
+
|
|
333
|
+
return HypothesisResult(
|
|
334
|
+
hypothesis=f"There are significant {period}ly patterns in returns",
|
|
335
|
+
supported=supported,
|
|
336
|
+
confidence=confidence,
|
|
337
|
+
evidence=[
|
|
338
|
+
f"F-statistic: {f_stat:.2f}",
|
|
339
|
+
f"p-value: {p_value:.4f}",
|
|
340
|
+
f"Best {period}: {best_label} ({means[best_idx]:.4%})",
|
|
341
|
+
f"Worst {period}: {worst_label} ({means[worst_idx]:.4%})",
|
|
342
|
+
],
|
|
343
|
+
metrics={
|
|
344
|
+
"f_statistic": f_stat,
|
|
345
|
+
"p_value": p_value,
|
|
346
|
+
"best_period_return": means[best_idx],
|
|
347
|
+
"worst_period_return": means[worst_idx],
|
|
348
|
+
},
|
|
349
|
+
sample_size=len(returns),
|
|
350
|
+
caveats=[
|
|
351
|
+
"Seasonality may change over time",
|
|
352
|
+
"Sample period affects results",
|
|
353
|
+
"May be coincidental",
|
|
354
|
+
],
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
# ============================================================================
|
|
359
|
+
# RULE CONVERTER
|
|
360
|
+
# ============================================================================
|
|
361
|
+
|
|
362
|
+
class RuleConverter:
|
|
363
|
+
"""Convert natural language rules to algorithmic specifications."""
|
|
364
|
+
|
|
365
|
+
# Signal definitions
|
|
366
|
+
SIGNAL_PATTERNS = {
|
|
367
|
+
"ma_cross": {
|
|
368
|
+
"patterns": [r"(\d+)\s*(?:day|d)\s*(?:ma|moving average)\s*cross(?:es|ing)?\s*(?:above|below)?\s*(\d+)\s*(?:day|d)"],
|
|
369
|
+
"template": "MA({fast}) crosses MA({slow})",
|
|
370
|
+
},
|
|
371
|
+
"rsi": {
|
|
372
|
+
"patterns": [r"rsi\s*(?:below|under|<)\s*(\d+)", r"rsi\s*(?:above|over|>)\s*(\d+)"],
|
|
373
|
+
"template": "RSI {condition} {threshold}",
|
|
374
|
+
},
|
|
375
|
+
"price_breakout": {
|
|
376
|
+
"patterns": [r"(?:price|close)\s*(?:breaks?|crosses?)\s*(?:above|below)\s*(\d+)\s*(?:day|d)\s*(?:high|low)"],
|
|
377
|
+
"template": "Price breaks {period}-day {level}",
|
|
378
|
+
},
|
|
379
|
+
"volatility": {
|
|
380
|
+
"patterns": [r"(?:vol|volatility)\s*(?:below|under|<)\s*(\d+)%?", r"low\s*(?:vol|volatility)"],
|
|
381
|
+
"template": "Volatility condition",
|
|
382
|
+
},
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
def parse_rule(self, text: str) -> Optional[TradingRule]:
|
|
386
|
+
"""Parse a natural language rule into a TradingRule."""
|
|
387
|
+
|
|
388
|
+
text_lower = text.lower()
|
|
389
|
+
|
|
390
|
+
# Detect signal type
|
|
391
|
+
for signal_type, config in self.SIGNAL_PATTERNS.items():
|
|
392
|
+
for pattern in config["patterns"]:
|
|
393
|
+
match = re.search(pattern, text_lower)
|
|
394
|
+
if match:
|
|
395
|
+
return self._create_rule(signal_type, match, text)
|
|
396
|
+
|
|
397
|
+
# Generic rule if no specific pattern matched
|
|
398
|
+
return TradingRule(
|
|
399
|
+
name="custom_rule",
|
|
400
|
+
description=text,
|
|
401
|
+
entry_condition=text,
|
|
402
|
+
required_data=["price"],
|
|
403
|
+
parameters={},
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
def _create_rule(
|
|
407
|
+
self,
|
|
408
|
+
signal_type: str,
|
|
409
|
+
match: re.Match,
|
|
410
|
+
original_text: str,
|
|
411
|
+
) -> TradingRule:
|
|
412
|
+
"""Create a TradingRule from a matched pattern."""
|
|
413
|
+
|
|
414
|
+
if signal_type == "ma_cross":
|
|
415
|
+
fast = int(match.group(1))
|
|
416
|
+
slow = int(match.group(2))
|
|
417
|
+
return TradingRule(
|
|
418
|
+
name="ma_crossover",
|
|
419
|
+
description=f"{fast}/{slow} Moving Average Crossover",
|
|
420
|
+
entry_condition=f"SMA({fast}) > SMA({slow})",
|
|
421
|
+
exit_condition=f"SMA({fast}) < SMA({slow})",
|
|
422
|
+
required_data=["close"],
|
|
423
|
+
parameters={"fast_period": fast, "slow_period": slow},
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
elif signal_type == "rsi":
|
|
427
|
+
threshold = int(match.group(1))
|
|
428
|
+
is_oversold = "below" in original_text.lower() or "<" in original_text
|
|
429
|
+
return TradingRule(
|
|
430
|
+
name="rsi_signal",
|
|
431
|
+
description=f"RSI {'Oversold' if is_oversold else 'Overbought'} Signal",
|
|
432
|
+
entry_condition=f"RSI < {threshold}" if is_oversold else f"RSI > {threshold}",
|
|
433
|
+
exit_condition=f"RSI > {100 - threshold}" if is_oversold else f"RSI < {threshold}",
|
|
434
|
+
required_data=["close"],
|
|
435
|
+
parameters={"threshold": threshold, "period": 14},
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
elif signal_type == "price_breakout":
|
|
439
|
+
period = int(match.group(1))
|
|
440
|
+
is_high = "high" in original_text.lower()
|
|
441
|
+
return TradingRule(
|
|
442
|
+
name="price_breakout",
|
|
443
|
+
description=f"{period}-Day {'High' if is_high else 'Low'} Breakout",
|
|
444
|
+
entry_condition=f"Close > {period}-day high" if is_high else f"Close < {period}-day low",
|
|
445
|
+
required_data=["close", "high" if is_high else "low"],
|
|
446
|
+
parameters={"period": period, "breakout_type": "high" if is_high else "low"},
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# Default
|
|
450
|
+
return TradingRule(
|
|
451
|
+
name=signal_type,
|
|
452
|
+
description=original_text,
|
|
453
|
+
entry_condition=original_text,
|
|
454
|
+
required_data=["close"],
|
|
455
|
+
parameters={},
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
def rule_to_python(self, rule: TradingRule) -> str:
|
|
459
|
+
"""Convert a TradingRule to Python code."""
|
|
460
|
+
|
|
461
|
+
if rule.name == "ma_crossover":
|
|
462
|
+
fast = rule.parameters.get("fast_period", 10)
|
|
463
|
+
slow = rule.parameters.get("slow_period", 50)
|
|
464
|
+
return f'''
|
|
465
|
+
def generate_signals(prices: pd.Series) -> pd.Series:
|
|
466
|
+
"""MA Crossover: {rule.description}"""
|
|
467
|
+
fast_ma = prices.rolling({fast}).mean()
|
|
468
|
+
slow_ma = prices.rolling({slow}).mean()
|
|
469
|
+
|
|
470
|
+
signal = pd.Series(0, index=prices.index)
|
|
471
|
+
signal[fast_ma > slow_ma] = 1 # Long
|
|
472
|
+
signal[fast_ma < slow_ma] = -1 # Short or flat
|
|
473
|
+
|
|
474
|
+
return signal
|
|
475
|
+
'''
|
|
476
|
+
|
|
477
|
+
elif rule.name == "rsi_signal":
|
|
478
|
+
threshold = rule.parameters.get("threshold", 30)
|
|
479
|
+
period = rule.parameters.get("period", 14)
|
|
480
|
+
return f'''
|
|
481
|
+
def generate_signals(prices: pd.Series) -> pd.Series:
|
|
482
|
+
"""RSI Signal: {rule.description}"""
|
|
483
|
+
delta = prices.diff()
|
|
484
|
+
gain = delta.where(delta > 0, 0).rolling({period}).mean()
|
|
485
|
+
loss = (-delta.where(delta < 0, 0)).rolling({period}).mean()
|
|
486
|
+
|
|
487
|
+
rs = gain / loss
|
|
488
|
+
rsi = 100 - (100 / (1 + rs))
|
|
489
|
+
|
|
490
|
+
signal = pd.Series(0, index=prices.index)
|
|
491
|
+
signal[rsi < {threshold}] = 1 # Buy on oversold
|
|
492
|
+
signal[rsi > {100 - threshold}] = -1 # Sell on overbought
|
|
493
|
+
|
|
494
|
+
return signal
|
|
495
|
+
'''
|
|
496
|
+
|
|
497
|
+
elif rule.name == "price_breakout":
|
|
498
|
+
period = rule.parameters.get("period", 20)
|
|
499
|
+
breakout_type = rule.parameters.get("breakout_type", "high")
|
|
500
|
+
return f'''
|
|
501
|
+
def generate_signals(prices: pd.DataFrame) -> pd.Series:
|
|
502
|
+
"""Breakout Signal: {rule.description}"""
|
|
503
|
+
close = prices['close']
|
|
504
|
+
|
|
505
|
+
{'high_n = prices["high"].rolling('+str(period)+').max()' if breakout_type == 'high' else 'low_n = prices["low"].rolling('+str(period)+').min()'}
|
|
506
|
+
|
|
507
|
+
signal = pd.Series(0, index=close.index)
|
|
508
|
+
{'signal[close > high_n.shift(1)] = 1' if breakout_type == 'high' else 'signal[close < low_n.shift(1)] = -1'}
|
|
509
|
+
|
|
510
|
+
return signal
|
|
511
|
+
'''
|
|
512
|
+
|
|
513
|
+
# Generic template
|
|
514
|
+
return f'''
|
|
515
|
+
def generate_signals(prices: pd.Series) -> pd.Series:
|
|
516
|
+
"""Custom Signal: {rule.description}
|
|
517
|
+
|
|
518
|
+
Entry: {rule.entry_condition}
|
|
519
|
+
Exit: {rule.exit_condition or "Reverse signal"}
|
|
520
|
+
"""
|
|
521
|
+
# TODO: Implement custom logic
|
|
522
|
+
signal = pd.Series(0, index=prices.index)
|
|
523
|
+
return signal
|
|
524
|
+
'''
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
# ============================================================================
|
|
528
|
+
# FAILURE MODE DETECTOR
|
|
529
|
+
# ============================================================================
|
|
530
|
+
|
|
531
|
+
class FailureModeDetector:
|
|
532
|
+
"""Detect potential failure modes in strategies."""
|
|
533
|
+
|
|
534
|
+
FAILURE_MODES = {
|
|
535
|
+
"overfitting": {
|
|
536
|
+
"description": "Strategy may be overfitted to historical data",
|
|
537
|
+
"checks": ["parameter_sensitivity", "out_of_sample"],
|
|
538
|
+
},
|
|
539
|
+
"regime_dependency": {
|
|
540
|
+
"description": "Strategy may only work in specific market regimes",
|
|
541
|
+
"checks": ["regime_breakdown"],
|
|
542
|
+
},
|
|
543
|
+
"capacity_limit": {
|
|
544
|
+
"description": "Strategy may have limited capacity",
|
|
545
|
+
"checks": ["market_impact", "liquidity"],
|
|
546
|
+
},
|
|
547
|
+
"crowding": {
|
|
548
|
+
"description": "Strategy may be crowded by similar traders",
|
|
549
|
+
"checks": ["signal_correlation"],
|
|
550
|
+
},
|
|
551
|
+
"data_mining": {
|
|
552
|
+
"description": "Results may be due to data mining bias",
|
|
553
|
+
"checks": ["multiple_testing"],
|
|
554
|
+
},
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
def detect_failure_modes(
|
|
558
|
+
self,
|
|
559
|
+
strategy_results: Dict[str, Any],
|
|
560
|
+
returns: pd.Series,
|
|
561
|
+
) -> List[Dict[str, Any]]:
|
|
562
|
+
"""Detect potential failure modes in a strategy."""
|
|
563
|
+
|
|
564
|
+
failures = []
|
|
565
|
+
|
|
566
|
+
# Check for overfitting signals
|
|
567
|
+
if self._check_overfitting(strategy_results):
|
|
568
|
+
failures.append({
|
|
569
|
+
"mode": "overfitting",
|
|
570
|
+
"severity": "high",
|
|
571
|
+
"description": self.FAILURE_MODES["overfitting"]["description"],
|
|
572
|
+
"evidence": self._get_overfitting_evidence(strategy_results),
|
|
573
|
+
"mitigation": "Use walk-forward optimization, reduce parameters",
|
|
574
|
+
})
|
|
575
|
+
|
|
576
|
+
# Check for regime dependency
|
|
577
|
+
if self._check_regime_dependency(strategy_results, returns):
|
|
578
|
+
failures.append({
|
|
579
|
+
"mode": "regime_dependency",
|
|
580
|
+
"severity": "medium",
|
|
581
|
+
"description": self.FAILURE_MODES["regime_dependency"]["description"],
|
|
582
|
+
"evidence": ["Performance varies significantly across market regimes"],
|
|
583
|
+
"mitigation": "Add regime filters or diversify strategies",
|
|
584
|
+
})
|
|
585
|
+
|
|
586
|
+
# Check for data mining
|
|
587
|
+
if self._check_data_mining(strategy_results):
|
|
588
|
+
failures.append({
|
|
589
|
+
"mode": "data_mining",
|
|
590
|
+
"severity": "medium",
|
|
591
|
+
"description": self.FAILURE_MODES["data_mining"]["description"],
|
|
592
|
+
"evidence": ["Multiple parameters tested without correction"],
|
|
593
|
+
"mitigation": "Apply multiple testing correction, use holdout data",
|
|
594
|
+
})
|
|
595
|
+
|
|
596
|
+
return failures
|
|
597
|
+
|
|
598
|
+
def _check_overfitting(self, results: Dict[str, Any]) -> bool:
|
|
599
|
+
"""Check for signs of overfitting."""
|
|
600
|
+
|
|
601
|
+
# High in-sample performance but not robust
|
|
602
|
+
sharpe = results.get("sharpe_ratio", 0)
|
|
603
|
+
n_params = results.get("num_parameters", 0)
|
|
604
|
+
n_trades = results.get("num_trades", 100)
|
|
605
|
+
|
|
606
|
+
# Too good to be true Sharpe
|
|
607
|
+
if sharpe > 3:
|
|
608
|
+
return True
|
|
609
|
+
|
|
610
|
+
# Too many parameters relative to trades
|
|
611
|
+
if n_params > 0 and n_trades / n_params < 20:
|
|
612
|
+
return True
|
|
613
|
+
|
|
614
|
+
return False
|
|
615
|
+
|
|
616
|
+
def _check_regime_dependency(
|
|
617
|
+
self,
|
|
618
|
+
results: Dict[str, Any],
|
|
619
|
+
returns: pd.Series,
|
|
620
|
+
) -> bool:
|
|
621
|
+
"""Check if strategy is regime-dependent."""
|
|
622
|
+
|
|
623
|
+
# Calculate volatility regimes
|
|
624
|
+
vol = returns.rolling(63).std() * np.sqrt(252)
|
|
625
|
+
high_vol_threshold = vol.quantile(0.75)
|
|
626
|
+
|
|
627
|
+
# Check if performance differs significantly by regime
|
|
628
|
+
strategy_returns = results.get("strategy_returns", returns)
|
|
629
|
+
|
|
630
|
+
if isinstance(strategy_returns, pd.Series):
|
|
631
|
+
high_vol_perf = strategy_returns[vol > high_vol_threshold].mean()
|
|
632
|
+
low_vol_perf = strategy_returns[vol <= high_vol_threshold].mean()
|
|
633
|
+
|
|
634
|
+
# Significant difference suggests regime dependency
|
|
635
|
+
if abs(high_vol_perf - low_vol_perf) > 0.1 * abs(strategy_returns.mean()):
|
|
636
|
+
return True
|
|
637
|
+
|
|
638
|
+
return False
|
|
639
|
+
|
|
640
|
+
def _check_data_mining(self, results: Dict[str, Any]) -> bool:
|
|
641
|
+
"""Check for data mining bias."""
|
|
642
|
+
|
|
643
|
+
# Simple heuristics
|
|
644
|
+
n_params = results.get("num_parameters", 0)
|
|
645
|
+
|
|
646
|
+
# Many parameters suggest potential data mining
|
|
647
|
+
if n_params > 5:
|
|
648
|
+
return True
|
|
649
|
+
|
|
650
|
+
return False
|
|
651
|
+
|
|
652
|
+
def _get_overfitting_evidence(self, results: Dict[str, Any]) -> List[str]:
|
|
653
|
+
"""Get evidence of overfitting."""
|
|
654
|
+
|
|
655
|
+
evidence = []
|
|
656
|
+
|
|
657
|
+
sharpe = results.get("sharpe_ratio", 0)
|
|
658
|
+
if sharpe > 3:
|
|
659
|
+
evidence.append(f"Unusually high Sharpe ratio: {sharpe:.2f}")
|
|
660
|
+
|
|
661
|
+
n_params = results.get("num_parameters", 0)
|
|
662
|
+
n_trades = results.get("num_trades", 100)
|
|
663
|
+
if n_params > 0 and n_trades / n_params < 20:
|
|
664
|
+
evidence.append(f"Low trade/parameter ratio: {n_trades/n_params:.1f}")
|
|
665
|
+
|
|
666
|
+
return evidence
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
# ============================================================================
|
|
670
|
+
# STRATEGY GENERATOR
|
|
671
|
+
# ============================================================================
|
|
672
|
+
|
|
673
|
+
class StrategyGenerator:
|
|
674
|
+
"""Generate strategy ideas from hypotheses and rules."""
|
|
675
|
+
|
|
676
|
+
STRATEGY_TEMPLATES = {
|
|
677
|
+
"momentum": StrategyIdea(
|
|
678
|
+
name="Trend Following",
|
|
679
|
+
thesis="Trends persist due to behavioral biases and institutional flows",
|
|
680
|
+
rules=[
|
|
681
|
+
TradingRule(
|
|
682
|
+
name="price_above_ma",
|
|
683
|
+
description="Price above 200-day MA",
|
|
684
|
+
entry_condition="Close > SMA(200)",
|
|
685
|
+
exit_condition="Close < SMA(200)",
|
|
686
|
+
required_data=["close"],
|
|
687
|
+
parameters={"period": 200},
|
|
688
|
+
),
|
|
689
|
+
],
|
|
690
|
+
expected_edge="2-4% annualized alpha in trending markets",
|
|
691
|
+
risk_factors=["Choppy markets", "Regime changes", "Crowding"],
|
|
692
|
+
data_requirements=["Daily prices", "Volume"],
|
|
693
|
+
),
|
|
694
|
+
"mean_reversion": StrategyIdea(
|
|
695
|
+
name="Mean Reversion",
|
|
696
|
+
thesis="Prices revert to fair value after overreaction",
|
|
697
|
+
rules=[
|
|
698
|
+
TradingRule(
|
|
699
|
+
name="oversold_bounce",
|
|
700
|
+
description="Buy when RSI oversold",
|
|
701
|
+
entry_condition="RSI(14) < 30",
|
|
702
|
+
exit_condition="RSI(14) > 70",
|
|
703
|
+
required_data=["close"],
|
|
704
|
+
parameters={"rsi_period": 14, "oversold": 30, "overbought": 70},
|
|
705
|
+
),
|
|
706
|
+
],
|
|
707
|
+
expected_edge="1-3% annualized alpha",
|
|
708
|
+
risk_factors=["Trending markets", "Extended drawdowns", "Value traps"],
|
|
709
|
+
data_requirements=["Daily prices"],
|
|
710
|
+
),
|
|
711
|
+
"quality": StrategyIdea(
|
|
712
|
+
name="Quality Factor",
|
|
713
|
+
thesis="High-quality companies outperform over long horizons",
|
|
714
|
+
rules=[
|
|
715
|
+
TradingRule(
|
|
716
|
+
name="quality_screen",
|
|
717
|
+
description="Screen for quality metrics",
|
|
718
|
+
entry_condition="ROE > 15% AND Debt/Equity < 0.5 AND Margin Trend > 0",
|
|
719
|
+
exit_condition="Quality score deteriorates",
|
|
720
|
+
required_data=["fundamentals"],
|
|
721
|
+
parameters={"roe_threshold": 0.15, "de_threshold": 0.5},
|
|
722
|
+
),
|
|
723
|
+
],
|
|
724
|
+
expected_edge="2-5% annualized alpha over market cycles",
|
|
725
|
+
risk_factors=["Valuation multiples", "Factor crowding", "Sector concentration"],
|
|
726
|
+
data_requirements=["Quarterly fundamentals", "Daily prices"],
|
|
727
|
+
),
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
def suggest_strategies(
|
|
731
|
+
self,
|
|
732
|
+
hypothesis_results: List[HypothesisResult],
|
|
733
|
+
market_context: Optional[Dict[str, Any]] = None,
|
|
734
|
+
) -> List[StrategyIdea]:
|
|
735
|
+
"""Suggest strategies based on hypothesis testing results."""
|
|
736
|
+
|
|
737
|
+
suggestions = []
|
|
738
|
+
|
|
739
|
+
for result in hypothesis_results:
|
|
740
|
+
if result.supported and result.confidence > 0.8:
|
|
741
|
+
# Map hypothesis to strategy template
|
|
742
|
+
hypothesis_lower = result.hypothesis.lower()
|
|
743
|
+
|
|
744
|
+
if "momentum" in hypothesis_lower or "trend" in hypothesis_lower:
|
|
745
|
+
suggestions.append(self.STRATEGY_TEMPLATES["momentum"])
|
|
746
|
+
|
|
747
|
+
elif "revert" in hypothesis_lower or "extreme" in hypothesis_lower:
|
|
748
|
+
suggestions.append(self.STRATEGY_TEMPLATES["mean_reversion"])
|
|
749
|
+
|
|
750
|
+
elif "quality" in hypothesis_lower or "fundamental" in hypothesis_lower:
|
|
751
|
+
suggestions.append(self.STRATEGY_TEMPLATES["quality"])
|
|
752
|
+
|
|
753
|
+
return suggestions
|