quantcli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ """因子定义基础类"""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Optional, List, Dict, Any
5
+ from enum import Enum
6
+
7
+
8
+ class FactorType(str, Enum):
9
+ """因子类型"""
10
+ FUNDAMENTAL = "fundamental"
11
+ TECHNICAL = "technical"
12
+ INTRADAY = "intraday"
13
+ COMPOSITE = "composite"
14
+
15
+
16
+ class FactorDirection(str, Enum):
17
+ """因子方向"""
18
+ POSITIVE = "positive" # 值越高越好
19
+ NEGATIVE = "negative" # 值越低越好
20
+ NEUTRAL = "neutral" # 无方向性
21
+
22
+
23
+ @dataclass
24
+ class FactorDefinition:
25
+ """因子定义
26
+
27
+ Attributes:
28
+ name: 因子名称
29
+ type: 因子类型 (fundamental/technical)
30
+ expr: 因子表达式
31
+ direction: 因子方向 (positive/negative/neutral)
32
+ description: 因子描述
33
+ params: 可选参数
34
+ """
35
+ name: str
36
+ type: str
37
+ expr: str
38
+ direction: str = "neutral"
39
+ description: str = ""
40
+ params: Dict[str, Any] = field(default_factory=dict)
41
+
42
+ def __post_init__(self):
43
+ """验证参数"""
44
+ if self.type not in [t.value for t in FactorType]:
45
+ raise ValueError(f"Invalid factor type: {self.type}")
46
+ if self.direction not in [d.value for d in FactorDirection]:
47
+ raise ValueError(f"Invalid factor direction: {self.direction}")
48
+
49
+
50
+ @dataclass
51
+ class ScreeningStage:
52
+ """筛选阶段配置
53
+
54
+ Attributes:
55
+ conditions: 简化的条件列表(向后兼容)
56
+ fundamental_conditions: 基本面条件(需要基本面数据)
57
+ daily_conditions: 日线条件(需要日线数据)
58
+ limit: 候选数量限制
59
+ """
60
+ conditions: List[str] = field(default_factory=list)
61
+ fundamental_conditions: List[str] = field(default_factory=list)
62
+ daily_conditions: List[str] = field(default_factory=list)
63
+ limit: int = 200
64
+
65
+ @classmethod
66
+ def from_dict(cls, data: Dict[str, Any]) -> "ScreeningStage":
67
+ """从字典创建筛选阶段配置"""
68
+ if isinstance(data, list):
69
+ # 兼容旧格式:直接是条件列表
70
+ return cls(conditions=data)
71
+ return cls(
72
+ conditions=data.get("conditions", []),
73
+ fundamental_conditions=data.get("fundamental_conditions", []),
74
+ daily_conditions=data.get("daily_conditions", []),
75
+ limit=data.get("limit", 200),
76
+ )
77
+
78
+
79
+ @dataclass
80
+ class IntradayStage:
81
+ """分钟级阶段配置
82
+
83
+ Attributes:
84
+ weights: 因子权重配置
85
+ normalize: 标准化方法
86
+ """
87
+ weights: Dict[str, float] = field(default_factory=dict)
88
+ normalize: str = "zscore"
89
+
90
+ @classmethod
91
+ def from_dict(cls, data: Dict[str, Any]) -> "IntradayStage":
92
+ """从字典创建分钟级配置"""
93
+ return cls(
94
+ weights=data.get("weights", {}),
95
+ normalize=data.get("normalize", "zscore"),
96
+ )
97
+
98
+
99
+ @dataclass
100
+ class StrategyConfig:
101
+ """策略配置
102
+
103
+ Attributes:
104
+ name: 策略名称
105
+ version: 版本号
106
+ screening: 筛选条件列表
107
+ ranking: 权重配置
108
+ intraday: 分钟级配置(可选)
109
+ output: 输出配置
110
+ """
111
+ name: str
112
+ version: str = "1.0.0"
113
+ screening: Dict[str, Any] = field(default_factory=dict)
114
+ ranking: Dict[str, Any] = field(default_factory=dict)
115
+ intraday: Dict[str, Any] = field(default_factory=dict)
116
+ output: Dict[str, Any] = field(default_factory=dict)
117
+
118
+
119
+ @dataclass
120
+ class ScreeningCondition:
121
+ """筛选条件
122
+
123
+ Attributes:
124
+ expression: 表达式字符串,如 "pe < 50"
125
+ column: 涉及的列名
126
+ """
127
+ expression: str
128
+ column: str
129
+
130
+ @classmethod
131
+ def from_string(cls, expr: str) -> "ScreeningCondition":
132
+ """从字符串解析筛选条件"""
133
+ import re
134
+ # 提取列名(简单的变量名匹配)
135
+ match = re.match(r'^([a-zA-Z_][a-zA-Z0-9_]*)\s*[<>=!]+', expr)
136
+ if match:
137
+ column = match.group(1)
138
+ else:
139
+ column = ""
140
+ return cls(expression=expr, column=column)
141
+
142
+
143
+ @dataclass
144
+ class BonusCondition:
145
+ """加分条件
146
+
147
+ Attributes:
148
+ condition: 条件表达式字符串,如 "volume_ratio < 0.8"
149
+ weight: 加分权重
150
+ description: 条件描述
151
+ """
152
+ condition: str
153
+ weight: float
154
+ description: str = ""
155
+
156
+ @classmethod
157
+ def from_dict(cls, data: Dict[str, Any]) -> "BonusCondition":
158
+ """从字典创建加分条件"""
159
+ return cls(
160
+ condition=data["condition"],
161
+ weight=float(data["weight"]),
162
+ description=data.get("description", "")
163
+ )
@@ -0,0 +1,281 @@
1
+ """因子计算器
2
+
3
+ 职责:
4
+ - 为筛选条件计算因子值
5
+ - 为排名阶段计算所有因子值
6
+ - 支持日线和分钟因子混合计算
7
+ """
8
+
9
+ import pandas as pd
10
+ import numpy as np
11
+ from typing import Dict, List, Optional, Set
12
+
13
+ from ..utils import get_logger
14
+ from .base import FactorDefinition
15
+ from ..parser import Formula
16
+ from ..parser.constants import BUILTIN_FUNCTIONS
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ class FactorComputer:
22
+ """因子计算器"""
23
+
24
+ def __init__(self, builtin_functions: Optional[Set[str]] = None):
25
+ """初始化
26
+
27
+ Args:
28
+ builtin_functions: 内置函数集合,用于区分因子和函数
29
+ """
30
+ self.builtin_functions = builtin_functions or BUILTIN_FUNCTIONS
31
+
32
+ def find_factor_by_name(
33
+ self,
34
+ factor_name: str,
35
+ factors: Dict[str, FactorDefinition]
36
+ ) -> Optional[FactorDefinition]:
37
+ """根据因子名查找因子定义
38
+
39
+ Args:
40
+ factor_name: 因子名
41
+ factors: 因子定义字典
42
+
43
+ Returns:
44
+ FactorDefinition 或 None
45
+ """
46
+ for ref, factor in factors.items():
47
+ # 通过文件名匹配(如 "ma10_deviation" 在路径中)
48
+ if factor_name in ref.lower():
49
+ return factor
50
+ return None
51
+
52
+ def get_factor_names_from_conditions(
53
+ self,
54
+ conditions: List,
55
+ data_columns: Optional[Set[str]] = None
56
+ ) -> Set[str]:
57
+ """从条件表达式中提取因子名
58
+
59
+ Args:
60
+ conditions: 条件列表
61
+ data_columns: 数据中已有的列名
62
+
63
+ Returns:
64
+ 需要计算的因子名集合
65
+ """
66
+ if data_columns is None:
67
+ data_columns = {'close', 'open', 'high', 'low', 'volume'}
68
+
69
+ factor_names = set()
70
+ import re
71
+
72
+ for condition in conditions:
73
+ expr_str = condition.expression if hasattr(condition, 'expression') else condition
74
+ matches = re.findall(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b', expr_str)
75
+
76
+ for m in matches:
77
+ if m not in self.builtin_functions and m not in data_columns:
78
+ factor_names.add(m)
79
+
80
+ return factor_names
81
+
82
+ def compute_factors_for_screening(
83
+ self,
84
+ factor_names: List[str],
85
+ factors: Dict[str, FactorDefinition],
86
+ price_data: Dict[str, pd.DataFrame],
87
+ candidates: List[str]
88
+ ) -> Dict[str, Dict[str, float]]:
89
+ """为筛选条件计算因子值
90
+
91
+ Args:
92
+ factor_names: 需要计算的因子名列表
93
+ factors: 因子定义字典
94
+ price_data: 价格数据字典
95
+ candidates: 候选股票列表
96
+
97
+ Returns:
98
+ {symbol: {factor_name: value}} 字典
99
+ """
100
+ results = {}
101
+
102
+ for symbol in candidates:
103
+ if symbol not in price_data:
104
+ continue
105
+
106
+ symbol_data = price_data[symbol]
107
+ if symbol_data.empty:
108
+ continue
109
+
110
+ symbol_results = {}
111
+ for factor_name in factor_names:
112
+ factor = self.find_factor_by_name(factor_name, factors)
113
+ if not factor:
114
+ continue
115
+
116
+ try:
117
+ formula = Formula(factor.expr, name=factor_name)
118
+ result = formula.compute(symbol_data)
119
+ if not result.empty:
120
+ symbol_results[factor_name] = float(result.iloc[-1])
121
+ except Exception as e:
122
+ logger.warning(f"Failed to compute factor {factor_name} for {symbol}: {e}")
123
+
124
+ if symbol_results:
125
+ results[symbol] = symbol_results
126
+
127
+ return results
128
+
129
+ def compute_all_factors(
130
+ self,
131
+ factors: Dict[str, FactorDefinition],
132
+ price_data: Dict[str, pd.DataFrame],
133
+ intraday_data: Optional[Dict[str, pd.DataFrame]] = None,
134
+ candidates: Optional[List[str]] = None
135
+ ) -> pd.DataFrame:
136
+ """计算所有因子值(日线和分钟混合)
137
+
138
+ Args:
139
+ factors: 因子定义字典
140
+ price_data: 日线价格数据
141
+ intraday_data: 分钟数据(可选)
142
+ candidates: 候选股票列表
143
+
144
+ Returns:
145
+ 包含因子值的 DataFrame
146
+ """
147
+ if candidates is None:
148
+ candidates = list(price_data.keys())
149
+
150
+ rows = []
151
+
152
+ for symbol in candidates:
153
+ row = {"symbol": symbol}
154
+
155
+ # 日线因子
156
+ if symbol in price_data:
157
+ daily_df = price_data[symbol]
158
+ for ref, factor in factors.items():
159
+ if factor.type == "intraday":
160
+ continue
161
+
162
+ try:
163
+ formula = Formula(factor.expr, name=factor.name)
164
+ result = formula.compute(daily_df)
165
+ if not result.empty:
166
+ row[factor.name] = float(result.iloc[-1])
167
+ except Exception as e:
168
+ logger.warning(f"Failed to compute daily factor {factor.name}: {e}")
169
+
170
+ # 分钟因子
171
+ if intraday_data and symbol in intraday_data:
172
+ intraday_df = intraday_data[symbol]
173
+ if not intraday_df.empty:
174
+ for ref, factor in factors.items():
175
+ if factor.type != "intraday":
176
+ continue
177
+
178
+ try:
179
+ formula = Formula(factor.expr, name=factor.name)
180
+ result = formula.compute(intraday_df)
181
+ if not result.empty:
182
+ row[factor.name] = float(result.iloc[-1])
183
+ except Exception as e:
184
+ logger.warning(f"Failed to compute intraday factor {factor.name}: {e}")
185
+
186
+ rows.append(row)
187
+
188
+ if rows:
189
+ return pd.DataFrame(rows)
190
+ return pd.DataFrame(columns=["symbol"])
191
+
192
+ def merge_factor_results(
193
+ self,
194
+ factor_data: List[Dict],
195
+ screening_factors: Optional[Dict[str, Dict[str, float]]] = None
196
+ ) -> pd.DataFrame:
197
+ """合并因子计算结果
198
+
199
+ Args:
200
+ factor_data: 排名阶段的因子计算结果
201
+ screening_factors: 筛选阶段计算的因子值
202
+
203
+ Returns:
204
+ 合并后的 DataFrame
205
+ """
206
+ if not factor_data:
207
+ return pd.DataFrame()
208
+
209
+ df = pd.DataFrame(factor_data)
210
+
211
+ # 合并筛选阶段计算的因子值
212
+ if screening_factors:
213
+ for symbol, factor_values in screening_factors.items():
214
+ mask = df["symbol"] == symbol
215
+ if mask.any():
216
+ for name, value in factor_values.items():
217
+ if name not in df.columns:
218
+ df.loc[mask, name] = value
219
+
220
+ return df
221
+
222
+ def compute_inline_factors(
223
+ self,
224
+ inline_factors: List[FactorDefinition],
225
+ price_data: Dict[str, pd.DataFrame],
226
+ intraday_data: Optional[Dict[str, pd.DataFrame]] = None,
227
+ candidates: Optional[List[str]] = None
228
+ ) -> Dict[str, FactorDefinition]:
229
+ """计算内联因子定义
230
+
231
+ Args:
232
+ inline_factors: 内联因子定义列表
233
+ price_data: 日线价格数据
234
+ intraday_data: 分钟数据(可选)
235
+ candidates: 候选股票列表
236
+
237
+ Returns:
238
+ 因子定义字典(key 为因子名)
239
+ """
240
+ if not inline_factors:
241
+ return {}
242
+
243
+ if candidates is None:
244
+ candidates = list(price_data.keys())
245
+
246
+ # 转换为字典格式,key 为因子名
247
+ result = {}
248
+
249
+ for symbol in candidates:
250
+ if symbol not in price_data:
251
+ continue
252
+
253
+ daily_df = price_data[symbol]
254
+ if daily_df.empty:
255
+ continue
256
+
257
+ for factor in inline_factors:
258
+ # 避免重复计算
259
+ if factor.name in result:
260
+ continue
261
+
262
+ try:
263
+ # 检查因子类型
264
+ if factor.type == "intraday" and intraday_data and symbol in intraday_data:
265
+ df = intraday_data[symbol]
266
+ else:
267
+ df = daily_df
268
+
269
+ if df.empty:
270
+ continue
271
+
272
+ formula = Formula(factor.expr, name=factor.name)
273
+ result_df = formula.compute(df)
274
+ if not result_df.empty:
275
+ # 验证因子值有效(非全 NaN)
276
+ if not result_df.isna().all():
277
+ result[factor.name] = factor
278
+ except Exception as e:
279
+ logger.warning(f"Failed to compute inline factor {factor.name}: {e}")
280
+
281
+ return result