alpha101-pipeline 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alpha101_pipeline-0.1.0/.gitignore +46 -0
- alpha101_pipeline-0.1.0/.omc/sessions/1a6eb433-b024-4110-831b-3696d302b3d4.json +8 -0
- alpha101_pipeline-0.1.0/LICENSE +21 -0
- alpha101_pipeline-0.1.0/PKG-INFO +200 -0
- alpha101_pipeline-0.1.0/README.md +172 -0
- alpha101_pipeline-0.1.0/pyproject.toml +44 -0
- alpha101_pipeline-0.1.0/src/alpha101_pipeline/__init__.py +42 -0
- alpha101_pipeline-0.1.0/src/alpha101_pipeline/backtest.py +1471 -0
- alpha101_pipeline-0.1.0/src/alpha101_pipeline/cli.py +170 -0
- alpha101_pipeline-0.1.0/src/alpha101_pipeline/engine.py +1648 -0
- alpha101_pipeline-0.1.0/src/alpha101_pipeline/plot.py +314 -0
- alpha101_pipeline-0.1.0/src/alpha101_pipeline/reorder.py +412 -0
- alpha101_pipeline-0.1.0/src/alpha101_pipeline/store.py +545 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Rust build artifacts
|
|
2
|
+
/target/
|
|
3
|
+
factor_analysis_cli/target/
|
|
4
|
+
**/*.rs.bk
|
|
5
|
+
*.pdb
|
|
6
|
+
|
|
7
|
+
# Cargo
|
|
8
|
+
Cargo.lock
|
|
9
|
+
|
|
10
|
+
# IDE and editor files
|
|
11
|
+
.idea/
|
|
12
|
+
.vscode/
|
|
13
|
+
*.swp
|
|
14
|
+
*.swo
|
|
15
|
+
*~
|
|
16
|
+
.DS_Store
|
|
17
|
+
|
|
18
|
+
# Debug and profiling
|
|
19
|
+
perf.data*
|
|
20
|
+
*.profraw
|
|
21
|
+
*.profdata
|
|
22
|
+
|
|
23
|
+
# Temporary files
|
|
24
|
+
*.txt
|
|
25
|
+
!USAGE.md
|
|
26
|
+
!README.md
|
|
27
|
+
!PROJECT_SUMMARY.md
|
|
28
|
+
!CLAUDE.md
|
|
29
|
+
|
|
30
|
+
# Test and demo command files
|
|
31
|
+
demo_commands.txt
|
|
32
|
+
test_commands.txt
|
|
33
|
+
|
|
34
|
+
# Compiled binaries (optional - uncomment if you don't want to commit any binaries)
|
|
35
|
+
# /target/release/alpha101_polars
|
|
36
|
+
|
|
37
|
+
# Python package build artifacts
|
|
38
|
+
alpha101_pipeline/dist/
|
|
39
|
+
alpha101_pipeline/.omc/
|
|
40
|
+
|
|
41
|
+
# OMC state
|
|
42
|
+
.omc/
|
|
43
|
+
|
|
44
|
+
# Data and output (large files)
|
|
45
|
+
data/
|
|
46
|
+
output/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 goldenquant
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: alpha101-pipeline
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: DuckDB-powered factor research pipeline: formula engine, factor store, layered backtest
|
|
5
|
+
Project-URL: Repository, https://github.com/goldenquant/alpha101
|
|
6
|
+
Author: goldenquant
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: alpha101,backtest,duckdb,factor,parquet,quant
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Office/Business :: Financial :: Investment
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: duckdb>=1.0
|
|
21
|
+
Requires-Dist: pyarrow>=14.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: build; extra == 'dev'
|
|
24
|
+
Requires-Dist: twine; extra == 'dev'
|
|
25
|
+
Provides-Extra: plot
|
|
26
|
+
Requires-Dist: matplotlib>=3.7; extra == 'plot'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# alpha101-pipeline
|
|
30
|
+
|
|
31
|
+
基于 DuckDB 的 A 股日内因子研究全流程工具链:公式引擎 → 因子存储 → 分层回测 → 可视化。
|
|
32
|
+
|
|
33
|
+
## 安装
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install alpha101-pipeline
|
|
37
|
+
# 带绘图功能:
|
|
38
|
+
pip install "alpha101-pipeline[plot]"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## 快速上手
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# 1. 重排面板数据(按 datetime, code 排序,TIMESTAMP 类型,多行组)
|
|
45
|
+
alpha101-reorder --input raw.parquet --output panel_sorted.parquet
|
|
46
|
+
|
|
47
|
+
# 2. 计算因子存入 store
|
|
48
|
+
alpha101-store add \
|
|
49
|
+
--source panel_sorted.parquet \
|
|
50
|
+
--store data/factors \
|
|
51
|
+
--formula mom_12='ts_mean(delta(close,1),12)' \
|
|
52
|
+
--formula rev_12='ts_mean(delta(close,1),12) * -1'
|
|
53
|
+
|
|
54
|
+
# 3. 回测全部因子
|
|
55
|
+
alpha101-backtest \
|
|
56
|
+
--store data/factors \
|
|
57
|
+
--out-dir output/backtest \
|
|
58
|
+
--forward 12 --groups 10
|
|
59
|
+
|
|
60
|
+
# 4. 绘制分层图
|
|
61
|
+
alpha101-plot batch \
|
|
62
|
+
--returns-root output/backtest/series \
|
|
63
|
+
--reports-dir output/backtest/reports \
|
|
64
|
+
--out-dir output/backtest/plots
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Python API
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from alpha101_pipeline import FactorStore, run_intraday_multi
|
|
71
|
+
from pathlib import Path
|
|
72
|
+
|
|
73
|
+
# 计算因子
|
|
74
|
+
store = FactorStore(Path("data/factors"))
|
|
75
|
+
store.add_factors(
|
|
76
|
+
[("mom_12", "ts_mean(delta(close,1),12)")],
|
|
77
|
+
source=Path("panel_sorted.parquet"),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# 回测
|
|
81
|
+
reports = run_intraday_multi(
|
|
82
|
+
store.store_dir,
|
|
83
|
+
["mom_12"],
|
|
84
|
+
source_panel=Path("panel_sorted.parquet"),
|
|
85
|
+
factor_files={"mom_12": store.factor_path("mom_12")},
|
|
86
|
+
)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## 支持的函数
|
|
90
|
+
|
|
91
|
+
### 时序窗口函数(PARTITION BY 股票 ORDER BY 日期)
|
|
92
|
+
|
|
93
|
+
| 函数 | 说明 |
|
|
94
|
+
|------|------|
|
|
95
|
+
| `delay(x, d)` | 取 d 期前的值 |
|
|
96
|
+
| `delta(x, d)` | 与 d 期前的差值 |
|
|
97
|
+
| `ts_sum(x, d)` / `sum(x, d)` | 滚动求和 |
|
|
98
|
+
| `ts_mean(x, d)` / `mean(x, d)` / `sma(x, d)` | 滚动均值 |
|
|
99
|
+
| `ts_min(x, d)` / `min(x, d)` | 滚动最小值 |
|
|
100
|
+
| `ts_max(x, d)` / `max(x, d)` | 滚动最大值 |
|
|
101
|
+
| `ts_stddev(x, d)` / `stddev(x, d)` | 滚动标准差 |
|
|
102
|
+
| `ts_variance(x, d)` / `variance(x, d)` | 滚动方差 |
|
|
103
|
+
| `ts_count(x, d)` | 滚动非空计数 |
|
|
104
|
+
| `ts_count_not_nan(x, d)` | 滚动非 NaN 计数 |
|
|
105
|
+
| `ts_zscore(x, d)` | 滚动 Z-Score |
|
|
106
|
+
| `ts_pct_change(x, d)` | 滚动百分比变化 |
|
|
107
|
+
| `product(x, d)` | 滚动乘积 |
|
|
108
|
+
| `decay_linear(x, d)` | 线性衰减加权和 |
|
|
109
|
+
| `ts_corr(x, y, d)` / `correlation(x, y, d)` | 滚动皮尔逊相关 |
|
|
110
|
+
| `ts_covariance(x, y, d)` / `covariance(x, y, d)` | 滚动协方差 |
|
|
111
|
+
| `bollinger_upper(x, d)` | 布林带上轨 |
|
|
112
|
+
| `bollinger_lower(x, d)` | 布林带下轨 |
|
|
113
|
+
| `ts_median(x, d)` / `median(x, d)` | 滚动中位数 |
|
|
114
|
+
| `ts_quantile(x, d, q)` / `quantile(x, d, q)` | 滚动分位数 |
|
|
115
|
+
| `wma(x, d)` | 加权移动平均 |
|
|
116
|
+
| `ts_skew(x, d)` / `skew(x, d)` | 滚动偏度 |
|
|
117
|
+
| `ts_kurt(x, d)` / `kurt(x, d)` | 滚动峰度 |
|
|
118
|
+
| `ts_mad(x, d)` / `mad(x, d)` | 滚动平均绝对偏差 |
|
|
119
|
+
| `ts_rank(x, d)` | 滚动时序排名 |
|
|
120
|
+
| `slope(x, y, d)` / `regr_slope(x, y, d)` | 滚动回归斜率 |
|
|
121
|
+
| `rsquare(x, y, d)` / `regr_r2(x, y, d)` | 滚动回归 R² |
|
|
122
|
+
| `resi(x, y, d)` / `regr_resid(x, y, d)` | 滚动回归残差 |
|
|
123
|
+
| `idxmax(x, d)` / `ts_argmax(x, d)` | 滚动窗口最大值位置 |
|
|
124
|
+
| `idxmin(x, d)` / `ts_argmin(x, d)` | 滚动窗口最小值位置 |
|
|
125
|
+
|
|
126
|
+
### 截面函数(PARTITION BY 日期)
|
|
127
|
+
|
|
128
|
+
| 函数 | 说明 |
|
|
129
|
+
|------|------|
|
|
130
|
+
| `rank(x)` | 截面排名(百分位) |
|
|
131
|
+
| `scale(x)` | 截面标准化到 [0, 1] |
|
|
132
|
+
| `zscore(x)` | 截面 Z-Score |
|
|
133
|
+
| `demean(x)` | 截面去均值 |
|
|
134
|
+
|
|
135
|
+
### 分组函数(PARTITION BY 日期 + 分组列)
|
|
136
|
+
|
|
137
|
+
| 函数 | 说明 |
|
|
138
|
+
|------|------|
|
|
139
|
+
| `group_mean(x, group)` | 分组均值 |
|
|
140
|
+
| `group_rank(x, group)` | 组内排名 |
|
|
141
|
+
| `group_neutralize(x, group)` / `indneutralize(x, group)` | 分组中性化 |
|
|
142
|
+
| `group_zscore(x, group)` | 组内 Z-Score |
|
|
143
|
+
|
|
144
|
+
### 数学函数(标量)
|
|
145
|
+
|
|
146
|
+
| 函数 | 说明 |
|
|
147
|
+
|------|------|
|
|
148
|
+
| `abs(x)` | 绝对值 |
|
|
149
|
+
| `log(x)` | 自然对数 |
|
|
150
|
+
| `sqrt(x)` | 平方根 |
|
|
151
|
+
| `sign(x)` | 符号函数 |
|
|
152
|
+
| `exp(x)` | 指数 |
|
|
153
|
+
| `round(x)` | 四舍五入 |
|
|
154
|
+
| `floor(x)` | 向下取整 |
|
|
155
|
+
| `ceil(x)` | 向上取整 |
|
|
156
|
+
| `sin(x)` | 正弦 |
|
|
157
|
+
| `cos(x)` | 余弦 |
|
|
158
|
+
| `tan(x)` | 正切 |
|
|
159
|
+
| `signed_power(x, n)` / `power(x, n)` / `pow(x, n)` | 幂运算(保留符号) |
|
|
160
|
+
| `min(x, y)` | 两值取小 |
|
|
161
|
+
| `max(x, y)` | 两值取大 |
|
|
162
|
+
|
|
163
|
+
### 工具函数
|
|
164
|
+
|
|
165
|
+
| 函数 | 说明 |
|
|
166
|
+
|------|------|
|
|
167
|
+
| `if(cond, then, else)` | 条件选择 |
|
|
168
|
+
| `fillna(x, val)` | 空值填充 |
|
|
169
|
+
| `clip(x, lo, hi)` | 截断到 [lo, hi] |
|
|
170
|
+
| `is_finite(x)` | 是否有限值 |
|
|
171
|
+
|
|
172
|
+
### 不支持的函数(递归/状态型,无法用纯 SQL 表达)
|
|
173
|
+
|
|
174
|
+
`ema`, `rsi`, `macd`, `atr`, `roc`, `obv`, `cci`, `mfi`
|
|
175
|
+
|
|
176
|
+
## 运算符
|
|
177
|
+
|
|
178
|
+
| 优先级 | 运算符 | 说明 |
|
|
179
|
+
|--------|--------|------|
|
|
180
|
+
| 1(最高) | `()` | 括号 |
|
|
181
|
+
| 2 | `^` | 幂运算(右结合:`2^3^2 = 2^9 = 512`) |
|
|
182
|
+
| 3 | `-x` | 一元负号 |
|
|
183
|
+
| 4 | `*` `/` | 乘除 |
|
|
184
|
+
| 5 | `+` `-` | 加减 |
|
|
185
|
+
| 6 | `>` `<` `>=` `<=` `==` `!=` | 比较(返回 1.0 / 0.0) |
|
|
186
|
+
| 7(最低) | `? :` | 三元条件(`close > 100 ? 1 : 0`) |
|
|
187
|
+
|
|
188
|
+
## 回测输出指标
|
|
189
|
+
|
|
190
|
+
| 指标 | 说明 |
|
|
191
|
+
|------|------|
|
|
192
|
+
| IC 均值 / ICIR | 每日 Spearman 秩相关 IC 的均值和信息比率 |
|
|
193
|
+
| MS(单调性得分) | 相邻组收益方向一致比例(0~1,1.0 = 完美单调) |
|
|
194
|
+
| Spearman | 组号 vs 年化收益的秩相关(-1~+1) |
|
|
195
|
+
| 多空夏普 / 年化 | 最高组减最低组的多空组合绩效 |
|
|
196
|
+
| 分组年化收益 | 每个分层组的年化收益率 |
|
|
197
|
+
|
|
198
|
+
## 许可证
|
|
199
|
+
|
|
200
|
+
MIT
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# alpha101-pipeline
|
|
2
|
+
|
|
3
|
+
基于 DuckDB 的 A 股日内因子研究全流程工具链:公式引擎 → 因子存储 → 分层回测 → 可视化。
|
|
4
|
+
|
|
5
|
+
## 安装
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install alpha101-pipeline
|
|
9
|
+
# 带绘图功能:
|
|
10
|
+
pip install "alpha101-pipeline[plot]"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## 快速上手
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# 1. 重排面板数据(按 datetime, code 排序,TIMESTAMP 类型,多行组)
|
|
17
|
+
alpha101-reorder --input raw.parquet --output panel_sorted.parquet
|
|
18
|
+
|
|
19
|
+
# 2. 计算因子存入 store
|
|
20
|
+
alpha101-store add \
|
|
21
|
+
--source panel_sorted.parquet \
|
|
22
|
+
--store data/factors \
|
|
23
|
+
--formula mom_12='ts_mean(delta(close,1),12)' \
|
|
24
|
+
--formula rev_12='ts_mean(delta(close,1),12) * -1'
|
|
25
|
+
|
|
26
|
+
# 3. 回测全部因子
|
|
27
|
+
alpha101-backtest \
|
|
28
|
+
--store data/factors \
|
|
29
|
+
--out-dir output/backtest \
|
|
30
|
+
--forward 12 --groups 10
|
|
31
|
+
|
|
32
|
+
# 4. 绘制分层图
|
|
33
|
+
alpha101-plot batch \
|
|
34
|
+
--returns-root output/backtest/series \
|
|
35
|
+
--reports-dir output/backtest/reports \
|
|
36
|
+
--out-dir output/backtest/plots
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Python API
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from alpha101_pipeline import FactorStore, run_intraday_multi
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
|
|
45
|
+
# 计算因子
|
|
46
|
+
store = FactorStore(Path("data/factors"))
|
|
47
|
+
store.add_factors(
|
|
48
|
+
[("mom_12", "ts_mean(delta(close,1),12)")],
|
|
49
|
+
source=Path("panel_sorted.parquet"),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# 回测
|
|
53
|
+
reports = run_intraday_multi(
|
|
54
|
+
store.store_dir,
|
|
55
|
+
["mom_12"],
|
|
56
|
+
source_panel=Path("panel_sorted.parquet"),
|
|
57
|
+
factor_files={"mom_12": store.factor_path("mom_12")},
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 支持的函数
|
|
62
|
+
|
|
63
|
+
### 时序窗口函数(PARTITION BY 股票 ORDER BY 日期)
|
|
64
|
+
|
|
65
|
+
| 函数 | 说明 |
|
|
66
|
+
|------|------|
|
|
67
|
+
| `delay(x, d)` | 取 d 期前的值 |
|
|
68
|
+
| `delta(x, d)` | 与 d 期前的差值 |
|
|
69
|
+
| `ts_sum(x, d)` / `sum(x, d)` | 滚动求和 |
|
|
70
|
+
| `ts_mean(x, d)` / `mean(x, d)` / `sma(x, d)` | 滚动均值 |
|
|
71
|
+
| `ts_min(x, d)` / `min(x, d)` | 滚动最小值 |
|
|
72
|
+
| `ts_max(x, d)` / `max(x, d)` | 滚动最大值 |
|
|
73
|
+
| `ts_stddev(x, d)` / `stddev(x, d)` | 滚动标准差 |
|
|
74
|
+
| `ts_variance(x, d)` / `variance(x, d)` | 滚动方差 |
|
|
75
|
+
| `ts_count(x, d)` | 滚动非空计数 |
|
|
76
|
+
| `ts_count_not_nan(x, d)` | 滚动非 NaN 计数 |
|
|
77
|
+
| `ts_zscore(x, d)` | 滚动 Z-Score |
|
|
78
|
+
| `ts_pct_change(x, d)` | 滚动百分比变化 |
|
|
79
|
+
| `product(x, d)` | 滚动乘积 |
|
|
80
|
+
| `decay_linear(x, d)` | 线性衰减加权和 |
|
|
81
|
+
| `ts_corr(x, y, d)` / `correlation(x, y, d)` | 滚动皮尔逊相关 |
|
|
82
|
+
| `ts_covariance(x, y, d)` / `covariance(x, y, d)` | 滚动协方差 |
|
|
83
|
+
| `bollinger_upper(x, d)` | 布林带上轨 |
|
|
84
|
+
| `bollinger_lower(x, d)` | 布林带下轨 |
|
|
85
|
+
| `ts_median(x, d)` / `median(x, d)` | 滚动中位数 |
|
|
86
|
+
| `ts_quantile(x, d, q)` / `quantile(x, d, q)` | 滚动分位数 |
|
|
87
|
+
| `wma(x, d)` | 加权移动平均 |
|
|
88
|
+
| `ts_skew(x, d)` / `skew(x, d)` | 滚动偏度 |
|
|
89
|
+
| `ts_kurt(x, d)` / `kurt(x, d)` | 滚动峰度 |
|
|
90
|
+
| `ts_mad(x, d)` / `mad(x, d)` | 滚动平均绝对偏差 |
|
|
91
|
+
| `ts_rank(x, d)` | 滚动时序排名 |
|
|
92
|
+
| `slope(x, y, d)` / `regr_slope(x, y, d)` | 滚动回归斜率 |
|
|
93
|
+
| `rsquare(x, y, d)` / `regr_r2(x, y, d)` | 滚动回归 R² |
|
|
94
|
+
| `resi(x, y, d)` / `regr_resid(x, y, d)` | 滚动回归残差 |
|
|
95
|
+
| `idxmax(x, d)` / `ts_argmax(x, d)` | 滚动窗口最大值位置 |
|
|
96
|
+
| `idxmin(x, d)` / `ts_argmin(x, d)` | 滚动窗口最小值位置 |
|
|
97
|
+
|
|
98
|
+
### 截面函数(PARTITION BY 日期)
|
|
99
|
+
|
|
100
|
+
| 函数 | 说明 |
|
|
101
|
+
|------|------|
|
|
102
|
+
| `rank(x)` | 截面排名(百分位) |
|
|
103
|
+
| `scale(x)` | 截面标准化到 [0, 1] |
|
|
104
|
+
| `zscore(x)` | 截面 Z-Score |
|
|
105
|
+
| `demean(x)` | 截面去均值 |
|
|
106
|
+
|
|
107
|
+
### 分组函数(PARTITION BY 日期 + 分组列)
|
|
108
|
+
|
|
109
|
+
| 函数 | 说明 |
|
|
110
|
+
|------|------|
|
|
111
|
+
| `group_mean(x, group)` | 分组均值 |
|
|
112
|
+
| `group_rank(x, group)` | 组内排名 |
|
|
113
|
+
| `group_neutralize(x, group)` / `indneutralize(x, group)` | 分组中性化 |
|
|
114
|
+
| `group_zscore(x, group)` | 组内 Z-Score |
|
|
115
|
+
|
|
116
|
+
### 数学函数(标量)
|
|
117
|
+
|
|
118
|
+
| 函数 | 说明 |
|
|
119
|
+
|------|------|
|
|
120
|
+
| `abs(x)` | 绝对值 |
|
|
121
|
+
| `log(x)` | 自然对数 |
|
|
122
|
+
| `sqrt(x)` | 平方根 |
|
|
123
|
+
| `sign(x)` | 符号函数 |
|
|
124
|
+
| `exp(x)` | 指数 |
|
|
125
|
+
| `round(x)` | 四舍五入 |
|
|
126
|
+
| `floor(x)` | 向下取整 |
|
|
127
|
+
| `ceil(x)` | 向上取整 |
|
|
128
|
+
| `sin(x)` | 正弦 |
|
|
129
|
+
| `cos(x)` | 余弦 |
|
|
130
|
+
| `tan(x)` | 正切 |
|
|
131
|
+
| `signed_power(x, n)` / `power(x, n)` / `pow(x, n)` | 幂运算(保留符号) |
|
|
132
|
+
| `min(x, y)` | 两值取小 |
|
|
133
|
+
| `max(x, y)` | 两值取大 |
|
|
134
|
+
|
|
135
|
+
### 工具函数
|
|
136
|
+
|
|
137
|
+
| 函数 | 说明 |
|
|
138
|
+
|------|------|
|
|
139
|
+
| `if(cond, then, else)` | 条件选择 |
|
|
140
|
+
| `fillna(x, val)` | 空值填充 |
|
|
141
|
+
| `clip(x, lo, hi)` | 截断到 [lo, hi] |
|
|
142
|
+
| `is_finite(x)` | 是否有限值 |
|
|
143
|
+
|
|
144
|
+
### 不支持的函数(递归/状态型,无法用纯 SQL 表达)
|
|
145
|
+
|
|
146
|
+
`ema`, `rsi`, `macd`, `atr`, `roc`, `obv`, `cci`, `mfi`
|
|
147
|
+
|
|
148
|
+
## 运算符
|
|
149
|
+
|
|
150
|
+
| 优先级 | 运算符 | 说明 |
|
|
151
|
+
|--------|--------|------|
|
|
152
|
+
| 1(最高) | `()` | 括号 |
|
|
153
|
+
| 2 | `^` | 幂运算(右结合:`2^3^2 = 2^9 = 512`) |
|
|
154
|
+
| 3 | `-x` | 一元负号 |
|
|
155
|
+
| 4 | `*` `/` | 乘除 |
|
|
156
|
+
| 5 | `+` `-` | 加减 |
|
|
157
|
+
| 6 | `>` `<` `>=` `<=` `==` `!=` | 比较(返回 1.0 / 0.0) |
|
|
158
|
+
| 7(最低) | `? :` | 三元条件(`close > 100 ? 1 : 0`) |
|
|
159
|
+
|
|
160
|
+
## 回测输出指标
|
|
161
|
+
|
|
162
|
+
| 指标 | 说明 |
|
|
163
|
+
|------|------|
|
|
164
|
+
| IC 均值 / ICIR | 每日 Spearman 秩相关 IC 的均值和信息比率 |
|
|
165
|
+
| MS(单调性得分) | 相邻组收益方向一致比例(0~1,1.0 = 完美单调) |
|
|
166
|
+
| Spearman | 组号 vs 年化收益的秩相关(-1~+1) |
|
|
167
|
+
| 多空夏普 / 年化 | 最高组减最低组的多空组合绩效 |
|
|
168
|
+
| 分组年化收益 | 每个分层组的年化收益率 |
|
|
169
|
+
|
|
170
|
+
## 许可证
|
|
171
|
+
|
|
172
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "alpha101-pipeline"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "DuckDB-powered factor research pipeline: formula engine, factor store, layered backtest"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{ name = "goldenquant" }]
|
|
13
|
+
keywords = ["quant", "factor", "backtest", "duckdb", "alpha101", "parquet"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Financial and Insurance Industry",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Office/Business :: Financial :: Investment",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"duckdb>=1.0",
|
|
27
|
+
"pyarrow>=14.0",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
plot = ["matplotlib>=3.7"]
|
|
32
|
+
dev = ["build", "twine"]
|
|
33
|
+
|
|
34
|
+
[project.scripts]
|
|
35
|
+
alpha101-backtest = "alpha101_pipeline.cli:main_backtest"
|
|
36
|
+
alpha101-store = "alpha101_pipeline.store:main"
|
|
37
|
+
alpha101-reorder = "alpha101_pipeline.reorder:main"
|
|
38
|
+
alpha101-plot = "alpha101_pipeline.plot:main"
|
|
39
|
+
|
|
40
|
+
[project.urls]
|
|
41
|
+
Repository = "https://github.com/goldenquant/alpha101"
|
|
42
|
+
|
|
43
|
+
[tool.hatch.build.targets.wheel]
|
|
44
|
+
packages = ["src/alpha101_pipeline"]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""alpha101-pipeline: DuckDB-powered factor research pipeline.
|
|
2
|
+
|
|
3
|
+
Modules:
|
|
4
|
+
engine — Alpha101 formula parser + DuckDB SQL translator
|
|
5
|
+
store — Persistent factor store (split-file Parquet architecture)
|
|
6
|
+
backtest — Layered backtest engine with IC, monotonicity metrics
|
|
7
|
+
reorder — Panel data reordering for columnar efficiency
|
|
8
|
+
plot — Offline layered backtest charting
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from alpha101_pipeline.engine import (
|
|
12
|
+
parse_formula,
|
|
13
|
+
DuckDbSqlTranslator,
|
|
14
|
+
configure_duckdb,
|
|
15
|
+
quote_ident,
|
|
16
|
+
sql_literal,
|
|
17
|
+
run_alpha101_duckdb,
|
|
18
|
+
)
|
|
19
|
+
from alpha101_pipeline.store import FactorStore
|
|
20
|
+
from alpha101_pipeline.backtest import (
|
|
21
|
+
run_factor_backtest,
|
|
22
|
+
run_intraday_multi,
|
|
23
|
+
compute_metrics,
|
|
24
|
+
compute_ic_metrics,
|
|
25
|
+
compute_monotonicity,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__version__ = "0.1.0"
|
|
29
|
+
__all__ = [
|
|
30
|
+
"parse_formula",
|
|
31
|
+
"DuckDbSqlTranslator",
|
|
32
|
+
"configure_duckdb",
|
|
33
|
+
"quote_ident",
|
|
34
|
+
"sql_literal",
|
|
35
|
+
"run_alpha101_duckdb",
|
|
36
|
+
"FactorStore",
|
|
37
|
+
"run_factor_backtest",
|
|
38
|
+
"run_intraday_multi",
|
|
39
|
+
"compute_metrics",
|
|
40
|
+
"compute_ic_metrics",
|
|
41
|
+
"compute_monotonicity",
|
|
42
|
+
]
|