py-alpha-lib 0.1.0__cp314-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,239 @@
1
+ import sys
2
+ from typing import Callable
3
+ from .parser import Lark_StandAlone, Transformer, v_args
4
+ import numpy as np
5
+ import re
6
+ import math
7
+ import io
8
+
9
+ parser = Lark_StandAlone()
10
+
11
+
12
+ class ExecContext:
13
+ def __call__(self, name: str) -> np.ndarray:
14
+ pass
15
+
16
+
17
+ @v_args(inline=True)
18
+ class AlphaTransformer(Transformer):
19
+ def __init__(
20
+ self,
21
+ name_convertor: Callable[[str], str] | None = None,
22
+ ):
23
+ self.name_convertor = name_convertor
24
+ self.variables = set()
25
+
26
+ def start(self, expr):
27
+ return expr
28
+
29
+ def ternary_expr(self, cond, true_case, false_case):
30
+ return f"np.where({cond}, {true_case}, {false_case})"
31
+
32
+ def logical_or_expr(self, left, *rights):
33
+ result = left
34
+ for right in rights:
35
+ result = f"np.bitwise_or({result}, {right})"
36
+ return result
37
+
38
+ def logical_and_expr(self, left, *rights):
39
+ result = left
40
+ for right in rights:
41
+ result = f"np.bitwise_and({result}, {right})"
42
+ return result
43
+
44
+ def eq(self, left, right):
45
+ return f"{left} == {right}"
46
+
47
+ def ne(self, left, right):
48
+ return f"{left} != {right}"
49
+
50
+ def lt(self, left, right):
51
+ return f"{left} < {right}"
52
+
53
+ def gt(self, left, right):
54
+ return f"{left} > {right}"
55
+
56
+ def le(self, left, right):
57
+ return f"{left} <= {right}"
58
+
59
+ def ge(self, left, right):
60
+ return f"{left} >= {right}"
61
+
62
+ def sum(self, first, *rest):
63
+ result = first
64
+ it = iter(rest)
65
+ for op, val in zip(it, it):
66
+ result = f"{result} {op} {val}"
67
+ return result
68
+
69
+ def product(self, first, *rest):
70
+ result = first
71
+ it = iter(rest)
72
+ for op, val in zip(it, it):
73
+ result = f"{result} {op} {val}"
74
+ return result
75
+
76
+ def power(self, base, *rest):
77
+ result = base
78
+ it = iter(rest)
79
+ for op, val in zip(it, it):
80
+ result = f"np.power({result}, {val})"
81
+ return result
82
+
83
+ def neg(self, minus, item):
84
+ return f"-{item}"
85
+
86
+ def func_call(self, name, args=""):
87
+ # Unwrap ctx('...') if present, because function names shouldn't be wrapped
88
+ if name.startswith("ctx('") and name.endswith("')"):
89
+ name = name[5:-2]
90
+ return f"ctx.{name}({args})"
91
+
92
+ def arguments(self, *args):
93
+ return ", ".join(args)
94
+
95
+ def NAME(self, name):
96
+ name = str(name)
97
+ if self.name_convertor:
98
+ name_key = self.name_convertor(name)
99
+ else:
100
+ name_key = name
101
+ self.variables.add(name_key)
102
+ return f"ctx('{name_key}')"
103
+
104
+ def NUMBER(self, name):
105
+ return str(name)
106
+
107
+ def dotted_name(self, *names):
108
+ real_names = []
109
+ for n in names:
110
+ if n.startswith("ctx('") and n.endswith("')"):
111
+ real_names.append(n[5:-2])
112
+ else:
113
+ real_names.append(n)
114
+
115
+ full_name = ".".join(real_names)
116
+
117
+ # Treating dotted name as a variable access string too, similar to NAME
118
+ # Assuming dotted names are also data fields provided by ctx
119
+ if self.name_convertor:
120
+ key = self.name_convertor(full_name)
121
+ else:
122
+ key = full_name
123
+ self.variables.add(key)
124
+ return f"ctx('{key}')"
125
+
126
+ def add_op(self, op):
127
+ return str(op)
128
+
129
+ def mul_op(self, op):
130
+ return str(op)
131
+
132
+
133
+ def to_python(
134
+ name: str,
135
+ code: str,
136
+ /,
137
+ indent: int = 0,
138
+ indent_by: str = " ",
139
+ as_function: bool = False,
140
+ name_convertor: Callable[[str], str] | None = None,
141
+ optimize: bool = False,
142
+ ) -> str:
143
+ """
144
+ Convert a parse tree to Python code.
145
+
146
+ There are two modes:
147
+ 1. Function mode: Convert the code as a function.
148
+ - All function arguments are (ctx: ExecContext)
149
+ - In generated function, convert each variable name to ctx('VARIABLE_NAME') to get the data.
150
+ - Return the result of the code.
151
+ 2. Variable mode: Convert the code as a variable.
152
+ - assume there is a global ExecContext variable named 'ctx'
153
+ - In generated variable, convert each variable name to ctx('VARIABLE_NAME') to get the data.
154
+
155
+ Args:
156
+ name: The name of the target function or variable.
157
+ code: The code to convert.
158
+ indent: The init number of spaces to indent the code.
159
+ indent_by: The string to use for indentation.
160
+ as_function: Whether to convert the code as a function or a variable.
161
+ name_convertor: A optional function to convert the identifier name in the code. For example, 'to_lower_case' or 'to_snake_case'.
162
+ optimize: In function mode, optimize the code by declare variables when multiple times used.
163
+
164
+ Returns:
165
+ The converted code.
166
+ """
167
+ if not code.strip():
168
+ return ""
169
+
170
+ try:
171
+ tree = parser.parse(code)
172
+ except Exception as e:
173
+ raise ValueError(f"Failed to parse code: {code}") from e
174
+
175
+ transformer = AlphaTransformer(name_convertor=name_convertor)
176
+ converted_expr = transformer.transform(tree)
177
+
178
+ indent_str = indent_by * indent
179
+
180
+ if as_function:
181
+ lines = []
182
+ lines.append(f"{indent_str}def {name}(ctx):")
183
+
184
+ body_indent = indent_str + indent_by
185
+
186
+ if optimize:
187
+ # Count occurrences
188
+ var_usage = {}
189
+ for var in transformer.variables:
190
+ pattern = re.escape(f"ctx('{var}')")
191
+ count = len(re.findall(pattern, converted_expr))
192
+ var_usage[var] = count
193
+
194
+ # Sort variables to ensure consistent output
195
+ sorted_vars = sorted([v for v, c in var_usage.items() if c > 1])
196
+
197
+ for var in sorted_vars:
198
+ safe_var_name = "_" + var.replace(".", "_") # simple safe name
199
+ lines.append(f"{body_indent}{safe_var_name} = ctx('{var}')")
200
+ # Replace in expression
201
+ converted_expr = converted_expr.replace(f"ctx('{var}')", safe_var_name)
202
+
203
+ lines.append(f"{body_indent}return {converted_expr}")
204
+ return "\n".join(lines)
205
+ else:
206
+ return f"{indent_str}{name} = {converted_expr}"
207
+
208
+
209
+ def to_python_file(
210
+ codes: list[str],
211
+ names: list[str] | str = "alpha_",
212
+ /,
213
+ fp: io.StringIO | None = None,
214
+ imports: list[str] | None = [],
215
+ name_convertor: Callable[[str], str] | None = None,
216
+ ):
217
+ if isinstance(names, str):
218
+ n = len(codes)
219
+ w = math.ceil(math.log10(n))
220
+ names = [f"{names}{i + 1:0{w}d}" for i in range(n)]
221
+
222
+ assert len(names) == len(codes)
223
+
224
+ if fp is None:
225
+ fp = sys.stdout
226
+
227
+ for i in imports:
228
+ print(f"{i}", file=fp)
229
+
230
+ if "import numpy as np" not in imports:
231
+ print("import numpy as np", file=fp)
232
+
233
+ for name, code in zip(names, codes):
234
+ print(f"# {code}", file=fp)
235
+ py_code = to_python(
236
+ name, code, as_function=True, optimize=True, name_convertor=name_convertor
237
+ )
238
+ print(py_code, file=fp)
239
+ print("\n\n", file=fp)
@@ -0,0 +1,188 @@
1
+ Metadata-Version: 2.4
2
+ Name: py-alpha-lib
3
+ Version: 0.1.0
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: Implementation :: CPython
6
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
7
+ Requires-Dist: numpy>=2
8
+ License-File: LICENSE
9
+ Summary: Alpha Library: A high-performance rolling window calculation library implemented in Rust with Python bindings. Used for financial data analysis and factor research.
10
+ Author: LiJia
11
+ Requires-Python: >=3.11
12
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
13
+
14
+ # Introduction
15
+
16
+ `alpha-lib` is a Python library that implements various algorithms and functions commonly used in quantitative finance and algorithmic trading.
17
+
18
+ For financial data analysis, there are many algorithms required a rolling window calculation. This library provides efficient implementations of these algorithms.
19
+
20
+ ## Algorithms
21
+
22
+ | Name | Description | Ref Link |
23
+ | ---------- | ------------------------------------------------------------ | ----------------------------------------------------------------------- |
24
+ | BARSLAST | Bars since last condition true | https://www.amibroker.com/guide/afl/barslast.html |
25
+ | BARSSINCE | Bars since first condition true | https://www.amibroker.com/guide/afl/barssince.html |
26
+ | COUNT | Count periods where condition is true | https://www.amibroker.com/guide/afl/count.html |
27
+ | CROSS | CROSS(A, B): Previous A < B, Current A >= B | https://www.amibroker.com/guide/afl/cross.html |
28
+ | DMA | Exponential Moving Average | https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average |
29
+ | HHV | Highest High Value | https://www.amibroker.com/guide/afl/hhv.html |
30
+ | HHVBARS | Bars since Highest High Value | https://www.amibroker.com/guide/afl/hhvbars.html |
31
+ | LLV | Lowest Low Value | https://www.amibroker.com/guide/afl/llv.html |
32
+ | LLVBARS | Bars since Lowest Low Value | https://www.amibroker.com/guide/afl/llvbars.html |
33
+ | LONGCROSS | LONGCROSS(A,B,N): Previous N A < B, Current A >= B | |
34
+ | MA | Moving Average | https://en.wikipedia.org/wiki/Moving_average#Simple_moving_average |
35
+ | RANK | rank by group dim | |
36
+ | RCROSS | RCROSE(A, B): Previous A > B, Current A <= B | |
37
+ | REF | Reference to value N periods ago | https://www.amibroker.com/guide/afl/ref.html |
38
+ | RLONGCROSS | RLONGCROSS(A,B,N): Previous N A > B, Current A <= B | |
39
+ | SMA | Exponential Moving Average (variant of EMA) | https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average |
40
+ | SUM | Sum of value N periods ago | https://www.amibroker.com/guide/afl/sum.html |
41
+ | SUMBARS | Sums X backwards until the sum is greater than or equal to A | https://www.amibroker.com/guide/afl/sumbars.html |
42
+ | TS_RANK | rank by ts dim |
43
+
44
+ # Usage
45
+
46
+ ## Installation
47
+
48
+ You can install the library using pip:
49
+
50
+ ```bash
51
+ pip install py-alpha-lib
52
+ ```
53
+
54
+ ## Simple Example
55
+
56
+ ```python
57
+ import alpha as al
58
+ import numpy as np
59
+
60
+ data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=np.float64)
61
+
62
+ # Calculate 3-period moving average, note that first 2 values are average of available values
63
+ result = al.MA(data, 3)
64
+ print(result)
65
+ # Output: [1. 1.5 2. 3. 4. 5. 6. 7. 8. 9. ]
66
+
67
+ # Calculate 3-period exponential moving average, first 2 values are NaN
68
+ al.set_ctx(flags=al.FLAG_STRICTLY_CYCLE)
69
+ result = al.EMA(data, 3)
70
+ print(result)
71
+ # Output: [ nan nan 2. 3. 4. 5. 6. 7. 8. 9. ]
72
+
73
+ # Calculate 3-period exponential moving average, skipping NaN values
74
+ al.set_ctx(flags=al.FLAG_SKIP_NAN)
75
+ data_with_nan = [1, 2, None, 4, 5, 6, 7, 8, 9, 10]
76
+ result = al.MA(data_with_nan, 3)
77
+ print(result)
78
+ # Output: [1. 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5]
79
+ ```
80
+
81
+ ## Environment Context
82
+
83
+ You may notice that some functions have different behaviors based on the context settings. You can set the context using `al.set_ctx()` function. The context includes:
84
+
85
+ - `groups`: Number of groups to divide the data into for group-wise operations. `groups` used calculations multiple stocks(for example) in a single array.
86
+ - Each group is assumed to be of equal size and contiguous in the input array.
87
+ - Each group is processed paralleled and independently. This is why the performance is very good.
88
+ - For `rank` function, groups is required to be set greater than 1. Because rank is a group-wise operation.
89
+ - `start`: The starting index for calculations.
90
+ - For some case, this may reduce unnecessary computations.
91
+ - Default is 0.
92
+ - `flags`: Additional flags to modify function behaviors.
93
+ - `FLAG_SKIP_NAN`: When this flag is set, functions will skip NaN values during computations.
94
+ - `FLAG_STRICTLY_CYCLE`: When this flag is set, functions will strictly cycle over the data, meaning that initial periods that do not have enough data will be filled with NaN.
95
+ - You can combine multiple flags using bitwise OR operation, e.g., `flags=FLAG_SKIP_NAN | FLAG_STRICTLY_CYCLE`.
96
+
97
+ ## Factor expression to Python code
98
+
99
+ You can convert factor expressions to Python code using the `lang` module. For example:
100
+
101
+ ```bash
102
+ python -m alpha.lang examples/wq101/alpha101.txt
103
+ ```
104
+
105
+ This will read the factor expressions from [`examples/wq101/alpha101.txt`](examples/wq101/alpha101.txt) and generate corresponding Python code using `alpha-lib` functions.
106
+
107
+ After generating the code, you may need to adjust the code
108
+
109
+ - Fix type conversions between `float` and `bool`.
110
+ - Add context settings if needed.
111
+
112
+ # Full Example
113
+
114
+ ## WorldQuant 101 famous alpha 101
115
+
116
+ [The WorldQuant 101 alpha factors](https://arxiv.org/pdf/1601.009913.pdf) are a set of quantitative trading signals developed by WorldQuant. There are some implementations of these alpha factors, for example:
117
+ [DolphinDB implementation: ](https://github.com/dolphindb/DolphinDBModules/blob/master/wq101alpha/README.md), it provides 101 alpha factors implemented in DolphinDB language also with comparative `pandas` based Python implementation. It's a good starting point for comparing with our `alpha-lib`.
118
+
119
+ The full implementation of these 101 alpha factors using `alpha-lib` can be found in the [wq101](examples/wq101) folder of this repository. This implementation leverages the efficient algorithms provided by `alpha-lib` to compute the alpha factors.
120
+
121
+ - `al`: is the factor implemented using `alpha-lib`.
122
+ - `pd_`: is the factor implemented using `pandas` for comparison.
123
+ - Because we can not setup the full featured DolphinDB environment here, we just use it's results.
124
+
125
+ ### Run the example
126
+
127
+ Show help message:
128
+
129
+ ```
130
+ $ examples/wq101/main.py --help
131
+ usage: main.py [-h] [-s START] [-e END] [-v] [-d DATA] [-o OUTPUT] [--with-pd] [--with-al] [no ...]
132
+
133
+ positional arguments:
134
+ no alpha numbers to run, e.g., 1 2 3
135
+
136
+ options:
137
+ -h, --help show this help message and exit
138
+ -s, --start START start alpha number
139
+ -e, --end END end alpha number
140
+ -v, --verbose enable verbose logging
141
+ -d, --data DATA data file path
142
+ -o, --output OUTPUT save output to file
143
+ --with-pd run pandas implementation
144
+ --with-al run alpha-lib implementation
145
+ ```
146
+
147
+ ```bash
148
+ # Run specific alpha factors both pandas and alpha-lib implementations
149
+ examples/wq101/main.py --with-pd --with-al 1 2 3 4
150
+
151
+ # Run a range of alpha factors using alpha-lib implementation
152
+ examples/wq101/main.py --with-al -s 1 -e 102
153
+
154
+ ```
155
+
156
+ Because the `pandas` implementation is too slow for some factors, below is a 1~14 factors (`examples/wq101/main.py --with-al -s 1 -e 15`) run time comparison on a sample dataset with 4000 stocks and 261 trading days, total 1,044,000 factors to compute for each factor.
157
+
158
+ The _pandas/DolphinDB_ is copied from the [DolphinDB implementation result](https://github.com/dolphindb/DolphinDBModules/blob/master/wq101alpha/README.md#31-dolphindb-vs-python-pandas)
159
+
160
+ The `Value` columns are used to verify the correctness of the implementations, they should be the same or very close.
161
+
162
+ The hardware/soft environment is:
163
+
164
+ - CPU: Intel 13th Gen Core i7-13700K (16 cores, 24 threads)
165
+ - RAM: 32GB
166
+ - OS: Ubuntu 22.04 LTS
167
+ - Python: 3.14 without free-threading
168
+ - pandas: 3.0
169
+ - numpy: 2.4
170
+
171
+ | no | pandasTime(ms) | alphaLibTime(ms) | SpeedUp<br/>(pandas/alphaLib) | SpeedUp<br/>(pandas/DolphinDB) | pandasValue | alphaLibValue |
172
+ | ---- | -------------- | ---------------- | ----------------------------- | ------------------------------ | ----------- | ------------- |
173
+ | data | 11396 | 718 | 15 | | | |
174
+ | #001 | 14231 | 7 | 2033 | 800 | 0.182125 | 0.182125 |
175
+ | #002 | 465 | 14 | 33 | 9 | -0.64422 | -0.326332 |
176
+ | #003 | 430 | 8 | 53 | 14 | 0.236184 | 0.236184 |
177
+ | #004 | 55107 | 6 | 9184 | 1193 | -8 | -8 |
178
+ | #005 | 105 | 9 | 11 | 5 | -0.331333 | -0.331333 |
179
+ | #006 | 351 | 2 | 175 | 84 | 0.234518 | 0.234518 |
180
+ | #007 | 43816 | 17 | 2577 | 486 | -1 | -1 |
181
+ | #008 | 222 | 9 | 24 | 14 | -0.6435 | -0.6435 |
182
+ | #009 | 97 | 9 | 10 | 14 | 17.012321 | 17.012321 |
183
+ | #010 | 145 | 11 | 13 | 6 | 0.662 | 0.662 |
184
+ | #011 | 158 | 10 | 15 | 6 | 0.785196 | 0.892723 |
185
+ | #012 | 4 | 4 | 1 | 0.7 | -17.012321 | -17.012321 |
186
+ | #013 | 446 | 9 | 49 | 8 | -0.58 | -0.58 |
187
+ | #014 | 398 | 8 | 49 | 18 | 0.095449 | 0.095449 |
188
+
@@ -0,0 +1,16 @@
1
+ alpha\__init__.py,sha256=rZE0P-d0AT7YG624RuE_EPdwIGKOHz2RMPn6Z-nTu9o,171
2
+ alpha\algo\__init__.py,sha256=F8t1XUjla37_0x-fjGL4HXTZASmSYLotWQbfdOhdfh0,76
3
+ alpha\algo\_algo.pyd,sha256=MK1Nk-4xQngkixcZcQiLLzmKLC5NPE3ZTmDXa3XF4LU,2076672
4
+ alpha\algo\algo.md,sha256=KZjYakkAkTLx4iPDj7hcVZEVv7Fc7Ye9h-zdz7JB7rI,1929
5
+ alpha\algo\algo.py,sha256=Y0R13cZ7uigtsobYd4wo9vtMlPJRwL0ktwPp6GPl0xc,602
6
+ alpha\algo\algo_gen.py,sha256=hSc8c8yJrGcNnZohc56TgJj_7V6Bcxnf_7K04mOiSYc,12269
7
+ alpha\algo.md,sha256=Av1UP0Tj7RHaksHD-0kM4uvppXyvDMlyEQZ6CG7PNYY,3521
8
+ alpha\lang\__init__.py,sha256=JdMw9vZv-4CQgChVfMxIYwpb5pQsYc9d_GleocYfFg4,50
9
+ alpha\lang\__main__.py,sha256=aemLHkuKarwc2x8aejCWBpFsYY49NzQOUhHcUFf19BE,490
10
+ alpha\lang\alpha.lark,sha256=qqMXJ--F3ErYikNdRs1vwHGKoJe_aRUeNmpo87NaTXs,1046
11
+ alpha\lang\parser.py,sha256=WBpwEUYzGJ1nM7iUh_mEimGLhHzVZXS8BAzF58ZRagU,156846
12
+ alpha\lang\to_python.py,sha256=G-bcISR1ItW6AxfccuEZo-xwukl-TtVLgmo3n2nnrPE,6579
13
+ py_alpha_lib-0.1.0.dist-info\METADATA,sha256=clwi8ljMxqv3YKDgmCaufgZ6-jPOpwnndEKSY8jI7vw,11880
14
+ py_alpha_lib-0.1.0.dist-info\WHEEL,sha256=dsg5IA1tFdLNsJihRQJCX59s7GKfDG9N_2EOwwysnks,96
15
+ py_alpha_lib-0.1.0.dist-info\licenses\LICENSE,sha256=3D2Y67XRAnMSiByf02r4DaFv92CdDLw5U6xHyUVkdI4,1289
16
+ py_alpha_lib-0.1.0.dist-info\RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.11.5)
3
+ Root-Is-Purelib: false
4
+ Tag: cp314-abi3-win_amd64
@@ -0,0 +1,22 @@
1
+ # BSD 2-Clause License
2
+
3
+ Redistribution and use in source and binary forms, with or without
4
+ modification, are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice,
7
+ this list of conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice,
10
+ this list of conditions and the following disclaimer in the documentation
11
+ and/or other materials provided with the distribution.
12
+
13
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
17
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
20
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
21
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.