silars 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- silars-0.1.0/PKG-INFO +11 -0
- silars-0.1.0/README.md +0 -0
- silars-0.1.0/pyproject.toml +22 -0
- silars-0.1.0/setup.cfg +4 -0
- silars-0.1.0/silars/__init__.py +4 -0
- silars-0.1.0/silars/transformer/__init__.py +29 -0
- silars-0.1.0/silars/transformer/factory.py +354 -0
- silars-0.1.0/silars/transformer/partial.py +77 -0
- silars-0.1.0/silars.egg-info/PKG-INFO +11 -0
- silars-0.1.0/silars.egg-info/SOURCES.txt +11 -0
- silars-0.1.0/silars.egg-info/dependency_links.txt +1 -0
- silars-0.1.0/silars.egg-info/requires.txt +5 -0
- silars-0.1.0/silars.egg-info/top_level.txt +1 -0
silars-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: silars
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
License-Expression: MIT
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: numpy>=2.3.1
|
|
8
|
+
Requires-Dist: pandas>=2.3.1
|
|
9
|
+
Requires-Dist: polars>=1.31.0
|
|
10
|
+
Requires-Dist: scikit-learn>=1.7.0
|
|
11
|
+
Requires-Dist: ygo>=1.1.6
|
silars-0.1.0/README.md
ADDED
|
File without changes
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "silars"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = ""
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
dependencies = [
|
|
9
|
+
"numpy>=2.3.1",
|
|
10
|
+
"pandas>=2.3.1",
|
|
11
|
+
"polars>=1.31.0",
|
|
12
|
+
"scikit-learn>=1.7.0",
|
|
13
|
+
"ygo>=1.1.6",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[build-system]
|
|
17
|
+
requires = ["setuptools>=42", "wheel"]
|
|
18
|
+
build-backend = "setuptools.build_meta"
|
|
19
|
+
|
|
20
|
+
[tool.setuptools.packages.find]
|
|
21
|
+
where = ["."]
|
|
22
|
+
include = ["silars", "silars.*"]
|
silars-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copyright (c) ZhangYundi.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Created on 2025/7/17 10:53
|
|
4
|
+
# Description:
|
|
5
|
+
|
|
6
|
+
from .factory import (
|
|
7
|
+
Function,
|
|
8
|
+
Cast,
|
|
9
|
+
Imputer,
|
|
10
|
+
Replace,
|
|
11
|
+
Target,
|
|
12
|
+
DropNull,
|
|
13
|
+
TargetFromDifferentTag,
|
|
14
|
+
Reindex,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from .partial import StandardScaler
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"Function",
|
|
21
|
+
"Cast",
|
|
22
|
+
"Imputer",
|
|
23
|
+
"Replace",
|
|
24
|
+
"Target",
|
|
25
|
+
"DropNull",
|
|
26
|
+
"StandardScaler",
|
|
27
|
+
"TargetFromDifferentTag",
|
|
28
|
+
"Reindex",
|
|
29
|
+
]
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
# Copyright (c) ZhangYundi.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Created on 2025/7/17 10:53
|
|
4
|
+
# Description:
|
|
5
|
+
|
|
6
|
+
import inspect
|
|
7
|
+
import math
|
|
8
|
+
from collections.abc import Sequence, Iterable
|
|
9
|
+
from typing import Literal
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import polars as pl
|
|
13
|
+
import ygo
|
|
14
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
|
15
|
+
|
|
16
|
+
SPECIAL_VALUES = {
|
|
17
|
+
"null": None,
|
|
18
|
+
"none": None,
|
|
19
|
+
"nan": None,
|
|
20
|
+
"inf": np.inf,
|
|
21
|
+
"-inf": -np.inf,
|
|
22
|
+
"int": int,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Function(BaseEstimator, TransformerMixin):
|
|
27
|
+
"""
|
|
28
|
+
将任意函数封装为支持 set_params 的 BaseEstimator
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
fn: callable
|
|
33
|
+
方法对象
|
|
34
|
+
kwargs: dict
|
|
35
|
+
fn的参数
|
|
36
|
+
|
|
37
|
+
Examples
|
|
38
|
+
--------
|
|
39
|
+
>>> import polars as pl
|
|
40
|
+
>>> func = Function(lambda date, ds_path: pl.scan_parquet("/path/to/your/data.parquet"))
|
|
41
|
+
>>> func.set_params(ds_path="mc/stock_kline_day")
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, fn: callable, **kwargs):
|
|
45
|
+
self.fn = ygo.delay(fn)(**kwargs)
|
|
46
|
+
self.params = {k: inspect.Parameter.empty for k in ygo.fn_signature_params(self.fn)}
|
|
47
|
+
self.params.update(self.fn.stored_kwargs)
|
|
48
|
+
|
|
49
|
+
def set_params(self, **params):
|
|
50
|
+
self.fn = ygo.delay(self.fn)(**params)
|
|
51
|
+
self.params.update(self.fn.stored_kwargs)
|
|
52
|
+
return self
|
|
53
|
+
|
|
54
|
+
def get_params(self, deep=True):
|
|
55
|
+
return self.params
|
|
56
|
+
|
|
57
|
+
def fit(self, X, y=None):
|
|
58
|
+
return self
|
|
59
|
+
|
|
60
|
+
def transform(self, X):
|
|
61
|
+
if self.fn is None:
|
|
62
|
+
raise ValueError("Function not provided.")
|
|
63
|
+
params = {k: v for k, v in self.fn.stored_kwargs.items() if v is not inspect.Parameter.empty}
|
|
64
|
+
return self.fn(X, **params)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class DropNull(BaseEstimator, TransformerMixin):
|
|
68
|
+
"""
|
|
69
|
+
Drop all rows that contain one or more null values
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
subset: Sequence[str] | str | None
|
|
74
|
+
剔除空值的字段
|
|
75
|
+
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self, subset: Sequence[str] | str | None = None):
|
|
79
|
+
self.subset = subset
|
|
80
|
+
|
|
81
|
+
def fit(self, X=None, y=None):
|
|
82
|
+
return self
|
|
83
|
+
|
|
84
|
+
def transform(self, X: pl.LazyFrame):
|
|
85
|
+
if self.subset:
|
|
86
|
+
return X.drop_nulls(pl.sql_expr(self.subset))
|
|
87
|
+
return X
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Cast(BaseEstimator, TransformerMixin):
|
|
91
|
+
"""
|
|
92
|
+
类型转换
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
old: str | polars.DataType | polars.Expr
|
|
97
|
+
原本类型或者需要转换的列
|
|
98
|
+
new: polars.DataType
|
|
99
|
+
转换的目标类型
|
|
100
|
+
|
|
101
|
+
Examples
|
|
102
|
+
--------
|
|
103
|
+
>>> Cast("asset", pl.UInt16)
|
|
104
|
+
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(self, old: pl.Expr | str | pl.DataType = None, new=None):
|
|
108
|
+
self.old = old
|
|
109
|
+
self.new = new
|
|
110
|
+
self.old_expr: pl.Expr = None
|
|
111
|
+
|
|
112
|
+
def fit(self, X, y=None):
|
|
113
|
+
if isinstance(self.old, str):
|
|
114
|
+
if self.old in SPECIAL_VALUES:
|
|
115
|
+
self.old = SPECIAL_VALUES[self.old]
|
|
116
|
+
self.old_expr = self.old if isinstance(self.old, pl.Expr) else pl.col(self.old)
|
|
117
|
+
return self
|
|
118
|
+
|
|
119
|
+
def transform(self, X):
|
|
120
|
+
return X.with_columns(self.old_expr.cast(self.new))
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class Imputer(BaseEstimator, TransformerMixin):
|
|
124
|
+
"""
|
|
125
|
+
插值器
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
strategy
|
|
130
|
+
插值策略
|
|
131
|
+
over_spec
|
|
132
|
+
用于滚动插值
|
|
133
|
+
columns
|
|
134
|
+
需要处理的列
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
def __init__(self,
|
|
138
|
+
strategy: Literal[
|
|
139
|
+
"forward", "backward", "mean", "zero", "max", "min", "one"] | None = "forward",
|
|
140
|
+
over_spec: dict | None = None,
|
|
141
|
+
columns: Sequence[str] | None = None):
|
|
142
|
+
self.strategy = strategy
|
|
143
|
+
self.over_spec = over_spec
|
|
144
|
+
self.columns = [columns, ] if isinstance(columns, str) else columns
|
|
145
|
+
self._exprs: list[pl.Expr] = list()
|
|
146
|
+
|
|
147
|
+
def fit(self, X=None, y=None):
|
|
148
|
+
return self
|
|
149
|
+
|
|
150
|
+
def transform(self, X: pl.LazyFrame):
|
|
151
|
+
cols = X.collect_schema().names() if not self.columns else self.columns
|
|
152
|
+
if not self.over_spec:
|
|
153
|
+
self._exprs = [pl.col(col).fill_null(strategy=self.strategy) for col in cols]
|
|
154
|
+
else:
|
|
155
|
+
self._exprs = [pl.col(col).fill_null(strategy=self.strategy).over(**self.over_spec) for col in self.columns]
|
|
156
|
+
return X.with_columns(self._exprs)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class Replace(BaseEstimator, TransformerMixin):
|
|
160
|
+
"""
|
|
161
|
+
替换器
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
columns: list
|
|
166
|
+
需要替换的目标列
|
|
167
|
+
old
|
|
168
|
+
旧值, 可以是list, 如[np.inf, -np.inf]
|
|
169
|
+
new
|
|
170
|
+
新值
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(self, columns: list | None = None, old=None, new=None):
|
|
174
|
+
|
|
175
|
+
self.columns = columns
|
|
176
|
+
self.old = old
|
|
177
|
+
self.new = new
|
|
178
|
+
|
|
179
|
+
def fit(self, X, y=None):
|
|
180
|
+
if isinstance(self.old, str):
|
|
181
|
+
if self.old in SPECIAL_VALUES:
|
|
182
|
+
self.old = SPECIAL_VALUES[self.old]
|
|
183
|
+
elif isinstance(self.old, Iterable):
|
|
184
|
+
self.old = [SPECIAL_VALUES[old] if old in SPECIAL_VALUES else old for old in self.old]
|
|
185
|
+
if self.columns is None:
|
|
186
|
+
self.columns = list()
|
|
187
|
+
return self
|
|
188
|
+
|
|
189
|
+
def transform(self, X):
|
|
190
|
+
return X.with_columns(pl.col(c).replace(self.old, self.new) for c in self.columns)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class Target(BaseEstimator, TransformerMixin):
|
|
194
|
+
"""
|
|
195
|
+
添加目标收益列
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
price_tag
|
|
200
|
+
价格列,支持sql语法,如 `if(ask_p1 > 0, bid_p1 > 0, (ask_p1+bid_p1)/2, null) as price`
|
|
201
|
+
frequency
|
|
202
|
+
数据集频率, 默认`3s`
|
|
203
|
+
target
|
|
204
|
+
目标收益,默认`5min`。大于日级别的:出场价使用最后一条数据
|
|
205
|
+
partition_by
|
|
206
|
+
分区
|
|
207
|
+
order_by
|
|
208
|
+
排序
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
def __init__(self,
|
|
212
|
+
price_tag: str = "price",
|
|
213
|
+
frequency: str = "3s",
|
|
214
|
+
target: str = "5min",
|
|
215
|
+
gap: str = "3s",
|
|
216
|
+
partition_by: Sequence[str] | str = "asset",
|
|
217
|
+
order_by: Sequence[str] | str | None = "time",
|
|
218
|
+
alias: str = None):
|
|
219
|
+
self.price_tag = price_tag
|
|
220
|
+
self.frequency = frequency
|
|
221
|
+
self.target = target
|
|
222
|
+
self.gap = gap
|
|
223
|
+
self.partition_by = partition_by
|
|
224
|
+
self.order_by = order_by
|
|
225
|
+
self.alias = alias
|
|
226
|
+
self._exprs = list()
|
|
227
|
+
|
|
228
|
+
def fit(self, X=None, y=None):
|
|
229
|
+
from pandas import Timedelta
|
|
230
|
+
self.alias = self.alias if self.alias else self.target
|
|
231
|
+
over_spec = {"partition_by": self.partition_by}
|
|
232
|
+
if self.order_by:
|
|
233
|
+
over_spec["order_by"] = self.order_by
|
|
234
|
+
freq_secs = int(Timedelta(self.frequency).seconds)
|
|
235
|
+
target_timedelta = Timedelta(self.target)
|
|
236
|
+
if target_timedelta.days > 0:
|
|
237
|
+
expr_target = (
|
|
238
|
+
(
|
|
239
|
+
pl.sql_expr(self.price_tag)
|
|
240
|
+
.last()
|
|
241
|
+
.over(**over_spec) / pl.sql_expr(self.price_tag) - 1
|
|
242
|
+
).alias(self.alias)
|
|
243
|
+
)
|
|
244
|
+
else:
|
|
245
|
+
expr_target = (
|
|
246
|
+
(
|
|
247
|
+
pl.sql_expr(self.price_tag)
|
|
248
|
+
.shift(-math.ceil(target_timedelta.seconds / freq_secs))
|
|
249
|
+
.over(**over_spec) / pl.sql_expr(self.price_tag) - 1
|
|
250
|
+
).alias(self.alias)
|
|
251
|
+
)
|
|
252
|
+
expr_inf = pl.col(self.alias).cast(pl.Float32).replace([float("inf"), float("-inf")], None)
|
|
253
|
+
|
|
254
|
+
self._exprs = [expr_target, expr_inf]
|
|
255
|
+
gap_secs = int(Timedelta(self.gap).seconds)
|
|
256
|
+
if gap_secs > 0:
|
|
257
|
+
expr_gap = (
|
|
258
|
+
pl.col(self.alias)
|
|
259
|
+
.shift(-math.ceil(gap_secs / freq_secs))
|
|
260
|
+
.over(**over_spec)
|
|
261
|
+
)
|
|
262
|
+
self._exprs.insert(1, expr_gap)
|
|
263
|
+
return self
|
|
264
|
+
|
|
265
|
+
def transform(self, X: pl.LazyFrame):
|
|
266
|
+
for expr in self._exprs:
|
|
267
|
+
X = X.with_columns(expr)
|
|
268
|
+
return X
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class TargetFromDifferentTag(BaseEstimator, TransformerMixin):
|
|
272
|
+
"""
|
|
273
|
+
添加目标收益列: 使用不同的价格列
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
enter_price_tag: str
|
|
278
|
+
进场价格列,支持sql语法,如 `if(ask_p1 > 0, bid_p1 > 0, (ask_p1+bid_p1)/2, null) as price`
|
|
279
|
+
exit_price_tag: str
|
|
280
|
+
出场价格列
|
|
281
|
+
frequency
|
|
282
|
+
数据集频率, 默认`3s`
|
|
283
|
+
target
|
|
284
|
+
目标收益,默认`5min`
|
|
285
|
+
partition_by
|
|
286
|
+
分区
|
|
287
|
+
order_by
|
|
288
|
+
排序
|
|
289
|
+
alias: str
|
|
290
|
+
重新命名
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
def __init__(self,
|
|
294
|
+
enter_price_tag: str,
|
|
295
|
+
exit_price_tag: str,
|
|
296
|
+
frequency: str = "3s",
|
|
297
|
+
target: str = "5min",
|
|
298
|
+
gap: str = "3s",
|
|
299
|
+
partition_by: Sequence[str] | str = "asset",
|
|
300
|
+
order_by: Sequence[str] | str | None = "time",
|
|
301
|
+
alias: str = None):
|
|
302
|
+
self.enter_price_tag = enter_price_tag
|
|
303
|
+
self.exit_price_tag = exit_price_tag
|
|
304
|
+
self.frequency = frequency
|
|
305
|
+
self.target = target
|
|
306
|
+
self.gap = gap
|
|
307
|
+
self.partition_by = partition_by
|
|
308
|
+
self.order_by = order_by
|
|
309
|
+
self.alias = alias
|
|
310
|
+
self._exprs = list()
|
|
311
|
+
|
|
312
|
+
def fit(self, X=None, y=None):
|
|
313
|
+
from pandas import Timedelta
|
|
314
|
+
self.alias = self.alias if self.alias else self.target
|
|
315
|
+
over_spec = {"partition_by": self.partition_by}
|
|
316
|
+
if self.order_by:
|
|
317
|
+
over_spec["order_by"] = self.order_by
|
|
318
|
+
freq_secs = int(Timedelta(self.frequency).seconds)
|
|
319
|
+
expr_target = (
|
|
320
|
+
(
|
|
321
|
+
pl.sql_expr(self.exit_price_tag)
|
|
322
|
+
.shift(-math.ceil(Timedelta(self.target).seconds / freq_secs))
|
|
323
|
+
.over(**over_spec) / pl.sql_expr(self.enter_price_tag) - 1
|
|
324
|
+
).alias(self.alias)
|
|
325
|
+
)
|
|
326
|
+
expr_inf = pl.col(self.alias).cast(pl.Float32).replace([float("inf"), float("-inf")], None)
|
|
327
|
+
|
|
328
|
+
self._exprs = [expr_target, expr_inf]
|
|
329
|
+
gap_secs = int(Timedelta(self.gap).seconds)
|
|
330
|
+
if gap_secs > 0:
|
|
331
|
+
expr_gap = (
|
|
332
|
+
pl.col(self.alias)
|
|
333
|
+
.shift(-math.ceil(gap_secs / freq_secs))
|
|
334
|
+
.over(**over_spec)
|
|
335
|
+
)
|
|
336
|
+
self._exprs.insert(1, expr_gap)
|
|
337
|
+
return self
|
|
338
|
+
|
|
339
|
+
def transform(self, X: pl.LazyFrame):
|
|
340
|
+
for expr in self._exprs:
|
|
341
|
+
X = X.with_columns(expr)
|
|
342
|
+
return X
|
|
343
|
+
|
|
344
|
+
class Reindex(BaseEstimator, TransformerMixin):
|
|
345
|
+
|
|
346
|
+
def __init__(self, new_index: pl.LazyFrame | pl.DataFrame):
|
|
347
|
+
self.new_index = new_index
|
|
348
|
+
|
|
349
|
+
def fit(self, X, y=None):
|
|
350
|
+
self.new_index = self.new_index.lazy()
|
|
351
|
+
return self
|
|
352
|
+
|
|
353
|
+
def transform(self, X: pl.LazyFrame):
|
|
354
|
+
return self.new_index.join(X, on=self.new_index.collect_schema().names(), how="left")
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Copyright (c) ZhangYundi.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
# Created on 2025/7/17 10:53
|
|
4
|
+
# Description:
|
|
5
|
+
|
|
6
|
+
import polars as pl
|
|
7
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class StandardScaler(BaseEstimator, TransformerMixin):
|
|
11
|
+
"""
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
ddof: int
|
|
15
|
+
自由度(0为总体标准差,1为样本标准差)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, subset: list[str], ddof=1):
|
|
19
|
+
self.subset = subset
|
|
20
|
+
self._count = {col: 0 for col in subset}
|
|
21
|
+
self._mean = {col: 0 for col in subset}
|
|
22
|
+
self._M2 = {col: 0 for col in subset}
|
|
23
|
+
self.count = None
|
|
24
|
+
self.mean = None
|
|
25
|
+
self.M2 = None
|
|
26
|
+
self.ddof = ddof
|
|
27
|
+
self.reset()
|
|
28
|
+
|
|
29
|
+
def reset(self):
|
|
30
|
+
"""重置"""
|
|
31
|
+
self._count = {col: 0 for col in self.subset}
|
|
32
|
+
self._mean = {col: 0 for col in self.subset}
|
|
33
|
+
self._M2 = {col: 0 for col in self.subset}
|
|
34
|
+
self.count = pl.DataFrame(self._count)
|
|
35
|
+
self.mean = pl.DataFrame(self._mean)
|
|
36
|
+
self.M2 = pl.DataFrame(self._M2)
|
|
37
|
+
|
|
38
|
+
def fit(self, X: pl.DataFrame, y=None):
|
|
39
|
+
self.reset()
|
|
40
|
+
self.partial_fit(X, y)
|
|
41
|
+
return self
|
|
42
|
+
|
|
43
|
+
def partial_fit(self, X: pl.DataFrame, y=None):
|
|
44
|
+
data = X.select(self.subset)
|
|
45
|
+
mu_new = data.mean()
|
|
46
|
+
n_new = data.count() - data.null_count()
|
|
47
|
+
m2_new = data.select(((pl.col(c) - mu_new[c]) ** 2).sum() for c in self.subset)
|
|
48
|
+
# 新旧均值差异
|
|
49
|
+
delta = mu_new - self.mean
|
|
50
|
+
# 全局均值
|
|
51
|
+
new_count = self.count + n_new
|
|
52
|
+
new_mean = (self.mean * self.count + mu_new * n_new) / new_count
|
|
53
|
+
# 全局M2
|
|
54
|
+
# 误差项
|
|
55
|
+
correction_term = delta.select(pl.col(c) ** 2 * (self.count[c] * n_new[c]) / new_count[c] for c in self.subset)
|
|
56
|
+
|
|
57
|
+
new_M2 = self.M2 + m2_new + correction_term
|
|
58
|
+
# 更新状态
|
|
59
|
+
self.count = new_count
|
|
60
|
+
self.mean = new_mean
|
|
61
|
+
self.M2 = new_M2
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
def get_std(self, col: str):
|
|
65
|
+
return (self.M2[col] / (self.count[col] - self.ddof)) ** 0.5
|
|
66
|
+
|
|
67
|
+
def transform(self, X: pl.DataFrame):
|
|
68
|
+
if not self.subset:
|
|
69
|
+
return X
|
|
70
|
+
return (X
|
|
71
|
+
.with_columns((pl.col(c) - self.mean[c]) / self.get_std(c) for c in self.subset)
|
|
72
|
+
.with_columns(pl.col(c).cast(pl.Float32).replace([float("inf"), float("-inf")], None) for c in self.subset))
|
|
73
|
+
|
|
74
|
+
def partial_fit_transform(self, X: pl.DataFrame, y=None):
|
|
75
|
+
if not self.subset:
|
|
76
|
+
return X
|
|
77
|
+
return self.partial_fit(X).transform(X)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: silars
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
License-Expression: MIT
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: numpy>=2.3.1
|
|
8
|
+
Requires-Dist: pandas>=2.3.1
|
|
9
|
+
Requires-Dist: polars>=1.31.0
|
|
10
|
+
Requires-Dist: scikit-learn>=1.7.0
|
|
11
|
+
Requires-Dist: ygo>=1.1.6
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
silars/__init__.py
|
|
4
|
+
silars.egg-info/PKG-INFO
|
|
5
|
+
silars.egg-info/SOURCES.txt
|
|
6
|
+
silars.egg-info/dependency_links.txt
|
|
7
|
+
silars.egg-info/requires.txt
|
|
8
|
+
silars.egg-info/top_level.txt
|
|
9
|
+
silars/transformer/__init__.py
|
|
10
|
+
silars/transformer/factory.py
|
|
11
|
+
silars/transformer/partial.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
silars
|