rust-pyfunc 0.43.0__tar.gz → 0.44.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rust-pyfunc might be problematic. Click here for more details.
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/Cargo.lock +9 -1
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/Cargo.toml +3 -1
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/PKG-INFO +1 -1
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/statistical_analysis.pyi +106 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/backup_reader.rs +23 -15
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/factor_neutralization_io_optimized.rs +1 -3
- rust_pyfunc-0.44.1/src/frontier_dist.rs +576 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/gp_correlation_dimension.rs +255 -169
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lib.rs +20 -6
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lz_complexity.rs +38 -31
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/simple_parallel.rs +39 -13
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/.github/workflows/CI.yml +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/.github/workflows/deploy.yml +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/.gitignore +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/CLAUDE.md +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/CRUSH.md +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/FACTOR_NEUTRALIZATION_REQUIREMENTS.md +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/README.md +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/alter.sh +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/PriceTree.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/PriceTreeViz.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/RollingFutureAccessor.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/TRADE_PEAK_ANALYSIS_README.md +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/brachistochrone_curve.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/calculate_shannon_entropy_change.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/calculate_shannon_entropy_change_at_low.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/compute_max_eigenvalue.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/dtw_distance.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_follow_volume_sum_same_price.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_follow_volume_sum_same_price_and_flag.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_half_energy_time.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_local_peaks_within_window.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_max_range_product.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/identify_segments.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/index.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/jaccard_similarity.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/mark_follow_groups.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/mark_follow_groups_with_flag.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/max_range_loop.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/min_range_loop.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/min_word_edit_distance.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/ols.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/ols_predict.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/ols_residuals.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/parallel_computing_system.md +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/rolling_cv.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/rolling_qcv.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/rolling_volatility.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/rolling_window_stat.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/search_data.json +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/static/search.js +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/static/style.css +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/sum_as_string.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/transfer_entropy.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/trend.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/trend_fast.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/vectorize_sentences.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/vectorize_sentences_list.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs//344/275/277/347/224/250/350/257/264/346/230/216.md" +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs//345/256/214/346/210/220/346/200/273/347/273/223.md" +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs_generator.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/hmm_visualizer.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/pyproject.toml +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/__init__.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/__init__.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/core_functions.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_correlation.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_correlation.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_corrwith.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_extensions.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_merge.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_rank.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/parallel_computing.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/rolling_future.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/rolling_past.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/text_analysis.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/time_series.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/trading_analysis.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/tree_structures.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/treevisual.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/web_manager.py +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/web_manager.pyi +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/abnormal_asks_analyzer.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/column_correlation.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/difference_matrix.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/entropy_analysis.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/error/mod.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/grouping.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lagged_regression.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lagged_regression_incremental.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lagged_regression_optimized.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lagged_regression_simd.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/market_correlation.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/order_contamination.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/order_neighborhood.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/order_records_ultra_sorted.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/pandas_ext/mod.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/parallel_computing.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/permutation_analysis_v0816_fixed.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/price_breakthrough_stats.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/price_cycle_b_segments_enhanced.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/safe_eigenvalue.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/sequence/mod.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/series_rank.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/eigenvalue_analysis.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/eigenvalue_analysis_modified.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/fast_correlation.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/fast_correlation_v2.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/hmm_trend_prediction.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/local_correlation.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/mod.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_correlation_mean.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_window_core_feature.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_window_core_feature_optimized.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_window_core_feature_simd.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_window_core_feature_ultra.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/text/mod.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/text/string_proximity.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/fast_extreme.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/lyapunov.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/mod.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/retreat_advance.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/retreat_advance_v2.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/super_extreme.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/trade_analysis_ultra_turbo.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/trade_peak_analysis.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/trade_records_ultra_sorted.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/tree/mod.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/vector_similarity.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/vector_similarity_optimized.rs +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/templates/base.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/templates/function.html +0 -0
- {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/templates/index.html +0 -0
@@ -2016,9 +2016,15 @@ dependencies = [
|
|
2016
2016
|
"serde",
|
2017
2017
|
]
|
2018
2018
|
|
2019
|
+
[[package]]
|
2020
|
+
name = "roots"
|
2021
|
+
version = "0.0.8"
|
2022
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2023
|
+
checksum = "082f11ffa03bbef6c2c6ea6bea1acafaade2fd9050ae0234ab44a2153742b058"
|
2024
|
+
|
2019
2025
|
[[package]]
|
2020
2026
|
name = "rust_pyfunc"
|
2021
|
-
version = "0.
|
2027
|
+
version = "0.44.1"
|
2022
2028
|
dependencies = [
|
2023
2029
|
"arrow",
|
2024
2030
|
"base64 0.21.7",
|
@@ -2029,6 +2035,7 @@ dependencies = [
|
|
2029
2035
|
"faer",
|
2030
2036
|
"lapack",
|
2031
2037
|
"libc",
|
2038
|
+
"log",
|
2032
2039
|
"memmap2",
|
2033
2040
|
"nalgebra",
|
2034
2041
|
"ndarray",
|
@@ -2044,6 +2051,7 @@ dependencies = [
|
|
2044
2051
|
"rand",
|
2045
2052
|
"rayon",
|
2046
2053
|
"rmp-serde",
|
2054
|
+
"roots",
|
2047
2055
|
"rustc-hash",
|
2048
2056
|
"serde",
|
2049
2057
|
"serde_json",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "rust_pyfunc"
|
3
|
-
version = "0.
|
3
|
+
version = "0.44.1"
|
4
4
|
edition = "2021"
|
5
5
|
description = "A collection of high-performance Python functions implemented in Rust"
|
6
6
|
readme = "README.md"
|
@@ -41,6 +41,8 @@ arrow = "51.0"
|
|
41
41
|
parquet = "51.0"
|
42
42
|
rustc-hash = "1.1"
|
43
43
|
thiserror = "1.0"
|
44
|
+
roots = "0.0.8"
|
45
|
+
log = "0.4"
|
44
46
|
# Unix-specific dependencies for fork mode
|
45
47
|
[target.'cfg(unix)'.dependencies]
|
46
48
|
nix = { version = "0.27", features = ["process", "signal", "fs"] }
|
@@ -1000,4 +1000,110 @@ def hmm_trend_prediction(
|
|
1000
1000
|
... columns=['更新后下跌概率', '更新后震荡概率', '更新后上涨概率']
|
1001
1001
|
... )
|
1002
1002
|
"""
|
1003
|
+
...
|
1004
|
+
|
1005
|
+
def distances_to_frontier(
|
1006
|
+
r: NDArray[np.float64],
|
1007
|
+
group_size: int,
|
1008
|
+
drop_last: bool = True,
|
1009
|
+
ddof: int = 1,
|
1010
|
+
ridge: float = 1e-6
|
1011
|
+
) -> NDArray[np.float64]:
|
1012
|
+
"""计算收益序列中每个聚合块到马科维茨有效前沿的距离。
|
1013
|
+
|
1014
|
+
基于马科维茨投资组合理论的有效前沿距离计算功能。给定单日3秒频率收益序列,
|
1015
|
+
按指定块大小聚合后计算每个资产点到有效前沿的最短距离。
|
1016
|
+
|
1017
|
+
算法步骤:
|
1018
|
+
1. 数据分块聚合:将收益序列按指定大小分块,计算每块均值
|
1019
|
+
2. 协方差矩阵计算:计算块间样本协方差矩阵(带岭化保证正定性)
|
1020
|
+
3. 有效前沿构造:使用马科维茨无约束闭式解构造有效前沿
|
1021
|
+
4. 距离计算:使用KKT-λ四次方程法计算每个资产点到前沿的最短欧氏距离
|
1022
|
+
|
1023
|
+
参数说明:
|
1024
|
+
----------
|
1025
|
+
r : NDArray[np.float64]
|
1026
|
+
1D float64数组,单日3秒频率收益序列
|
1027
|
+
group_size : int
|
1028
|
+
每多少行聚合成一块(x),必须大于0
|
1029
|
+
drop_last : bool, default=True
|
1030
|
+
尾部不足group_size行时是否丢弃,True丢弃,False则报错
|
1031
|
+
ddof : int, default=1
|
1032
|
+
协方差/方差的自由度调整,0或1,默认1(样本协方差)
|
1033
|
+
ridge : float, default=1e-6
|
1034
|
+
岭化强度系数,用于保证协方差矩阵正定
|
1035
|
+
|
1036
|
+
返回值:
|
1037
|
+
-------
|
1038
|
+
NDArray[np.float64]
|
1039
|
+
shape=(m,)的1D数组,包含每个资产点到有效前沿的距离
|
1040
|
+
其中m = floor(len(r) / group_size)(如果drop_last=True)
|
1041
|
+
|
1042
|
+
异常:
|
1043
|
+
-----
|
1044
|
+
ValueError
|
1045
|
+
当输入参数无效时抛出:
|
1046
|
+
- group_size <= 0
|
1047
|
+
- 输入序列为空
|
1048
|
+
- drop_last=False且序列长度不能被group_size整除
|
1049
|
+
- 块大小 <= 自由度调整
|
1050
|
+
- 协方差矩阵不正定(可尝试增大ridge)
|
1051
|
+
- 有效前沿参数计算失败(Δ <= 0)
|
1052
|
+
|
1053
|
+
数值提示:
|
1054
|
+
--------
|
1055
|
+
- 当 m >> group_size 时,协方差矩阵可能秩亏,需要通过增大ridge参数保证可逆性
|
1056
|
+
- 如果出现数值不稳定错误,建议将ridge增大10倍或100倍
|
1057
|
+
- 默认使用样本协方差(ddof=1),符合统计学习习惯
|
1058
|
+
|
1059
|
+
性能特点:
|
1060
|
+
--------
|
1061
|
+
- 使用Rust实现,计算性能优异
|
1062
|
+
- 采用Cholesky分解避免显式矩阵求逆,数值稳定性好
|
1063
|
+
- 支持大规模数据处理,内存使用优化
|
1064
|
+
- 多项式求根采用高效算法,避免数值迭代
|
1065
|
+
|
1066
|
+
应用场景:
|
1067
|
+
--------
|
1068
|
+
- 投资组合绩效评估:评估各时间段表现相对有效前沿的距离
|
1069
|
+
- 市场效率分析:通过距离分布判断市场效率变化
|
1070
|
+
- 风险管理:识别偏离有效前沿的异常时期
|
1071
|
+
- 资产配置优化:为动态调整提供量化依据
|
1072
|
+
|
1073
|
+
示例:
|
1074
|
+
-----
|
1075
|
+
>>> import numpy as np
|
1076
|
+
>>> from rust_pyfunc import distances_to_frontier
|
1077
|
+
>>>
|
1078
|
+
>>> # 生成测试数据
|
1079
|
+
>>> np.random.seed(0)
|
1080
|
+
>>> r = 1e-4 * np.random.randn(4800).astype(np.float64)
|
1081
|
+
>>>
|
1082
|
+
>>> # 每1分钟聚合(20个3秒间隔)
|
1083
|
+
>>> distances = distances_to_frontier(r, group_size=20)
|
1084
|
+
>>> print(f"距离数组形状: {distances.shape}") # (240,)
|
1085
|
+
>>> print(f"平均距离: {np.mean(distances):.6e}")
|
1086
|
+
>>>
|
1087
|
+
>>> # 每2分半聚合(50个3秒间隔)
|
1088
|
+
>>> distances2 = distances_to_frontier(r, group_size=50)
|
1089
|
+
>>> print(f"距离数组形状: {distances2.shape}") # (96,)
|
1090
|
+
>>>
|
1091
|
+
>>> # 增大岭化系数处理病态数据
|
1092
|
+
>>> distances3 = distances_to_frontier(r, group_size=100, ridge=1e-4)
|
1093
|
+
>>> print(f"距离数组形状: {distances3.shape}") # (48,)
|
1094
|
+
>>>
|
1095
|
+
>>> # 分析距离分布
|
1096
|
+
>>> import matplotlib.pyplot as plt
|
1097
|
+
>>> plt.hist(distances, bins=30, alpha=0.7)
|
1098
|
+
>>> plt.xlabel('到有效前沿的距离')
|
1099
|
+
>>> plt.ylabel('频次')
|
1100
|
+
>>> plt.title('距离分布直方图')
|
1101
|
+
>>> plt.show()
|
1102
|
+
|
1103
|
+
注意:
|
1104
|
+
-----
|
1105
|
+
- 函数保证返回的距离值非负且有限
|
1106
|
+
- 在极少数情况下如果多项式求根失败,对应距离会设为0并发出警告
|
1107
|
+
- 所有计算都使用双精度浮点数,确保数值精度
|
1108
|
+
"""
|
1003
1109
|
...
|
@@ -1,5 +1,7 @@
|
|
1
1
|
use memmap2::Mmap;
|
2
|
+
use numpy::PyArray1;
|
2
3
|
use pyo3::prelude::*;
|
4
|
+
use rayon::iter::IndexedParallelIterator;
|
3
5
|
use rayon::prelude::*;
|
4
6
|
use serde::{Deserialize, Serialize};
|
5
7
|
use std::collections::HashSet;
|
@@ -2138,6 +2140,16 @@ pub fn read_backup_results_factor_only_ultra_fast(
|
|
2138
2140
|
})?
|
2139
2141
|
};
|
2140
2142
|
|
2143
|
+
#[cfg(target_family = "unix")]
|
2144
|
+
unsafe {
|
2145
|
+
// 提示内核按顺序访问,增加预读窗口
|
2146
|
+
let _ = libc::madvise(
|
2147
|
+
mmap.as_ptr() as *mut libc::c_void,
|
2148
|
+
file_len,
|
2149
|
+
libc::MADV_SEQUENTIAL,
|
2150
|
+
);
|
2151
|
+
}
|
2152
|
+
|
2141
2153
|
// 读取文件头
|
2142
2154
|
let header = unsafe { &*(mmap.as_ptr() as *const FileHeader) };
|
2143
2155
|
|
@@ -2147,10 +2159,7 @@ pub fn read_backup_results_factor_only_ultra_fast(
|
|
2147
2159
|
|
2148
2160
|
let record_count = header.record_count as usize;
|
2149
2161
|
if record_count == 0 {
|
2150
|
-
return Python::with_gil(|py|
|
2151
|
-
let numpy = py.import("numpy")?;
|
2152
|
-
Ok(numpy.call_method1("array", (Vec::<f64>::new(),))?.into())
|
2153
|
-
});
|
2162
|
+
return Python::with_gil(|py| Ok(PyArray1::<f64>::from_vec(py, Vec::new()).into_py(py)));
|
2154
2163
|
}
|
2155
2164
|
|
2156
2165
|
let record_size = header.record_size as usize;
|
@@ -2196,29 +2205,28 @@ pub fn read_backup_results_factor_only_ultra_fast(
|
|
2196
2205
|
})?;
|
2197
2206
|
|
2198
2207
|
// 并行读取所有因子值
|
2199
|
-
let factors
|
2200
|
-
|
2201
|
-
|
2202
|
-
.
|
2208
|
+
let mut factors = vec![0f64; record_count];
|
2209
|
+
pool.install(|| {
|
2210
|
+
factors
|
2211
|
+
.par_iter_mut()
|
2212
|
+
.enumerate()
|
2213
|
+
.with_min_len(4096)
|
2214
|
+
.for_each(|(i, slot)| {
|
2203
2215
|
let record_offset = records_start + i * record_size;
|
2204
2216
|
|
2205
2217
|
// 直接读取因子值,完全跳过其他字段的解析
|
2206
2218
|
unsafe {
|
2207
2219
|
let factor_ptr = mmap.as_ptr().add(record_offset + factor_offset) as *const f64;
|
2208
|
-
*factor_ptr
|
2220
|
+
*slot = *factor_ptr;
|
2209
2221
|
}
|
2210
|
-
})
|
2211
|
-
.collect()
|
2222
|
+
});
|
2212
2223
|
});
|
2213
2224
|
|
2214
2225
|
// 显式释放mmap
|
2215
2226
|
drop(mmap);
|
2216
2227
|
|
2217
2228
|
// 创建numpy数组
|
2218
|
-
Python::with_gil(|py|
|
2219
|
-
let numpy = py.import("numpy")?;
|
2220
|
-
Ok(numpy.call_method1("array", (factors,))?.into())
|
2221
|
-
})
|
2229
|
+
Python::with_gil(|py| Ok(PyArray1::from_vec(py, factors).into_py(py)))
|
2222
2230
|
}
|
2223
2231
|
|
2224
2232
|
/// 超高速查询备份文件中的指定列(完整版本v2)
|
@@ -703,9 +703,7 @@ pub fn batch_factor_neutralization_io_optimized(
|
|
703
703
|
// 显示进度:有处理进展或者已经运行超过5秒
|
704
704
|
if processed > 0 || elapsed.as_secs() >= 5 {
|
705
705
|
let current_time = Local::now().format("%Y-%m-%d %H:%M:%S");
|
706
|
-
print!("\r[{}] 📊 处理进度: {}/{} ({:.1}%) - 成功: {}, 失败: {} - 已用时间: {} - 预计剩余: {}",
|
707
|
-
current_time, processed, total_files, progress_percent,
|
708
|
-
success_count, errors, elapsed_time_str, remaining_time_str);
|
706
|
+
print!("\r[{}] 📊 处理进度: {}/{} ({:.1}%) - 成功: {}, 失败: {} - 已用时间: {} - 预计剩余: {}", current_time, processed, total_files, progress_percent, success_count, errors, elapsed_time_str, remaining_time_str);
|
709
707
|
io::stdout().flush().unwrap();
|
710
708
|
}
|
711
709
|
}
|