rust-pyfunc 0.43.0__tar.gz → 0.44.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rust-pyfunc might be problematic. Click here for more details.

Files changed (133) hide show
  1. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/Cargo.lock +9 -1
  2. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/Cargo.toml +3 -1
  3. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/PKG-INFO +1 -1
  4. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/statistical_analysis.pyi +106 -0
  5. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/backup_reader.rs +23 -15
  6. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/factor_neutralization_io_optimized.rs +1 -3
  7. rust_pyfunc-0.44.1/src/frontier_dist.rs +576 -0
  8. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/gp_correlation_dimension.rs +255 -169
  9. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lib.rs +20 -6
  10. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lz_complexity.rs +38 -31
  11. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/simple_parallel.rs +39 -13
  12. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/.github/workflows/CI.yml +0 -0
  13. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/.github/workflows/deploy.yml +0 -0
  14. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/.gitignore +0 -0
  15. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/CLAUDE.md +0 -0
  16. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/CRUSH.md +0 -0
  17. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/FACTOR_NEUTRALIZATION_REQUIREMENTS.md +0 -0
  18. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/README.md +0 -0
  19. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/alter.sh +0 -0
  20. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/PriceTree.html +0 -0
  21. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/PriceTreeViz.html +0 -0
  22. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/RollingFutureAccessor.html +0 -0
  23. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/TRADE_PEAK_ANALYSIS_README.md +0 -0
  24. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/brachistochrone_curve.html +0 -0
  25. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/calculate_shannon_entropy_change.html +0 -0
  26. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/calculate_shannon_entropy_change_at_low.html +0 -0
  27. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/compute_max_eigenvalue.html +0 -0
  28. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/dtw_distance.html +0 -0
  29. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_follow_volume_sum_same_price.html +0 -0
  30. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_follow_volume_sum_same_price_and_flag.html +0 -0
  31. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_half_energy_time.html +0 -0
  32. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_local_peaks_within_window.html +0 -0
  33. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/find_max_range_product.html +0 -0
  34. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/identify_segments.html +0 -0
  35. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/index.html +0 -0
  36. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/jaccard_similarity.html +0 -0
  37. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/mark_follow_groups.html +0 -0
  38. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/mark_follow_groups_with_flag.html +0 -0
  39. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/max_range_loop.html +0 -0
  40. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/min_range_loop.html +0 -0
  41. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/min_word_edit_distance.html +0 -0
  42. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/ols.html +0 -0
  43. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/ols_predict.html +0 -0
  44. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/ols_residuals.html +0 -0
  45. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/parallel_computing_system.md +0 -0
  46. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/rolling_cv.html +0 -0
  47. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/rolling_qcv.html +0 -0
  48. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/rolling_volatility.html +0 -0
  49. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/rolling_window_stat.html +0 -0
  50. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/search_data.json +0 -0
  51. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/static/search.js +0 -0
  52. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/static/style.css +0 -0
  53. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/sum_as_string.html +0 -0
  54. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/transfer_entropy.html +0 -0
  55. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/trend.html +0 -0
  56. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/trend_fast.html +0 -0
  57. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/vectorize_sentences.html +0 -0
  58. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs/vectorize_sentences_list.html +0 -0
  59. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs//344/275/277/347/224/250/350/257/264/346/230/216.md" +0 -0
  60. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs//345/256/214/346/210/220/346/200/273/347/273/223.md" +0 -0
  61. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/docs_generator.py +0 -0
  62. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/hmm_visualizer.py +0 -0
  63. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/pyproject.toml +0 -0
  64. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/__init__.py +0 -0
  65. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/__init__.pyi +0 -0
  66. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/core_functions.pyi +0 -0
  67. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_correlation.py +0 -0
  68. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_correlation.pyi +0 -0
  69. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_corrwith.py +0 -0
  70. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_extensions.pyi +0 -0
  71. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_merge.py +0 -0
  72. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/pandas_rank.py +0 -0
  73. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/parallel_computing.pyi +0 -0
  74. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/rolling_future.py +0 -0
  75. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/rolling_past.py +0 -0
  76. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/text_analysis.pyi +0 -0
  77. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/time_series.pyi +0 -0
  78. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/trading_analysis.pyi +0 -0
  79. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/tree_structures.pyi +0 -0
  80. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/treevisual.py +0 -0
  81. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/web_manager.py +0 -0
  82. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/python/rust_pyfunc/web_manager.pyi +0 -0
  83. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/abnormal_asks_analyzer.rs +0 -0
  84. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/column_correlation.rs +0 -0
  85. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/difference_matrix.rs +0 -0
  86. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/entropy_analysis.rs +0 -0
  87. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/error/mod.rs +0 -0
  88. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/grouping.rs +0 -0
  89. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lagged_regression.rs +0 -0
  90. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lagged_regression_incremental.rs +0 -0
  91. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lagged_regression_optimized.rs +0 -0
  92. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/lagged_regression_simd.rs +0 -0
  93. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/market_correlation.rs +0 -0
  94. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/order_contamination.rs +0 -0
  95. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/order_neighborhood.rs +0 -0
  96. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/order_records_ultra_sorted.rs +0 -0
  97. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/pandas_ext/mod.rs +0 -0
  98. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/parallel_computing.rs +0 -0
  99. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/permutation_analysis_v0816_fixed.rs +0 -0
  100. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/price_breakthrough_stats.rs +0 -0
  101. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/price_cycle_b_segments_enhanced.rs +0 -0
  102. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/safe_eigenvalue.rs +0 -0
  103. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/sequence/mod.rs +0 -0
  104. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/series_rank.rs +0 -0
  105. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/eigenvalue_analysis.rs +0 -0
  106. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/eigenvalue_analysis_modified.rs +0 -0
  107. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/fast_correlation.rs +0 -0
  108. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/fast_correlation_v2.rs +0 -0
  109. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/hmm_trend_prediction.rs +0 -0
  110. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/local_correlation.rs +0 -0
  111. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/mod.rs +0 -0
  112. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_correlation_mean.rs +0 -0
  113. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_window_core_feature.rs +0 -0
  114. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_window_core_feature_optimized.rs +0 -0
  115. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_window_core_feature_simd.rs +0 -0
  116. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/statistics/rolling_window_core_feature_ultra.rs +0 -0
  117. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/text/mod.rs +0 -0
  118. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/text/string_proximity.rs +0 -0
  119. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/fast_extreme.rs +0 -0
  120. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/lyapunov.rs +0 -0
  121. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/mod.rs +0 -0
  122. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/retreat_advance.rs +0 -0
  123. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/retreat_advance_v2.rs +0 -0
  124. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/time_series/super_extreme.rs +0 -0
  125. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/trade_analysis_ultra_turbo.rs +0 -0
  126. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/trade_peak_analysis.rs +0 -0
  127. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/trade_records_ultra_sorted.rs +0 -0
  128. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/tree/mod.rs +0 -0
  129. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/vector_similarity.rs +0 -0
  130. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/src/vector_similarity_optimized.rs +0 -0
  131. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/templates/base.html +0 -0
  132. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/templates/function.html +0 -0
  133. {rust_pyfunc-0.43.0 → rust_pyfunc-0.44.1}/templates/index.html +0 -0
@@ -2016,9 +2016,15 @@ dependencies = [
2016
2016
  "serde",
2017
2017
  ]
2018
2018
 
2019
+ [[package]]
2020
+ name = "roots"
2021
+ version = "0.0.8"
2022
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2023
+ checksum = "082f11ffa03bbef6c2c6ea6bea1acafaade2fd9050ae0234ab44a2153742b058"
2024
+
2019
2025
  [[package]]
2020
2026
  name = "rust_pyfunc"
2021
- version = "0.43.0"
2027
+ version = "0.44.1"
2022
2028
  dependencies = [
2023
2029
  "arrow",
2024
2030
  "base64 0.21.7",
@@ -2029,6 +2035,7 @@ dependencies = [
2029
2035
  "faer",
2030
2036
  "lapack",
2031
2037
  "libc",
2038
+ "log",
2032
2039
  "memmap2",
2033
2040
  "nalgebra",
2034
2041
  "ndarray",
@@ -2044,6 +2051,7 @@ dependencies = [
2044
2051
  "rand",
2045
2052
  "rayon",
2046
2053
  "rmp-serde",
2054
+ "roots",
2047
2055
  "rustc-hash",
2048
2056
  "serde",
2049
2057
  "serde_json",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "rust_pyfunc"
3
- version = "0.43.0"
3
+ version = "0.44.1"
4
4
  edition = "2021"
5
5
  description = "A collection of high-performance Python functions implemented in Rust"
6
6
  readme = "README.md"
@@ -41,6 +41,8 @@ arrow = "51.0"
41
41
  parquet = "51.0"
42
42
  rustc-hash = "1.1"
43
43
  thiserror = "1.0"
44
+ roots = "0.0.8"
45
+ log = "0.4"
44
46
  # Unix-specific dependencies for fork mode
45
47
  [target.'cfg(unix)'.dependencies]
46
48
  nix = { version = "0.27", features = ["process", "signal", "fs"] }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rust_pyfunc
3
- Version: 0.43.0
3
+ Version: 0.44.1
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
@@ -1000,4 +1000,110 @@ def hmm_trend_prediction(
1000
1000
  ... columns=['更新后下跌概率', '更新后震荡概率', '更新后上涨概率']
1001
1001
  ... )
1002
1002
  """
1003
+ ...
1004
+
1005
+ def distances_to_frontier(
1006
+ r: NDArray[np.float64],
1007
+ group_size: int,
1008
+ drop_last: bool = True,
1009
+ ddof: int = 1,
1010
+ ridge: float = 1e-6
1011
+ ) -> NDArray[np.float64]:
1012
+ """计算收益序列中每个聚合块到马科维茨有效前沿的距离。
1013
+
1014
+ 基于马科维茨投资组合理论的有效前沿距离计算功能。给定单日3秒频率收益序列,
1015
+ 按指定块大小聚合后计算每个资产点到有效前沿的最短距离。
1016
+
1017
+ 算法步骤:
1018
+ 1. 数据分块聚合:将收益序列按指定大小分块,计算每块均值
1019
+ 2. 协方差矩阵计算:计算块间样本协方差矩阵(带岭化保证正定性)
1020
+ 3. 有效前沿构造:使用马科维茨无约束闭式解构造有效前沿
1021
+ 4. 距离计算:使用KKT-λ四次方程法计算每个资产点到前沿的最短欧氏距离
1022
+
1023
+ 参数说明:
1024
+ ----------
1025
+ r : NDArray[np.float64]
1026
+ 1D float64数组,单日3秒频率收益序列
1027
+ group_size : int
1028
+ 每多少行聚合成一块(x),必须大于0
1029
+ drop_last : bool, default=True
1030
+ 尾部不足group_size行时是否丢弃,True丢弃,False则报错
1031
+ ddof : int, default=1
1032
+ 协方差/方差的自由度调整,0或1,默认1(样本协方差)
1033
+ ridge : float, default=1e-6
1034
+ 岭化强度系数,用于保证协方差矩阵正定
1035
+
1036
+ 返回值:
1037
+ -------
1038
+ NDArray[np.float64]
1039
+ shape=(m,)的1D数组,包含每个资产点到有效前沿的距离
1040
+ 其中m = floor(len(r) / group_size)(如果drop_last=True)
1041
+
1042
+ 异常:
1043
+ -----
1044
+ ValueError
1045
+ 当输入参数无效时抛出:
1046
+ - group_size <= 0
1047
+ - 输入序列为空
1048
+ - drop_last=False且序列长度不能被group_size整除
1049
+ - 块大小 <= 自由度调整
1050
+ - 协方差矩阵不正定(可尝试增大ridge)
1051
+ - 有效前沿参数计算失败(Δ <= 0)
1052
+
1053
+ 数值提示:
1054
+ --------
1055
+ - 当 m >> group_size 时,协方差矩阵可能秩亏,需要通过增大ridge参数保证可逆性
1056
+ - 如果出现数值不稳定错误,建议将ridge增大10倍或100倍
1057
+ - 默认使用样本协方差(ddof=1),符合统计学习习惯
1058
+
1059
+ 性能特点:
1060
+ --------
1061
+ - 使用Rust实现,计算性能优异
1062
+ - 采用Cholesky分解避免显式矩阵求逆,数值稳定性好
1063
+ - 支持大规模数据处理,内存使用优化
1064
+ - 多项式求根采用高效算法,避免数值迭代
1065
+
1066
+ 应用场景:
1067
+ --------
1068
+ - 投资组合绩效评估:评估各时间段表现相对有效前沿的距离
1069
+ - 市场效率分析:通过距离分布判断市场效率变化
1070
+ - 风险管理:识别偏离有效前沿的异常时期
1071
+ - 资产配置优化:为动态调整提供量化依据
1072
+
1073
+ 示例:
1074
+ -----
1075
+ >>> import numpy as np
1076
+ >>> from rust_pyfunc import distances_to_frontier
1077
+ >>>
1078
+ >>> # 生成测试数据
1079
+ >>> np.random.seed(0)
1080
+ >>> r = 1e-4 * np.random.randn(4800).astype(np.float64)
1081
+ >>>
1082
+ >>> # 每1分钟聚合(20个3秒间隔)
1083
+ >>> distances = distances_to_frontier(r, group_size=20)
1084
+ >>> print(f"距离数组形状: {distances.shape}") # (240,)
1085
+ >>> print(f"平均距离: {np.mean(distances):.6e}")
1086
+ >>>
1087
+ >>> # 每2分半聚合(50个3秒间隔)
1088
+ >>> distances2 = distances_to_frontier(r, group_size=50)
1089
+ >>> print(f"距离数组形状: {distances2.shape}") # (96,)
1090
+ >>>
1091
+ >>> # 增大岭化系数处理病态数据
1092
+ >>> distances3 = distances_to_frontier(r, group_size=100, ridge=1e-4)
1093
+ >>> print(f"距离数组形状: {distances3.shape}") # (48,)
1094
+ >>>
1095
+ >>> # 分析距离分布
1096
+ >>> import matplotlib.pyplot as plt
1097
+ >>> plt.hist(distances, bins=30, alpha=0.7)
1098
+ >>> plt.xlabel('到有效前沿的距离')
1099
+ >>> plt.ylabel('频次')
1100
+ >>> plt.title('距离分布直方图')
1101
+ >>> plt.show()
1102
+
1103
+ 注意:
1104
+ -----
1105
+ - 函数保证返回的距离值非负且有限
1106
+ - 在极少数情况下如果多项式求根失败,对应距离会设为0并发出警告
1107
+ - 所有计算都使用双精度浮点数,确保数值精度
1108
+ """
1003
1109
  ...
@@ -1,5 +1,7 @@
1
1
  use memmap2::Mmap;
2
+ use numpy::PyArray1;
2
3
  use pyo3::prelude::*;
4
+ use rayon::iter::IndexedParallelIterator;
3
5
  use rayon::prelude::*;
4
6
  use serde::{Deserialize, Serialize};
5
7
  use std::collections::HashSet;
@@ -2138,6 +2140,16 @@ pub fn read_backup_results_factor_only_ultra_fast(
2138
2140
  })?
2139
2141
  };
2140
2142
 
2143
+ #[cfg(target_family = "unix")]
2144
+ unsafe {
2145
+ // 提示内核按顺序访问,增加预读窗口
2146
+ let _ = libc::madvise(
2147
+ mmap.as_ptr() as *mut libc::c_void,
2148
+ file_len,
2149
+ libc::MADV_SEQUENTIAL,
2150
+ );
2151
+ }
2152
+
2141
2153
  // 读取文件头
2142
2154
  let header = unsafe { &*(mmap.as_ptr() as *const FileHeader) };
2143
2155
 
@@ -2147,10 +2159,7 @@ pub fn read_backup_results_factor_only_ultra_fast(
2147
2159
 
2148
2160
  let record_count = header.record_count as usize;
2149
2161
  if record_count == 0 {
2150
- return Python::with_gil(|py| {
2151
- let numpy = py.import("numpy")?;
2152
- Ok(numpy.call_method1("array", (Vec::<f64>::new(),))?.into())
2153
- });
2162
+ return Python::with_gil(|py| Ok(PyArray1::<f64>::from_vec(py, Vec::new()).into_py(py)));
2154
2163
  }
2155
2164
 
2156
2165
  let record_size = header.record_size as usize;
@@ -2196,29 +2205,28 @@ pub fn read_backup_results_factor_only_ultra_fast(
2196
2205
  })?;
2197
2206
 
2198
2207
  // 并行读取所有因子值
2199
- let factors: Vec<f64> = pool.install(|| {
2200
- (0..record_count)
2201
- .into_par_iter()
2202
- .map(|i| {
2208
+ let mut factors = vec![0f64; record_count];
2209
+ pool.install(|| {
2210
+ factors
2211
+ .par_iter_mut()
2212
+ .enumerate()
2213
+ .with_min_len(4096)
2214
+ .for_each(|(i, slot)| {
2203
2215
  let record_offset = records_start + i * record_size;
2204
2216
 
2205
2217
  // 直接读取因子值,完全跳过其他字段的解析
2206
2218
  unsafe {
2207
2219
  let factor_ptr = mmap.as_ptr().add(record_offset + factor_offset) as *const f64;
2208
- *factor_ptr
2220
+ *slot = *factor_ptr;
2209
2221
  }
2210
- })
2211
- .collect()
2222
+ });
2212
2223
  });
2213
2224
 
2214
2225
  // 显式释放mmap
2215
2226
  drop(mmap);
2216
2227
 
2217
2228
  // 创建numpy数组
2218
- Python::with_gil(|py| {
2219
- let numpy = py.import("numpy")?;
2220
- Ok(numpy.call_method1("array", (factors,))?.into())
2221
- })
2229
+ Python::with_gil(|py| Ok(PyArray1::from_vec(py, factors).into_py(py)))
2222
2230
  }
2223
2231
 
2224
2232
  /// 超高速查询备份文件中的指定列(完整版本v2)
@@ -703,9 +703,7 @@ pub fn batch_factor_neutralization_io_optimized(
703
703
  // 显示进度:有处理进展或者已经运行超过5秒
704
704
  if processed > 0 || elapsed.as_secs() >= 5 {
705
705
  let current_time = Local::now().format("%Y-%m-%d %H:%M:%S");
706
- print!("\r[{}] 📊 处理进度: {}/{} ({:.1}%) - 成功: {}, 失败: {} - 已用时间: {} - 预计剩余: {}",
707
- current_time, processed, total_files, progress_percent,
708
- success_count, errors, elapsed_time_str, remaining_time_str);
706
+ print!("\r[{}] 📊 处理进度: {}/{} ({:.1}%) - 成功: {}, 失败: {} - 已用时间: {} - 预计剩余: {}", current_time, processed, total_files, progress_percent, success_count, errors, elapsed_time_str, remaining_time_str);
709
707
  io::stdout().flush().unwrap();
710
708
  }
711
709
  }