cnhkmcp 2.1.3__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. cnhkmcp/__init__.py +126 -0
  2. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/README.md +38 -0
  3. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/ace.log +0 -0
  4. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/config.json +6 -0
  5. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/ace_lib.py +1514 -0
  6. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_datasets.py +157 -0
  7. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_documentation.py +132 -0
  8. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_operators.py +99 -0
  9. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/helpful_functions.py +180 -0
  10. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.ico +0 -0
  11. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.png +0 -0
  12. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/001_10_Steps_to_Start_on_BRAIN_documentation.json +14 -0
  13. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/001_Intermediate_Pack_-_Improve_your_Alpha_2_2_documentation.json +174 -0
  14. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/001_Intermediate_Pack_-_Understand_Results_1_2_documentation.json +167 -0
  15. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/001_Introduction_to_Alphas_documentation.json +145 -0
  16. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/001_Introduction_to_BRAIN_Expression_Language_documentation.json +107 -0
  17. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/001_WorldQuant_Challenge_documentation.json +56 -0
  18. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/001__Read_this_First_-_Starter_Pack_documentation.json +404 -0
  19. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/002_How_to_choose_the_Simulation_Settings_documentation.json +268 -0
  20. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/002_Simulate_your_first_Alpha_documentation.json +88 -0
  21. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/002__Alpha_Examples_for_Beginners_documentation.json +254 -0
  22. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/002__Alpha_Examples_for_Bronze_Users_documentation.json +114 -0
  23. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/002__Alpha_Examples_for_Silver_Users_documentation.json +79 -0
  24. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/002__How_BRAIN_works_documentation.json +184 -0
  25. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/003_Clear_these_tests_before_submitting_an_Alpha_documentation.json +388 -0
  26. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/003_Parameters_in_the_Simulation_results_documentation.json +243 -0
  27. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/004_Group_Data_Fields_documentation.json +69 -0
  28. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/004_How_to_use_the_Data_Explorer_documentation.json +142 -0
  29. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/004_Model77_dataset_documentation.json +14 -0
  30. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/004_Sentiment1_dataset_documentation.json +14 -0
  31. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/004_Understanding_Data_in_BRAIN_Key_Concepts_and_Tips_documentation.json +182 -0
  32. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/004_Vector_Data_Fields_documentation.json +30 -0
  33. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_Crowding_Risk-Neutralized_Alphas_documentation.json +64 -0
  34. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_D0_documentation.json +66 -0
  35. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_Double_Neutralization_documentation.json +53 -0
  36. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_Fast_D1_Documentation_documentation.json +304 -0
  37. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_Investability_Constrained_Metrics_documentation.json +129 -0
  38. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_Must-read_posts_How_to_improve_your_Alphas_documentation.json +14 -0
  39. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_Neutralization_documentation.json +29 -0
  40. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_RAM_Risk-Neutralized_Alphas_documentation.json +64 -0
  41. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_Risk_Neutralization_Default_setting_documentation.json +75 -0
  42. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_Risk_Neutralized_Alphas_documentation.json +171 -0
  43. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/005_Statistical_Risk-Neutralized_Alphas_documentation.json +51 -0
  44. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/006_EUR_TOP2500_Universe_documentation.json +35 -0
  45. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/006_GLB_TOPDIV3000_Universe_documentation.json +48 -0
  46. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/006_Getting_Started_China_Research_for_Consultants_Gold_documentation.json +142 -0
  47. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/006_Getting_started_on_Illiquid_Universes_Gold_documentation.json +46 -0
  48. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/006_Getting_started_with_USA_TOPSP500_universe_Gold_documentation.json +62 -0
  49. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/006_Global_Alphas_Gold_documentation.json +66 -0
  50. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/006_India_Alphas_documentation.json +35 -0
  51. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Consultant_Dos_and_Don_ts_documentation.json +35 -0
  52. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Consultant_Features_documentation.json +239 -0
  53. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Consultant_Simulation_Features_documentation.json +149 -0
  54. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Consultant_Submission_Tests_documentation.json +363 -0
  55. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Finding_Consultant_Alphas_documentation.json +333 -0
  56. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Power_Pool_Alphas_documentation.json +14 -0
  57. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Research_Advisory_Program_documentation.json +35 -0
  58. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Starting_Guide_for_Research_Consultants_documentation.json +14 -0
  59. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Visualization_Tool_documentation.json +99 -0
  60. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007_Your_Advisor_-_Kunqi_Jiang_documentation.json +53 -0
  61. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007__Brain_Genius_documentation.json +288 -0
  62. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/007__Single_Dataset_Alphas_documentation.json +41 -0
  63. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/008_Advisory_Theme_Calendar_documentation.json +14 -0
  64. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/008_Multiplier_Rules_documentation.json +14 -0
  65. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/008_Overview_of_Themes_documentation.json +14 -0
  66. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/008_Theme_Calendar_documentation.json +14 -0
  67. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/009_Combo_Expression_documentation.json +272 -0
  68. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/009_Global_SuperAlphas_documentation.json +14 -0
  69. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/009_Helpful_Tips_documentation.json +58 -0
  70. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/009_Selection_Expression_documentation.json +1546 -0
  71. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/009_SuperAlpha_Operators_documentation.json +890 -0
  72. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/009_SuperAlpha_Results_documentation.json +83 -0
  73. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/009_What_is_a_SuperAlpha_documentation.json +261 -0
  74. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/010_BRAIN_API_documentation.json +515 -0
  75. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/010_Documentation_for_ACE_API_Library_Gold_documentation.json +27 -0
  76. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/010__Understanding_simulation_limits_documentation.json +210 -0
  77. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/arithmetic_operators.json +209 -0
  78. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/cross_sectional_operators.json +98 -0
  79. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/group_operators.json +121 -0
  80. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/logical_operators.json +145 -0
  81. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/reduce_operators.json +156 -0
  82. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/special_operators.json +35 -0
  83. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/test.txt +1 -0
  84. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/time_series_operators.json +386 -0
  85. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/transformational_operators.json +61 -0
  86. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/vector_operators.json +38 -0
  87. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/main.py +576 -0
  88. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/process_knowledge_base.py +281 -0
  89. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/rag_engine.py +408 -0
  90. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/requirements.txt +7 -0
  91. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/run.bat +3 -0
  92. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/_manifest.json +302 -0
  93. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/_meta.json +1 -0
  94. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/chroma.sqlite3 +0 -0
  95. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242//321/211/320/266/320/246/321/206/320/274/320/261/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +265 -0
  96. cnhkmcp/untracked/APP/.gitignore +32 -0
  97. cnhkmcp/untracked/APP/MODULAR_STRUCTURE.md +112 -0
  98. cnhkmcp/untracked/APP/README.md +309 -0
  99. cnhkmcp/untracked/APP/Tranformer/Transformer.py +4989 -0
  100. cnhkmcp/untracked/APP/Tranformer/ace.log +0 -0
  101. cnhkmcp/untracked/APP/Tranformer/ace_lib.py +1514 -0
  102. cnhkmcp/untracked/APP/Tranformer/helpful_functions.py +180 -0
  103. cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates.json +7187 -0
  104. cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates_/321/207/320/264/342/225/221/321/204/342/225/233/320/233.json +654 -0
  105. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_error.json +1 -0
  106. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_success.json +47312 -0
  107. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_/321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/277/321/207/320/253/342/224/244/321/206/320/236/320/265/321/210/342/225/234/342/225/234/321/205/320/225/320/265Machine_lib.json +22 -0
  108. cnhkmcp/untracked/APP/Tranformer/parsetab.py +60 -0
  109. cnhkmcp/untracked/APP/Tranformer/template_summary.txt +3182 -0
  110. cnhkmcp/untracked/APP/Tranformer/transformer_config.json +7 -0
  111. cnhkmcp/untracked/APP/Tranformer/validator.py +889 -0
  112. cnhkmcp/untracked/APP/ace.log +69 -0
  113. cnhkmcp/untracked/APP/ace_lib.py +1514 -0
  114. cnhkmcp/untracked/APP/blueprints/__init__.py +6 -0
  115. cnhkmcp/untracked/APP/blueprints/feature_engineering.py +347 -0
  116. cnhkmcp/untracked/APP/blueprints/idea_house.py +221 -0
  117. cnhkmcp/untracked/APP/blueprints/inspiration_house.py +432 -0
  118. cnhkmcp/untracked/APP/blueprints/paper_analysis.py +570 -0
  119. cnhkmcp/untracked/APP/custom_templates/templates.json +1257 -0
  120. cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +400 -0
  121. cnhkmcp/untracked/APP/give_me_idea/ace_lib.py +1514 -0
  122. cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +252 -0
  123. cnhkmcp/untracked/APP/give_me_idea/fetch_all_datasets.py +157 -0
  124. cnhkmcp/untracked/APP/give_me_idea/fetch_all_operators.py +99 -0
  125. cnhkmcp/untracked/APP/give_me_idea/helpful_functions.py +180 -0
  126. cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +11 -0
  127. cnhkmcp/untracked/APP/helpful_functions.py +180 -0
  128. cnhkmcp/untracked/APP/hkSimulator/ace_lib.py +1501 -0
  129. cnhkmcp/untracked/APP/hkSimulator/autosimulator.py +447 -0
  130. cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +180 -0
  131. cnhkmcp/untracked/APP/mirror_config.txt +20 -0
  132. cnhkmcp/untracked/APP/operaters.csv +129 -0
  133. cnhkmcp/untracked/APP/requirements.txt +53 -0
  134. cnhkmcp/untracked/APP/run_app.bat +28 -0
  135. cnhkmcp/untracked/APP/run_app.sh +34 -0
  136. cnhkmcp/untracked/APP/setup_tsinghua.bat +39 -0
  137. cnhkmcp/untracked/APP/setup_tsinghua.sh +43 -0
  138. cnhkmcp/untracked/APP/simulator/alpha_submitter.py +404 -0
  139. cnhkmcp/untracked/APP/simulator/simulator_wqb.py +618 -0
  140. cnhkmcp/untracked/APP/ssrn-3332513.pdf +109188 -19
  141. cnhkmcp/untracked/APP/static/brain.js +589 -0
  142. cnhkmcp/untracked/APP/static/decoder.js +1540 -0
  143. cnhkmcp/untracked/APP/static/feature_engineering.js +1729 -0
  144. cnhkmcp/untracked/APP/static/idea_house.js +937 -0
  145. cnhkmcp/untracked/APP/static/inspiration.js +465 -0
  146. cnhkmcp/untracked/APP/static/inspiration_house.js +868 -0
  147. cnhkmcp/untracked/APP/static/paper_analysis.js +390 -0
  148. cnhkmcp/untracked/APP/static/script.js +3082 -0
  149. cnhkmcp/untracked/APP/static/simulator.js +597 -0
  150. cnhkmcp/untracked/APP/static/styles.css +3127 -0
  151. cnhkmcp/untracked/APP/static/usage_widget.js +508 -0
  152. cnhkmcp/untracked/APP/templates/alpha_inspector.html +511 -0
  153. cnhkmcp/untracked/APP/templates/feature_engineering.html +960 -0
  154. cnhkmcp/untracked/APP/templates/idea_house.html +564 -0
  155. cnhkmcp/untracked/APP/templates/index.html +932 -0
  156. cnhkmcp/untracked/APP/templates/inspiration_house.html +861 -0
  157. cnhkmcp/untracked/APP/templates/paper_analysis.html +91 -0
  158. cnhkmcp/untracked/APP/templates/simulator.html +343 -0
  159. cnhkmcp/untracked/APP/templates/transformer_web.html +580 -0
  160. cnhkmcp/untracked/APP/usage.md +351 -0
  161. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/ace_lib.py +1514 -0
  162. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/brain_alpha_inspector.py +712 -0
  163. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/helpful_functions.py +180 -0
  164. cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +2460 -0
  165. cnhkmcp/untracked/__init__.py +0 -0
  166. cnhkmcp/untracked/arXiv_API_Tool_Manual.md +490 -0
  167. cnhkmcp/untracked/arxiv_api.py +229 -0
  168. cnhkmcp/untracked/back_up/forum_functions.py +998 -0
  169. cnhkmcp/untracked/back_up/platform_functions.py +2886 -0
  170. cnhkmcp/untracked/brain-consultant.md +31 -0
  171. cnhkmcp/untracked/forum_functions.py +407 -0
  172. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/forum_functions.py +407 -0
  173. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/platform_functions.py +2601 -0
  174. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/user_config.json +31 -0
  175. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/210/320/276/320/271AI/321/210/320/277/342/225/227/321/210/342/224/220/320/251/321/204/342/225/225/320/272/321/206/320/246/320/227/321/206/320/261/320/263/321/206/320/255/320/265/321/205/320/275/320/266/321/204/342/225/235/320/252/321/204/342/225/225/320/233/321/210/342/225/234/342/225/234/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270.md +101 -0
  176. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +190 -0
  177. cnhkmcp/untracked/platform_functions.py +2601 -0
  178. cnhkmcp/untracked/sample_mcp_config.json +11 -0
  179. cnhkmcp/untracked/user_config.json +31 -0
  180. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/222/321/210/320/220/320/223/321/206/320/246/320/227/321/206/320/261/320/263_BRAIN_Alpha_Test_Requirements_and_Tips.md +202 -0
  181. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Alpha_explaination_workflow.md +56 -0
  182. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_6_Tips_Datafield_Exploration_Guide.md +194 -0
  183. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_Alpha_Improvement_Workflow.md +101 -0
  184. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Dataset_Exploration_Expert_Manual.md +436 -0
  185. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_daily_report_workflow.md +128 -0
  186. cnhkmcp/untracked//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +192 -0
  187. {cnhkmcp-2.1.3.dist-info → cnhkmcp-2.1.5.dist-info}/METADATA +1 -1
  188. cnhkmcp-2.1.5.dist-info/RECORD +192 -0
  189. cnhkmcp-2.1.5.dist-info/top_level.txt +1 -0
  190. cnhkmcp-2.1.3.dist-info/RECORD +0 -6
  191. cnhkmcp-2.1.3.dist-info/top_level.txt +0 -1
  192. {cnhkmcp-2.1.3.dist-info → cnhkmcp-2.1.5.dist-info}/WHEEL +0 -0
  193. {cnhkmcp-2.1.3.dist-info → cnhkmcp-2.1.5.dist-info}/entry_points.txt +0 -0
  194. {cnhkmcp-2.1.3.dist-info → cnhkmcp-2.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,889 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ 表达式验证器 - 使用抽象语法树验证字符串表达式格式是否正确
5
+
6
+ 本模块实现了一个能够检测字符串表达式格式是否正确的系统,基于PLY(Python Lex-Yacc)
7
+ 构建词法分析器和语法分析器,识别表达式中的操作符、函数和字段,并验证其格式正确性。
8
+ """
9
+
10
+ import re
11
+ import sys
12
+ import json
13
+ import os
14
+ from typing import List, Dict, Any, Optional, Tuple
15
+
16
+ # 尝试导入PLY库,如果不存在则提供安装提示
17
+ try:
18
+ import ply.lex as lex
19
+ import ply.yacc as yacc
20
+ except ImportError:
21
+ print("错误: 需要安装PLY库。请运行 'pip install ply' 来安装。")
22
+ sys.exit(1)
23
+
24
+ # 1. 定义支持的操作符和函数
25
+ supported_functions = {
26
+ # Group 类别函数
27
+ 'group_min': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
28
+ 'group_mean': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression']},
29
+ 'group_median': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
30
+ 'group_max': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
31
+ 'group_rank': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
32
+ 'group_vector_proj': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'category']},
33
+ 'group_normalize': {'min_args': 2, 'max_args': 5, 'arg_types': ['expression', 'category', 'expression', 'expression', 'expression']},
34
+ 'group_extra': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'category']},
35
+ 'group_backfill': {'min_args': 3, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression'], 'param_names': ['x', 'cat', 'days', 'std']},
36
+ 'group_scale': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
37
+ 'group_count': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
38
+ 'group_zscore': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
39
+ 'group_std_dev': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
40
+ 'group_sum': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
41
+ 'group_neutralize': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'category']},
42
+ 'group_multi_regression': {'min_args': 4, 'max_args': 9, 'arg_types': ['expression'] * 9},
43
+ 'group_cartesian_product': {'min_args': 2, 'max_args': 2, 'arg_types': ['category', 'category']},
44
+ 'combo_a': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression']},
45
+
46
+ # Transformational 类别函数
47
+ 'right_tail': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression']},
48
+ 'bucket': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # 第二个参数可以是string类型的range参数
49
+ 'tail': {'min_args': 1, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression']},
50
+ 'left_tail': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression']},
51
+ 'trade_when': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression']},
52
+ 'generate_stats': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
53
+
54
+ # Cross Sectional 类别函数
55
+ 'winsorize': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'std']},
56
+ 'rank': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression']},
57
+ 'regression_proj': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
58
+ 'vector_neut': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
59
+ 'regression_neut': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
60
+ 'multi_regression': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 100}, # 支持多个自变量
61
+
62
+ # Time Series 类别函数
63
+ 'ts_std_dev': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
64
+ 'ts_mean': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
65
+ 'ts_delay': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
66
+ 'ts_corr': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']},
67
+ 'ts_zscore': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
68
+ 'ts_returns': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'mode']},
69
+ 'ts_product': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
70
+ 'ts_backfill': {'min_args': 2, 'max_args': 4, 'arg_types': ['expression', 'number', 'number', 'string']},
71
+ 'days_from_last_change': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
72
+ 'last_diff_value': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
73
+ 'ts_scale': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number']},
74
+ 'ts_entropy': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number'], 'param_names': ['x', 'd', 'buckets']},
75
+ 'ts_step': {'min_args': 1, 'max_args': 1, 'arg_types': ['number']},
76
+ 'ts_sum': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
77
+ 'ts_co_kurtosis': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']},
78
+ 'inst_tvr': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
79
+ 'ts_decay_exp_window': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'factor']},
80
+ 'ts_av_diff': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
81
+ 'ts_kurtosis': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
82
+ 'ts_min_max_diff': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number']},
83
+ 'ts_arg_max': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
84
+ 'ts_max': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
85
+ 'ts_min_max_cps': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number']},
86
+ 'ts_rank': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number']},
87
+ 'ts_ir': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
88
+ 'ts_theilsen': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']},
89
+ 'hump_decay': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number']},
90
+ 'ts_weighted_decay': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number']},
91
+ 'ts_quantile': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'string']},
92
+ 'ts_min': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
93
+ 'ts_count_nans': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
94
+ 'ts_covariance': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']},
95
+ 'ts_co_skewness': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']},
96
+ 'ts_min_diff': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
97
+ 'ts_decay_linear': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'boolean']},
98
+ 'jump_decay': {'min_args': 2, 'max_args': 5, 'arg_types': ['expression', 'number', 'expression', 'number', 'number'], 'param_names': ['x', 'd', 'stddev', 'sensitivity', 'force']},
99
+ 'ts_moment': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'k']},
100
+ 'ts_arg_min': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
101
+ 'ts_regression': {'min_args': 3, 'max_args': 5, 'arg_types': ['expression', 'expression', 'number', 'number', 'number'], 'param_names': ['y', 'x', 'd', 'lag', 'rettype']},
102
+ 'ts_skewness': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
103
+ 'ts_max_diff': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
104
+ 'kth_element': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'number', 'number']},
105
+ 'hump': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'hump']},
106
+ 'ts_median': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
107
+ 'ts_delta': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
108
+ 'ts_poly_regression': {'min_args': 3, 'max_args': 4, 'arg_types': ['expression', 'expression', 'number', 'number']},
109
+ 'ts_target_tvr_decay': {'min_args': 1, 'max_args': 4, 'arg_types': ['expression', 'number', 'number', 'number'], 'param_names': ['x', 'lambda_min', 'lambda_max', 'target_tvr']},
110
+ 'ts_target_tvr_delta_limit': {'min_args': 2, 'max_args': 5, 'arg_types': ['expression', 'expression', 'number', 'number', 'number']},
111
+ 'ts_target_tvr_hump': {'min_args': 1, 'max_args': 4, 'arg_types': ['expression', 'number', 'number', 'number']},
112
+ 'ts_delta_limit': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number']},
113
+
114
+ # Special 类别函数
115
+ 'inst_pnl': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
116
+ 'self_corr': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
117
+ 'in': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # 注意:这是关键字
118
+ 'universe_size': {'min_args': 0, 'max_args': 0, 'arg_types': []},
119
+
120
+ # Missing functions from operators.py
121
+ 'quantile': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'driver', 'sigma']}, # quantile(x, driver = gaussian, sigma = 1.0)
122
+ 'normalize': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'boolean', 'number']}, # normalize(x, useStd = false, limit = 0.0)
123
+ 'zscore': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, # zscore(x)
124
+
125
+ # Logical 类别函数
126
+ 'or': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # 注意:这是关键字
127
+ 'and': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # 注意:这是关键字
128
+ 'not': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, # 注意:这是关键字
129
+ 'is_nan': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
130
+ 'is_not_nan': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
131
+ 'less': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
132
+ 'equal': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
133
+ 'greater': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
134
+ 'is_finite': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
135
+ 'if_else': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression']},
136
+ 'not_equal': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
137
+ 'less_equal': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
138
+ 'greater_equal': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
139
+
140
+ # Vector 类别函数
141
+ 'vec_kurtosis': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
142
+ 'vec_min': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
143
+ 'vec_count': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
144
+ 'vec_sum': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
145
+ 'vec_skewness': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
146
+ 'vec_max': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
147
+ 'vec_avg': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
148
+ 'vec_range': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
149
+ 'vec_choose': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'nth']},
150
+ 'vec_powersum': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'constant']},
151
+ 'vec_stddev': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
152
+ 'vec_percentage': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'percentage']},
153
+ 'vec_ir': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
154
+ 'vec_norm': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
155
+ 'ts_percentage': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'd', 'percentage']},
156
+ 'signed_power': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
157
+ 'ts_product': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number']},
158
+
159
+ # Additional functions from test cases
160
+ 'rank_by_side': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'number', 'number'], 'param_names': ['x', 'rate', 'scale']},
161
+ 'log_diff': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
162
+ 'nan_mask': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']},
163
+ 'ts_partial_corr': {'min_args': 4, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'number']},
164
+ 'ts_triple_corr': {'min_args': 4, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'number']},
165
+ 'clamp': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'lower', 'upper']},
166
+ 'keep': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number'], 'param_names': ['x', 'condition', 'period']},
167
+ 'replace': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'target', 'dest']},
168
+ 'filter': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'h', 't']},
169
+ 'one_side': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'string'], 'param_names': ['x', 'side']},
170
+ 'scale_down': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'constant']},
171
+
172
+ # Arithmetic 类别函数
173
+ 'add': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'boolean']}, # add(x, y, filter=false)
174
+ 'multiply': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 99 + ['boolean'], 'param_names': ['x', 'y', 'filter']}, # multiply(x, y, ..., filter=false)
175
+ 'sign': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
176
+ 'subtract': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'boolean']}, # subtract(x, y, filter=false)
177
+ 'pasteurize': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
178
+ 'log': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
179
+ 'purify': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
180
+ 'arc_tan': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
181
+ 'max': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 100}, # max(x, y, ...)
182
+ 'to_nan': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'boolean']}, # to_nan(x, value=0, reverse=false)
183
+ 'abs': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
184
+ 'sigmoid': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
185
+ 'divide': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # divide(x, y)
186
+ 'min': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 100}, # min(x, y, ...)
187
+ 'tanh': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
188
+ 'nan_out': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'lower', 'upper']}, # nan_out(x, lower=0, upper=0)
189
+ 'signed_power': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # signed_power(x, y)
190
+ 'inverse': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
191
+ 'round': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
192
+ 'sqrt': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
193
+ 's_log_1p': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
194
+ 'reverse': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']}, # -x
195
+ 'power': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression']}, # power(x, y)
196
+ 'densify': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
197
+ 'floor': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
198
+ # Appended missing operators
199
+ 'arc_cos': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']},
200
+ 'arc_sin': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']},
201
+ 'ceiling': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']},
202
+ 'exp': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']},
203
+ 'fraction': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['x']},
204
+ 'round_down': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'f']},
205
+ 'is_not_finite': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
206
+ 'negate': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
207
+ 'ts_rank_gmean_amean_diff': {'min_args': 5, 'max_args': 5, 'arg_types': ['expression', 'expression', 'expression', 'expression', 'number'], 'param_names': ['input1', 'input2', 'input3', '...', 'd']},
208
+ 'ts_vector_neut': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number'], 'param_names': ['x', 'y', 'd']},
209
+ 'ts_vector_proj': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'number'], 'param_names': ['x', 'y', 'd']},
210
+ 'scale': {'min_args': 1, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression'], 'param_names': ['x', 'scale', 'longscale', 'shortscale']},
211
+ 'generalized_rank': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['open', 'm']},
212
+ 'rank_gmean_amean_diff': {'min_args': 4, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression'], 'param_names': ['input1', 'input2', 'input3', '...']},
213
+ 'truncate': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'maxPercent']},
214
+ 'vector_proj': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'y']},
215
+ 'vec_filter': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['vec', 'value']},
216
+ 'group_coalesce': {'min_args': 4, 'max_args': 4, 'arg_types': ['expression', 'expression', 'expression', 'expression'], 'param_names': ['original_group', 'group2', 'group3', '…']},
217
+ 'group_percentage': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'category', 'expression'], 'param_names': ['x', 'group', 'percentage']},
218
+ 'group_vector_neut': {'min_args': 3, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['x', 'y', 'g']},
219
+ 'convert': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['x', 'mode']},
220
+ 'reduce_avg': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['input', 'threshold']},
221
+ 'reduce_choose': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['input', 'nth', 'ignoreNan']},
222
+ 'reduce_count': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['input', 'threshold']},
223
+ 'reduce_ir': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
224
+ 'reduce_kurtosis': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
225
+ 'reduce_max': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
226
+ 'reduce_min': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
227
+ 'reduce_norm': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
228
+ 'reduce_percentage': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['input', 'percentage']},
229
+ 'reduce_powersum': {'min_args': 1, 'max_args': 3, 'arg_types': ['expression', 'expression', 'expression'], 'param_names': ['input', 'constant', 'precise']},
230
+ 'reduce_range': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
231
+ 'reduce_skewness': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
232
+ 'reduce_stddev': {'min_args': 1, 'max_args': 2, 'arg_types': ['expression', 'expression'], 'param_names': ['input', 'threshold']},
233
+ 'reduce_sum': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression'], 'param_names': ['input']},
234
+ }
235
+
236
+ # 2. 定义group类型字段
237
+ group_fields = {
238
+ 'sector', 'subindustry', 'industry', 'exchange', 'country', 'market'
239
+ }
240
+
241
+ # 3. 有效类别集合
242
+ valid_categories = group_fields
243
+
244
+ # 4. 字段命名模式 - 只校验字段是不是数字字母下划线组成
245
+ field_patterns = [
246
+ re.compile(r'^[a-zA-Z0-9_]+$'), # 只允许数字、字母和下划线组成的字段名
247
+ ]
248
+
249
+ # 4. 抽象语法树节点类型
250
+ class ASTNode:
251
+ """抽象语法树节点基类"""
252
+ def __init__(self, node_type: str, children: Optional[List['ASTNode']] = None,
253
+ value: Optional[Any] = None, line: Optional[int] = None):
254
+ self.node_type = node_type # 'function', 'operator', 'field', 'number', 'expression'
255
+ self.children = children or []
256
+ self.value = value
257
+ self.line = line
258
+
259
+ def __str__(self) -> str:
260
+ return f"ASTNode({self.node_type}, {self.value}, line={self.line})"
261
+
262
+ def __repr__(self) -> str:
263
+ return self.__str__()
264
+
265
+ class ExpressionValidator:
266
+ """表达式验证器类"""
267
+
268
+ def __init__(self):
269
+ """初始化词法分析器和语法分析器"""
270
+ # 构建词法分析器
271
+ self.lexer = lex.lex(module=self, debug=False)
272
+ # 构建语法分析器
273
+ self.parser = yacc.yacc(module=self, debug=False)
274
+ # 错误信息存储
275
+ self.errors = []
276
+
277
+ # 词法分析器规则
278
+ tokens = ('FUNCTION', 'FIELD', 'NUMBER', 'LPAREN', 'RPAREN',
279
+ 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'COMMA', 'CATEGORY',
280
+ 'EQUAL', 'ASSIGN', 'IDENTIFIER', 'STRING', 'GREATER', 'LESS', 'GREATEREQUAL', 'LESSEQUAL', 'NOTEQUAL', 'BOOLEAN')
281
+
282
+ # 忽略空白字符
283
+ t_ignore = ' \t\n'
284
+
285
+ # 操作符 - 注意顺序很重要,长的操作符要放在前面
286
+ t_PLUS = r'\+'
287
+ t_MINUS = r'-'
288
+ t_TIMES = r'\*'
289
+ t_DIVIDE = r'/'
290
+ t_LPAREN = r'\('
291
+ t_RPAREN = r'\)'
292
+ t_COMMA = r','
293
+ t_EQUAL = r'=='
294
+ t_NOTEQUAL = r'!='
295
+ t_GREATEREQUAL = r'>='
296
+ t_LESSEQUAL = r'<='
297
+ t_GREATER = r'>'
298
+ t_LESS = r'<'
299
+ t_ASSIGN = r'='
300
+
301
+ # 数字(整数和浮点数)
302
+ def t_NUMBER(self, t):
303
+ r'\d+\.?\d*'
304
+ if '.' in t.value:
305
+ t.value = float(t.value)
306
+ else:
307
+ t.value = int(t.value)
308
+ return t
309
+
310
+ # 字符串 - 需要放在所有其他标识符规则之前
311
+ def t_STRING(self, t):
312
+ r"'[^']*'|\"[^\"]*\""
313
+ # 去除引号
314
+ t.value = t.value[1:-1]
315
+ return t
316
+
317
+ # 函数和字段名
318
+ def t_IDENTIFIER(self, t):
319
+ r'[a-zA-Z_][a-zA-Z0-9_]*'
320
+ # 检查是否为布尔值
321
+ if t.value.lower() in {'true', 'false'}:
322
+ t.type = 'BOOLEAN'
323
+ t.value = t.value.lower() # 转换为小写以保持一致性
324
+ else:
325
+ # 查看当前token后面的字符,判断是否为参数名(后面跟着'=')
326
+ lexpos = t.lexpos
327
+ next_chars = ''
328
+ if lexpos + len(t.value) < len(t.lexer.lexdata):
329
+ # 查看当前token后面的字符,跳过空格
330
+ next_pos = lexpos + len(t.value)
331
+ while next_pos < len(t.lexer.lexdata) and t.lexer.lexdata[next_pos].isspace():
332
+ next_pos += 1
333
+ if next_pos < len(t.lexer.lexdata):
334
+ next_chars = t.lexer.lexdata[next_pos:next_pos+1]
335
+
336
+ # 如果后面跟着'=',则为参数名
337
+ if next_chars == '=':
338
+ t.type = 'IDENTIFIER'
339
+ # 如果后面跟着'(',则为函数名
340
+ elif next_chars == '(':
341
+ t.type = 'FUNCTION'
342
+ t.value = t.value.lower() # 转换为小写以保持一致性
343
+ # 检查是否为参数名(支持更多参数名)
344
+ elif t.value in {'std', 'k', 'lambda_min', 'lambda_max', 'target_tvr', 'range', 'buckets', 'lag', 'rettype', 'mode', 'nth', 'constant', 'percentage', 'driver', 'sigma', 'rate', 'scale', 'filter', 'lower', 'upper', 'target', 'dest', 'event', 'sensitivity', 'force', 'h', 't', 'period', 'stddev', 'factor', 'k', 'useStd', 'limit', 'gaussian', 'uniform', 'cauchy'}:
345
+ t.type = 'IDENTIFIER'
346
+ # 检查是否为函数名(不区分大小写)
347
+ elif t.value.lower() in supported_functions:
348
+ t.type = 'FUNCTION'
349
+ t.value = t.value.lower() # 转换为小写以保持一致性
350
+ # 检查是否为有效类别
351
+ elif t.value in valid_categories:
352
+ t.type = 'CATEGORY'
353
+ # 检查是否为字段名
354
+ elif self._is_valid_field(t.value):
355
+ t.type = 'FIELD'
356
+ else:
357
+ # 其他标识符,保留为IDENTIFIER类型
358
+ t.type = 'IDENTIFIER'
359
+ return t
360
+
361
+ # 行号跟踪
362
+ def t_newline(self, t):
363
+ r'\n+'
364
+ t.lexer.lineno += len(t.value)
365
+
366
+ # 错误处理
367
+ def t_error(self, t):
368
+ if t:
369
+ # 检查是否为非法字符
370
+ if not re.match(r'[a-zA-Z0-9_\+\-\*/\(\)\,\s=<>!]', t.value[0]):
371
+ # 这是一个非法字符
372
+ self.errors.append(f"非法字符 '{t.value[0]}' (行 {t.lexer.lineno})")
373
+ else:
374
+ # 这是一个非法标记
375
+ self.errors.append(f"非法标记 '{t.value}' (行 {t.lexer.lineno})")
376
+ # 跳过这个字符,继续处理
377
+ t.lexer.skip(1)
378
+ else:
379
+ self.errors.append("词法分析器到达文件末尾")
380
+
381
+ # 语法分析器规则
382
+ def p_expression(self, p):
383
+ """expression : comparison
384
+ | expression EQUAL comparison
385
+ | expression NOTEQUAL comparison
386
+ | expression GREATER comparison
387
+ | expression LESS comparison
388
+ | expression GREATEREQUAL comparison
389
+ | expression LESSEQUAL comparison"""
390
+ if len(p) == 2:
391
+ p[0] = p[1]
392
+ else:
393
+ p[0] = ASTNode('binop', [p[1], p[3]], {'op': p[2]})
394
+
395
+ def p_comparison(self, p):
396
+ """comparison : term
397
+ | comparison PLUS term
398
+ | comparison MINUS term"""
399
+ if len(p) == 2:
400
+ p[0] = p[1]
401
+ else:
402
+ p[0] = ASTNode('binop', [p[1], p[3]], {'op': p[2]})
403
+
404
+ def p_term(self, p):
405
+ """term : factor
406
+ | term TIMES factor
407
+ | term DIVIDE factor"""
408
+ if len(p) == 2:
409
+ p[0] = p[1]
410
+ else:
411
+ p[0] = ASTNode('binop', [p[1], p[3]], {'op': p[2]})
412
+
413
+ def p_factor(self, p):
414
+ """factor : NUMBER
415
+ | STRING
416
+ | FIELD
417
+ | CATEGORY
418
+ | IDENTIFIER
419
+ | BOOLEAN
420
+ | MINUS factor
421
+ | LPAREN expression RPAREN
422
+ | function_call"""
423
+ if len(p) == 2:
424
+ # 数字、字符串、字段、类别或标识符
425
+ if p.slice[1].type == 'NUMBER':
426
+ p[0] = ASTNode('number', value=p[1])
427
+ elif p.slice[1].type == 'STRING':
428
+ p[0] = ASTNode('string', value=p[1])
429
+ elif p.slice[1].type == 'FIELD':
430
+ p[0] = ASTNode('field', value=p[1])
431
+ elif p.slice[1].type == 'CATEGORY':
432
+ p[0] = ASTNode('category', value=p[1])
433
+ elif p.slice[1].type == 'BOOLEAN':
434
+ p[0] = ASTNode('boolean', value=p[1])
435
+ elif p.slice[1].type == 'IDENTIFIER':
436
+ p[0] = ASTNode('identifier', value=p[1])
437
+ else:
438
+ p[0] = p[1]
439
+ elif len(p) == 3:
440
+ # 一元负号
441
+ p[0] = ASTNode('unop', [p[2]], {'op': p[1]})
442
+ elif len(p) == 4:
443
+ # 括号表达式
444
+ p[0] = p[2]
445
+ else:
446
+ # 函数调用
447
+ p[0] = p[1]
448
+
449
+ def p_function_call(self, p):
450
+ '''function_call : FUNCTION LPAREN args RPAREN'''
451
+ p[0] = ASTNode('function', p[3], p[1])
452
+
453
+ def p_args(self, p):
454
+ '''args : arg_list
455
+ | empty'''
456
+ if len(p) == 2 and p[1] is not None:
457
+ p[0] = p[1]
458
+ else:
459
+ p[0] = []
460
+
461
+ def p_arg_list(self, p):
462
+ '''arg_list : arg
463
+ | arg_list COMMA arg'''
464
+ if len(p) == 2:
465
+ p[0] = [p[1]]
466
+ else:
467
+ p[0] = p[1] + [p[3]]
468
+
469
+ def p_arg(self, p):
470
+ '''arg : expression
471
+ | IDENTIFIER ASSIGN expression'''
472
+ if len(p) == 2:
473
+ p[0] = {'type': 'positional', 'value': p[1]}
474
+ else:
475
+ p[0] = {'type': 'named', 'name': p[1], 'value': p[3]}
476
+
477
+ def p_empty(self, p):
478
+ '''empty :'''
479
+ p[0] = None
480
+
481
+ # 语法错误处理
482
+ def p_error(self, p):
483
+ if p:
484
+ self.errors.append(f"语法错误在位置 {p.lexpos}: 非法标记 '{p.value}'")
485
+ else:
486
+ self.errors.append("语法错误: 表达式不完整")
487
+
488
+ def _is_valid_field(self, field_name: str) -> bool:
489
+ """检查字段名是否符合模式"""
490
+ for pattern in field_patterns:
491
+ if pattern.match(field_name):
492
+ return True
493
+ return False
494
+
495
+ def validate_function(self, node: ASTNode, is_in_group_arg: bool = False) -> List[str]:
496
+ """验证函数调用的参数数量和类型"""
497
+ function_name = node.value
498
+ args = node.children
499
+ function_info = supported_functions.get(function_name)
500
+
501
+ if not function_info:
502
+ return [f"未知函数: {function_name}"]
503
+
504
+ errors = []
505
+
506
+ # 检查参数数量
507
+ if len(args) < function_info['min_args']:
508
+ errors.append(f"函数 {function_name} 需要至少 {function_info['min_args']} 个参数,但只提供了 {len(args)}")
509
+ elif len(args) > function_info['max_args']:
510
+ errors.append(f"函数 {function_name} 最多接受 {function_info['max_args']} 个参数,但提供了 {len(args)}")
511
+
512
+ # 处理参数验证
513
+ # 跟踪已使用的位置参数索引
514
+ positional_index = 0
515
+
516
+ # 对于所有函数,支持命名参数
517
+ for arg in args:
518
+ if isinstance(arg, dict):
519
+ if arg['type'] == 'named':
520
+ # 命名参数
521
+ if 'param_names' in function_info and arg['name'] in function_info['param_names']:
522
+ # 查找参数在param_names中的索引
523
+ param_index = function_info['param_names'].index(arg['name'])
524
+ if param_index < len(function_info['arg_types']):
525
+ expected_type = function_info['arg_types'][param_index]
526
+ arg_errors = self._validate_arg_type(arg['value'], expected_type, param_index, function_name, is_in_group_arg)
527
+ errors.extend(arg_errors)
528
+ # 对于winsorize函数,支持std和clip参数
529
+ elif function_name == 'winsorize' and arg['name'] in ['std', 'clip']:
530
+ arg_errors = self._validate_arg_type(arg['value'], 'number', 0, function_name, is_in_group_arg)
531
+ errors.extend(arg_errors)
532
+ # 对于bucket函数,支持'range'和'buckets'参数
533
+ elif function_name == 'bucket' and arg['name'] in ['range', 'buckets']:
534
+ # range和buckets参数应该是string类型
535
+ arg_errors = self._validate_arg_type(arg['value'], 'string', 1, function_name, is_in_group_arg)
536
+ errors.extend(arg_errors)
537
+ else:
538
+ errors.append(f"函数 {function_name} 不存在参数 '{arg['name']}'")
539
+ elif arg['type'] == 'positional':
540
+ # 位置参数(字典形式)
541
+ # 对于winsorize函数,第二个参数必须是命名参数
542
+ if function_name == 'winsorize' and positional_index == 1:
543
+ errors.append(f"函数 {function_name} 的第二个参数必须使用命名参数 'std='")
544
+ # 对于ts_moment函数,第三个参数必须是命名参数
545
+ elif function_name == 'ts_moment' and positional_index == 2:
546
+ errors.append(f"函数 {function_name} 的第三个参数必须使用命名参数 'k='")
547
+ else:
548
+ # 验证位置参数的类型
549
+ if positional_index < len(function_info['arg_types']):
550
+ expected_type = function_info['arg_types'][positional_index]
551
+ arg_errors = self._validate_arg_type(arg['value'], expected_type, positional_index, function_name, is_in_group_arg)
552
+ errors.extend(arg_errors)
553
+ positional_index += 1
554
+ else:
555
+ # 其他字典类型参数
556
+ errors.append(f"参数 {positional_index+1} 格式错误")
557
+ positional_index += 1
558
+ else:
559
+ # 位置参数(直接ASTNode形式)
560
+ # 对于winsorize函数,第二个参数必须是命名参数
561
+ if function_name == 'winsorize' and positional_index == 1:
562
+ errors.append(f"函数 {function_name} 的第二个参数必须使用命名参数 'std='")
563
+ # 对于ts_moment函数,第三个参数必须是命名参数
564
+ elif function_name == 'ts_moment' and positional_index == 2:
565
+ errors.append(f"函数 {function_name} 的第三个参数必须使用命名参数 'k='")
566
+ else:
567
+ # 验证位置参数的类型
568
+ if positional_index < len(function_info['arg_types']):
569
+ expected_type = function_info['arg_types'][positional_index]
570
+ arg_errors = self._validate_arg_type(arg, expected_type, positional_index, function_name, is_in_group_arg)
571
+ errors.extend(arg_errors)
572
+ positional_index += 1
573
+
574
+ return errors
575
+
576
+ def _validate_arg_type(self, arg: ASTNode, expected_type: str, arg_index: int, function_name: str, is_in_group_arg: bool = False) -> List[str]:
577
+ """验证参数类型是否符合预期"""
578
+ errors = []
579
+
580
+ # 首先检查是否是group类型字段,如果是则只能用于Group类型函数
581
+ # 但是如果当前函数是group_xxx或在group函数的参数链中,则允许使用
582
+ if arg.node_type == 'category' and arg.value in group_fields:
583
+ if not (function_name.startswith('group_') or is_in_group_arg):
584
+ errors.append(f"Group类型字段 '{arg.value}' 只能用于Group类型函数的参数中")
585
+
586
+ # 然后验证参数类型是否符合预期
587
+ if expected_type == 'expression':
588
+ # 表达式可以是任何有效的AST节点
589
+ pass
590
+ elif expected_type == 'number':
591
+ if arg.node_type != 'number':
592
+ errors.append(f"参数 {arg_index+1} 应该是一个数字,但得到 {arg.node_type}")
593
+ elif expected_type == 'boolean':
594
+ # 布尔值可以是数字(0/1)
595
+ if arg.node_type != 'number':
596
+ errors.append(f"参数 {arg_index+1} 应该是一个布尔值(0/1),但得到 {arg.node_type}")
597
+ elif expected_type == 'field':
598
+ if arg.node_type != 'field' and arg.node_type != 'category':
599
+ # 允许field或category作为字段参数
600
+ errors.append(f"参数 {arg_index+1} 应该是一个字段,但得到 {arg.node_type}")
601
+ elif arg.node_type == 'field' and not self._is_valid_field(arg.value):
602
+ errors.append(f"无效的字段名: {arg.value}")
603
+ elif expected_type == 'category':
604
+ if not function_name.startswith('group_'):
605
+ # 非group函数的category参数必须是category类型且在valid_categories中
606
+ if arg.node_type != 'category':
607
+ errors.append(f"参数 {arg_index+1} 应该是一个类别,但得到 {arg.node_type}")
608
+ elif arg.value not in valid_categories:
609
+ errors.append(f"无效的类别: {arg.value}")
610
+ # group函数的category参数可以是任何类型(field、category等),不进行类型校验
611
+
612
+ return errors
613
+
614
+ def validate_ast(self, ast: Optional[ASTNode], is_in_group_arg: bool = False) -> List[str]:
615
+ """递归验证抽象语法树"""
616
+ if not ast:
617
+ return ["无法解析表达式"]
618
+
619
+ errors = []
620
+
621
+ # 根据节点类型进行验证
622
+ if ast.node_type == 'function':
623
+ # 检查当前函数是否是group函数
624
+ is_group_function = ast.value.startswith('group_')
625
+ # 确定当前是否在group函数的参数链中
626
+ current_in_group_arg = is_in_group_arg or is_group_function
627
+ # 验证函数
628
+ function_errors = self.validate_function(ast, current_in_group_arg)
629
+ errors.extend(function_errors)
630
+
631
+ # 递归验证子节点时使用current_in_group_arg
632
+ for child in ast.children:
633
+ if isinstance(child, dict):
634
+ # 命名参数,验证其值
635
+ if 'value' in child and hasattr(child['value'], 'node_type'):
636
+ child_errors = self.validate_ast(child['value'], current_in_group_arg)
637
+ errors.extend(child_errors)
638
+ elif hasattr(child, 'node_type'):
639
+ child_errors = self.validate_ast(child, current_in_group_arg)
640
+ errors.extend(child_errors)
641
+ elif ast.node_type in ['unop', 'binop']:
642
+ # 对操作符的子节点进行验证
643
+ for child in ast.children:
644
+ if hasattr(child, 'node_type'):
645
+ child_errors = self.validate_ast(child, is_in_group_arg)
646
+ errors.extend(child_errors)
647
+ elif ast.node_type == 'field':
648
+ # 验证字段名
649
+ if not self._is_valid_field(ast.value):
650
+ errors.append(f"无效的字段名: {ast.value}")
651
+ else:
652
+ # 递归验证子节点
653
+ for child in ast.children:
654
+ if isinstance(child, dict):
655
+ # 命名参数,验证其值
656
+ if 'value' in child and hasattr(child['value'], 'node_type'):
657
+ child_errors = self.validate_ast(child['value'], is_in_group_arg)
658
+ errors.extend(child_errors)
659
+ elif hasattr(child, 'node_type'):
660
+ child_errors = self.validate_ast(child, is_in_group_arg)
661
+ errors.extend(child_errors)
662
+
663
+ return errors
664
+
665
+ def _process_semicolon_expression(self, expression: str) -> Tuple[bool, str]:
666
+ """处理带有分号的表达式,将其转换为不带分号的简化形式
667
+
668
+ Args:
669
+ expression: 要处理的表达式字符串
670
+
671
+ Returns:
672
+ Tuple[bool, str]: (是否成功, 转换后的表达式或错误信息)
673
+ """
674
+ # 检查表达式是否以分号结尾
675
+ if expression.strip().endswith(';'):
676
+ return False, "表达式不能以分号结尾"
677
+
678
+ # 分割表达式为语句列表
679
+ statements = [stmt.strip() for stmt in expression.split(';') if stmt.strip()]
680
+ if not statements:
681
+ return False, "表达式不能为空"
682
+
683
+ # 存储变量赋值
684
+ variables = {}
685
+
686
+ # 处理每个赋值语句(除了最后一个)
687
+ for i, stmt in enumerate(statements[:-1]):
688
+ # 检查是否包含赋值符号
689
+ if '=' not in stmt:
690
+ return False, f"第{i+1}个语句必须是赋值语句(使用=符号)"
691
+
692
+ # 检查是否是比较操作符(==, !=, <=, >=)
693
+ if any(op in stmt for op in ['==', '!=', '<=', '>=']):
694
+ # 如果包含比较操作符,需要确认是否有赋值符号
695
+ # 使用临时替换法:将比较操作符替换为临时标记,再检查是否还有=
696
+ temp_stmt = stmt
697
+ for op in ['==', '!=', '<=', '>=']:
698
+ temp_stmt = temp_stmt.replace(op, '---')
699
+
700
+ if '=' not in temp_stmt:
701
+ return False, f"第{i+1}个语句必须是赋值语句,不能只是比较表达式"
702
+
703
+ # 找到第一个=符号(不是比较操作符的一部分)
704
+ # 先将比较操作符替换为临时标记,再找=
705
+ temp_stmt = stmt
706
+ for op in ['==', '!=', '<=', '>=']:
707
+ temp_stmt = temp_stmt.replace(op, '---')
708
+
709
+ if '=' not in temp_stmt:
710
+ return False, f"第{i+1}个语句必须是赋值语句(使用=符号)"
711
+
712
+ # 找到实际的=位置
713
+ equals_pos = temp_stmt.index('=')
714
+
715
+ # 在原始语句中找到对应位置
716
+ real_equals_pos = 0
717
+ temp_count = 0
718
+ for char in stmt:
719
+ if temp_count == equals_pos:
720
+ break
721
+ if char in '!<>':
722
+ # 检查是否是比较操作符的一部分
723
+ if real_equals_pos + 1 < len(stmt) and stmt[real_equals_pos + 1] == '=':
724
+ # 是比较操作符,跳过两个字符
725
+ real_equals_pos += 2
726
+ temp_count += 3 # 因为替换成了三个字符的---
727
+ else:
728
+ real_equals_pos += 1
729
+ temp_count += 1
730
+ else:
731
+ real_equals_pos += 1
732
+ temp_count += 1
733
+
734
+ # 分割变量名和值
735
+ var_name = stmt[:real_equals_pos].strip()
736
+ var_value = stmt[real_equals_pos + 1:].strip()
737
+
738
+ # 检查变量名是否有效
739
+ if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', var_name):
740
+ return False, f"第{i+1}个语句的变量名'{var_name}'无效,只能包含字母、数字和下划线,且不能以数字开头"
741
+
742
+ var_name_lower = var_name.lower() # 变量名不区分大小写
743
+
744
+ # 检查变量名是否在后续表达式中使用
745
+ # 这里不需要,因为后面的表达式会检查
746
+
747
+ # 检查变量值中使用的变量是否已经定义
748
+ # 简单检查:提取所有可能的变量名
749
+ used_vars = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', var_value)
750
+ for used_var in used_vars:
751
+ used_var_lower = used_var.lower()
752
+ if used_var_lower not in variables:
753
+ # 检查是否是函数名
754
+ if used_var not in supported_functions:
755
+ # 对于单个字母或简单单词,不自动视为字段名,要求先定义
756
+ if len(used_var) <= 2:
757
+ return False, f"第{i+1}个语句中使用的变量'{used_var}'未在之前定义"
758
+ # 对于较长的字段名,仍然允许作为字段名
759
+ elif not self._is_valid_field(used_var):
760
+ return False, f"第{i+1}个语句中使用的变量'{used_var}'未在之前定义"
761
+
762
+ # 将之前定义的变量替换到当前值中
763
+ for existing_var, existing_val in variables.items():
764
+ # 使用单词边界匹配,避免替换到其他单词的一部分
765
+ var_value = re.sub(rf'\b{existing_var}\b', existing_val, var_value)
766
+
767
+ # 存储变量
768
+ variables[var_name_lower] = var_value
769
+
770
+ # 处理最后一个语句(实际的表达式)
771
+ final_stmt = statements[-1]
772
+
773
+ # 检查最后一个语句是否是赋值语句
774
+ if '=' in final_stmt:
775
+ # 替换比较操作符为临时标记,然后检查是否还有单独的=
776
+ temp_stmt = final_stmt
777
+ for op in ['==', '!=', '<=', '>=']:
778
+ temp_stmt = temp_stmt.replace(op, '---')
779
+
780
+ if '=' in temp_stmt:
781
+ return False, "最后一个语句不能是赋值语句"
782
+
783
+ # 检查最后一个语句中使用的变量是否已经定义
784
+ used_vars = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', final_stmt)
785
+ for used_var in used_vars:
786
+ used_var_lower = used_var.lower()
787
+ if used_var_lower not in variables:
788
+ # 检查是否是函数名
789
+ if used_var not in supported_functions:
790
+ # 在分号表达式中,所有非函数名的标识符都必须是变量,必须在之前定义
791
+ return False, f"最后一个语句中使用的变量'{used_var}'未在之前定义"
792
+
793
+ # 将变量替换到最后一个表达式中
794
+ final_expr = final_stmt
795
+ for var_name, var_value in variables.items():
796
+ final_expr = re.sub(rf'\b{var_name}\b', var_value, final_expr)
797
+
798
+ return True, final_expr
799
+
800
+ def check_expression(self, expression: str) -> Dict[str, Any]:
801
+ """
802
+ 检查表达式格式是否正确
803
+
804
+ Args:
805
+ expression: 要验证的表达式字符串
806
+
807
+ Returns:
808
+ 包含验证结果的字典
809
+ """
810
+ # 重置错误列表
811
+ self.errors = []
812
+
813
+ try:
814
+ expression = expression.strip()
815
+ if not expression:
816
+ return {
817
+ 'valid': False,
818
+ 'errors': ['表达式不能为空'],
819
+ 'tokens': [],
820
+ 'ast': None
821
+ }
822
+
823
+ # 处理带有分号的表达式
824
+ if ';' in expression:
825
+ success, result = self._process_semicolon_expression(expression)
826
+ if not success:
827
+ return {
828
+ 'valid': False,
829
+ 'errors': [result],
830
+ 'tokens': [],
831
+ 'ast': None
832
+ }
833
+ expression = result
834
+
835
+ # 重置词法分析器的行号
836
+ self.lexer.lineno = 1
837
+
838
+ # 词法分析(用于调试)
839
+ self.lexer.input(expression)
840
+ tokens = []
841
+ # 调试:打印识别的标记
842
+ # print(f"\n调试 - 表达式: {expression}")
843
+ # print("识别的标记:")
844
+ for token in self.lexer:
845
+ # print(f" - 类型: {token.type}, 值: '{token.value}', 位置: {token.lexpos}")
846
+ tokens.append(token)
847
+
848
+ # 重新设置词法分析器的输入,以便语法分析器使用
849
+ self.lexer.input(expression)
850
+ self.lexer.lineno = 1
851
+
852
+ # 语法分析
853
+ ast = self.parser.parse(expression, lexer=self.lexer)
854
+
855
+ # 验证AST
856
+ validation_errors = self.validate_ast(ast)
857
+
858
+ # 合并所有错误
859
+ all_errors = self.errors + validation_errors
860
+
861
+ # 检查括号是否匹配
862
+ bracket_count = 0
863
+ for char in expression:
864
+ if char == '(':
865
+ bracket_count += 1
866
+ elif char == ')':
867
+ bracket_count -= 1
868
+ if bracket_count < 0:
869
+ all_errors.append("括号不匹配: 右括号过多")
870
+ break
871
+ if bracket_count > 0:
872
+ all_errors.append("括号不匹配: 左括号过多")
873
+
874
+ return {
875
+ 'valid': len(all_errors) == 0,
876
+ 'errors': all_errors,
877
+ 'tokens': tokens,
878
+ 'ast': ast
879
+ }
880
+ except Exception as e:
881
+ return {
882
+ 'valid': False,
883
+ 'errors': [f"解析错误: {str(e)}"],
884
+ 'tokens': [],
885
+ 'ast': None
886
+ }
887
+
888
+
889
+