cnhkmcp 2.1.1__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {cnhkmcp-2.1.1/cnhkmcp.egg-info → cnhkmcp-2.1.2}/PKG-INFO +1 -1
  2. {cnhkmcp-2.1.1/cnhkmcp/untracked/APP/Tranformer → cnhkmcp-2.1.2/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool}/helpful_functions.py +1 -1
  3. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/process_knowledge_base.py +73 -72
  4. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/rag_engine.py +122 -70
  5. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/requirements.txt +2 -2
  6. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/Transformer.py +1 -1
  7. {cnhkmcp-2.1.1/cnhkmcp/untracked/APP/give_me_idea → cnhkmcp-2.1.2/cnhkmcp/untracked/APP/Tranformer}/helpful_functions.py +1 -1
  8. {cnhkmcp-2.1.1/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool → cnhkmcp-2.1.2/cnhkmcp/untracked/APP/give_me_idea}/helpful_functions.py +1 -1
  9. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/helpful_functions.py +1 -1
  10. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/hkSimulator/ace_lib.py +2 -2
  11. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/hkSimulator/autosimulator.py +4 -4
  12. cnhkmcp-2.1.2/cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +180 -0
  13. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/simulator/simulator_wqb.py +1 -1
  14. cnhkmcp-2.1.2/cnhkmcp/untracked/APP//347/274/230/345/210/206/344/270/200/351/201/223/346/241/245/helpful_functions.py +180 -0
  15. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252/platform_functions.py +2 -2
  16. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/platform_functions.py +1 -1
  17. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2/cnhkmcp.egg-info}/PKG-INFO +1 -1
  18. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/SOURCES.txt +0 -4
  19. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/setup.py +1 -1
  20. cnhkmcp-2.1.1/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/vector_db/_manifest.json +0 -326
  21. cnhkmcp-2.1.1/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/vector_db/_meta.json +0 -1
  22. cnhkmcp-2.1.1/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/vector_db/be5d957c-b724-46e3-91d1-999e9f5f7d28/index_metadata.pickle +0 -0
  23. cnhkmcp-2.1.1/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/vector_db/chroma.sqlite3 +0 -0
  24. cnhkmcp-2.1.1/cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +0 -180
  25. cnhkmcp-2.1.1/cnhkmcp/untracked/APP//347/274/230/345/210/206/344/270/200/351/201/223/346/241/245/helpful_functions.py +0 -180
  26. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/LICENSE +0 -0
  27. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/MANIFEST.in +0 -0
  28. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/README.md +0 -0
  29. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/__init__.py +0 -0
  30. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/README.md" +0 -0
  31. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/ace.log" +0 -0
  32. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/config.json" +0 -0
  33. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool/ace_lib.py" +0 -0
  34. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool/fetch_all_datasets.py" +0 -0
  35. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool/fetch_all_documentation.py" +0 -0
  36. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool/fetch_all_operators.py" +0 -0
  37. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/icon.ico" +0 -0
  38. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/icon.png" +0 -0
  39. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/knowledge/test.txt" +0 -0
  40. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/main.py" +0 -0
  41. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/run.bat" +0 -0
  42. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266//351/246/226/346/254/241/350/277/220/350/241/214/346/211/223/345/274/200/346/210/221.py" +0 -0
  43. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/.gitignore +0 -0
  44. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/MODULAR_STRUCTURE.md +0 -0
  45. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/README.md +0 -0
  46. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/ace.log +0 -0
  47. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/ace_lib.py +0 -0
  48. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates.json +0 -0
  49. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates_/347/244/272/344/276/213.json" +0 -0
  50. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_error.json +0 -0
  51. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_success.json +0 -0
  52. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_/347/244/272/344/276/213/345/217/257/347/233/264/346/216/245/350/275/275/345/205/245Machine_lib.json" +0 -0
  53. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/parsetab.py +0 -0
  54. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/template_summary.txt +0 -0
  55. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/transformer_config.json +0 -0
  56. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/validator.py +0 -0
  57. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/ace.log +0 -0
  58. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/ace_lib.py +0 -0
  59. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/__init__.py +0 -0
  60. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/feature_engineering.py +0 -0
  61. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/idea_house.py +0 -0
  62. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/inspiration_house.py +0 -0
  63. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/paper_analysis.py +0 -0
  64. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/custom_templates/templates.json +0 -0
  65. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +0 -0
  66. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/ace_lib.py +0 -0
  67. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +0 -0
  68. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/fetch_all_datasets.py +0 -0
  69. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/fetch_all_operators.py +0 -0
  70. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +0 -0
  71. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/mirror_config.txt +0 -0
  72. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/operaters.csv +0 -0
  73. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/requirements.txt +0 -0
  74. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/run_app.bat +0 -0
  75. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/run_app.sh +0 -0
  76. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/setup_tsinghua.bat +0 -0
  77. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/setup_tsinghua.sh +0 -0
  78. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/simulator/alpha_submitter.py +0 -0
  79. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/ssrn-3332513.pdf +0 -0
  80. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/brain.js +0 -0
  81. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/decoder.js +0 -0
  82. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/feature_engineering.js +0 -0
  83. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/idea_house.js +0 -0
  84. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/inspiration.js +0 -0
  85. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/inspiration_house.js +0 -0
  86. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/paper_analysis.js +0 -0
  87. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/script.js +0 -0
  88. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/simulator.js +0 -0
  89. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/styles.css +0 -0
  90. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/usage_widget.js +0 -0
  91. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/alpha_inspector.html +0 -0
  92. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/feature_engineering.html +0 -0
  93. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/idea_house.html +0 -0
  94. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/index.html +0 -0
  95. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/inspiration_house.html +0 -0
  96. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/paper_analysis.html +0 -0
  97. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/simulator.html +0 -0
  98. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/transformer_web.html +0 -0
  99. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/usage.md +0 -0
  100. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP//347/274/230/345/210/206/344/270/200/351/201/223/346/241/245/ace_lib.py" +0 -0
  101. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP//347/274/230/345/210/206/344/270/200/351/201/223/346/241/245/brain_alpha_inspector.py" +0 -0
  102. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP//350/277/220/350/241/214/346/211/223/345/274/200/346/210/221.py" +0 -0
  103. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/arXiv_API_Tool_Manual.md +0 -0
  104. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/arxiv_api.py +0 -0
  105. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/forum_functions.py +0 -0
  106. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252/forum_functions.py" +0 -0
  107. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252/user_config.json" +0 -0
  108. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252//350/256/251AI/350/257/273/350/277/231/344/270/252/346/226/207/346/241/243/346/235/245/345/255/246/344/274/232/344/270/213/350/275/275/346/265/217/350/247/210/345/231/250.md" +0 -0
  109. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252//351/205/215/347/275/256/345/211/215/350/277/220/350/241/214/346/210/221_/345/256/211/350/243/205/345/277/205/350/246/201/344/276/235/350/265/226/345/214/205.py" +0 -0
  110. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/sample_mcp_config.json +0 -0
  111. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked/user_config.json +0 -0
  112. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/217/202/350/200/203/346/226/207/346/241/243_BRAIN_Alpha_Test_Requirements_and_Tips.md" +0 -0
  113. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_Alpha_explaination_workflow.md" +0 -0
  114. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_BRAIN_6_Tips_Datafield_Exploration_Guide.md" +0 -0
  115. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_BRAIN_Alpha_Improvement_Workflow.md" +0 -0
  116. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_Dataset_Exploration_Expert_Manual.md" +0 -0
  117. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_daily_report_workflow.md" +0 -0
  118. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp/untracked//351/205/215/347/275/256/345/211/215/350/277/220/350/241/214/346/210/221_/345/256/211/350/243/205/345/277/205/350/246/201/344/276/235/350/265/226/345/214/205.py" +0 -0
  119. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/dependency_links.txt +0 -0
  120. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/entry_points.txt +0 -0
  121. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/not-zip-safe +0 -0
  122. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/requires.txt +0 -0
  123. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/top_level.txt +0 -0
  124. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/requirements.txt +0 -0
  125. {cnhkmcp-2.1.1 → cnhkmcp-2.1.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cnhkmcp
3
- Version: 2.1.1
3
+ Version: 2.1.2
4
4
  Summary: A comprehensive Model Context Protocol (MCP) server for quantitative trading platform integration
5
5
  Home-page: https://github.com/cnhk/cnhkmcp
6
6
  Author: CNHK
@@ -122,7 +122,7 @@ def save_simulation_result(result: dict) -> None:
122
122
 
123
123
  os.makedirs(folder_path, exist_ok=True)
124
124
 
125
- with open(file_path, "w") as file:
125
+ with open(file_path, "w", encoding="utf-8") as file:
126
126
  json.dump(result, file)
127
127
 
128
128
 
@@ -16,17 +16,18 @@ if TOOL_DIR not in sys.path:
16
16
  sys.path.insert(0, TOOL_DIR)
17
17
  import ace_lib
18
18
  from fetch_all_operators import fetch_operators, prompt_credentials
19
- from fetch_all_datasets import (
20
- fetch_all_combinations,
21
- fetch_datasets_for_combo,
22
- merge_and_deduplicate,
23
- )
24
19
  from fetch_all_documentation import (
25
20
  fetch_tutorials,
26
21
  fetch_tutorial_pages,
27
22
  fetch_page,
28
23
  _extract_page_id,
29
24
  )
25
+ # Dataset fetching currently disabled per request
26
+ # from fetch_all_datasets import (
27
+ # fetch_all_combinations,
28
+ # fetch_datasets_for_combo,
29
+ # merge_and_deduplicate,
30
+ # )
30
31
 
31
32
 
32
33
  def ensure_knowledge_dir():
@@ -107,62 +108,63 @@ def process_operators(session: ace_lib.SingleSession, knowledge_dir: str):
107
108
  print(f"✓ Created {filename} with {len(category_list)} operators")
108
109
 
109
110
 
110
- def process_datasets(session: ace_lib.SingleSession, dataset_dir: str):
111
- """Fetch datasets and save one JSON per region."""
112
- print("=== Processing Datasets ===")
113
-
114
- print("Fetching valid instrument/region/delay/universe combinations...")
115
- options_df = fetch_all_combinations(session)
116
- if options_df is None or options_df.empty:
117
- print("No simulation options fetched; aborting dataset fetch.")
118
- return
119
-
120
- all_datasets: list[pd.DataFrame] = []
121
- combo_idx = 0
122
-
123
- for _, row in options_df.iterrows():
124
- instrument_type = row.get("InstrumentType")
125
- region = row.get("Region")
126
- delay = row.get("Delay")
127
- universes = row.get("Universe") or []
128
-
129
- for universe in universes:
130
- combo_idx += 1
131
- print(f"[{combo_idx}] {instrument_type} / {region} / D{delay} / {universe}")
132
- try:
133
- df = fetch_datasets_for_combo(session, instrument_type, region, delay, universe)
134
- print(f" -> {len(df)} rows")
135
- all_datasets.append(df)
136
- except Exception as exc:
137
- print(f" -> Failed: {exc}")
138
-
139
- if not all_datasets:
140
- print("No datasets fetched; nothing to save.")
141
- return
142
-
143
- combined_df = pd.concat([df for df in all_datasets if not df.empty], ignore_index=True)
144
- if combined_df.empty:
145
- print("No datasets fetched; nothing to save.")
146
- return
147
-
148
- regions = sorted(combined_df["param_region"].dropna().unique())
149
- print(f"Found regions: {', '.join(regions)}")
150
-
151
- for region in regions:
152
- region_df = combined_df[combined_df["param_region"] == region]
153
- region_unique = merge_and_deduplicate([region_df])
154
-
155
- region_list = []
156
- for _, row in region_unique.iterrows():
157
- record = {col: to_jsonable(row[col]) for col in row.index}
158
- region_list.append(record)
159
-
160
- filename = f"{region.replace(' ', '_').lower()}_datasets.json"
161
- filepath = os.path.join(dataset_dir, filename)
162
- with open(filepath, "w", encoding="utf-8") as f:
163
- json.dump(region_list, f, ensure_ascii=False, indent=2)
164
-
165
- print(f"✓ Created {filename} with {len(region_list)} datasets")
111
+ # Dataset fetching intentionally disabled; keep for potential re-enable.
112
+ # def process_datasets(session: ace_lib.SingleSession, dataset_dir: str):
113
+ # """Fetch datasets and save one JSON per region."""
114
+ # print("=== Processing Datasets ===")
115
+ #
116
+ # print("Fetching valid instrument/region/delay/universe combinations...")
117
+ # options_df = fetch_all_combinations(session)
118
+ # if options_df is None or options_df.empty:
119
+ # print("No simulation options fetched; aborting dataset fetch.")
120
+ # return
121
+ #
122
+ # all_datasets: list[pd.DataFrame] = []
123
+ # combo_idx = 0
124
+ #
125
+ # for _, row in options_df.iterrows():
126
+ # instrument_type = row.get("InstrumentType")
127
+ # region = row.get("Region")
128
+ # delay = row.get("Delay")
129
+ # universes = row.get("Universe") or []
130
+ #
131
+ # for universe in universes:
132
+ # combo_idx += 1
133
+ # print(f"[{combo_idx}] {instrument_type} / {region} / D{delay} / {universe}")
134
+ # try:
135
+ # df = fetch_datasets_for_combo(session, instrument_type, region, delay, universe)
136
+ # print(f" -> {len(df)} rows")
137
+ # all_datasets.append(df)
138
+ # except Exception as exc:
139
+ # print(f" -> Failed: {exc}")
140
+ #
141
+ # if not all_datasets:
142
+ # print("No datasets fetched; nothing to save.")
143
+ # return
144
+ #
145
+ # combined_df = pd.concat([df for df in all_datasets if not df.empty], ignore_index=True)
146
+ # if combined_df.empty:
147
+ # print("No datasets fetched; nothing to save.")
148
+ # return
149
+ #
150
+ # regions = sorted(combined_df["param_region"].dropna().unique())
151
+ # print(f"Found regions: {', '.join(regions)}")
152
+ #
153
+ # for region in regions:
154
+ # region_df = combined_df[combined_df["param_region"] == region]
155
+ # region_unique = merge_and_deduplicate([region_df])
156
+ #
157
+ # region_list = []
158
+ # for _, row in region_unique.iterrows():
159
+ # record = {col: to_jsonable(row[col]) for col in row.index}
160
+ # region_list.append(record)
161
+ #
162
+ # filename = f"{region.replace(' ', '_').lower()}_datasets.json"
163
+ # filepath = os.path.join(dataset_dir, filename)
164
+ # with open(filepath, "w", encoding="utf-8") as f:
165
+ # json.dump(region_list, f, ensure_ascii=False, indent=2)
166
+ #
167
+ # print(f"✓ Created {filename} with {len(region_list)} datasets")
166
168
 
167
169
 
168
170
  def process_documentation(session: ace_lib.SingleSession, knowledge_dir: str):
@@ -240,7 +242,7 @@ def main():
240
242
 
241
243
  # Ensure knowledge directory exists
242
244
  knowledge_dir = ensure_knowledge_dir()
243
- dataset_dir = knowledge_dir # Save datasets directly under knowledge
245
+ # dataset_dir = knowledge_dir # Save datasets directly under knowledge (disabled)
244
246
  print(f"Knowledge directory: {knowledge_dir}\n")
245
247
 
246
248
  # Process documentation (tutorials/pages)
@@ -262,16 +264,15 @@ def main():
262
264
  traceback.print_exc()
263
265
  return
264
266
 
265
- # Process datasets by region
266
- print("\nStarting dataset processing...\n")
267
- try:
268
- process_datasets(session, dataset_dir)
269
- except Exception as exc:
270
- print(f"✗ Failed to process datasets: {exc}")
271
- import traceback
272
- traceback.print_exc()
273
- return
274
-
267
+ # Dataset processing disabled; re-enable by uncommenting the block below.
268
+ # print("\nStarting dataset processing...\n")
269
+ # try:
270
+ # process_datasets(session, dataset_dir)
271
+ # except Exception as exc:
272
+ # print(f"✗ Failed to process datasets: {exc}")
273
+ # import traceback
274
+ # traceback.print_exc()
275
+ # return
275
276
 
276
277
  print("\n=== Processing Complete ===")
277
278
 
@@ -18,6 +18,7 @@ PREFERRED_MODELS = [
18
18
  # Final chosen model will be detected at runtime from supported list
19
19
  MODEL_NAME = None
20
20
  COLLECTION_NAME = "brain_kb_v5"
21
+ BATCH_SIZE = 128 # batch upserts to avoid huge single writes
21
22
 
22
23
  # Optional imports for different file types
23
24
  try:
@@ -37,6 +38,8 @@ class KnowledgeBase:
37
38
  self.meta_path = os.path.join(self.db_path, "_meta.json")
38
39
  self.manifest_path = os.path.join(self.db_path, "_manifest.json")
39
40
  self._collection_reset_guard = False
41
+ self._query_reset_guard = False
42
+ self._sync_lock = threading.Lock()
40
43
 
41
44
  if not os.path.exists(self.kb_path):
42
45
  os.makedirs(self.kb_path)
@@ -83,6 +86,7 @@ class KnowledgeBase:
83
86
 
84
87
  # Initialize Vector DB
85
88
  self._init_collection()
89
+ self._healthcheck()
86
90
 
87
91
  # Initial sync
88
92
  self.sync_knowledge()
@@ -119,6 +123,20 @@ class KnowledgeBase:
119
123
  except Exception:
120
124
  pass # Metadata failure should not block runtime
121
125
 
126
+ def _healthcheck(self):
127
+ """Validate index readability right after startup; rebuild if corrupted."""
128
+ try:
129
+ _ = self.collection.count()
130
+ except Exception as e:
131
+ msg = str(e).lower()
132
+ if any(x in msg for x in ["hnsw", "segment", "compaction", "backfill"]):
133
+ print("Detected index corruption on startup. Rebuilding vector_db...")
134
+ shutil.rmtree(self.db_path, ignore_errors=True)
135
+ self._init_collection(recreate=True)
136
+ self.sync_knowledge(allow_reset=False)
137
+ else:
138
+ print(f"Index healthcheck encountered an unexpected error: {e}")
139
+
122
140
  def _maybe_reset_for_incompatibility(self, chosen_model: str, embed_dim: int, chroma_version: str):
123
141
  """If existing index meta differs (model/dimension/chromadb), wipe it."""
124
142
  if not os.path.exists(self.db_path):
@@ -145,79 +163,88 @@ class KnowledgeBase:
145
163
 
146
164
  def sync_knowledge(self, allow_reset: bool = True):
147
165
  """Scans the knowledge folder and updates the vector database."""
166
+ if not self._sync_lock.acquire(blocking=False):
167
+ print("Sync already running, skip this trigger.")
168
+ return
169
+
148
170
  print("Syncing knowledge base...")
149
171
  manifest = self._load_manifest()
150
172
  updated_manifest = {}
151
173
  supported_extensions = (".txt", ".md", ".pdf", ".docx", ".json")
152
174
  current_files = []
153
- for filename in os.listdir(self.kb_path):
154
- file_path = os.path.join(self.kb_path, filename)
155
- if os.path.isfile(file_path) and filename.lower().endswith(supported_extensions):
156
- current_files.append(filename)
157
- mtime = os.path.getmtime(file_path)
158
- size = os.path.getsize(file_path)
159
- prev_meta = manifest.get(filename)
160
- # Skip unchanged files
161
- if prev_meta and prev_meta.get("mtime") == mtime and prev_meta.get("size") == size:
162
- updated_manifest[filename] = prev_meta
163
- continue
164
- try:
165
- content = self._extract_text(file_path)
166
- if content:
167
- # Sliding window chunking on original text
168
- chunk_size = 400
169
- overlap = 80
170
- original_chunks = []
171
- for i in range(0, len(content), chunk_size - overlap):
172
- chunk = content[i:i + chunk_size].strip()
173
- if chunk:
174
- original_chunks.append(chunk)
175
-
176
- if original_chunks:
177
- # Normalize for embedding generation only (not for storage)
178
- normalized_chunks = [c.lower().replace('_', ' ') for c in original_chunks]
179
-
180
- ids = [f"{filename}_{i}" for i in range(len(original_chunks))]
181
- metadatas = [{"source": filename, "chunk": i} for i in range(len(original_chunks))]
182
-
183
- # Compute embeddings from normalized text
184
- embeddings = []
185
- for v in self.model.embed(normalized_chunks):
186
- try:
187
- embeddings.append(v.tolist())
188
- except Exception:
189
- embeddings.append(list(v))
175
+ try:
176
+ for filename in os.listdir(self.kb_path):
177
+ file_path = os.path.join(self.kb_path, filename)
178
+ if os.path.isfile(file_path) and filename.lower().endswith(supported_extensions):
179
+ current_files.append(filename)
180
+ mtime = os.path.getmtime(file_path)
181
+ size = os.path.getsize(file_path)
182
+ prev_meta = manifest.get(filename)
183
+ # Skip unchanged files
184
+ if prev_meta and prev_meta.get("mtime") == mtime and prev_meta.get("size") == size:
185
+ updated_manifest[filename] = prev_meta
186
+ continue
187
+ try:
188
+ content = self._extract_text(file_path)
189
+ if content:
190
+ # Sliding window chunking on original text
191
+ chunk_size = 800
192
+ overlap = 80
193
+ original_chunks = []
194
+ for i in range(0, len(content), chunk_size - overlap):
195
+ chunk = content[i:i + chunk_size].strip()
196
+ if chunk:
197
+ original_chunks.append(chunk)
190
198
 
191
- # Store ORIGINAL text (not normalized) so users see the real content
192
- self.collection.upsert(
193
- documents=original_chunks,
194
- ids=ids,
195
- metadatas=metadatas,
196
- embeddings=embeddings
197
- )
198
- print(f" ✓ Indexed {filename}: {len(original_chunks)} chunks")
199
- updated_manifest[filename] = {"mtime": mtime, "size": size}
199
+ if original_chunks:
200
+ # Normalize for embedding generation only (not for storage)
201
+ normalized_chunks = [c.lower().replace('_', ' ') for c in original_chunks]
202
+
203
+ ids = [f"{filename}_{i}" for i in range(len(original_chunks))]
204
+ metadatas = [{"source": filename, "chunk": i} for i in range(len(original_chunks))]
205
+
206
+ # Compute embeddings from normalized text
207
+ embeddings = []
208
+ for v in self.model.embed(normalized_chunks):
209
+ try:
210
+ embeddings.append(v.tolist())
211
+ except Exception:
212
+ embeddings.append(list(v))
213
+
214
+ # Store ORIGINAL text (not normalized) so users see the real content
215
+ for start in range(0, len(original_chunks), BATCH_SIZE):
216
+ end = start + BATCH_SIZE
217
+ self.collection.upsert(
218
+ documents=original_chunks[start:end],
219
+ ids=ids[start:end],
220
+ metadatas=metadatas[start:end],
221
+ embeddings=embeddings[start:end]
222
+ )
223
+ print(f" ✓ Indexed {filename}: {len(original_chunks)} chunks (batched)")
224
+ updated_manifest[filename] = {"mtime": mtime, "size": size}
225
+ except Exception as e:
226
+ err_msg = str(e)
227
+ print(f"Error processing {filename}: {err_msg}")
228
+ # Auto-recover if HNSW/compaction/index errors occur
229
+ if allow_reset and any(x in err_msg.lower() for x in ["hnsw", "compaction", "segment reader"]):
230
+ if not self._collection_reset_guard:
231
+ print("Detected index corruption. Rebuilding vector_db and retrying sync once...")
232
+ self._collection_reset_guard = True
233
+ self._init_collection(recreate=True)
234
+ return self.sync_knowledge(allow_reset=False)
235
+ # Remove deleted files from the index
236
+ deleted_files = set(manifest.keys()) - set(current_files)
237
+ for filename in deleted_files:
238
+ try:
239
+ self.collection.delete(where={"source": filename})
240
+ print(f" ✓ Removed deleted file from index: {filename}")
200
241
  except Exception as e:
201
- err_msg = str(e)
202
- print(f"Error processing {filename}: {err_msg}")
203
- # Auto-recover if HNSW/compaction/index errors occur
204
- if allow_reset and any(x in err_msg.lower() for x in ["hnsw", "compaction", "segment reader"]):
205
- if not self._collection_reset_guard:
206
- print("Detected index corruption. Rebuilding vector_db and retrying sync once...")
207
- self._collection_reset_guard = True
208
- self._init_collection(recreate=True)
209
- return self.sync_knowledge(allow_reset=False)
210
- # Remove deleted files from the index
211
- deleted_files = set(manifest.keys()) - set(current_files)
212
- for filename in deleted_files:
213
- try:
214
- self.collection.delete(where={"source": filename})
215
- print(f" ✓ Removed deleted file from index: {filename}")
216
- except Exception as e:
217
- print(f" ! Failed to remove {filename}: {e}")
218
- # Persist manifest
219
- self._save_manifest(updated_manifest)
220
- print("Knowledge base sync complete.")
242
+ print(f" ! Failed to remove {filename}: {e}")
243
+ # Persist manifest
244
+ self._save_manifest(updated_manifest)
245
+ print("Knowledge base sync complete.")
246
+ finally:
247
+ self._sync_lock.release()
221
248
 
222
249
  def _extract_text(self, file_path):
223
250
  ext = os.path.splitext(file_path)[1].lower()
@@ -249,7 +276,7 @@ class KnowledgeBase:
249
276
  return json.dumps(data, ensure_ascii=False, indent=2)
250
277
  return None
251
278
 
252
- def query(self, text, top_k=5, distance_threshold=0.8):
279
+ def query(self, text, top_k=5, distance_threshold=0.8, allow_reset: bool = True):
253
280
  """Retrieves relevant snippets from the knowledge base.
254
281
 
255
282
  Uses cosine distance (lower is better). A result is treated as a hit only
@@ -311,9 +338,25 @@ class KnowledgeBase:
311
338
  context = "\n---\n".join([h["text"] for h in hits]) if is_hit else ""
312
339
  return {"hit": is_hit, "context": context, "hits": hits}
313
340
  except Exception as e:
314
- print(f"Query error: {e}")
341
+ err_msg = str(e)
342
+ print(f"Query error: {err_msg}")
315
343
  import traceback
316
344
  traceback.print_exc()
345
+
346
+ # Auto-recover if HNSW/compaction/backfill errors surface during query
347
+ if allow_reset and any(x in err_msg.lower() for x in ["hnsw", "compaction", "segment reader", "backfill"]):
348
+ if not self._query_reset_guard:
349
+ print("Detected index corruption during query. Rebuilding vector_db and retrying once...")
350
+ self._query_reset_guard = True
351
+ try:
352
+ self._init_collection(recreate=True)
353
+ self.sync_knowledge(allow_reset=False)
354
+ # Retry query once with guard disabled to avoid loops
355
+ self._query_reset_guard = False
356
+ return self.query(text, top_k=top_k, distance_threshold=distance_threshold, allow_reset=False)
357
+ except Exception as inner_e:
358
+ print(f"Auto-rebuild after query failure also failed: {inner_e}")
359
+ self._query_reset_guard = False
317
360
  return {"hit": False, "context": "", "hits": []}
318
361
 
319
362
  def start_watcher(self):
@@ -343,14 +386,23 @@ class KBHandler(FileSystemEventHandler):
343
386
  def __init__(self, kb_instance):
344
387
  self.kb = kb_instance
345
388
  self.supported_extensions = (".txt", ".md", ".pdf", ".docx", ".json")
389
+ self._debounce_timer = None
390
+
391
+ def _trigger_sync(self):
392
+ def run():
393
+ self.kb.sync_knowledge()
394
+ if self._debounce_timer and self._debounce_timer.is_alive():
395
+ return
396
+ self._debounce_timer = threading.Timer(0.5, run)
397
+ self._debounce_timer.start()
346
398
 
347
399
  def on_modified(self, event):
348
400
  if not event.is_directory and event.src_path.lower().endswith(self.supported_extensions):
349
401
  print(f"File modified: {event.src_path}. Re-syncing...")
350
- threading.Thread(target=self.kb.sync_knowledge).start()
402
+ self._trigger_sync()
351
403
 
352
404
  def on_created(self, event):
353
405
  if not event.is_directory and event.src_path.lower().endswith(self.supported_extensions):
354
406
  print(f"File created: {event.src_path}. Syncing...")
355
- threading.Thread(target=self.kb.sync_knowledge).start()
407
+ self._trigger_sync()
356
408
 
@@ -1,7 +1,7 @@
1
1
  openai
2
2
  Pillow
3
- fastembed>=0.3.4
4
- chromadb>=0.5.0
3
+ fastembed>=0.3.6
4
+ chromadb>=0.5.11
5
5
  watchdog
6
6
  pypdf
7
7
  python-docx
@@ -4729,7 +4729,7 @@ async def main():
4729
4729
  input_str = input()
4730
4730
  if input_str == "":
4731
4731
  config_path = os.path.join(os.path.dirname(__file__), 'transformer_config.json')
4732
- with open(config_path, 'r') as f:
4732
+ with open(config_path, 'r', encoding='utf-8') as f:
4733
4733
  config = json.load(f)
4734
4734
  print("\n" + "="*60)
4735
4735
  print("✓ 已从 transformer_config.json 加载账号配置")
@@ -122,7 +122,7 @@ def save_simulation_result(result: dict) -> None:
122
122
 
123
123
  os.makedirs(folder_path, exist_ok=True)
124
124
 
125
- with open(file_path, "w") as file:
125
+ with open(file_path, "w", encoding="utf-8") as file:
126
126
  json.dump(result, file)
127
127
 
128
128
 
@@ -122,7 +122,7 @@ def save_simulation_result(result: dict) -> None:
122
122
 
123
123
  os.makedirs(folder_path, exist_ok=True)
124
124
 
125
- with open(file_path, "w") as file:
125
+ with open(file_path, "w", encoding="utf-8") as file:
126
126
  json.dump(result, file)
127
127
 
128
128
 
@@ -122,7 +122,7 @@ def save_simulation_result(result: dict) -> None:
122
122
 
123
123
  os.makedirs(folder_path, exist_ok=True)
124
124
 
125
- with open(file_path, "w") as file:
125
+ with open(file_path, "w", encoding="utf-8") as file:
126
126
  json.dump(result, file)
127
127
 
128
128
 
@@ -117,7 +117,7 @@ def get_credentials() -> tuple[str, str]:
117
117
  credentials_file_path = os.path.join(credentials_folder_path, "platform-brain.json")
118
118
 
119
119
  if Path(credentials_file_path).exists() and os.path.getsize(credentials_file_path) > 2:
120
- with open(credentials_file_path) as file:
120
+ with open(credentials_file_path, encoding="utf-8") as file:
121
121
  data = json.loads(file.read())
122
122
  else:
123
123
  os.makedirs(credentials_folder_path, exist_ok=True)
@@ -128,7 +128,7 @@ def get_credentials() -> tuple[str, str]:
128
128
  email = input("Email:\n")
129
129
  password = getpass.getpass(prompt="Password:")
130
130
  data = {"email": email, "password": password}
131
- with open(credentials_file_path, "w") as file:
131
+ with open(credentials_file_path, "w", encoding="utf-8") as file:
132
132
  json.dump(data, file)
133
133
  return (data["email"], data["password"])
134
134
 
@@ -241,7 +241,7 @@ def get_user_json():
241
241
  json_path = re.sub(r'^["\']+|["\']+$', '', raw_path.strip())
242
242
  if os.path.exists(json_path):
243
243
  try:
244
- with open(json_path, 'r') as f:
244
+ with open(json_path, 'r', encoding='utf-8') as f:
245
245
  alpha_list = json.load(f)
246
246
  # Check master log for previous progress
247
247
  file_name = os.path.basename(json_path)
@@ -349,7 +349,7 @@ def simulation_worker(session, alpha_list, mode, json_path, location_path, retry
349
349
  sent_count += len(batch)
350
350
  update_master_log(json_path, sent_count - 1)
351
351
  locations[str(time.time())] = location
352
- with open(location_path, 'w') as f:
352
+ with open(location_path, 'w', encoding='utf-8') as f:
353
353
  json.dump(locations, f, indent=2)
354
354
  # Do NOT overwrite the input JSON file
355
355
  logger.info(f'Simulation sent, location(s) saved: {location}')
@@ -370,7 +370,7 @@ def result_worker(session, location_path, result_path, poll_interval=30):
370
370
  if not os.path.exists(location_path):
371
371
  time.sleep(poll_interval)
372
372
  continue
373
- with open(location_path, 'r') as f:
373
+ with open(location_path, 'r', encoding='utf-8') as f:
374
374
  locations = json.load(f)
375
375
  for loc_key, loc_val in locations.items():
376
376
  if loc_key in results:
@@ -418,7 +418,7 @@ def result_worker(session, location_path, result_path, poll_interval=30):
418
418
  alpha = session.get(f"https://api.worldquantbrain.com/alphas/{alpha_id}")
419
419
  results[loc_key] = alpha.json()
420
420
  logger.info(f"Result fetched for location {loc_val}")
421
- with open(result_path, 'w') as f:
421
+ with open(result_path, 'w', encoding='utf-8') as f:
422
422
  json.dump(results, f, indent=2)
423
423
  except Exception as e:
424
424
  logger.error(f'Error fetching result for {loc_val}: {e}')