cnhkmcp 2.1.0__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {cnhkmcp-2.1.0/cnhkmcp.egg-info → cnhkmcp-2.1.2}/PKG-INFO +1 -1
  2. {cnhkmcp-2.1.0/cnhkmcp/untracked/APP/give_me_idea → cnhkmcp-2.1.2/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool}/helpful_functions.py +1 -1
  3. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/main.py +0 -5
  4. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/process_knowledge_base.py +73 -72
  5. cnhkmcp-2.1.2/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/rag_engine.py +408 -0
  6. cnhkmcp-2.1.2/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/requirements.txt +7 -0
  7. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/Transformer.py +1 -1
  8. cnhkmcp-2.1.2/cnhkmcp/untracked/APP/Tranformer/ace.log +0 -0
  9. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/helpful_functions.py +1 -1
  10. {cnhkmcp-2.1.0/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool → cnhkmcp-2.1.2/cnhkmcp/untracked/APP/give_me_idea}/helpful_functions.py +1 -1
  11. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/helpful_functions.py +1 -1
  12. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/hkSimulator/ace_lib.py +2 -2
  13. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/hkSimulator/autosimulator.py +4 -4
  14. cnhkmcp-2.1.2/cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +180 -0
  15. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/simulator/simulator_wqb.py +1 -1
  16. cnhkmcp-2.1.2/cnhkmcp/untracked/APP//347/274/230/345/210/206/344/270/200/351/201/223/346/241/245/helpful_functions.py +180 -0
  17. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252/platform_functions.py +2 -2
  18. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/platform_functions.py +1 -1
  19. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2/cnhkmcp.egg-info}/PKG-INFO +1 -1
  20. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/SOURCES.txt +1 -1
  21. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/setup.py +1 -1
  22. cnhkmcp-2.1.0/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/rag_engine.py +0 -265
  23. cnhkmcp-2.1.0/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/requirements.txt +0 -12
  24. cnhkmcp-2.1.0/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/vector_db/chroma.sqlite3 +0 -0
  25. cnhkmcp-2.1.0/cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +0 -180
  26. cnhkmcp-2.1.0/cnhkmcp/untracked/APP//347/274/230/345/210/206/344/270/200/351/201/223/346/241/245/helpful_functions.py +0 -180
  27. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/LICENSE +0 -0
  28. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/MANIFEST.in +0 -0
  29. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/README.md +0 -0
  30. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/__init__.py +0 -0
  31. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/README.md" +0 -0
  32. /cnhkmcp-2.1.0/cnhkmcp/untracked/APP/Tranformer/ace.log → /cnhkmcp-2.1.2/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/ace.log" +0 -0
  33. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/config.json" +0 -0
  34. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool/ace_lib.py" +0 -0
  35. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool/fetch_all_datasets.py" +0 -0
  36. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool/fetch_all_documentation.py" +0 -0
  37. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/get_knowledgeBase_tool/fetch_all_operators.py" +0 -0
  38. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/icon.ico" +0 -0
  39. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/icon.png" +0 -0
  40. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/knowledge/test.txt" +0 -0
  41. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266/run.bat" +0 -0
  42. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/AI/346/241/214/351/235/242/346/217/222/344/273/266//351/246/226/346/254/241/350/277/220/350/241/214/346/211/223/345/274/200/346/210/221.py" +0 -0
  43. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/.gitignore +0 -0
  44. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/MODULAR_STRUCTURE.md +0 -0
  45. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/README.md +0 -0
  46. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/ace_lib.py +0 -0
  47. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates.json +0 -0
  48. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates_/347/244/272/344/276/213.json" +0 -0
  49. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_error.json +0 -0
  50. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_success.json +0 -0
  51. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_/347/244/272/344/276/213/345/217/257/347/233/264/346/216/245/350/275/275/345/205/245Machine_lib.json" +0 -0
  52. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/parsetab.py +0 -0
  53. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/template_summary.txt +0 -0
  54. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/transformer_config.json +0 -0
  55. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/Tranformer/validator.py +0 -0
  56. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/ace.log +0 -0
  57. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/ace_lib.py +0 -0
  58. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/__init__.py +0 -0
  59. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/feature_engineering.py +0 -0
  60. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/idea_house.py +0 -0
  61. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/inspiration_house.py +0 -0
  62. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/blueprints/paper_analysis.py +0 -0
  63. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/custom_templates/templates.json +0 -0
  64. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +0 -0
  65. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/ace_lib.py +0 -0
  66. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +0 -0
  67. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/fetch_all_datasets.py +0 -0
  68. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/fetch_all_operators.py +0 -0
  69. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +0 -0
  70. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/mirror_config.txt +0 -0
  71. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/operaters.csv +0 -0
  72. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/requirements.txt +0 -0
  73. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/run_app.bat +0 -0
  74. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/run_app.sh +0 -0
  75. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/setup_tsinghua.bat +0 -0
  76. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/setup_tsinghua.sh +0 -0
  77. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/simulator/alpha_submitter.py +0 -0
  78. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/ssrn-3332513.pdf +0 -0
  79. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/brain.js +0 -0
  80. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/decoder.js +0 -0
  81. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/feature_engineering.js +0 -0
  82. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/idea_house.js +0 -0
  83. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/inspiration.js +0 -0
  84. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/inspiration_house.js +0 -0
  85. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/paper_analysis.js +0 -0
  86. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/script.js +0 -0
  87. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/simulator.js +0 -0
  88. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/styles.css +0 -0
  89. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/static/usage_widget.js +0 -0
  90. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/alpha_inspector.html +0 -0
  91. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/feature_engineering.html +0 -0
  92. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/idea_house.html +0 -0
  93. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/index.html +0 -0
  94. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/inspiration_house.html +0 -0
  95. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/paper_analysis.html +0 -0
  96. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/simulator.html +0 -0
  97. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/templates/transformer_web.html +0 -0
  98. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP/usage.md +0 -0
  99. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP//347/274/230/345/210/206/344/270/200/351/201/223/346/241/245/ace_lib.py" +0 -0
  100. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP//347/274/230/345/210/206/344/270/200/351/201/223/346/241/245/brain_alpha_inspector.py" +0 -0
  101. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/APP//350/277/220/350/241/214/346/211/223/345/274/200/346/210/221.py" +0 -0
  102. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/arXiv_API_Tool_Manual.md +0 -0
  103. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/arxiv_api.py +0 -0
  104. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/forum_functions.py +0 -0
  105. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252/forum_functions.py" +0 -0
  106. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252/user_config.json" +0 -0
  107. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252//350/256/251AI/350/257/273/350/277/231/344/270/252/346/226/207/346/241/243/346/235/245/345/255/246/344/274/232/344/270/213/350/275/275/346/265/217/350/247/210/345/231/250.md" +0 -0
  108. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/mcp/346/226/207/344/273/266/350/256/272/345/235/233/347/211/2102_/345/246/202/346/236/234/345/216/237/347/211/210/345/220/257/345/212/250/344/270/215/344/272/206/346/265/217/350/247/210/345/231/250/345/260/261/350/257/225/350/277/231/344/270/252//351/205/215/347/275/256/345/211/215/350/277/220/350/241/214/346/210/221_/345/256/211/350/243/205/345/277/205/350/246/201/344/276/235/350/265/226/345/214/205.py" +0 -0
  109. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/sample_mcp_config.json +0 -0
  110. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked/user_config.json +0 -0
  111. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/217/202/350/200/203/346/226/207/346/241/243_BRAIN_Alpha_Test_Requirements_and_Tips.md" +0 -0
  112. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_Alpha_explaination_workflow.md" +0 -0
  113. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_BRAIN_6_Tips_Datafield_Exploration_Guide.md" +0 -0
  114. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_BRAIN_Alpha_Improvement_Workflow.md" +0 -0
  115. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_Dataset_Exploration_Expert_Manual.md" +0 -0
  116. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked//347/244/272/344/276/213/345/267/245/344/275/234/346/265/201_daily_report_workflow.md" +0 -0
  117. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp/untracked//351/205/215/347/275/256/345/211/215/350/277/220/350/241/214/346/210/221_/345/256/211/350/243/205/345/277/205/350/246/201/344/276/235/350/265/226/345/214/205.py" +0 -0
  118. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/dependency_links.txt +0 -0
  119. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/entry_points.txt +0 -0
  120. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/not-zip-safe +0 -0
  121. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/requires.txt +0 -0
  122. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/cnhkmcp.egg-info/top_level.txt +0 -0
  123. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/requirements.txt +0 -0
  124. {cnhkmcp-2.1.0 → cnhkmcp-2.1.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cnhkmcp
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: A comprehensive Model Context Protocol (MCP) server for quantitative trading platform integration
5
5
  Home-page: https://github.com/cnhk/cnhkmcp
6
6
  Author: CNHK
@@ -122,7 +122,7 @@ def save_simulation_result(result: dict) -> None:
122
122
 
123
123
  os.makedirs(folder_path, exist_ok=True)
124
124
 
125
- with open(file_path, "w") as file:
125
+ with open(file_path, "w", encoding="utf-8") as file:
126
126
  json.dump(result, file)
127
127
 
128
128
 
@@ -4,7 +4,6 @@ import base64
4
4
  import tkinter as tk
5
5
  from tkinter import scrolledtext, messagebox, Toplevel
6
6
  from PIL import Image, ImageTk, ImageGrab
7
- import pyautogui
8
7
  from openai import OpenAI
9
8
  import threading
10
9
  import io
@@ -21,14 +20,10 @@ def install_dependencies():
21
20
  # Mapping of package names to their import names (if different)
22
21
  packages = {
23
22
  "openai": "openai",
24
- "pyautogui": "pyautogui",
25
23
  "Pillow": "PIL",
26
- "pyperclip": "pyperclip",
27
- "keyboard": "keyboard",
28
24
  "fastembed": "fastembed",
29
25
  "chromadb": "chromadb",
30
26
  "watchdog": "watchdog",
31
- "urllib3": "urllib3",
32
27
  "pypdf": "pypdf",
33
28
  "python-docx": "docx"
34
29
  }
@@ -16,17 +16,18 @@ if TOOL_DIR not in sys.path:
16
16
  sys.path.insert(0, TOOL_DIR)
17
17
  import ace_lib
18
18
  from fetch_all_operators import fetch_operators, prompt_credentials
19
- from fetch_all_datasets import (
20
- fetch_all_combinations,
21
- fetch_datasets_for_combo,
22
- merge_and_deduplicate,
23
- )
24
19
  from fetch_all_documentation import (
25
20
  fetch_tutorials,
26
21
  fetch_tutorial_pages,
27
22
  fetch_page,
28
23
  _extract_page_id,
29
24
  )
25
+ # Dataset fetching currently disabled per request
26
+ # from fetch_all_datasets import (
27
+ # fetch_all_combinations,
28
+ # fetch_datasets_for_combo,
29
+ # merge_and_deduplicate,
30
+ # )
30
31
 
31
32
 
32
33
  def ensure_knowledge_dir():
@@ -107,62 +108,63 @@ def process_operators(session: ace_lib.SingleSession, knowledge_dir: str):
107
108
  print(f"✓ Created {filename} with {len(category_list)} operators")
108
109
 
109
110
 
110
- def process_datasets(session: ace_lib.SingleSession, dataset_dir: str):
111
- """Fetch datasets and save one JSON per region."""
112
- print("=== Processing Datasets ===")
113
-
114
- print("Fetching valid instrument/region/delay/universe combinations...")
115
- options_df = fetch_all_combinations(session)
116
- if options_df is None or options_df.empty:
117
- print("No simulation options fetched; aborting dataset fetch.")
118
- return
119
-
120
- all_datasets: list[pd.DataFrame] = []
121
- combo_idx = 0
122
-
123
- for _, row in options_df.iterrows():
124
- instrument_type = row.get("InstrumentType")
125
- region = row.get("Region")
126
- delay = row.get("Delay")
127
- universes = row.get("Universe") or []
128
-
129
- for universe in universes:
130
- combo_idx += 1
131
- print(f"[{combo_idx}] {instrument_type} / {region} / D{delay} / {universe}")
132
- try:
133
- df = fetch_datasets_for_combo(session, instrument_type, region, delay, universe)
134
- print(f" -> {len(df)} rows")
135
- all_datasets.append(df)
136
- except Exception as exc:
137
- print(f" -> Failed: {exc}")
138
-
139
- if not all_datasets:
140
- print("No datasets fetched; nothing to save.")
141
- return
142
-
143
- combined_df = pd.concat([df for df in all_datasets if not df.empty], ignore_index=True)
144
- if combined_df.empty:
145
- print("No datasets fetched; nothing to save.")
146
- return
147
-
148
- regions = sorted(combined_df["param_region"].dropna().unique())
149
- print(f"Found regions: {', '.join(regions)}")
150
-
151
- for region in regions:
152
- region_df = combined_df[combined_df["param_region"] == region]
153
- region_unique = merge_and_deduplicate([region_df])
154
-
155
- region_list = []
156
- for _, row in region_unique.iterrows():
157
- record = {col: to_jsonable(row[col]) for col in row.index}
158
- region_list.append(record)
159
-
160
- filename = f"{region.replace(' ', '_').lower()}_datasets.json"
161
- filepath = os.path.join(dataset_dir, filename)
162
- with open(filepath, "w", encoding="utf-8") as f:
163
- json.dump(region_list, f, ensure_ascii=False, indent=2)
164
-
165
- print(f"✓ Created {filename} with {len(region_list)} datasets")
111
+ # Dataset fetching intentionally disabled; keep for potential re-enable.
112
+ # def process_datasets(session: ace_lib.SingleSession, dataset_dir: str):
113
+ # """Fetch datasets and save one JSON per region."""
114
+ # print("=== Processing Datasets ===")
115
+ #
116
+ # print("Fetching valid instrument/region/delay/universe combinations...")
117
+ # options_df = fetch_all_combinations(session)
118
+ # if options_df is None or options_df.empty:
119
+ # print("No simulation options fetched; aborting dataset fetch.")
120
+ # return
121
+ #
122
+ # all_datasets: list[pd.DataFrame] = []
123
+ # combo_idx = 0
124
+ #
125
+ # for _, row in options_df.iterrows():
126
+ # instrument_type = row.get("InstrumentType")
127
+ # region = row.get("Region")
128
+ # delay = row.get("Delay")
129
+ # universes = row.get("Universe") or []
130
+ #
131
+ # for universe in universes:
132
+ # combo_idx += 1
133
+ # print(f"[{combo_idx}] {instrument_type} / {region} / D{delay} / {universe}")
134
+ # try:
135
+ # df = fetch_datasets_for_combo(session, instrument_type, region, delay, universe)
136
+ # print(f" -> {len(df)} rows")
137
+ # all_datasets.append(df)
138
+ # except Exception as exc:
139
+ # print(f" -> Failed: {exc}")
140
+ #
141
+ # if not all_datasets:
142
+ # print("No datasets fetched; nothing to save.")
143
+ # return
144
+ #
145
+ # combined_df = pd.concat([df for df in all_datasets if not df.empty], ignore_index=True)
146
+ # if combined_df.empty:
147
+ # print("No datasets fetched; nothing to save.")
148
+ # return
149
+ #
150
+ # regions = sorted(combined_df["param_region"].dropna().unique())
151
+ # print(f"Found regions: {', '.join(regions)}")
152
+ #
153
+ # for region in regions:
154
+ # region_df = combined_df[combined_df["param_region"] == region]
155
+ # region_unique = merge_and_deduplicate([region_df])
156
+ #
157
+ # region_list = []
158
+ # for _, row in region_unique.iterrows():
159
+ # record = {col: to_jsonable(row[col]) for col in row.index}
160
+ # region_list.append(record)
161
+ #
162
+ # filename = f"{region.replace(' ', '_').lower()}_datasets.json"
163
+ # filepath = os.path.join(dataset_dir, filename)
164
+ # with open(filepath, "w", encoding="utf-8") as f:
165
+ # json.dump(region_list, f, ensure_ascii=False, indent=2)
166
+ #
167
+ # print(f"✓ Created {filename} with {len(region_list)} datasets")
166
168
 
167
169
 
168
170
  def process_documentation(session: ace_lib.SingleSession, knowledge_dir: str):
@@ -240,7 +242,7 @@ def main():
240
242
 
241
243
  # Ensure knowledge directory exists
242
244
  knowledge_dir = ensure_knowledge_dir()
243
- dataset_dir = knowledge_dir # Save datasets directly under knowledge
245
+ # dataset_dir = knowledge_dir # Save datasets directly under knowledge (disabled)
244
246
  print(f"Knowledge directory: {knowledge_dir}\n")
245
247
 
246
248
  # Process documentation (tutorials/pages)
@@ -262,16 +264,15 @@ def main():
262
264
  traceback.print_exc()
263
265
  return
264
266
 
265
- # Process datasets by region
266
- print("\nStarting dataset processing...\n")
267
- try:
268
- process_datasets(session, dataset_dir)
269
- except Exception as exc:
270
- print(f"✗ Failed to process datasets: {exc}")
271
- import traceback
272
- traceback.print_exc()
273
- return
274
-
267
+ # Dataset processing disabled; re-enable by uncommenting the block below.
268
+ # print("\nStarting dataset processing...\n")
269
+ # try:
270
+ # process_datasets(session, dataset_dir)
271
+ # except Exception as exc:
272
+ # print(f"✗ Failed to process datasets: {exc}")
273
+ # import traceback
274
+ # traceback.print_exc()
275
+ # return
275
276
 
276
277
  print("\n=== Processing Complete ===")
277
278
 
@@ -0,0 +1,408 @@
1
+ import os
2
+ import json
3
+ import shutil
4
+ import chromadb
5
+ from fastembed import TextEmbedding
6
+ from watchdog.observers import Observer
7
+ from watchdog.events import FileSystemEventHandler
8
+ import threading
9
+
10
+
11
+ PREFERRED_MODELS = [
12
+ "jinaai/jina-embeddings-v2-base-zh", # 中英混合友好,~0.64GB
13
+ "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # 多语 ~50 语种
14
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", # 多语轻量版
15
+ "intfloat/multilingual-e5-large", # 多语更强,体积约 2.2GB
16
+ ]
17
+
18
+ # Final chosen model will be detected at runtime from supported list
19
+ MODEL_NAME = None
20
+ COLLECTION_NAME = "brain_kb_v5"
21
+ BATCH_SIZE = 128 # batch upserts to avoid huge single writes
22
+
23
+ # Optional imports for different file types
24
+ try:
25
+ from pypdf import PdfReader
26
+ except ImportError:
27
+ PdfReader = None
28
+
29
+ try:
30
+ from docx import Document
31
+ except ImportError:
32
+ Document = None
33
+
34
+ class KnowledgeBase:
35
+ def __init__(self, kb_path="knowledge", db_path="vector_db"):
36
+ self.kb_path = os.path.abspath(kb_path)
37
+ self.db_path = os.path.abspath(db_path)
38
+ self.meta_path = os.path.join(self.db_path, "_meta.json")
39
+ self.manifest_path = os.path.join(self.db_path, "_manifest.json")
40
+ self._collection_reset_guard = False
41
+ self._query_reset_guard = False
42
+ self._sync_lock = threading.Lock()
43
+
44
+ if not os.path.exists(self.kb_path):
45
+ os.makedirs(self.kb_path)
46
+
47
+ # Initialize Embedding Model (BAAI/bge-small-zh-v1.5 is ~100MB)
48
+ # This will load from cache if already downloaded
49
+ # Pick the first available model from the preferred list
50
+ _supported_raw = TextEmbedding.list_supported_models()
51
+ supported = set()
52
+ for item in _supported_raw:
53
+ if isinstance(item, dict) and "model" in item:
54
+ supported.add(item["model"])
55
+ elif isinstance(item, str):
56
+ supported.add(item)
57
+ chosen = None
58
+ for name in PREFERRED_MODELS:
59
+ if name in supported:
60
+ chosen = name
61
+ break
62
+ if not chosen:
63
+ raise RuntimeError(
64
+ "No preferred embedding models are supported by fastembed. "
65
+ "Please check available models via TextEmbedding.list_supported_models()."
66
+ )
67
+
68
+ print(f"Loading Knowledge Base Embedding Model: {chosen} (may take some time on first run)...")
69
+ try:
70
+ self.model = TextEmbedding(model_name=chosen)
71
+ print("Embedding Model loaded successfully.")
72
+ except Exception as e:
73
+ print(f"Error loading embedding model: {e}")
74
+ raise
75
+
76
+ # Store chosen model name for reference
77
+ global MODEL_NAME
78
+ MODEL_NAME = chosen
79
+
80
+ # Cache embedding dimension (detects library/model changes that corrupt existing indexes)
81
+ self.embed_dim = self._get_embedding_dim()
82
+ self.chroma_version = getattr(chromadb, "__version__", "unknown")
83
+
84
+ # If the stored index was built with a different model/dimension/chromadb version, wipe it
85
+ self._maybe_reset_for_incompatibility(chosen, self.embed_dim, self.chroma_version)
86
+
87
+ # Initialize Vector DB
88
+ self._init_collection()
89
+ self._healthcheck()
90
+
91
+ # Initial sync
92
+ self.sync_knowledge()
93
+
94
+ # Start Watcher
95
+ self.start_watcher()
96
+
97
+ def _init_collection(self, recreate: bool = False):
98
+ """(Re)initialize Chroma client/collection. If recreate=True, wipe on-disk index."""
99
+ if recreate and os.path.exists(self.db_path):
100
+ shutil.rmtree(self.db_path, ignore_errors=True)
101
+ try:
102
+ self.client = chromadb.PersistentClient(path=self.db_path)
103
+ self.collection = self.client.get_or_create_collection(
104
+ name=COLLECTION_NAME,
105
+ metadata={"hnsw:space": "cosine"}
106
+ )
107
+ except Exception as exc:
108
+ # If collection load itself fails, wipe and retry once to clear corrupted segments
109
+ if not recreate:
110
+ shutil.rmtree(self.db_path, ignore_errors=True)
111
+ return self._init_collection(recreate=True)
112
+ raise
113
+
114
+ # Persist metadata about the embedding model used to build this index
115
+ try:
116
+ os.makedirs(self.db_path, exist_ok=True)
117
+ with open(self.meta_path, "w", encoding="utf-8") as f:
118
+ json.dump({
119
+ "model": MODEL_NAME,
120
+ "embed_dim": self.embed_dim,
121
+ "chroma_version": self.chroma_version,
122
+ }, f)
123
+ except Exception:
124
+ pass # Metadata failure should not block runtime
125
+
126
+ def _healthcheck(self):
127
+ """Validate index readability right after startup; rebuild if corrupted."""
128
+ try:
129
+ _ = self.collection.count()
130
+ except Exception as e:
131
+ msg = str(e).lower()
132
+ if any(x in msg for x in ["hnsw", "segment", "compaction", "backfill"]):
133
+ print("Detected index corruption on startup. Rebuilding vector_db...")
134
+ shutil.rmtree(self.db_path, ignore_errors=True)
135
+ self._init_collection(recreate=True)
136
+ self.sync_knowledge(allow_reset=False)
137
+ else:
138
+ print(f"Index healthcheck encountered an unexpected error: {e}")
139
+
140
+ def _maybe_reset_for_incompatibility(self, chosen_model: str, embed_dim: int, chroma_version: str):
141
+ """If existing index meta differs (model/dimension/chromadb), wipe it."""
142
+ if not os.path.exists(self.db_path):
143
+ return
144
+ try:
145
+ with open(self.meta_path, "r", encoding="utf-8") as f:
146
+ meta = json.load(f)
147
+ prev_model = meta.get("model")
148
+ prev_dim = meta.get("embed_dim")
149
+ prev_chroma = meta.get("chroma_version")
150
+ if prev_model != chosen_model or prev_dim != embed_dim or prev_chroma != chroma_version:
151
+ shutil.rmtree(self.db_path, ignore_errors=True)
152
+ except Exception:
153
+ # If meta cannot be read, assume stale/corrupted and rebuild
154
+ shutil.rmtree(self.db_path, ignore_errors=True)
155
+
156
+ def _get_embedding_dim(self) -> int:
157
+ for vec in self.model.embed(["dimension_probe"]):
158
+ try:
159
+ return len(vec)
160
+ except Exception:
161
+ return len(list(vec))
162
+ raise RuntimeError("Failed to determine embedding dimension")
163
+
164
+ def sync_knowledge(self, allow_reset: bool = True):
165
+ """Scans the knowledge folder and updates the vector database."""
166
+ if not self._sync_lock.acquire(blocking=False):
167
+ print("Sync already running, skip this trigger.")
168
+ return
169
+
170
+ print("Syncing knowledge base...")
171
+ manifest = self._load_manifest()
172
+ updated_manifest = {}
173
+ supported_extensions = (".txt", ".md", ".pdf", ".docx", ".json")
174
+ current_files = []
175
+ try:
176
+ for filename in os.listdir(self.kb_path):
177
+ file_path = os.path.join(self.kb_path, filename)
178
+ if os.path.isfile(file_path) and filename.lower().endswith(supported_extensions):
179
+ current_files.append(filename)
180
+ mtime = os.path.getmtime(file_path)
181
+ size = os.path.getsize(file_path)
182
+ prev_meta = manifest.get(filename)
183
+ # Skip unchanged files
184
+ if prev_meta and prev_meta.get("mtime") == mtime and prev_meta.get("size") == size:
185
+ updated_manifest[filename] = prev_meta
186
+ continue
187
+ try:
188
+ content = self._extract_text(file_path)
189
+ if content:
190
+ # Sliding window chunking on original text
191
+ chunk_size = 800
192
+ overlap = 80
193
+ original_chunks = []
194
+ for i in range(0, len(content), chunk_size - overlap):
195
+ chunk = content[i:i + chunk_size].strip()
196
+ if chunk:
197
+ original_chunks.append(chunk)
198
+
199
+ if original_chunks:
200
+ # Normalize for embedding generation only (not for storage)
201
+ normalized_chunks = [c.lower().replace('_', ' ') for c in original_chunks]
202
+
203
+ ids = [f"{filename}_{i}" for i in range(len(original_chunks))]
204
+ metadatas = [{"source": filename, "chunk": i} for i in range(len(original_chunks))]
205
+
206
+ # Compute embeddings from normalized text
207
+ embeddings = []
208
+ for v in self.model.embed(normalized_chunks):
209
+ try:
210
+ embeddings.append(v.tolist())
211
+ except Exception:
212
+ embeddings.append(list(v))
213
+
214
+ # Store ORIGINAL text (not normalized) so users see the real content
215
+ for start in range(0, len(original_chunks), BATCH_SIZE):
216
+ end = start + BATCH_SIZE
217
+ self.collection.upsert(
218
+ documents=original_chunks[start:end],
219
+ ids=ids[start:end],
220
+ metadatas=metadatas[start:end],
221
+ embeddings=embeddings[start:end]
222
+ )
223
+ print(f" ✓ Indexed {filename}: {len(original_chunks)} chunks (batched)")
224
+ updated_manifest[filename] = {"mtime": mtime, "size": size}
225
+ except Exception as e:
226
+ err_msg = str(e)
227
+ print(f"Error processing {filename}: {err_msg}")
228
+ # Auto-recover if HNSW/compaction/index errors occur
229
+ if allow_reset and any(x in err_msg.lower() for x in ["hnsw", "compaction", "segment reader"]):
230
+ if not self._collection_reset_guard:
231
+ print("Detected index corruption. Rebuilding vector_db and retrying sync once...")
232
+ self._collection_reset_guard = True
233
+ self._init_collection(recreate=True)
234
+ return self.sync_knowledge(allow_reset=False)
235
+ # Remove deleted files from the index
236
+ deleted_files = set(manifest.keys()) - set(current_files)
237
+ for filename in deleted_files:
238
+ try:
239
+ self.collection.delete(where={"source": filename})
240
+ print(f" ✓ Removed deleted file from index: {filename}")
241
+ except Exception as e:
242
+ print(f" ! Failed to remove {filename}: {e}")
243
+ # Persist manifest
244
+ self._save_manifest(updated_manifest)
245
+ print("Knowledge base sync complete.")
246
+ finally:
247
+ self._sync_lock.release()
248
+
249
+ def _extract_text(self, file_path):
250
+ ext = os.path.splitext(file_path)[1].lower()
251
+ if ext == ".txt":
252
+ with open(file_path, 'r', encoding='utf-8') as f:
253
+ return f.read()
254
+ elif ext == ".md":
255
+ # Treat Markdown as plain text for retrieval
256
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
257
+ return f.read()
258
+ elif ext == ".pdf":
259
+ if PdfReader:
260
+ reader = PdfReader(file_path)
261
+ text = ""
262
+ for page in reader.pages:
263
+ text += page.extract_text() + "\n"
264
+ return text
265
+ else:
266
+ print("pypdf not installed, skipping PDF.")
267
+ elif ext == ".docx":
268
+ if Document:
269
+ doc = Document(file_path)
270
+ return "\n".join([para.text for para in doc.paragraphs])
271
+ else:
272
+ print("python-docx not installed, skipping Word.")
273
+ elif ext == ".json":
274
+ with open(file_path, 'r', encoding='utf-8') as f:
275
+ data = json.load(f)
276
+ return json.dumps(data, ensure_ascii=False, indent=2)
277
+ return None
278
+
279
+ def query(self, text, top_k=5, distance_threshold=0.8, allow_reset: bool = True):
280
+ """Retrieves relevant snippets from the knowledge base.
281
+
282
+ Uses cosine distance (lower is better). A result is treated as a hit only
283
+ when best_distance <= distance_threshold.
284
+ Returns:
285
+ dict: {"hit": bool, "context": str, "hits": [{source, chunk, distance, text}, ...]}
286
+ """
287
+ try:
288
+ # Normalize query same as indexed content
289
+ normalized_text = text.lower().replace('_', ' ')
290
+
291
+ q_vec = None
292
+ for v in self.model.embed([normalized_text]):
293
+ try:
294
+ q_vec = v.tolist()
295
+ except Exception:
296
+ q_vec = list(v)
297
+ break
298
+ if q_vec is None:
299
+ return {"hit": False, "context": "", "hits": []}
300
+
301
+ results = self.collection.query(
302
+ query_embeddings=[q_vec],
303
+ n_results=top_k,
304
+ include=["documents", "metadatas", "distances"]
305
+ )
306
+
307
+ docs = (results or {}).get("documents") or []
308
+ metas = (results or {}).get("metadatas") or []
309
+ dists = (results or {}).get("distances") or []
310
+
311
+ if not docs or not docs[0]:
312
+ print("[KB Query] No results returned from collection")
313
+ return {"hit": False, "context": "", "hits": []}
314
+
315
+ docs0 = docs[0]
316
+ metas0 = metas[0] if metas and metas[0] else [{} for _ in docs0]
317
+ dists0 = dists[0] if dists and dists[0] else [None for _ in docs0]
318
+
319
+ hits = []
320
+ for doc_text, meta, dist in zip(docs0, metas0, dists0):
321
+ hits.append({
322
+ "source": (meta or {}).get("source", ""),
323
+ "chunk": (meta or {}).get("chunk", None),
324
+ "distance": dist,
325
+ "text": doc_text,
326
+ })
327
+
328
+ best = hits[0].get("distance")
329
+ is_hit = (best is not None) and (best <= distance_threshold)
330
+
331
+ # Debug log
332
+ best_str = f"{best:.4f}" if best is not None else "N/A"
333
+ print(f"[KB Query] '{text[:50]}...' -> best_dist={best_str}, threshold={distance_threshold}, hit={is_hit}")
334
+ if hits:
335
+ top3_dists = [f"{h['distance']:.4f}" if h['distance'] is not None else "N/A" for h in hits[:3]]
336
+ print(f"[KB Query] Top 3 distances: {top3_dists}")
337
+
338
+ context = "\n---\n".join([h["text"] for h in hits]) if is_hit else ""
339
+ return {"hit": is_hit, "context": context, "hits": hits}
340
+ except Exception as e:
341
+ err_msg = str(e)
342
+ print(f"Query error: {err_msg}")
343
+ import traceback
344
+ traceback.print_exc()
345
+
346
+ # Auto-recover if HNSW/compaction/backfill errors surface during query
347
+ if allow_reset and any(x in err_msg.lower() for x in ["hnsw", "compaction", "segment reader", "backfill"]):
348
+ if not self._query_reset_guard:
349
+ print("Detected index corruption during query. Rebuilding vector_db and retrying once...")
350
+ self._query_reset_guard = True
351
+ try:
352
+ self._init_collection(recreate=True)
353
+ self.sync_knowledge(allow_reset=False)
354
+ # Retry query once with guard disabled to avoid loops
355
+ self._query_reset_guard = False
356
+ return self.query(text, top_k=top_k, distance_threshold=distance_threshold, allow_reset=False)
357
+ except Exception as inner_e:
358
+ print(f"Auto-rebuild after query failure also failed: {inner_e}")
359
+ self._query_reset_guard = False
360
+ return {"hit": False, "context": "", "hits": []}
361
+
362
+ def start_watcher(self):
363
+ event_handler = KBHandler(self)
364
+ self.observer = Observer()
365
+ self.observer.schedule(event_handler, self.kb_path, recursive=False)
366
+ self.observer.start()
367
+
368
+ def _load_manifest(self):
369
+ if not os.path.exists(self.manifest_path):
370
+ return {}
371
+ try:
372
+ with open(self.manifest_path, "r", encoding="utf-8") as f:
373
+ return json.load(f)
374
+ except Exception:
375
+ return {}
376
+
377
+ def _save_manifest(self, data):
378
+ try:
379
+ os.makedirs(self.db_path, exist_ok=True)
380
+ with open(self.manifest_path, "w", encoding="utf-8") as f:
381
+ json.dump(data, f, ensure_ascii=False, indent=2)
382
+ except Exception as e:
383
+ print(f" ! Failed to save manifest: {e}")
384
+
385
+ class KBHandler(FileSystemEventHandler):
386
+ def __init__(self, kb_instance):
387
+ self.kb = kb_instance
388
+ self.supported_extensions = (".txt", ".md", ".pdf", ".docx", ".json")
389
+ self._debounce_timer = None
390
+
391
+ def _trigger_sync(self):
392
+ def run():
393
+ self.kb.sync_knowledge()
394
+ if self._debounce_timer and self._debounce_timer.is_alive():
395
+ return
396
+ self._debounce_timer = threading.Timer(0.5, run)
397
+ self._debounce_timer.start()
398
+
399
+ def on_modified(self, event):
400
+ if not event.is_directory and event.src_path.lower().endswith(self.supported_extensions):
401
+ print(f"File modified: {event.src_path}. Re-syncing...")
402
+ self._trigger_sync()
403
+
404
+ def on_created(self, event):
405
+ if not event.is_directory and event.src_path.lower().endswith(self.supported_extensions):
406
+ print(f"File created: {event.src_path}. Syncing...")
407
+ self._trigger_sync()
408
+
@@ -0,0 +1,7 @@
1
+ openai
2
+ Pillow
3
+ fastembed>=0.3.6
4
+ chromadb>=0.5.11
5
+ watchdog
6
+ pypdf
7
+ python-docx
@@ -4729,7 +4729,7 @@ async def main():
4729
4729
  input_str = input()
4730
4730
  if input_str == "":
4731
4731
  config_path = os.path.join(os.path.dirname(__file__), 'transformer_config.json')
4732
- with open(config_path, 'r') as f:
4732
+ with open(config_path, 'r', encoding='utf-8') as f:
4733
4733
  config = json.load(f)
4734
4734
  print("\n" + "="*60)
4735
4735
  print("✓ 已从 transformer_config.json 加载账号配置")
File without changes
@@ -122,7 +122,7 @@ def save_simulation_result(result: dict) -> None:
122
122
 
123
123
  os.makedirs(folder_path, exist_ok=True)
124
124
 
125
- with open(file_path, "w") as file:
125
+ with open(file_path, "w", encoding="utf-8") as file:
126
126
  json.dump(result, file)
127
127
 
128
128