cnhkmcp 2.1.2__py3-none-any.whl → 2.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. {cnhkmcp-2.1.2.dist-info → cnhkmcp-2.1.3.dist-info}/METADATA +1 -1
  2. cnhkmcp-2.1.3.dist-info/RECORD +6 -0
  3. cnhkmcp-2.1.3.dist-info/top_level.txt +1 -0
  4. cnhkmcp/__init__.py +0 -125
  5. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/README.md +0 -38
  6. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/ace.log +0 -0
  7. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/config.json +0 -6
  8. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/ace_lib.py +0 -1510
  9. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_datasets.py +0 -157
  10. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_documentation.py +0 -132
  11. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_operators.py +0 -99
  12. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/helpful_functions.py +0 -180
  13. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.ico +0 -0
  14. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.png +0 -0
  15. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/test.txt +0 -1
  16. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/main.py +0 -576
  17. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/process_knowledge_base.py +0 -281
  18. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/rag_engine.py +0 -408
  19. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/requirements.txt +0 -7
  20. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/run.bat +0 -3
  21. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242//321/211/320/266/320/246/321/206/320/274/320/261/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +0 -265
  22. cnhkmcp/untracked/APP/.gitignore +0 -32
  23. cnhkmcp/untracked/APP/MODULAR_STRUCTURE.md +0 -112
  24. cnhkmcp/untracked/APP/README.md +0 -309
  25. cnhkmcp/untracked/APP/Tranformer/Transformer.py +0 -4985
  26. cnhkmcp/untracked/APP/Tranformer/ace.log +0 -0
  27. cnhkmcp/untracked/APP/Tranformer/ace_lib.py +0 -1510
  28. cnhkmcp/untracked/APP/Tranformer/helpful_functions.py +0 -180
  29. cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates.json +0 -2421
  30. cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates_/321/207/320/264/342/225/221/321/204/342/225/233/320/233.json +0 -654
  31. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_error.json +0 -1034
  32. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_success.json +0 -444
  33. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_/321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/277/321/207/320/253/342/224/244/321/206/320/236/320/265/321/210/342/225/234/342/225/234/321/205/320/225/320/265Machine_lib.json +0 -22
  34. cnhkmcp/untracked/APP/Tranformer/parsetab.py +0 -60
  35. cnhkmcp/untracked/APP/Tranformer/template_summary.txt +0 -3182
  36. cnhkmcp/untracked/APP/Tranformer/transformer_config.json +0 -7
  37. cnhkmcp/untracked/APP/Tranformer/validator.py +0 -889
  38. cnhkmcp/untracked/APP/ace.log +0 -69
  39. cnhkmcp/untracked/APP/ace_lib.py +0 -1510
  40. cnhkmcp/untracked/APP/blueprints/__init__.py +0 -6
  41. cnhkmcp/untracked/APP/blueprints/feature_engineering.py +0 -347
  42. cnhkmcp/untracked/APP/blueprints/idea_house.py +0 -221
  43. cnhkmcp/untracked/APP/blueprints/inspiration_house.py +0 -432
  44. cnhkmcp/untracked/APP/blueprints/paper_analysis.py +0 -570
  45. cnhkmcp/untracked/APP/custom_templates/templates.json +0 -1257
  46. cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +0 -400
  47. cnhkmcp/untracked/APP/give_me_idea/ace_lib.py +0 -1510
  48. cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +0 -252
  49. cnhkmcp/untracked/APP/give_me_idea/fetch_all_datasets.py +0 -157
  50. cnhkmcp/untracked/APP/give_me_idea/fetch_all_operators.py +0 -99
  51. cnhkmcp/untracked/APP/give_me_idea/helpful_functions.py +0 -180
  52. cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +0 -11
  53. cnhkmcp/untracked/APP/helpful_functions.py +0 -180
  54. cnhkmcp/untracked/APP/hkSimulator/ace_lib.py +0 -1497
  55. cnhkmcp/untracked/APP/hkSimulator/autosimulator.py +0 -447
  56. cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +0 -180
  57. cnhkmcp/untracked/APP/mirror_config.txt +0 -20
  58. cnhkmcp/untracked/APP/operaters.csv +0 -129
  59. cnhkmcp/untracked/APP/requirements.txt +0 -53
  60. cnhkmcp/untracked/APP/run_app.bat +0 -28
  61. cnhkmcp/untracked/APP/run_app.sh +0 -34
  62. cnhkmcp/untracked/APP/setup_tsinghua.bat +0 -39
  63. cnhkmcp/untracked/APP/setup_tsinghua.sh +0 -43
  64. cnhkmcp/untracked/APP/simulator/alpha_submitter.py +0 -404
  65. cnhkmcp/untracked/APP/simulator/simulator_wqb.py +0 -618
  66. cnhkmcp/untracked/APP/ssrn-3332513.pdf +6 -109201
  67. cnhkmcp/untracked/APP/static/brain.js +0 -589
  68. cnhkmcp/untracked/APP/static/decoder.js +0 -1540
  69. cnhkmcp/untracked/APP/static/feature_engineering.js +0 -1729
  70. cnhkmcp/untracked/APP/static/idea_house.js +0 -937
  71. cnhkmcp/untracked/APP/static/inspiration.js +0 -465
  72. cnhkmcp/untracked/APP/static/inspiration_house.js +0 -868
  73. cnhkmcp/untracked/APP/static/paper_analysis.js +0 -390
  74. cnhkmcp/untracked/APP/static/script.js +0 -3082
  75. cnhkmcp/untracked/APP/static/simulator.js +0 -597
  76. cnhkmcp/untracked/APP/static/styles.css +0 -3127
  77. cnhkmcp/untracked/APP/static/usage_widget.js +0 -508
  78. cnhkmcp/untracked/APP/templates/alpha_inspector.html +0 -511
  79. cnhkmcp/untracked/APP/templates/feature_engineering.html +0 -960
  80. cnhkmcp/untracked/APP/templates/idea_house.html +0 -564
  81. cnhkmcp/untracked/APP/templates/index.html +0 -932
  82. cnhkmcp/untracked/APP/templates/inspiration_house.html +0 -861
  83. cnhkmcp/untracked/APP/templates/paper_analysis.html +0 -91
  84. cnhkmcp/untracked/APP/templates/simulator.html +0 -343
  85. cnhkmcp/untracked/APP/templates/transformer_web.html +0 -580
  86. cnhkmcp/untracked/APP/usage.md +0 -351
  87. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/ace_lib.py +0 -1510
  88. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/brain_alpha_inspector.py +0 -712
  89. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/helpful_functions.py +0 -180
  90. cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +0 -2456
  91. cnhkmcp/untracked/arXiv_API_Tool_Manual.md +0 -490
  92. cnhkmcp/untracked/arxiv_api.py +0 -229
  93. cnhkmcp/untracked/forum_functions.py +0 -998
  94. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/forum_functions.py +0 -407
  95. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/platform_functions.py +0 -2415
  96. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/user_config.json +0 -31
  97. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/210/320/276/320/271AI/321/210/320/277/342/225/227/321/210/342/224/220/320/251/321/204/342/225/225/320/272/321/206/320/246/320/227/321/206/320/261/320/263/321/206/320/255/320/265/321/205/320/275/320/266/321/204/342/225/235/320/252/321/204/342/225/225/320/233/321/210/342/225/234/342/225/234/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270.md +0 -101
  98. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +0 -190
  99. cnhkmcp/untracked/platform_functions.py +0 -2886
  100. cnhkmcp/untracked/sample_mcp_config.json +0 -11
  101. cnhkmcp/untracked/user_config.json +0 -31
  102. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/222/321/210/320/220/320/223/321/206/320/246/320/227/321/206/320/261/320/263_BRAIN_Alpha_Test_Requirements_and_Tips.md +0 -202
  103. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Alpha_explaination_workflow.md +0 -56
  104. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_6_Tips_Datafield_Exploration_Guide.md +0 -194
  105. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_Alpha_Improvement_Workflow.md +0 -101
  106. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Dataset_Exploration_Expert_Manual.md +0 -436
  107. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_daily_report_workflow.md +0 -128
  108. cnhkmcp/untracked//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +0 -190
  109. cnhkmcp-2.1.2.dist-info/RECORD +0 -111
  110. cnhkmcp-2.1.2.dist-info/top_level.txt +0 -1
  111. {cnhkmcp-2.1.2.dist-info → cnhkmcp-2.1.3.dist-info}/WHEEL +0 -0
  112. {cnhkmcp-2.1.2.dist-info → cnhkmcp-2.1.3.dist-info}/entry_points.txt +0 -0
  113. {cnhkmcp-2.1.2.dist-info → cnhkmcp-2.1.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,998 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- WorldQuant BRAIN Forum Functions - Python Version
4
- Comprehensive forum functionality including glossary, search, and post viewing.
5
- """
6
-
7
- import asyncio
8
- import re
9
- import sys
10
- import time
11
- from datetime import datetime
12
- from typing import Dict, Any, List, Optional
13
-
14
- from bs4 import BeautifulSoup
15
- from selenium import webdriver
16
- from selenium.webdriver.chrome.options import Options
17
- from selenium.webdriver.edge.options import Options as EdgeOptions
18
- from selenium.webdriver.common.by import By
19
- from selenium.webdriver.support.ui import WebDriverWait
20
- from selenium.webdriver.support import expected_conditions as EC
21
- from selenium.common.exceptions import TimeoutException, NoSuchElementException
22
- import requests
23
- import os
24
- import shutil
25
-
26
- # Initialize forum MCP server
27
- try:
28
- from mcp.server.fastmcp import FastMCP
29
- forum_mcp = FastMCP('brain_forum_server')
30
- except ImportError:
31
- # Fallback for testing
32
- forum_mcp = None
33
-
34
- def log(message: str, level: str = "INFO"):
35
- """Log message with timestamp."""
36
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
37
- print(f"[{timestamp}] [{level}] {message}", file=sys.stderr)
38
-
39
- class ForumClient:
40
- """Forum client for WorldQuant BRAIN support site."""
41
-
42
- def __init__(self):
43
- self.base_url = "https://support.worldquantbrain.com"
44
- self.session = requests.Session()
45
- self.session.headers.update({
46
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'
47
- })
48
-
49
- def get_brain_session(self):
50
- """Get authenticated session from BrainApiClient."""
51
- try:
52
- import sys
53
- import os
54
- sys.path.append(os.path.dirname(os.path.abspath(__file__)))
55
- from platform_functions import brain_client
56
- return brain_client.session
57
- except ImportError:
58
- return None
59
-
60
- def detect_available_browser(self) -> str:
61
- """Detect which browser WebDriver is available."""
62
- try:
63
- # Try Chrome first
64
- from selenium.webdriver.chrome.service import Service
65
- from selenium.webdriver.chrome.options import Options
66
- try:
67
- options = Options()
68
- options.add_argument('--headless')
69
- driver = webdriver.Chrome(options=options)
70
- driver.quit()
71
- return "chrome"
72
- except Exception:
73
- pass
74
-
75
- # Try Edge
76
- try:
77
- from selenium.webdriver.edge.options import Options as EdgeOptions
78
- options = EdgeOptions()
79
- options.add_argument('--headless')
80
- driver = webdriver.Edge(options=options)
81
- driver.quit()
82
- return "edge"
83
- except Exception:
84
- pass
85
-
86
- # Default to chrome
87
- return "chrome"
88
- except Exception:
89
- return "chrome"
90
-
91
- def setup_browser_options(self, headless: bool, browser_type: str):
92
- """Setup browser options based on browser type."""
93
- if browser_type.lower() == "chrome":
94
- return self.setup_chrome_options(headless)
95
- elif browser_type.lower() == "edge":
96
- return self.setup_edge_options(headless)
97
- else:
98
- return self.setup_chrome_options(headless)
99
-
100
- def setup_edge_options(self, headless: bool = True) -> EdgeOptions:
101
- """Setup Edge options for web scraping."""
102
- options = EdgeOptions()
103
-
104
- if headless:
105
- options.add_argument('--headless')
106
-
107
- # Performance optimizations
108
- options.add_argument('--disable-blink-features=AutomationControlled')
109
- options.add_argument('--log-level=3')
110
- options.add_argument('--no-sandbox')
111
- options.add_argument('--disable-dev-shm-usage')
112
- options.add_argument('--disable-web-security')
113
- options.add_argument('--disable-features=VizDisplayCompositor')
114
- options.add_argument('--disable-gpu')
115
- options.add_argument('--disable-extensions')
116
- options.add_argument('--disable-images')
117
- options.add_argument('--disable-javascript')
118
- options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36')
119
-
120
- return options
121
-
122
- def setup_chrome_options(self, headless: bool = True) -> Options:
123
- """Setup Chrome options for web scraping."""
124
- options = Options()
125
-
126
- if headless:
127
- options.add_argument('--headless')
128
-
129
- # Performance optimizations
130
- options.add_argument('--disable-blink-features=AutomationControlled')
131
- options.add_argument('--log-level=3')
132
- options.add_argument('--no-sandbox')
133
- options.add_argument('--disable-dev-shm-usage')
134
- options.add_argument('--disable-web-security')
135
- options.add_argument('--disable-features=VizDisplayCompositor')
136
- options.add_argument('--disable-gpu')
137
- options.add_argument('--disable-extensions')
138
- options.add_argument('--disable-images')
139
- options.add_argument('--disable-javascript')
140
- options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36')
141
-
142
- return options
143
-
144
- async def create_driver(self, headless: bool = True):
145
- """Create and configure WebDriver with cross-browser support."""
146
- browser_type = self.detect_available_browser()
147
- log(f"Using browser: {browser_type}", "INFO")
148
-
149
- options = self.setup_browser_options(headless, browser_type)
150
-
151
- try:
152
- if browser_type.lower() == "chrome":
153
- driver = webdriver.Chrome(options=options)
154
- elif browser_type.lower() == "edge":
155
- driver = webdriver.Edge(options=options)
156
- else:
157
- # Fallback to Chrome
158
- log("Falling back to Chrome", "WARNING")
159
- driver = webdriver.Chrome(options=options)
160
-
161
- # Set aggressive timeouts for speed
162
- driver.set_page_load_timeout(30)
163
- driver.implicitly_wait(10)
164
-
165
- return driver
166
-
167
- except Exception as e:
168
- log(f"Failed to create {browser_type} driver: {str(e)}", "ERROR")
169
- help_text = self.get_driver_installation_help(browser_type)
170
- log(help_text, "ERROR")
171
-
172
- # Try Chrome as fallback if Edge failed
173
- if browser_type.lower() != "chrome":
174
- try:
175
- log("Trying Chrome as fallback", "INFO")
176
- chrome_options = self.setup_browser_options(headless, "chrome")
177
- driver = webdriver.Chrome(options=chrome_options)
178
- driver.set_page_load_timeout(30)
179
- driver.implicitly_wait(10)
180
- return driver
181
- except Exception as e2:
182
- log(f"Chrome fallback also failed: {str(e2)}", "ERROR")
183
- chrome_help = self.get_driver_installation_help("chrome")
184
- log(chrome_help, "ERROR")
185
-
186
- raise Exception(f"Could not create any browser driver. {help_text}")
187
-
188
- async def login_to_forum(self, driver, email: str, password: str) -> bool:
189
- """Login to the WorldQuant BRAIN forum using existing authentication."""
190
- try:
191
- # Import BrainApiClient from platform_functions
192
- import sys
193
- import os
194
- sys.path.append(os.path.dirname(os.path.abspath(__file__)))
195
-
196
- try:
197
- from platform_functions import brain_client
198
- log("Using existing BrainApiClient for authentication", "INFO")
199
-
200
- # First authenticate with BrainApiClient
201
- auth_result = await brain_client.authenticate(email, password)
202
- if auth_result.get('status') != 'authenticated':
203
- log("BrainApiClient authentication failed", "ERROR")
204
- return False
205
-
206
- log("Successfully authenticated via BrainApiClient", "SUCCESS")
207
-
208
- # Navigate to forum with authenticated session
209
- log("Navigating to forum with authenticated session", "WORK")
210
- driver.get("https://support.worldquantbrain.com/hc/en-us")
211
- await asyncio.sleep(2)
212
-
213
- # Add authentication cookies to browser
214
- cookies = brain_client.session.cookies
215
- for cookie in cookies:
216
- driver.add_cookie({
217
- 'name': cookie.name,
218
- 'value': cookie.value,
219
- 'domain': '.worldquantbrain.com'
220
- })
221
-
222
- # Refresh page with cookies
223
- driver.refresh()
224
- await asyncio.sleep(2)
225
-
226
- return True
227
-
228
- except ImportError:
229
- log("BrainApiClient not available, using manual login", "WARNING")
230
-
231
- # Fallback to manual login
232
- driver.get("https://support.worldquantbrain.com/hc/en-us/signin")
233
- await asyncio.sleep(3)
234
-
235
- email_input = WebDriverWait(driver, 15).until(
236
- EC.presence_of_element_located((By.NAME, "email"))
237
- )
238
- password_input = WebDriverWait(driver, 15).until(
239
- EC.presence_of_element_located((By.NAME, "currentPassword"))
240
- )
241
-
242
- email_input.clear()
243
- email_input.send_keys(email)
244
- password_input.clear()
245
- password_input.send_keys(password)
246
-
247
- login_button = WebDriverWait(driver, 15).until(
248
- EC.element_to_be_clickable((By.XPATH, '//button[@type="submit"]'))
249
- )
250
- login_button.click()
251
- await asyncio.sleep(3)
252
-
253
- return True
254
-
255
- except Exception as e:
256
- log(f"Login failed: {str(e)}", "ERROR")
257
- return False
258
-
259
- async def get_glossary_terms(self, email: str, password: str, headless: bool = False) -> Dict[str, Any]:
260
- """Extract glossary terms from the forum."""
261
- driver = None
262
- try:
263
- log("Starting glossary extraction process", "INFO")
264
-
265
- # Add timeout protection
266
- async def extraction_with_timeout():
267
- return await self._perform_glossary_extraction(email, password, headless)
268
-
269
- # Run with 5-minute timeout
270
- result = await asyncio.wait_for(extraction_with_timeout(), timeout=300)
271
- return result
272
-
273
- except asyncio.TimeoutError:
274
- log("Glossary extraction timed out after 5 minutes", "ERROR")
275
- return {"error": "Glossary extraction timed out after 5 minutes"}
276
- except Exception as e:
277
- log(f"Glossary extraction failed: {str(e)}", "ERROR")
278
- return {"error": str(e)}
279
- finally:
280
- if driver:
281
- try:
282
- driver.quit()
283
- except:
284
- pass
285
-
286
- async def _perform_glossary_extraction(self, email: str, password: str, headless: bool) -> Dict[str, Any]:
287
- """Perform the actual glossary extraction."""
288
- driver = None
289
- try:
290
- driver = await self.create_driver(headless)
291
-
292
- # Login
293
- if not await self.login_to_forum(driver, email, password):
294
- raise Exception("Failed to login to forum")
295
-
296
- # Navigate to glossary page
297
- log("Navigating to glossary page", "WORK")
298
- driver.get("https://support.worldquantbrain.com/hc/en-us/articles/4902349883927-Click-here-for-a-list-of-terms-and-their-definitions")
299
- await asyncio.sleep(5)
300
-
301
- # Extract content
302
- log("Extracting glossary content", "WORK")
303
- page_source = driver.page_source
304
- soup = BeautifulSoup(page_source, 'html.parser')
305
-
306
- # Parse glossary terms
307
- terms = self._parse_glossary_terms(page_source)
308
-
309
- log(f"Extracted {len(terms)} glossary terms", "SUCCESS")
310
- return {
311
- "terms": terms,
312
- "total_count": len(terms),
313
- "extraction_timestamp": datetime.now().isoformat()
314
- }
315
-
316
- finally:
317
- if driver:
318
- try:
319
- driver.quit()
320
- except:
321
- pass
322
-
323
- def _parse_glossary_terms(self, content: str) -> List[Dict[str, str]]:
324
- """Parse glossary terms from HTML content."""
325
- terms = []
326
- lines = content.split('\n')
327
-
328
- current_term = None
329
- current_definition = []
330
- is_collecting_definition = False
331
- found_first_real_term = False
332
-
333
- for line in lines:
334
- line = line.strip()
335
- if not line:
336
- continue
337
-
338
- # Skip navigation and metadata lines at the beginning
339
- if not found_first_real_term and self._is_navigation_or_metadata(line):
340
- continue
341
-
342
- # Check if this line looks like a term
343
- if self._looks_like_term(line) and not is_collecting_definition:
344
- # Mark that we found the first real term
345
- if not found_first_real_term:
346
- found_first_real_term = True
347
-
348
- # Save previous term if exists
349
- if current_term and current_definition:
350
- terms.append({
351
- "term": current_term.strip(),
352
- "definition": " ".join(current_definition).strip()
353
- })
354
-
355
- current_term = line
356
- current_definition = []
357
- is_collecting_definition = True
358
- elif is_collecting_definition and found_first_real_term:
359
- # Check if this is the start of a new term
360
- if self._looks_like_term(line):
361
- # Save current term
362
- if current_term and current_definition:
363
- terms.append({
364
- "term": current_term.strip(),
365
- "definition": " ".join(current_definition).strip()
366
- })
367
-
368
- current_term = line
369
- current_definition = []
370
- else:
371
- # Add to definition
372
- if current_definition:
373
- current_definition.append(line)
374
- else:
375
- current_definition = [line]
376
-
377
- # Don't forget the last term
378
- if current_term and current_definition and found_first_real_term:
379
- terms.append({
380
- "term": current_term.strip(),
381
- "definition": " ".join(current_definition).strip()
382
- })
383
-
384
- # Filter out invalid terms and improve quality
385
- return [term for term in terms if
386
- len(term["term"]) > 0 and
387
- len(term["definition"]) > 10 and # Ensure meaningful definitions
388
- not self._is_navigation_or_metadata(term["term"]) and
389
- "ago" not in term["definition"] and # Remove timestamp-like definitions
390
- "minute read" not in term["definition"]] # Remove reading time
391
-
392
- def _looks_like_term(self, line: str) -> bool:
393
- """Check if a line looks like a glossary term."""
394
- # Skip very long lines (likely definitions)
395
- if len(line) > 100:
396
- return False
397
-
398
- # Skip navigation and metadata
399
- if self._is_navigation_or_metadata(line):
400
- return False
401
-
402
- # Skip lines that start with common definition words
403
- definition_starters = ['the', 'a', 'an', 'this', 'that', 'it', 'is', 'are', 'was', 'were', 'for', 'to', 'in', 'on', 'at', 'by', 'with']
404
- first_word = line.lower().split(' ')[0]
405
- if first_word and first_word in definition_starters:
406
- return False
407
-
408
- # Check if line has characteristics of a term
409
- # Terms are often short, may be all caps, or start with capital
410
- is_short = len(line) <= 80
411
- starts_with_capital = bool(re.match(r'^[A-Z]', line))
412
- has_all_caps = bool(re.match(r'^[A-Z\s\-\/\(\)]+$', line))
413
- has_reasonable_length = len(line) >= 2
414
-
415
- return is_short and has_reasonable_length and (starts_with_capital or has_all_caps)
416
-
417
- def _is_navigation_or_metadata(self, line: str) -> bool:
418
- """Check if a line is navigation or metadata."""
419
- navigation_patterns = [
420
- r'^\d+ days? ago$',
421
- r'~\d+ minute read',
422
- r'^Follow',
423
- r'^Not yet followed',
424
- r'^Updated$',
425
- r'^AS\d+$',
426
- r'^[A-Z] - [A-Z] - [A-Z]', # Letter navigation
427
- r'^A$',
428
- r'^B$',
429
- r'^[A-Z]$' # Single letters
430
- ]
431
-
432
- return any(re.match(pattern, line.strip()) for pattern in navigation_patterns)
433
-
434
- def get_driver_installation_help(self, browser_type: str) -> str:
435
- """Provide helpful instructions for installing WebDriver."""
436
- if browser_type.lower() == "chrome":
437
- return """
438
- Chrome WebDriver not found. Please install ChromeDriver:
439
- 1. Download from: https://chromedriver.chromium.org/downloads
440
- 2. Make sure version matches your Chrome browser
441
- 3. Add to PATH or place in current directory
442
- 4. Alternative: Install via pip: pip install chromedriver-autoinstaller
443
- """
444
- elif browser_type.lower() == "edge":
445
- return """
446
- Edge WebDriver not found. Please install Edge WebDriver:
447
- 1. Download from: https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/
448
- 2. Make sure version matches your Edge browser
449
- 3. Add to PATH or place in current directory
450
- 4. Alternative: Install via pip: pip install msedge-selenium-tools
451
- """
452
- else:
453
- return "Please install either ChromeDriver or Edge WebDriver for browser automation."
454
-
455
- async def read_full_forum_post(self, email: str, password: str, post_url_or_id: str,
456
- headless: bool = False, include_comments: bool = True) -> Dict[str, Any]:
457
- """Read a complete forum post with optional comments."""
458
- driver = None
459
- try:
460
- log("Starting forum post reading process", "INFO")
461
-
462
- # Determine if input is URL or article ID
463
- is_url = post_url_or_id.startswith('http')
464
- if is_url:
465
- post_url = post_url_or_id
466
- else:
467
- post_url = f"https://support.worldquantbrain.com/hc/zh-cn/community/posts/{post_url_or_id}"
468
-
469
- log(f"Target URL: {post_url}", "INFO")
470
- log(f"Include comments: {include_comments}", "INFO")
471
-
472
- driver = await self.create_driver(headless)
473
-
474
- # Login
475
- if not await self.login_to_forum(driver, email, password):
476
- raise Exception("Failed to login to forum")
477
-
478
- # Navigate directly to post URL
479
- log(f"Opening post: {post_url}", "WORK")
480
- driver.get(post_url)
481
- log("Post page loaded, extracting content immediately", "WORK")
482
-
483
- # Wait minimal time for content to appear
484
- await asyncio.sleep(2)
485
-
486
- # Extract post content quickly
487
- post_data = {}
488
- page_source = driver.page_source
489
- soup = BeautifulSoup(page_source, 'html.parser')
490
-
491
- # Extract post title
492
- title = soup.select_one('.post-title, h1, .article-title')
493
- if not title:
494
- title = soup.select_one('title')
495
- post_data['title'] = title.get_text().strip() if title else 'Unknown Title'
496
-
497
- # Extract post author
498
- author = soup.select_one('.post-author, .author, .article-author')
499
- if not author:
500
- author = soup.select_one('.comment-author')
501
- post_data['author'] = author.get_text().strip() if author else 'Unknown Author'
502
-
503
- # Extract post date
504
- date = soup.select_one('.post-date, .date, .article-date, time')
505
- if not date:
506
- time_element = soup.select_one('time')
507
- if time_element:
508
- date = time_element.get('datetime') or time_element.get('title') or time_element.get_text().strip()
509
- else:
510
- date = 'Unknown Date'
511
- else:
512
- date = date.get_text().strip()
513
- post_data['date'] = date if date else 'Unknown Date'
514
-
515
- # Extract post content
516
- post_content = soup.select_one('.post-body, .article-body, .content, .post-content')
517
- if not post_content:
518
- post_content = soup.select_one('article, main')
519
-
520
- if post_content:
521
- post_data['content_html'] = str(post_content)
522
- post_data['content_text'] = post_content.get_text().strip()
523
- else:
524
- post_data['content_html'] = 'No content found'
525
- post_data['content_text'] = 'No content found'
526
-
527
- post_data['url'] = post_url
528
- post_data['current_url'] = driver.current_url
529
-
530
- log(f"Post content extracted: \"{post_data['title']}\"", "SUCCESS")
531
-
532
- comments = []
533
- total_comments = 0
534
-
535
- # Extract comments conditionally
536
- if include_comments:
537
- log("Extracting comments...", "WORK")
538
- comments = await self._extract_forum_comments_full(driver, soup)
539
- total_comments = len(comments)
540
- log(f"Extracted {total_comments} comments", "SUCCESS")
541
- else:
542
- log("Skipping comment extraction (includeComments=false)", "INFO")
543
-
544
- return {
545
- "success": True,
546
- "post": post_data,
547
- "comments": comments,
548
- "total_comments": total_comments,
549
- "extracted_at": datetime.now().isoformat(),
550
- "processing_time": "full_extraction_with_comments" if include_comments else "post_only_extraction",
551
- "include_comments": include_comments
552
- }
553
-
554
- except Exception as e:
555
- log(f"Failed to read forum post: {str(e)}", "ERROR")
556
- return {"error": str(e)}
557
- finally:
558
- if driver:
559
- try:
560
- driver.quit()
561
- except:
562
- pass
563
-
564
- async def _extract_forum_comments_full(self, driver, soup: BeautifulSoup) -> List[Dict[str, Any]]:
565
- """Extract all comments from forum post with pagination support."""
566
- all_comments = []
567
- page_num = 1
568
-
569
- try:
570
- # First extract comments from current page source
571
- page_comments = self._parse_comments_from_html(soup)
572
- all_comments.extend(page_comments)
573
- log(f"Found {len(page_comments)} comments on page {page_num}", "INFO")
574
-
575
- # Check for pagination and continue if needed
576
- while True:
577
- try:
578
- # Look for next page button
579
- next_button = driver.find_element(By.CSS_SELECTOR, "span.pagination-next-text, .pagination-next, .next")
580
- next_text = next_button.text
581
-
582
- if "下一页" in next_text or "Next" in next_text or "next" in next_text.lower():
583
- log(f"Found next page, continuing to page {page_num + 1}", "INFO")
584
- next_button.click()
585
- await asyncio.sleep(2) # Minimal wait for next page
586
-
587
- # Extract comments from new page
588
- new_page_source = driver.page_source
589
- new_soup = BeautifulSoup(new_page_source, 'html.parser')
590
- new_page_comments = self._parse_comments_from_html(new_soup)
591
-
592
- if len(new_page_comments) == 0:
593
- break
594
-
595
- all_comments.extend(new_page_comments)
596
- page_num += 1
597
- log(f"Found {len(new_page_comments)} comments on page {page_num}", "INFO")
598
- else:
599
- break
600
- except Exception as e:
601
- log("No more pages found", "INFO")
602
- break
603
-
604
- return all_comments
605
-
606
- except Exception as e:
607
- log(f"Error in comment extraction: {str(e)}", "WARNING")
608
- return all_comments
609
-
610
- def _parse_comments_from_html(self, soup: BeautifulSoup) -> List[Dict[str, Any]]:
611
- """Parse comments from HTML using BeautifulSoup."""
612
- comments = []
613
-
614
- # Try multiple selectors for comments
615
- comment_selectors = [
616
- 'ul#comments.comment-list li.comment',
617
- '.comment-list .comment',
618
- '.comments .comment',
619
- 'li.comment',
620
- '.comment-item'
621
- ]
622
-
623
- comment_elements = None
624
-
625
- for selector in comment_selectors:
626
- comment_elements = soup.select(selector)
627
- if comment_elements:
628
- log(f"Found comments using selector: {selector}", "INFO")
629
- break
630
-
631
- if not comment_elements:
632
- log("No comments found on this page", "INFO")
633
- return comments
634
-
635
- for index, element in enumerate(comment_elements):
636
- try:
637
- comment = {}
638
-
639
- # Extract comment ID
640
- comment['id'] = element.get('id') or f"comment-{index}"
641
-
642
- # Extract author
643
- author_element = element.select_one('.comment-author a, .author a, .comment-author')
644
- comment['author'] = author_element.get_text().strip() if author_element else 'Unknown Author'
645
- comment['author_link'] = author_element.get('href') if author_element else ''
646
-
647
- # Extract date
648
- time_element = element.select_one('.meta-data time, time, .date, .comment-date')
649
- if time_element:
650
- comment['date'] = time_element.get('datetime') or time_element.get('title') or time_element.get_text().strip()
651
- comment['date_display'] = time_element.get('title') or time_element.get_text().strip()
652
- else:
653
- comment['date'] = 'Unknown Date'
654
- comment['date_display'] = 'Unknown Date'
655
-
656
- # Extract content
657
- content_element = element.select_one('.comment-body, .comment-content, .content')
658
- if content_element:
659
- comment['content_html'] = str(content_element)
660
- comment['content_text'] = content_element.get_text().strip()
661
- else:
662
- comment['content_html'] = ''
663
- comment['content_text'] = ''
664
-
665
- # Extract votes
666
- vote_element = element.select_one('.vote-up span, .votes, .vote-count')
667
- comment['votes'] = vote_element.get_text().strip() if vote_element else '0'
668
-
669
- # Extract status
670
- status_element = element.select_one('.status-label, .status, .badge')
671
- comment['status'] = status_element.get_text().strip() if status_element else '普通评论'
672
-
673
- if comment['content_text']:
674
- comments.append(comment)
675
-
676
- except Exception as e:
677
- log(f"Error parsing comment {index}: {str(e)}", "WARNING")
678
-
679
- return comments
680
-
681
- async def search_forum_posts(self, email: str, password: str, search_query: str,
682
- max_results: int = 50, headless: bool = True) -> Dict[str, Any]:
683
- """Search forum posts."""
684
- driver = None
685
- try:
686
- log("Starting forum search process", "INFO")
687
- log(f"Search query: '{search_query}'", "INFO")
688
- log(f"Max results: {max_results}", "INFO")
689
-
690
- driver = await self.create_driver(headless)
691
-
692
- # Login
693
- if not await self.login_to_forum(driver, email, password):
694
- raise Exception("Failed to login to forum")
695
-
696
- # Navigate to search
697
- encoded_query = requests.utils.quote(search_query)
698
- search_url = f"https://support.worldquantbrain.com/hc/zh-cn/search?utf8=%E2%9C%93&query={encoded_query}"
699
- log(f"Opening search URL: {search_url}", "WORK")
700
-
701
- driver.get(search_url)
702
- await asyncio.sleep(2)
703
-
704
- # Collect results with pagination
705
- all_results = []
706
- page_num = 1
707
-
708
- log("Starting result collection with pagination", "WORK")
709
-
710
- while len(all_results) < max_results:
711
- log(f"Processing page {page_num}", "INFO")
712
-
713
- # Wait for search results
714
- try:
715
- WebDriverWait(driver, 10).until(
716
- EC.presence_of_element_located((By.CSS_SELECTOR, '.search-results-list, .search-result-list-item'))
717
- )
718
- except TimeoutException:
719
- log(f"No search results found on page {page_num}", "WARNING")
720
- break
721
-
722
- # Extract results from current page
723
- page_source = driver.page_source
724
- soup = BeautifulSoup(page_source, 'html.parser')
725
- page_results = self._extract_search_results(soup, page_num)
726
-
727
- if not page_results:
728
- log(f"No more results found on page {page_num}", "INFO")
729
- break
730
-
731
- all_results.extend(page_results)
732
-
733
- # Check if we have enough results
734
- if len(all_results) >= max_results:
735
- all_results = all_results[:max_results]
736
- break
737
-
738
- # Try to go to next page
739
- if not await self._go_to_next_search_page(driver, soup):
740
- log("No more pages available", "INFO")
741
- break
742
-
743
- page_num += 1
744
- await asyncio.sleep(1)
745
-
746
- # Analyze results
747
- analysis = self._analyze_search_results(all_results, search_query)
748
-
749
- log(f"Search completed. Found {len(all_results)} results", "SUCCESS")
750
- return {
751
- "results": all_results,
752
- "total_found": len(all_results),
753
- "search_query": search_query,
754
- "analysis": analysis,
755
- "search_timestamp": datetime.now().isoformat()
756
- }
757
-
758
- except Exception as e:
759
- log(f"Search failed: {str(e)}", "ERROR")
760
- return {"error": str(e)}
761
- finally:
762
- if driver:
763
- try:
764
- driver.quit()
765
- except:
766
- pass
767
-
768
- def _extract_search_results(self, soup: BeautifulSoup, page_num: int) -> List[Dict[str, Any]]:
769
- """Extract search results from a page using multiple resilient selectors.
770
-
771
- Improvements vs original implementation:
772
- - Tries several container selectors (mirrors TS Cheerio approach)
773
- - Extracts richer metadata: description_html/text, votes, comments, author, date
774
- - Preserves legacy fields (snippet, metadata) for backward compatibility
775
- - Adds index & page for downstream analytics
776
- - Robust fallbacks & normalization of URLs
777
- """
778
- results: List[Dict[str, Any]] = []
779
-
780
- # Ordered list of possible container selectors (keep broad ones last)
781
- container_selectors = [
782
- '.search-result-list-item',
783
- '.search-results-list .search-result',
784
- '.striped-list-item',
785
- '.article-list-item',
786
- 'article.search-result',
787
- 'div.search-result',
788
- ]
789
-
790
- # Collect candidate elements (stop at first selector that yields results)
791
- result_items = []
792
- for selector in container_selectors:
793
- found = soup.select(selector)
794
- if found:
795
- log(f"Found {len(found)} search results using selector: {selector}", "INFO")
796
- result_items = found
797
- break
798
-
799
- # Fallback: regex class scan (original heuristic)
800
- if not result_items:
801
- fallback = soup.find_all(['article', 'div'], class_=re.compile(r'search-result|article-item'))
802
- if fallback:
803
- log(f"Fallback selector captured {len(fallback)} results", "INFO")
804
- result_items = fallback
805
- else:
806
- log("No search result items found with any selector", "WARNING")
807
- return results
808
-
809
- def first_text(element, selector_list: List[str]) -> str:
810
- for sel in selector_list:
811
- found = element.select_one(sel)
812
- if found and found.get_text(strip=True):
813
- return found.get_text(strip=True)
814
- return ''
815
-
816
- for idx, item in enumerate(result_items):
817
- try:
818
- # Title & link
819
- title_link_elem = None
820
- title_selectors = [
821
- '.search-result-title a',
822
- 'h3 a',
823
- '.title a',
824
- 'a'
825
- ]
826
- for sel in title_selectors:
827
- candidate = item.select_one(sel)
828
- if candidate and candidate.get_text(strip=True):
829
- title_link_elem = candidate
830
- break
831
-
832
- title = title_link_elem.get_text(strip=True) if title_link_elem else 'No title'
833
- link = title_link_elem.get('href') if title_link_elem and title_link_elem.has_attr('href') else ''
834
- if link and not link.startswith('http'):
835
- link = f"https://support.worldquantbrain.com{link}"
836
-
837
- if not link and not title:
838
- continue # Skip invalid entries
839
-
840
- # Description / snippet
841
- desc_elem = None
842
- desc_selectors = [
843
- '.search-results-description',
844
- '.description',
845
- '.excerpt',
846
- '.content-preview',
847
- 'p'
848
- ]
849
- for sel in desc_selectors:
850
- candidate = item.select_one(sel)
851
- if candidate and candidate.get_text(strip=True):
852
- desc_elem = candidate
853
- break
854
-
855
- description_html = str(desc_elem) if desc_elem else ''
856
- description_text = desc_elem.get_text(strip=True) if desc_elem else ''
857
-
858
- # Votes & comments
859
- votes = first_text(item, [
860
- '.search-result-votes span',
861
- '.votes span',
862
- '[class*="vote"] span',
863
- '[class*="vote"]'
864
- ]) or '0'
865
- comments = first_text(item, [
866
- '.search-result-meta-count span',
867
- '.comments span',
868
- '[class*="comment"] span',
869
- '[class*="comment"]'
870
- ]) or '0'
871
-
872
- # Metadata / author / date
873
- meta_block = item.select_one('.meta-data, .metadata, .post-meta')
874
- author = 'Unknown'
875
- date_val = 'Unknown'
876
- if meta_block:
877
- meta_text = meta_block.get_text(' ', strip=True)
878
- # Split on common separators
879
- parts = [p.strip() for p in re.split(r'[·•|]', meta_text) if p.strip()]
880
- if len(parts) >= 2:
881
- author = parts[0] or author
882
- date_val = parts[1] or date_val
883
-
884
- # Fallback selectors
885
- if author == 'Unknown':
886
- author = first_text(item, ['.author', '.username', '[class*="author"]']) or 'Unknown'
887
- if date_val == 'Unknown':
888
- # time element or date class
889
- time_elem = item.select_one('.date, time, [class*="date"]')
890
- if time_elem:
891
- date_val = time_elem.get('datetime') or time_elem.get('title') or time_elem.get_text(strip=True) or 'Unknown'
892
-
893
- # Compose legacy fields
894
- snippet = description_text
895
- metadata = f"author={author} date={date_val} votes={votes} comments={comments}".strip()
896
-
897
- results.append({
898
- 'title': title,
899
- 'link': link,
900
- 'description_html': description_html or 'No description',
901
- 'description_text': description_text or 'No description',
902
- 'votes': votes,
903
- 'comments': comments,
904
- 'author': author,
905
- 'date': date_val,
906
- 'snippet': snippet, # backward compatibility
907
- 'metadata': metadata, # backward compatibility / quick summary
908
- 'page': page_num,
909
- 'index': idx
910
- })
911
- except Exception as e:
912
- log(f"Error extracting search result {idx}: {str(e)}", "WARNING")
913
- continue
914
-
915
- return results
916
-
917
- async def _go_to_next_search_page(self, driver: webdriver.Chrome, soup: BeautifulSoup) -> bool:
918
- """Navigate to the next search page."""
919
- try:
920
- # Look for next page link
921
- next_link = soup.find('a', string=re.compile(r'next|下一页', re.IGNORECASE))
922
- if not next_link:
923
- next_link = soup.find('a', {'rel': 'next'})
924
-
925
- if next_link and next_link.get('href'):
926
- next_url = next_link['href']
927
- if not next_url.startswith('http'):
928
- next_url = f"https://support.worldquantbrain.com{next_url}"
929
-
930
- driver.get(next_url)
931
- await asyncio.sleep(2)
932
- return True
933
-
934
- return False
935
-
936
- except Exception as e:
937
- log(f"Error navigating to next page: {str(e)}", "WARNING")
938
- return False
939
-
940
- def _analyze_search_results(self, results: List[Dict[str, Any]], search_query: str) -> Dict[str, Any]:
941
- """Analyze search results for insights."""
942
- if not results:
943
- return {"message": "No results found"}
944
-
945
- # Basic statistics
946
- total_results = len(results)
947
-
948
- # Categorize results by type
949
- categories = {}
950
- for result in results:
951
- title = result.get('title', '').lower()
952
- if 'tutorial' in title or 'guide' in title:
953
- categories['tutorials'] = categories.get('tutorials', 0) + 1
954
- elif 'api' in title or 'reference' in title:
955
- categories['api_docs'] = categories.get('api_docs', 0) + 1
956
- elif 'error' in title or 'issue' in title or 'problem' in title:
957
- categories['troubleshooting'] = categories.get('troubleshooting', 0) + 1
958
- elif 'competition' in title or 'event' in title:
959
- categories['competitions'] = categories.get('competitions', 0) + 1
960
- else:
961
- categories['general'] = categories.get('general', 0) + 1
962
-
963
- # Find most relevant results (containing search terms)
964
- search_terms = search_query.lower().split()
965
- relevant_results = []
966
-
967
- for result in results:
968
- title = result.get('title', '').lower()
969
- snippet = result.get('snippet', '').lower()
970
- text = f"{title} {snippet}"
971
-
972
- term_matches = sum(1 for term in search_terms if term in text)
973
- if term_matches > 0:
974
- relevant_results.append({
975
- "result": result,
976
- "relevance_score": term_matches / len(search_terms)
977
- })
978
-
979
- # Sort by relevance
980
- relevant_results.sort(key=lambda x: x['relevance_score'], reverse=True)
981
-
982
- return {
983
- "total_results": total_results,
984
- "categories": categories,
985
- "most_relevant": relevant_results[:5] if relevant_results else [],
986
- "search_terms": search_terms
987
- }
988
-
989
- # Initialize forum client
990
- forum_client = ForumClient()
991
-
992
- # MCP Tools for Forum Functions - REMOVED (duplicate with platform_functions.py)
993
- # These tools are already properly integrated in the main platform_functions.py
994
-
995
- if __name__ == "__main__":
996
- print("📚 WorldQuant BRAIN Forum Functions Server Starting...", file=sys.stderr)
997
- print("Note: Forum tools are now integrated in the main platform_functions.py", file=sys.stderr)
998
- print("This file provides the ForumClient class for internal use.", file=sys.stderr)