cnhkmcp 2.1.2__py3-none-any.whl → 2.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cnhkmcp-2.1.2.dist-info → cnhkmcp-2.1.3.dist-info}/METADATA +1 -1
- cnhkmcp-2.1.3.dist-info/RECORD +6 -0
- cnhkmcp-2.1.3.dist-info/top_level.txt +1 -0
- cnhkmcp/__init__.py +0 -125
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/README.md +0 -38
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/ace.log +0 -0
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/config.json +0 -6
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/ace_lib.py +0 -1510
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_datasets.py +0 -157
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_documentation.py +0 -132
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_operators.py +0 -99
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/helpful_functions.py +0 -180
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.ico +0 -0
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.png +0 -0
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/test.txt +0 -1
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/main.py +0 -576
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/process_knowledge_base.py +0 -281
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/rag_engine.py +0 -408
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/requirements.txt +0 -7
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/run.bat +0 -3
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242//321/211/320/266/320/246/321/206/320/274/320/261/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +0 -265
- cnhkmcp/untracked/APP/.gitignore +0 -32
- cnhkmcp/untracked/APP/MODULAR_STRUCTURE.md +0 -112
- cnhkmcp/untracked/APP/README.md +0 -309
- cnhkmcp/untracked/APP/Tranformer/Transformer.py +0 -4985
- cnhkmcp/untracked/APP/Tranformer/ace.log +0 -0
- cnhkmcp/untracked/APP/Tranformer/ace_lib.py +0 -1510
- cnhkmcp/untracked/APP/Tranformer/helpful_functions.py +0 -180
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates.json +0 -2421
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates_/321/207/320/264/342/225/221/321/204/342/225/233/320/233.json +0 -654
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_error.json +0 -1034
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_success.json +0 -444
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_/321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/277/321/207/320/253/342/224/244/321/206/320/236/320/265/321/210/342/225/234/342/225/234/321/205/320/225/320/265Machine_lib.json +0 -22
- cnhkmcp/untracked/APP/Tranformer/parsetab.py +0 -60
- cnhkmcp/untracked/APP/Tranformer/template_summary.txt +0 -3182
- cnhkmcp/untracked/APP/Tranformer/transformer_config.json +0 -7
- cnhkmcp/untracked/APP/Tranformer/validator.py +0 -889
- cnhkmcp/untracked/APP/ace.log +0 -69
- cnhkmcp/untracked/APP/ace_lib.py +0 -1510
- cnhkmcp/untracked/APP/blueprints/__init__.py +0 -6
- cnhkmcp/untracked/APP/blueprints/feature_engineering.py +0 -347
- cnhkmcp/untracked/APP/blueprints/idea_house.py +0 -221
- cnhkmcp/untracked/APP/blueprints/inspiration_house.py +0 -432
- cnhkmcp/untracked/APP/blueprints/paper_analysis.py +0 -570
- cnhkmcp/untracked/APP/custom_templates/templates.json +0 -1257
- cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +0 -400
- cnhkmcp/untracked/APP/give_me_idea/ace_lib.py +0 -1510
- cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +0 -252
- cnhkmcp/untracked/APP/give_me_idea/fetch_all_datasets.py +0 -157
- cnhkmcp/untracked/APP/give_me_idea/fetch_all_operators.py +0 -99
- cnhkmcp/untracked/APP/give_me_idea/helpful_functions.py +0 -180
- cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +0 -11
- cnhkmcp/untracked/APP/helpful_functions.py +0 -180
- cnhkmcp/untracked/APP/hkSimulator/ace_lib.py +0 -1497
- cnhkmcp/untracked/APP/hkSimulator/autosimulator.py +0 -447
- cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +0 -180
- cnhkmcp/untracked/APP/mirror_config.txt +0 -20
- cnhkmcp/untracked/APP/operaters.csv +0 -129
- cnhkmcp/untracked/APP/requirements.txt +0 -53
- cnhkmcp/untracked/APP/run_app.bat +0 -28
- cnhkmcp/untracked/APP/run_app.sh +0 -34
- cnhkmcp/untracked/APP/setup_tsinghua.bat +0 -39
- cnhkmcp/untracked/APP/setup_tsinghua.sh +0 -43
- cnhkmcp/untracked/APP/simulator/alpha_submitter.py +0 -404
- cnhkmcp/untracked/APP/simulator/simulator_wqb.py +0 -618
- cnhkmcp/untracked/APP/ssrn-3332513.pdf +6 -109201
- cnhkmcp/untracked/APP/static/brain.js +0 -589
- cnhkmcp/untracked/APP/static/decoder.js +0 -1540
- cnhkmcp/untracked/APP/static/feature_engineering.js +0 -1729
- cnhkmcp/untracked/APP/static/idea_house.js +0 -937
- cnhkmcp/untracked/APP/static/inspiration.js +0 -465
- cnhkmcp/untracked/APP/static/inspiration_house.js +0 -868
- cnhkmcp/untracked/APP/static/paper_analysis.js +0 -390
- cnhkmcp/untracked/APP/static/script.js +0 -3082
- cnhkmcp/untracked/APP/static/simulator.js +0 -597
- cnhkmcp/untracked/APP/static/styles.css +0 -3127
- cnhkmcp/untracked/APP/static/usage_widget.js +0 -508
- cnhkmcp/untracked/APP/templates/alpha_inspector.html +0 -511
- cnhkmcp/untracked/APP/templates/feature_engineering.html +0 -960
- cnhkmcp/untracked/APP/templates/idea_house.html +0 -564
- cnhkmcp/untracked/APP/templates/index.html +0 -932
- cnhkmcp/untracked/APP/templates/inspiration_house.html +0 -861
- cnhkmcp/untracked/APP/templates/paper_analysis.html +0 -91
- cnhkmcp/untracked/APP/templates/simulator.html +0 -343
- cnhkmcp/untracked/APP/templates/transformer_web.html +0 -580
- cnhkmcp/untracked/APP/usage.md +0 -351
- cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/ace_lib.py +0 -1510
- cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/brain_alpha_inspector.py +0 -712
- cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/helpful_functions.py +0 -180
- cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +0 -2456
- cnhkmcp/untracked/arXiv_API_Tool_Manual.md +0 -490
- cnhkmcp/untracked/arxiv_api.py +0 -229
- cnhkmcp/untracked/forum_functions.py +0 -998
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/forum_functions.py +0 -407
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/platform_functions.py +0 -2415
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/user_config.json +0 -31
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/210/320/276/320/271AI/321/210/320/277/342/225/227/321/210/342/224/220/320/251/321/204/342/225/225/320/272/321/206/320/246/320/227/321/206/320/261/320/263/321/206/320/255/320/265/321/205/320/275/320/266/321/204/342/225/235/320/252/321/204/342/225/225/320/233/321/210/342/225/234/342/225/234/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270.md +0 -101
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +0 -190
- cnhkmcp/untracked/platform_functions.py +0 -2886
- cnhkmcp/untracked/sample_mcp_config.json +0 -11
- cnhkmcp/untracked/user_config.json +0 -31
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/222/321/210/320/220/320/223/321/206/320/246/320/227/321/206/320/261/320/263_BRAIN_Alpha_Test_Requirements_and_Tips.md +0 -202
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Alpha_explaination_workflow.md +0 -56
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_6_Tips_Datafield_Exploration_Guide.md +0 -194
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_Alpha_Improvement_Workflow.md +0 -101
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Dataset_Exploration_Expert_Manual.md +0 -436
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_daily_report_workflow.md +0 -128
- cnhkmcp/untracked//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +0 -190
- cnhkmcp-2.1.2.dist-info/RECORD +0 -111
- cnhkmcp-2.1.2.dist-info/top_level.txt +0 -1
- {cnhkmcp-2.1.2.dist-info → cnhkmcp-2.1.3.dist-info}/WHEEL +0 -0
- {cnhkmcp-2.1.2.dist-info → cnhkmcp-2.1.3.dist-info}/entry_points.txt +0 -0
- {cnhkmcp-2.1.2.dist-info → cnhkmcp-2.1.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,407 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
WorldQuant BRAIN Forum Functions - Python Version
|
|
4
|
-
Comprehensive forum functionality including glossary, search, and post viewing using Playwright.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import asyncio
|
|
8
|
-
import re
|
|
9
|
-
import sys
|
|
10
|
-
import time
|
|
11
|
-
from datetime import datetime
|
|
12
|
-
from typing import Dict, Any, List, Optional
|
|
13
|
-
|
|
14
|
-
from bs4 import BeautifulSoup
|
|
15
|
-
from playwright.async_api import async_playwright
|
|
16
|
-
import requests
|
|
17
|
-
import os
|
|
18
|
-
|
|
19
|
-
def log(message: str, level: str = "INFO"):
|
|
20
|
-
"""Log message with timestamp."""
|
|
21
|
-
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
22
|
-
print(f"[{timestamp}] [{level}] {message}", file=sys.stderr)
|
|
23
|
-
|
|
24
|
-
# --- Parsing Helper Functions (from playwright_forum_test.py) ---
|
|
25
|
-
|
|
26
|
-
def _is_navigation_or_metadata(line: str) -> bool:
|
|
27
|
-
"""Check if a line is navigation or metadata."""
|
|
28
|
-
navigation_patterns = [
|
|
29
|
-
r'^\d+ days? ago$',
|
|
30
|
-
r'~\d+ minute read',
|
|
31
|
-
r'^Follow',
|
|
32
|
-
r'^Not yet followed',
|
|
33
|
-
r'^Updated$',
|
|
34
|
-
r'^AS\d+$',
|
|
35
|
-
r'^[A-Z] - [A-Z] - [A-Z]', # Letter navigation
|
|
36
|
-
r'^A$',
|
|
37
|
-
r'^B$',
|
|
38
|
-
r'^[A-Z]$' # Single letters
|
|
39
|
-
]
|
|
40
|
-
return any(re.match(pattern, line.strip()) for pattern in navigation_patterns)
|
|
41
|
-
|
|
42
|
-
def _looks_like_term(line: str) -> bool:
|
|
43
|
-
"""Check if a line looks like a glossary term."""
|
|
44
|
-
if len(line) > 100:
|
|
45
|
-
return False
|
|
46
|
-
if _is_navigation_or_metadata(line):
|
|
47
|
-
return False
|
|
48
|
-
definition_starters = ['the', 'a', 'an', 'this', 'that', 'it', 'is', 'are', 'was', 'were', 'for', 'to', 'in', 'on', 'at', 'by', 'with']
|
|
49
|
-
first_word = line.lower().split(' ')[0] if line else ''
|
|
50
|
-
if first_word and first_word in definition_starters:
|
|
51
|
-
return False
|
|
52
|
-
is_short = len(line) <= 80
|
|
53
|
-
starts_with_capital = bool(re.match(r'^[A-Z]', line))
|
|
54
|
-
has_all_caps = bool(re.match(r'^[A-Z\s\-\/\(\)]+$', line))
|
|
55
|
-
has_reasonable_length = len(line) >= 2
|
|
56
|
-
return is_short and has_reasonable_length and (starts_with_capital or has_all_caps)
|
|
57
|
-
|
|
58
|
-
def _parse_glossary_terms(content: str) -> List[Dict[str, str]]:
|
|
59
|
-
"""Parse glossary terms from HTML content."""
|
|
60
|
-
soup = BeautifulSoup(content, 'html.parser')
|
|
61
|
-
# Get text from the article body, which is more reliable than splitting the whole HTML
|
|
62
|
-
article_body = soup.select_one('.article-body')
|
|
63
|
-
if not article_body:
|
|
64
|
-
return []
|
|
65
|
-
|
|
66
|
-
# Use .get_text with a separator to preserve line breaks, which is key for the logic below
|
|
67
|
-
lines = article_body.get_text(separator='\n').split('\n')
|
|
68
|
-
|
|
69
|
-
terms = []
|
|
70
|
-
current_term = None
|
|
71
|
-
current_definition = []
|
|
72
|
-
|
|
73
|
-
for line in lines:
|
|
74
|
-
line = line.strip()
|
|
75
|
-
if not line:
|
|
76
|
-
continue
|
|
77
|
-
|
|
78
|
-
if _looks_like_term(line):
|
|
79
|
-
if current_term:
|
|
80
|
-
# Save the previous term
|
|
81
|
-
terms.append({
|
|
82
|
-
"term": current_term,
|
|
83
|
-
"definition": " ".join(current_definition).strip()
|
|
84
|
-
})
|
|
85
|
-
# Start a new term
|
|
86
|
-
current_term = line
|
|
87
|
-
current_definition = []
|
|
88
|
-
elif current_term:
|
|
89
|
-
# Add to the current definition
|
|
90
|
-
current_definition.append(line)
|
|
91
|
-
|
|
92
|
-
# Add the last term
|
|
93
|
-
if current_term:
|
|
94
|
-
terms.append({
|
|
95
|
-
"term": current_term,
|
|
96
|
-
"definition": " ".join(current_definition).strip()
|
|
97
|
-
})
|
|
98
|
-
|
|
99
|
-
# Filter out invalid terms and improve quality
|
|
100
|
-
return [term for term in terms if
|
|
101
|
-
len(term["term"]) > 0 and
|
|
102
|
-
len(term["definition"]) > 10 and
|
|
103
|
-
not _is_navigation_or_metadata(term["term"]) and
|
|
104
|
-
"ago" not in term["definition"] and
|
|
105
|
-
"minute read" not in term["definition"]]
|
|
106
|
-
|
|
107
|
-
class ForumClient:
|
|
108
|
-
"""Forum client for WorldQuant BRAIN support site, using Playwright."""
|
|
109
|
-
|
|
110
|
-
def __init__(self):
|
|
111
|
-
self.base_url = "https://support.worldquantbrain.com"
|
|
112
|
-
# The session is mainly used for the initial authentication via brain_client
|
|
113
|
-
self.session = requests.Session()
|
|
114
|
-
self.session.headers.update({
|
|
115
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'
|
|
116
|
-
})
|
|
117
|
-
|
|
118
|
-
async def _get_browser_context(self, p: async_playwright, email: str, password: str):
|
|
119
|
-
"""Authenticate and return a browser context with the session."""
|
|
120
|
-
# Import brain_client here to avoid circular dependency
|
|
121
|
-
from platform_functions import brain_client
|
|
122
|
-
|
|
123
|
-
log("Authenticating with BRAIN platform...", "INFO")
|
|
124
|
-
auth_result = await brain_client.authenticate(email, password)
|
|
125
|
-
if auth_result.get('status') != 'authenticated':
|
|
126
|
-
raise Exception("BRAIN platform authentication failed.")
|
|
127
|
-
log("Successfully authenticated with BRAIN platform.", "SUCCESS")
|
|
128
|
-
|
|
129
|
-
browser = await p.chromium.launch(channel="chrome", headless=True, args=['--no-sandbox'])
|
|
130
|
-
context = await browser.new_context(user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36')
|
|
131
|
-
|
|
132
|
-
log("Transferring authentication session to browser...", "INFO")
|
|
133
|
-
cookies = brain_client.session.cookies
|
|
134
|
-
playwright_cookies = []
|
|
135
|
-
for cookie in cookies:
|
|
136
|
-
cookie_dict = {
|
|
137
|
-
'name': cookie.name,
|
|
138
|
-
'value': cookie.value,
|
|
139
|
-
'domain': cookie.domain,
|
|
140
|
-
'path': cookie.path,
|
|
141
|
-
'secure': cookie.secure,
|
|
142
|
-
'httpOnly': 'HttpOnly' in cookie._rest,
|
|
143
|
-
'sameSite': 'Lax'
|
|
144
|
-
}
|
|
145
|
-
if cookie.expires:
|
|
146
|
-
cookie_dict['expires'] = cookie.expires
|
|
147
|
-
playwright_cookies.append(cookie_dict)
|
|
148
|
-
|
|
149
|
-
await context.add_cookies(playwright_cookies)
|
|
150
|
-
log("Session transferred.", "SUCCESS")
|
|
151
|
-
|
|
152
|
-
return browser, context
|
|
153
|
-
|
|
154
|
-
async def get_glossary_terms(self, email: str, password: str) -> List[Dict[str, str]]:
|
|
155
|
-
"""Extract glossary terms from the forum using Playwright."""
|
|
156
|
-
async with async_playwright() as p:
|
|
157
|
-
browser = None
|
|
158
|
-
try:
|
|
159
|
-
log("Starting glossary extraction process with Playwright", "INFO")
|
|
160
|
-
browser, context = await self._get_browser_context(p, email, password)
|
|
161
|
-
|
|
162
|
-
page = await context.new_page()
|
|
163
|
-
log("Navigating to BRAIN support forum glossary...", "INFO")
|
|
164
|
-
await page.goto("https://support.worldquantbrain.com/hc/en-us/articles/4902349883927-Click-here-for-a-list-of-terms-and-their-definitions")
|
|
165
|
-
|
|
166
|
-
log("Extracting glossary content...", "INFO")
|
|
167
|
-
content = await page.content()
|
|
168
|
-
|
|
169
|
-
terms = _parse_glossary_terms(content)
|
|
170
|
-
|
|
171
|
-
log(f"Extracted {len(terms)} glossary terms", "SUCCESS")
|
|
172
|
-
return terms
|
|
173
|
-
|
|
174
|
-
except Exception as e:
|
|
175
|
-
log(f"Glossary extraction failed: {str(e)}", "ERROR")
|
|
176
|
-
# Re-raise to be handled by the MCP server wrapper
|
|
177
|
-
raise
|
|
178
|
-
finally:
|
|
179
|
-
if browser:
|
|
180
|
-
await browser.close()
|
|
181
|
-
log("Browser closed.", "INFO")
|
|
182
|
-
|
|
183
|
-
async def search_forum_posts(self, email: str, password: str, search_query: str, max_results: int = 50, locale: str = "zh-cn") -> Dict[str, Any]:
|
|
184
|
-
"""Search for posts on the forum using Playwright, with pagination."""
|
|
185
|
-
async with async_playwright() as p:
|
|
186
|
-
browser = None
|
|
187
|
-
try:
|
|
188
|
-
log(f"Starting forum search for '{search_query}'", "INFO")
|
|
189
|
-
browser, context = await self._get_browser_context(p, email, password)
|
|
190
|
-
|
|
191
|
-
page = await context.new_page()
|
|
192
|
-
|
|
193
|
-
search_results = []
|
|
194
|
-
page_num = 1
|
|
195
|
-
|
|
196
|
-
while len(search_results) < max_results:
|
|
197
|
-
search_url = f"{self.base_url}/hc/{locale}/search?page={page_num}&query={search_query}#results"
|
|
198
|
-
log(f"Navigating to search page: {search_url}", "INFO")
|
|
199
|
-
|
|
200
|
-
try:
|
|
201
|
-
response = await page.goto(search_url)
|
|
202
|
-
if response.status == 404:
|
|
203
|
-
log(f"Page {page_num} not found. End of results.", "INFO")
|
|
204
|
-
break
|
|
205
|
-
await page.wait_for_selector('ul.search-results-list', timeout=15000)
|
|
206
|
-
except Exception as e:
|
|
207
|
-
log(f"Could not load search results on page {page_num}: {e}", "INFO")
|
|
208
|
-
break
|
|
209
|
-
|
|
210
|
-
content = await page.content()
|
|
211
|
-
soup = BeautifulSoup(content, 'html.parser')
|
|
212
|
-
|
|
213
|
-
results_on_page = soup.select('li.search-result-list-item')
|
|
214
|
-
if not results_on_page:
|
|
215
|
-
log("No more search results found.", "INFO")
|
|
216
|
-
break
|
|
217
|
-
|
|
218
|
-
for result in results_on_page:
|
|
219
|
-
title_element = result.select_one('h2.search-result-title a')
|
|
220
|
-
snippet_element = result.select_one('.search-results-description')
|
|
221
|
-
|
|
222
|
-
if title_element:
|
|
223
|
-
title = title_element.get_text(strip=True)
|
|
224
|
-
link = title_element.get('href')
|
|
225
|
-
|
|
226
|
-
votes_element = result.select_one('.search-result-votes span[aria-hidden="true"]')
|
|
227
|
-
votes_text = votes_element.get_text(strip=True) if votes_element else '0'
|
|
228
|
-
votes_match = re.search(r'\d+', votes_text)
|
|
229
|
-
votes = int(votes_match.group()) if votes_match else 0
|
|
230
|
-
|
|
231
|
-
comments_element = result.select_one('.search-result-meta-count span[aria-hidden="true"]')
|
|
232
|
-
comments_text = comments_element.get_text(strip=True) if comments_element else '0'
|
|
233
|
-
comments_match = re.search(r'\d+', comments_text)
|
|
234
|
-
comments = int(comments_match.group()) if comments_match else 0
|
|
235
|
-
|
|
236
|
-
breadcrumbs_elements = result.select('ol.search-result-breadcrumbs li')
|
|
237
|
-
breadcrumbs = [bc.get_text(strip=True) for bc in breadcrumbs_elements]
|
|
238
|
-
|
|
239
|
-
meta_group = result.select_one('ul.meta-group')
|
|
240
|
-
author = 'Unknown'
|
|
241
|
-
post_date = 'Unknown'
|
|
242
|
-
if meta_group:
|
|
243
|
-
meta_data_elements = meta_group.select('li.meta-data')
|
|
244
|
-
if len(meta_data_elements) > 0:
|
|
245
|
-
author = meta_data_elements[0].get_text(strip=True)
|
|
246
|
-
if len(meta_data_elements) > 1:
|
|
247
|
-
time_element = meta_data_elements[1].select_one('time')
|
|
248
|
-
if time_element:
|
|
249
|
-
post_date = time_element.get('datetime', time_element.get_text(strip=True))
|
|
250
|
-
|
|
251
|
-
snippet = snippet_element.get_text(strip=True) if snippet_element else ''
|
|
252
|
-
|
|
253
|
-
full_link = ''
|
|
254
|
-
if link:
|
|
255
|
-
if link.startswith('http'):
|
|
256
|
-
full_link = link
|
|
257
|
-
else:
|
|
258
|
-
full_link = f"{self.base_url}{link}"
|
|
259
|
-
|
|
260
|
-
search_results.append({
|
|
261
|
-
'title': title,
|
|
262
|
-
'link': full_link,
|
|
263
|
-
'snippet': snippet,
|
|
264
|
-
'votes': votes,
|
|
265
|
-
'comments': comments,
|
|
266
|
-
'author': author,
|
|
267
|
-
'date': post_date,
|
|
268
|
-
'breadcrumbs': breadcrumbs
|
|
269
|
-
})
|
|
270
|
-
|
|
271
|
-
if len(search_results) >= max_results:
|
|
272
|
-
break
|
|
273
|
-
|
|
274
|
-
if len(search_results) >= max_results:
|
|
275
|
-
break
|
|
276
|
-
|
|
277
|
-
page_num += 1
|
|
278
|
-
|
|
279
|
-
log(f"Found {len(search_results)} results for '{search_query}'", "SUCCESS")
|
|
280
|
-
|
|
281
|
-
return {
|
|
282
|
-
"success": True,
|
|
283
|
-
"results": search_results,
|
|
284
|
-
"total_found": len(search_results)
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
except Exception as e:
|
|
288
|
-
log(f"Forum search failed: {str(e)}", "ERROR")
|
|
289
|
-
raise
|
|
290
|
-
finally:
|
|
291
|
-
if browser:
|
|
292
|
-
await browser.close()
|
|
293
|
-
|
|
294
|
-
async def read_full_forum_post(self, email: str, password: str, post_url_or_id: str, include_comments: bool = True) -> Dict[str, Any]:
|
|
295
|
-
"""Read a complete forum post and all its comments using Playwright."""
|
|
296
|
-
async with async_playwright() as p:
|
|
297
|
-
browser = None
|
|
298
|
-
try:
|
|
299
|
-
log("Starting forum post reading process with Playwright", "INFO")
|
|
300
|
-
|
|
301
|
-
if post_url_or_id.startswith('http'):
|
|
302
|
-
initial_url = post_url_or_id
|
|
303
|
-
else:
|
|
304
|
-
initial_url = f"https://support.worldquantbrain.com/hc/zh-cn/community/posts/{post_url_or_id}"
|
|
305
|
-
|
|
306
|
-
browser, context = await self._get_browser_context(p, email, password)
|
|
307
|
-
page = await context.new_page()
|
|
308
|
-
|
|
309
|
-
# --- Get Main Post Content and Final URL ---
|
|
310
|
-
log(f"Navigating to initial URL: {initial_url}", "INFO")
|
|
311
|
-
await page.goto(initial_url)
|
|
312
|
-
await page.wait_for_selector('.post-body, .article-body', timeout=15000)
|
|
313
|
-
|
|
314
|
-
# Get the final URL after any redirects
|
|
315
|
-
base_url = re.sub(r'(\?|&)page=\d+', '', page.url).split('#')[0]
|
|
316
|
-
log(f"Resolved to Base URL: {base_url}", "INFO")
|
|
317
|
-
await page.wait_for_selector('.post-body, .article-body', timeout=15000)
|
|
318
|
-
content = await page.content()
|
|
319
|
-
soup = BeautifulSoup(content, 'html.parser')
|
|
320
|
-
|
|
321
|
-
post_data = {}
|
|
322
|
-
title_element = soup.select_one('.post-title, h1.article-title, .article__title')
|
|
323
|
-
post_data['title'] = title_element.get_text(strip=True) if title_element else 'Unknown Title'
|
|
324
|
-
|
|
325
|
-
author_span = soup.select_one('.post-author span[title]')
|
|
326
|
-
post_data['author'] = author_span['title'] if author_span else 'Unknown Author'
|
|
327
|
-
|
|
328
|
-
body_element = soup.select_one('.post-body, .article-body')
|
|
329
|
-
post_data['body'] = body_element.get_text(strip=True) if body_element else 'Body not found'
|
|
330
|
-
|
|
331
|
-
votes_element = soup.select_one('.vote-sum')
|
|
332
|
-
date_element = soup.select_one('.post-meta .meta-data')
|
|
333
|
-
post_data['details'] = {
|
|
334
|
-
'votes': votes_element.get_text(strip=True) if votes_element else '0',
|
|
335
|
-
'date': date_element.get_text(strip=True) if date_element else 'Unknown Date'
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
# --- Get Comments with Pagination ---
|
|
339
|
-
comments = []
|
|
340
|
-
if include_comments:
|
|
341
|
-
log("Starting comment extraction...", "INFO")
|
|
342
|
-
page_num = 1
|
|
343
|
-
while True:
|
|
344
|
-
comment_url = f"{base_url}?page={page_num}#comments"
|
|
345
|
-
log(f"Navigating to comment page: {comment_url}", "INFO")
|
|
346
|
-
|
|
347
|
-
try:
|
|
348
|
-
response = await page.goto(comment_url)
|
|
349
|
-
if response.status == 404:
|
|
350
|
-
log(f"Page {page_num} returned 404. End of comments.", "INFO")
|
|
351
|
-
break
|
|
352
|
-
await page.wait_for_selector('.comment-list', timeout=10000)
|
|
353
|
-
except Exception as e:
|
|
354
|
-
log(f"Could not load page {page_num}: {e}. Assuming end of comments.", "INFO")
|
|
355
|
-
break
|
|
356
|
-
|
|
357
|
-
comment_soup = BeautifulSoup(await page.content(), 'html.parser')
|
|
358
|
-
comment_elements = comment_soup.select('.comment')
|
|
359
|
-
|
|
360
|
-
if not comment_elements:
|
|
361
|
-
log(f"No comments found on page {page_num}. Ending extraction.", "INFO")
|
|
362
|
-
break
|
|
363
|
-
|
|
364
|
-
log(f"Found {len(comment_elements)} comments on page {page_num}.", "INFO")
|
|
365
|
-
|
|
366
|
-
new_comments_found_on_page = 0
|
|
367
|
-
for comment_element in comment_elements:
|
|
368
|
-
author_span = comment_element.select_one('.comment-author span[title]')
|
|
369
|
-
author_id = author_span['title'] if author_span else 'Unknown'
|
|
370
|
-
|
|
371
|
-
body_element = comment_element.select_one('.comment-body')
|
|
372
|
-
date_element = comment_element.select_one('.comment-meta .meta-data')
|
|
373
|
-
|
|
374
|
-
comment_data = {
|
|
375
|
-
'author': author_id,
|
|
376
|
-
'body': body_element.get_text(strip=True) if body_element else '',
|
|
377
|
-
'date': date_element.get_text(strip=True) if date_element else 'Unknown Date'
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
if comment_data not in comments:
|
|
381
|
-
comments.append(comment_data)
|
|
382
|
-
new_comments_found_on_page += 1
|
|
383
|
-
|
|
384
|
-
if new_comments_found_on_page == 0 and page_num > 1:
|
|
385
|
-
log(f"No new comments detected on page {page_num}. Ending extraction.", "INFO")
|
|
386
|
-
break
|
|
387
|
-
|
|
388
|
-
page_num += 1
|
|
389
|
-
|
|
390
|
-
log(f"Extracted {len(comments)} comments in total.", "SUCCESS")
|
|
391
|
-
return {
|
|
392
|
-
"success": True, "post": post_data, "comments": comments, "total_comments": len(comments)
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
except Exception as e:
|
|
396
|
-
log(f"Failed to read forum post: {str(e)}", "ERROR")
|
|
397
|
-
raise
|
|
398
|
-
finally:
|
|
399
|
-
if browser:
|
|
400
|
-
await browser.close()
|
|
401
|
-
|
|
402
|
-
# Initialize forum client
|
|
403
|
-
forum_client = ForumClient()
|
|
404
|
-
|
|
405
|
-
# The main block is for testing and won't be run by the MCP server.
|
|
406
|
-
if __name__ == "__main__":
|
|
407
|
-
print("📚 WorldQuant BRAIN Forum Functions - This script provides the ForumClient class.", file=sys.stderr)
|