cnhkmcp 2.1.1__py3-none-any.whl → 2.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {cnhkmcp-2.1.1.dist-info → cnhkmcp-2.1.3.dist-info}/METADATA +1 -1
  2. cnhkmcp-2.1.3.dist-info/RECORD +6 -0
  3. cnhkmcp-2.1.3.dist-info/top_level.txt +1 -0
  4. cnhkmcp/__init__.py +0 -125
  5. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/README.md +0 -38
  6. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/ace.log +0 -0
  7. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/config.json +0 -6
  8. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/ace_lib.py +0 -1510
  9. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_datasets.py +0 -157
  10. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_documentation.py +0 -132
  11. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/fetch_all_operators.py +0 -99
  12. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/get_knowledgeBase_tool/helpful_functions.py +0 -180
  13. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.ico +0 -0
  14. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/icon.png +0 -0
  15. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/knowledge/test.txt +0 -1
  16. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/main.py +0 -576
  17. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/process_knowledge_base.py +0 -280
  18. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/rag_engine.py +0 -356
  19. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/requirements.txt +0 -7
  20. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/run.bat +0 -3
  21. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/_manifest.json +0 -326
  22. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/_meta.json +0 -1
  23. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/be5d957c-b724-46e3-91d1-999e9f5f7d28/index_metadata.pickle +0 -0
  24. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/chroma.sqlite3 +0 -0
  25. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242//321/211/320/266/320/246/321/206/320/274/320/261/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +0 -265
  26. cnhkmcp/untracked/APP/.gitignore +0 -32
  27. cnhkmcp/untracked/APP/MODULAR_STRUCTURE.md +0 -112
  28. cnhkmcp/untracked/APP/README.md +0 -309
  29. cnhkmcp/untracked/APP/Tranformer/Transformer.py +0 -4985
  30. cnhkmcp/untracked/APP/Tranformer/ace.log +0 -0
  31. cnhkmcp/untracked/APP/Tranformer/ace_lib.py +0 -1510
  32. cnhkmcp/untracked/APP/Tranformer/helpful_functions.py +0 -180
  33. cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates.json +0 -2421
  34. cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates_/321/207/320/264/342/225/221/321/204/342/225/233/320/233.json +0 -654
  35. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_error.json +0 -1034
  36. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_success.json +0 -444
  37. cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_/321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/277/321/207/320/253/342/224/244/321/206/320/236/320/265/321/210/342/225/234/342/225/234/321/205/320/225/320/265Machine_lib.json +0 -22
  38. cnhkmcp/untracked/APP/Tranformer/parsetab.py +0 -60
  39. cnhkmcp/untracked/APP/Tranformer/template_summary.txt +0 -3182
  40. cnhkmcp/untracked/APP/Tranformer/transformer_config.json +0 -7
  41. cnhkmcp/untracked/APP/Tranformer/validator.py +0 -889
  42. cnhkmcp/untracked/APP/ace.log +0 -69
  43. cnhkmcp/untracked/APP/ace_lib.py +0 -1510
  44. cnhkmcp/untracked/APP/blueprints/__init__.py +0 -6
  45. cnhkmcp/untracked/APP/blueprints/feature_engineering.py +0 -347
  46. cnhkmcp/untracked/APP/blueprints/idea_house.py +0 -221
  47. cnhkmcp/untracked/APP/blueprints/inspiration_house.py +0 -432
  48. cnhkmcp/untracked/APP/blueprints/paper_analysis.py +0 -570
  49. cnhkmcp/untracked/APP/custom_templates/templates.json +0 -1257
  50. cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +0 -400
  51. cnhkmcp/untracked/APP/give_me_idea/ace_lib.py +0 -1510
  52. cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +0 -252
  53. cnhkmcp/untracked/APP/give_me_idea/fetch_all_datasets.py +0 -157
  54. cnhkmcp/untracked/APP/give_me_idea/fetch_all_operators.py +0 -99
  55. cnhkmcp/untracked/APP/give_me_idea/helpful_functions.py +0 -180
  56. cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +0 -11
  57. cnhkmcp/untracked/APP/helpful_functions.py +0 -180
  58. cnhkmcp/untracked/APP/hkSimulator/ace_lib.py +0 -1497
  59. cnhkmcp/untracked/APP/hkSimulator/autosimulator.py +0 -447
  60. cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +0 -180
  61. cnhkmcp/untracked/APP/mirror_config.txt +0 -20
  62. cnhkmcp/untracked/APP/operaters.csv +0 -129
  63. cnhkmcp/untracked/APP/requirements.txt +0 -53
  64. cnhkmcp/untracked/APP/run_app.bat +0 -28
  65. cnhkmcp/untracked/APP/run_app.sh +0 -34
  66. cnhkmcp/untracked/APP/setup_tsinghua.bat +0 -39
  67. cnhkmcp/untracked/APP/setup_tsinghua.sh +0 -43
  68. cnhkmcp/untracked/APP/simulator/alpha_submitter.py +0 -404
  69. cnhkmcp/untracked/APP/simulator/simulator_wqb.py +0 -618
  70. cnhkmcp/untracked/APP/ssrn-3332513.pdf +6 -109201
  71. cnhkmcp/untracked/APP/static/brain.js +0 -589
  72. cnhkmcp/untracked/APP/static/decoder.js +0 -1540
  73. cnhkmcp/untracked/APP/static/feature_engineering.js +0 -1729
  74. cnhkmcp/untracked/APP/static/idea_house.js +0 -937
  75. cnhkmcp/untracked/APP/static/inspiration.js +0 -465
  76. cnhkmcp/untracked/APP/static/inspiration_house.js +0 -868
  77. cnhkmcp/untracked/APP/static/paper_analysis.js +0 -390
  78. cnhkmcp/untracked/APP/static/script.js +0 -3082
  79. cnhkmcp/untracked/APP/static/simulator.js +0 -597
  80. cnhkmcp/untracked/APP/static/styles.css +0 -3127
  81. cnhkmcp/untracked/APP/static/usage_widget.js +0 -508
  82. cnhkmcp/untracked/APP/templates/alpha_inspector.html +0 -511
  83. cnhkmcp/untracked/APP/templates/feature_engineering.html +0 -960
  84. cnhkmcp/untracked/APP/templates/idea_house.html +0 -564
  85. cnhkmcp/untracked/APP/templates/index.html +0 -932
  86. cnhkmcp/untracked/APP/templates/inspiration_house.html +0 -861
  87. cnhkmcp/untracked/APP/templates/paper_analysis.html +0 -91
  88. cnhkmcp/untracked/APP/templates/simulator.html +0 -343
  89. cnhkmcp/untracked/APP/templates/transformer_web.html +0 -580
  90. cnhkmcp/untracked/APP/usage.md +0 -351
  91. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/ace_lib.py +0 -1510
  92. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/brain_alpha_inspector.py +0 -712
  93. cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/helpful_functions.py +0 -180
  94. cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +0 -2456
  95. cnhkmcp/untracked/arXiv_API_Tool_Manual.md +0 -490
  96. cnhkmcp/untracked/arxiv_api.py +0 -229
  97. cnhkmcp/untracked/forum_functions.py +0 -998
  98. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/forum_functions.py +0 -407
  99. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/platform_functions.py +0 -2415
  100. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/user_config.json +0 -31
  101. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/210/320/276/320/271AI/321/210/320/277/342/225/227/321/210/342/224/220/320/251/321/204/342/225/225/320/272/321/206/320/246/320/227/321/206/320/261/320/263/321/206/320/255/320/265/321/205/320/275/320/266/321/204/342/225/235/320/252/321/204/342/225/225/320/233/321/210/342/225/234/342/225/234/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270.md +0 -101
  102. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +0 -190
  103. cnhkmcp/untracked/platform_functions.py +0 -2886
  104. cnhkmcp/untracked/sample_mcp_config.json +0 -11
  105. cnhkmcp/untracked/user_config.json +0 -31
  106. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/222/321/210/320/220/320/223/321/206/320/246/320/227/321/206/320/261/320/263_BRAIN_Alpha_Test_Requirements_and_Tips.md +0 -202
  107. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Alpha_explaination_workflow.md +0 -56
  108. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_6_Tips_Datafield_Exploration_Guide.md +0 -194
  109. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_Alpha_Improvement_Workflow.md +0 -101
  110. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Dataset_Exploration_Expert_Manual.md +0 -436
  111. cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_daily_report_workflow.md +0 -128
  112. cnhkmcp/untracked//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +0 -190
  113. cnhkmcp-2.1.1.dist-info/RECORD +0 -115
  114. cnhkmcp-2.1.1.dist-info/top_level.txt +0 -1
  115. {cnhkmcp-2.1.1.dist-info → cnhkmcp-2.1.3.dist-info}/WHEEL +0 -0
  116. {cnhkmcp-2.1.1.dist-info → cnhkmcp-2.1.3.dist-info}/entry_points.txt +0 -0
  117. {cnhkmcp-2.1.1.dist-info → cnhkmcp-2.1.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,407 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- WorldQuant BRAIN Forum Functions - Python Version
4
- Comprehensive forum functionality including glossary, search, and post viewing using Playwright.
5
- """
6
-
7
- import asyncio
8
- import re
9
- import sys
10
- import time
11
- from datetime import datetime
12
- from typing import Dict, Any, List, Optional
13
-
14
- from bs4 import BeautifulSoup
15
- from playwright.async_api import async_playwright
16
- import requests
17
- import os
18
-
19
- def log(message: str, level: str = "INFO"):
20
- """Log message with timestamp."""
21
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
22
- print(f"[{timestamp}] [{level}] {message}", file=sys.stderr)
23
-
24
- # --- Parsing Helper Functions (from playwright_forum_test.py) ---
25
-
26
- def _is_navigation_or_metadata(line: str) -> bool:
27
- """Check if a line is navigation or metadata."""
28
- navigation_patterns = [
29
- r'^\d+ days? ago$',
30
- r'~\d+ minute read',
31
- r'^Follow',
32
- r'^Not yet followed',
33
- r'^Updated$',
34
- r'^AS\d+$',
35
- r'^[A-Z] - [A-Z] - [A-Z]', # Letter navigation
36
- r'^A$',
37
- r'^B$',
38
- r'^[A-Z]$' # Single letters
39
- ]
40
- return any(re.match(pattern, line.strip()) for pattern in navigation_patterns)
41
-
42
- def _looks_like_term(line: str) -> bool:
43
- """Check if a line looks like a glossary term."""
44
- if len(line) > 100:
45
- return False
46
- if _is_navigation_or_metadata(line):
47
- return False
48
- definition_starters = ['the', 'a', 'an', 'this', 'that', 'it', 'is', 'are', 'was', 'were', 'for', 'to', 'in', 'on', 'at', 'by', 'with']
49
- first_word = line.lower().split(' ')[0] if line else ''
50
- if first_word and first_word in definition_starters:
51
- return False
52
- is_short = len(line) <= 80
53
- starts_with_capital = bool(re.match(r'^[A-Z]', line))
54
- has_all_caps = bool(re.match(r'^[A-Z\s\-\/\(\)]+$', line))
55
- has_reasonable_length = len(line) >= 2
56
- return is_short and has_reasonable_length and (starts_with_capital or has_all_caps)
57
-
58
- def _parse_glossary_terms(content: str) -> List[Dict[str, str]]:
59
- """Parse glossary terms from HTML content."""
60
- soup = BeautifulSoup(content, 'html.parser')
61
- # Get text from the article body, which is more reliable than splitting the whole HTML
62
- article_body = soup.select_one('.article-body')
63
- if not article_body:
64
- return []
65
-
66
- # Use .get_text with a separator to preserve line breaks, which is key for the logic below
67
- lines = article_body.get_text(separator='\n').split('\n')
68
-
69
- terms = []
70
- current_term = None
71
- current_definition = []
72
-
73
- for line in lines:
74
- line = line.strip()
75
- if not line:
76
- continue
77
-
78
- if _looks_like_term(line):
79
- if current_term:
80
- # Save the previous term
81
- terms.append({
82
- "term": current_term,
83
- "definition": " ".join(current_definition).strip()
84
- })
85
- # Start a new term
86
- current_term = line
87
- current_definition = []
88
- elif current_term:
89
- # Add to the current definition
90
- current_definition.append(line)
91
-
92
- # Add the last term
93
- if current_term:
94
- terms.append({
95
- "term": current_term,
96
- "definition": " ".join(current_definition).strip()
97
- })
98
-
99
- # Filter out invalid terms and improve quality
100
- return [term for term in terms if
101
- len(term["term"]) > 0 and
102
- len(term["definition"]) > 10 and
103
- not _is_navigation_or_metadata(term["term"]) and
104
- "ago" not in term["definition"] and
105
- "minute read" not in term["definition"]]
106
-
107
- class ForumClient:
108
- """Forum client for WorldQuant BRAIN support site, using Playwright."""
109
-
110
- def __init__(self):
111
- self.base_url = "https://support.worldquantbrain.com"
112
- # The session is mainly used for the initial authentication via brain_client
113
- self.session = requests.Session()
114
- self.session.headers.update({
115
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36'
116
- })
117
-
118
- async def _get_browser_context(self, p: async_playwright, email: str, password: str):
119
- """Authenticate and return a browser context with the session."""
120
- # Import brain_client here to avoid circular dependency
121
- from platform_functions import brain_client
122
-
123
- log("Authenticating with BRAIN platform...", "INFO")
124
- auth_result = await brain_client.authenticate(email, password)
125
- if auth_result.get('status') != 'authenticated':
126
- raise Exception("BRAIN platform authentication failed.")
127
- log("Successfully authenticated with BRAIN platform.", "SUCCESS")
128
-
129
- browser = await p.chromium.launch(channel="chrome", headless=True, args=['--no-sandbox'])
130
- context = await browser.new_context(user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36')
131
-
132
- log("Transferring authentication session to browser...", "INFO")
133
- cookies = brain_client.session.cookies
134
- playwright_cookies = []
135
- for cookie in cookies:
136
- cookie_dict = {
137
- 'name': cookie.name,
138
- 'value': cookie.value,
139
- 'domain': cookie.domain,
140
- 'path': cookie.path,
141
- 'secure': cookie.secure,
142
- 'httpOnly': 'HttpOnly' in cookie._rest,
143
- 'sameSite': 'Lax'
144
- }
145
- if cookie.expires:
146
- cookie_dict['expires'] = cookie.expires
147
- playwright_cookies.append(cookie_dict)
148
-
149
- await context.add_cookies(playwright_cookies)
150
- log("Session transferred.", "SUCCESS")
151
-
152
- return browser, context
153
-
154
- async def get_glossary_terms(self, email: str, password: str) -> List[Dict[str, str]]:
155
- """Extract glossary terms from the forum using Playwright."""
156
- async with async_playwright() as p:
157
- browser = None
158
- try:
159
- log("Starting glossary extraction process with Playwright", "INFO")
160
- browser, context = await self._get_browser_context(p, email, password)
161
-
162
- page = await context.new_page()
163
- log("Navigating to BRAIN support forum glossary...", "INFO")
164
- await page.goto("https://support.worldquantbrain.com/hc/en-us/articles/4902349883927-Click-here-for-a-list-of-terms-and-their-definitions")
165
-
166
- log("Extracting glossary content...", "INFO")
167
- content = await page.content()
168
-
169
- terms = _parse_glossary_terms(content)
170
-
171
- log(f"Extracted {len(terms)} glossary terms", "SUCCESS")
172
- return terms
173
-
174
- except Exception as e:
175
- log(f"Glossary extraction failed: {str(e)}", "ERROR")
176
- # Re-raise to be handled by the MCP server wrapper
177
- raise
178
- finally:
179
- if browser:
180
- await browser.close()
181
- log("Browser closed.", "INFO")
182
-
183
- async def search_forum_posts(self, email: str, password: str, search_query: str, max_results: int = 50, locale: str = "zh-cn") -> Dict[str, Any]:
184
- """Search for posts on the forum using Playwright, with pagination."""
185
- async with async_playwright() as p:
186
- browser = None
187
- try:
188
- log(f"Starting forum search for '{search_query}'", "INFO")
189
- browser, context = await self._get_browser_context(p, email, password)
190
-
191
- page = await context.new_page()
192
-
193
- search_results = []
194
- page_num = 1
195
-
196
- while len(search_results) < max_results:
197
- search_url = f"{self.base_url}/hc/{locale}/search?page={page_num}&query={search_query}#results"
198
- log(f"Navigating to search page: {search_url}", "INFO")
199
-
200
- try:
201
- response = await page.goto(search_url)
202
- if response.status == 404:
203
- log(f"Page {page_num} not found. End of results.", "INFO")
204
- break
205
- await page.wait_for_selector('ul.search-results-list', timeout=15000)
206
- except Exception as e:
207
- log(f"Could not load search results on page {page_num}: {e}", "INFO")
208
- break
209
-
210
- content = await page.content()
211
- soup = BeautifulSoup(content, 'html.parser')
212
-
213
- results_on_page = soup.select('li.search-result-list-item')
214
- if not results_on_page:
215
- log("No more search results found.", "INFO")
216
- break
217
-
218
- for result in results_on_page:
219
- title_element = result.select_one('h2.search-result-title a')
220
- snippet_element = result.select_one('.search-results-description')
221
-
222
- if title_element:
223
- title = title_element.get_text(strip=True)
224
- link = title_element.get('href')
225
-
226
- votes_element = result.select_one('.search-result-votes span[aria-hidden="true"]')
227
- votes_text = votes_element.get_text(strip=True) if votes_element else '0'
228
- votes_match = re.search(r'\d+', votes_text)
229
- votes = int(votes_match.group()) if votes_match else 0
230
-
231
- comments_element = result.select_one('.search-result-meta-count span[aria-hidden="true"]')
232
- comments_text = comments_element.get_text(strip=True) if comments_element else '0'
233
- comments_match = re.search(r'\d+', comments_text)
234
- comments = int(comments_match.group()) if comments_match else 0
235
-
236
- breadcrumbs_elements = result.select('ol.search-result-breadcrumbs li')
237
- breadcrumbs = [bc.get_text(strip=True) for bc in breadcrumbs_elements]
238
-
239
- meta_group = result.select_one('ul.meta-group')
240
- author = 'Unknown'
241
- post_date = 'Unknown'
242
- if meta_group:
243
- meta_data_elements = meta_group.select('li.meta-data')
244
- if len(meta_data_elements) > 0:
245
- author = meta_data_elements[0].get_text(strip=True)
246
- if len(meta_data_elements) > 1:
247
- time_element = meta_data_elements[1].select_one('time')
248
- if time_element:
249
- post_date = time_element.get('datetime', time_element.get_text(strip=True))
250
-
251
- snippet = snippet_element.get_text(strip=True) if snippet_element else ''
252
-
253
- full_link = ''
254
- if link:
255
- if link.startswith('http'):
256
- full_link = link
257
- else:
258
- full_link = f"{self.base_url}{link}"
259
-
260
- search_results.append({
261
- 'title': title,
262
- 'link': full_link,
263
- 'snippet': snippet,
264
- 'votes': votes,
265
- 'comments': comments,
266
- 'author': author,
267
- 'date': post_date,
268
- 'breadcrumbs': breadcrumbs
269
- })
270
-
271
- if len(search_results) >= max_results:
272
- break
273
-
274
- if len(search_results) >= max_results:
275
- break
276
-
277
- page_num += 1
278
-
279
- log(f"Found {len(search_results)} results for '{search_query}'", "SUCCESS")
280
-
281
- return {
282
- "success": True,
283
- "results": search_results,
284
- "total_found": len(search_results)
285
- }
286
-
287
- except Exception as e:
288
- log(f"Forum search failed: {str(e)}", "ERROR")
289
- raise
290
- finally:
291
- if browser:
292
- await browser.close()
293
-
294
- async def read_full_forum_post(self, email: str, password: str, post_url_or_id: str, include_comments: bool = True) -> Dict[str, Any]:
295
- """Read a complete forum post and all its comments using Playwright."""
296
- async with async_playwright() as p:
297
- browser = None
298
- try:
299
- log("Starting forum post reading process with Playwright", "INFO")
300
-
301
- if post_url_or_id.startswith('http'):
302
- initial_url = post_url_or_id
303
- else:
304
- initial_url = f"https://support.worldquantbrain.com/hc/zh-cn/community/posts/{post_url_or_id}"
305
-
306
- browser, context = await self._get_browser_context(p, email, password)
307
- page = await context.new_page()
308
-
309
- # --- Get Main Post Content and Final URL ---
310
- log(f"Navigating to initial URL: {initial_url}", "INFO")
311
- await page.goto(initial_url)
312
- await page.wait_for_selector('.post-body, .article-body', timeout=15000)
313
-
314
- # Get the final URL after any redirects
315
- base_url = re.sub(r'(\?|&)page=\d+', '', page.url).split('#')[0]
316
- log(f"Resolved to Base URL: {base_url}", "INFO")
317
- await page.wait_for_selector('.post-body, .article-body', timeout=15000)
318
- content = await page.content()
319
- soup = BeautifulSoup(content, 'html.parser')
320
-
321
- post_data = {}
322
- title_element = soup.select_one('.post-title, h1.article-title, .article__title')
323
- post_data['title'] = title_element.get_text(strip=True) if title_element else 'Unknown Title'
324
-
325
- author_span = soup.select_one('.post-author span[title]')
326
- post_data['author'] = author_span['title'] if author_span else 'Unknown Author'
327
-
328
- body_element = soup.select_one('.post-body, .article-body')
329
- post_data['body'] = body_element.get_text(strip=True) if body_element else 'Body not found'
330
-
331
- votes_element = soup.select_one('.vote-sum')
332
- date_element = soup.select_one('.post-meta .meta-data')
333
- post_data['details'] = {
334
- 'votes': votes_element.get_text(strip=True) if votes_element else '0',
335
- 'date': date_element.get_text(strip=True) if date_element else 'Unknown Date'
336
- }
337
-
338
- # --- Get Comments with Pagination ---
339
- comments = []
340
- if include_comments:
341
- log("Starting comment extraction...", "INFO")
342
- page_num = 1
343
- while True:
344
- comment_url = f"{base_url}?page={page_num}#comments"
345
- log(f"Navigating to comment page: {comment_url}", "INFO")
346
-
347
- try:
348
- response = await page.goto(comment_url)
349
- if response.status == 404:
350
- log(f"Page {page_num} returned 404. End of comments.", "INFO")
351
- break
352
- await page.wait_for_selector('.comment-list', timeout=10000)
353
- except Exception as e:
354
- log(f"Could not load page {page_num}: {e}. Assuming end of comments.", "INFO")
355
- break
356
-
357
- comment_soup = BeautifulSoup(await page.content(), 'html.parser')
358
- comment_elements = comment_soup.select('.comment')
359
-
360
- if not comment_elements:
361
- log(f"No comments found on page {page_num}. Ending extraction.", "INFO")
362
- break
363
-
364
- log(f"Found {len(comment_elements)} comments on page {page_num}.", "INFO")
365
-
366
- new_comments_found_on_page = 0
367
- for comment_element in comment_elements:
368
- author_span = comment_element.select_one('.comment-author span[title]')
369
- author_id = author_span['title'] if author_span else 'Unknown'
370
-
371
- body_element = comment_element.select_one('.comment-body')
372
- date_element = comment_element.select_one('.comment-meta .meta-data')
373
-
374
- comment_data = {
375
- 'author': author_id,
376
- 'body': body_element.get_text(strip=True) if body_element else '',
377
- 'date': date_element.get_text(strip=True) if date_element else 'Unknown Date'
378
- }
379
-
380
- if comment_data not in comments:
381
- comments.append(comment_data)
382
- new_comments_found_on_page += 1
383
-
384
- if new_comments_found_on_page == 0 and page_num > 1:
385
- log(f"No new comments detected on page {page_num}. Ending extraction.", "INFO")
386
- break
387
-
388
- page_num += 1
389
-
390
- log(f"Extracted {len(comments)} comments in total.", "SUCCESS")
391
- return {
392
- "success": True, "post": post_data, "comments": comments, "total_comments": len(comments)
393
- }
394
-
395
- except Exception as e:
396
- log(f"Failed to read forum post: {str(e)}", "ERROR")
397
- raise
398
- finally:
399
- if browser:
400
- await browser.close()
401
-
402
- # Initialize forum client
403
- forum_client = ForumClient()
404
-
405
- # The main block is for testing and won't be run by the MCP server.
406
- if __name__ == "__main__":
407
- print("📚 WorldQuant BRAIN Forum Functions - This script provides the ForumClient class.", file=sys.stderr)