cnhkmcp 2.1.9__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. cnhkmcp/__init__.py +1 -1
  2. cnhkmcp/untracked/AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221/BRAIN_AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221Mac_Linux/321/207/320/231/320/230/321/206/320/254/320/274.zip +0 -0
  3. cnhkmcp/untracked/AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221//321/205/320/237/320/234/321/205/320/227/342/225/227/321/205/320/276/320/231/321/210/320/263/320/225AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221_Windows/321/207/320/231/320/230/321/206/320/254/320/274.exe +0 -0
  4. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/README.md +1 -1
  5. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/config.json +2 -2
  6. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/main.py +1 -1
  7. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/chroma.sqlite3 +0 -0
  8. cnhkmcp/untracked/APP/Tranformer/Transformer.py +2 -2
  9. cnhkmcp/untracked/APP/Tranformer/transformer_config.json +1 -1
  10. cnhkmcp/untracked/APP/blueprints/feature_engineering.py +2 -2
  11. cnhkmcp/untracked/APP/blueprints/inspiration_house.py +4 -4
  12. cnhkmcp/untracked/APP/blueprints/paper_analysis.py +3 -3
  13. cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +34 -73
  14. cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +2 -2
  15. cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +366 -1
  16. cnhkmcp/untracked/APP/static/inspiration.js +345 -13
  17. cnhkmcp/untracked/APP/templates/index.html +11 -3
  18. cnhkmcp/untracked/APP/templates/transformer_web.html +1 -1
  19. cnhkmcp/untracked/APP/trailSomeAlphas/README.md +38 -0
  20. cnhkmcp/untracked/APP/trailSomeAlphas/ace.log +66 -0
  21. cnhkmcp/untracked/APP/trailSomeAlphas/enhance_template.py +588 -0
  22. cnhkmcp/untracked/APP/trailSomeAlphas/requirements.txt +3 -0
  23. cnhkmcp/untracked/APP/trailSomeAlphas/run_pipeline.py +1001 -0
  24. cnhkmcp/untracked/APP/trailSomeAlphas/run_pipeline_step_by_step.ipynb +5258 -0
  25. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/OUTPUT_TEMPLATE.md +325 -0
  26. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/SKILL.md +503 -0
  27. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/examples.md +244 -0
  28. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/output_report/ASI_delay1_analyst11_ideas.md +285 -0
  29. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/reference.md +399 -0
  30. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/SKILL.md +40 -0
  31. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/config.json +6 -0
  32. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709385783386000.json +388 -0
  33. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709386274840400.json +131 -0
  34. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709386838244700.json +1926 -0
  35. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709387369198500.json +31 -0
  36. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709387908905800.json +1926 -0
  37. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709388486243600.json +240 -0
  38. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709389024058600.json +1926 -0
  39. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709389549608700.json +41 -0
  40. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709390068714000.json +110 -0
  41. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709390591996900.json +36 -0
  42. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709391129137100.json +31 -0
  43. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709391691643500.json +41 -0
  44. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709392192099200.json +31 -0
  45. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709392703423500.json +46 -0
  46. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709393213729400.json +246 -0
  47. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710186683932500.json +388 -0
  48. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710187165414300.json +131 -0
  49. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710187665211700.json +1926 -0
  50. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710188149193400.json +31 -0
  51. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710188667627400.json +1926 -0
  52. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710189220822000.json +240 -0
  53. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710189726189500.json +1926 -0
  54. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710190248066100.json +41 -0
  55. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710190768298700.json +110 -0
  56. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710191282588100.json +36 -0
  57. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710191838960900.json +31 -0
  58. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710192396688000.json +41 -0
  59. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710192941922400.json +31 -0
  60. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710193473524600.json +46 -0
  61. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710194001961200.json +246 -0
  62. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710420975888800.json +46 -0
  63. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710421647590100.json +196 -0
  64. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710422131378500.json +5 -0
  65. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710422644184400.json +196 -0
  66. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710423702350600.json +196 -0
  67. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710424244661800.json +5 -0
  68. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_delay1.csv +211 -0
  69. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/final_expressions.json +7062 -0
  70. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/ace.log +3 -0
  71. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/ace_lib.py +1514 -0
  72. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/fetch_dataset.py +113 -0
  73. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/helpful_functions.py +180 -0
  74. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/implement_idea.py +236 -0
  75. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/merge_expression_list.py +90 -0
  76. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/parsetab.py +60 -0
  77. cnhkmcp/untracked/APP/trailSomeAlphas/skills/template_final_enhance/op/321/206/320/220/342/225/227/321/207/342/225/227/320/243.md +434 -0
  78. cnhkmcp/untracked/APP/trailSomeAlphas/skills/template_final_enhance/sample_prompt.md +62 -0
  79. cnhkmcp/untracked/APP/trailSomeAlphas/skills/template_final_enhance//321/205/320/235/320/245/321/205/320/253/320/260/321/205/320/275/320/240/321/206/320/220/320/255/321/210/320/220/320/223/321/211/320/220/342/225/227/321/210/342/225/233/320/241/321/211/320/243/342/225/233.md +354 -0
  80. cnhkmcp/untracked/APP/usage.md +2 -2
  81. cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +388 -8
  82. cnhkmcp/untracked/skills/alpha-expression-verifier/scripts/validator.py +889 -0
  83. cnhkmcp/untracked/skills/brain-data-feature-engineering/OUTPUT_TEMPLATE.md +325 -0
  84. cnhkmcp/untracked/skills/brain-data-feature-engineering/SKILL.md +263 -0
  85. cnhkmcp/untracked/skills/brain-data-feature-engineering/examples.md +244 -0
  86. cnhkmcp/untracked/skills/brain-data-feature-engineering/reference.md +493 -0
  87. cnhkmcp/untracked/skills/brain-feature-implementation/SKILL.md +87 -0
  88. cnhkmcp/untracked/skills/brain-feature-implementation/config.json +6 -0
  89. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/analyst15_GLB_delay1.csv +289 -0
  90. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/final_expressions.json +410 -0
  91. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588244.json +4 -0
  92. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588251.json +20 -0
  93. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588273.json +23 -0
  94. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588293.json +23 -0
  95. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588319.json +23 -0
  96. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588322.json +14 -0
  97. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588325.json +20 -0
  98. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588328.json +23 -0
  99. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588354.json +23 -0
  100. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588357.json +23 -0
  101. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588361.json +23 -0
  102. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588364.json +23 -0
  103. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588368.json +23 -0
  104. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588391.json +14 -0
  105. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588394.json +23 -0
  106. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588397.json +59 -0
  107. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588400.json +35 -0
  108. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588403.json +20 -0
  109. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588428.json +23 -0
  110. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588431.json +32 -0
  111. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588434.json +20 -0
  112. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588438.json +20 -0
  113. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588441.json +14 -0
  114. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588468.json +20 -0
  115. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/ace_lib.py +1514 -0
  116. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/fetch_dataset.py +107 -0
  117. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/helpful_functions.py +180 -0
  118. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/implement_idea.py +165 -0
  119. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/merge_expression_list.py +88 -0
  120. cnhkmcp/untracked/skills/brain-improve-alpha-performance/arXiv_API_Tool_Manual.md +490 -0
  121. cnhkmcp/untracked/skills/brain-improve-alpha-performance/reference.md +1 -1
  122. cnhkmcp/untracked/skills/brain-improve-alpha-performance/scripts/arxiv_api.py +229 -0
  123. cnhkmcp/untracked/skills/planning-with-files/SKILL.md +211 -0
  124. cnhkmcp/untracked/skills/planning-with-files/examples.md +202 -0
  125. cnhkmcp/untracked/skills/planning-with-files/reference.md +218 -0
  126. cnhkmcp/untracked/skills/planning-with-files/scripts/check-complete.sh +44 -0
  127. cnhkmcp/untracked/skills/planning-with-files/scripts/init-session.sh +120 -0
  128. cnhkmcp/untracked/skills/planning-with-files/templates/findings.md +95 -0
  129. cnhkmcp/untracked/skills/planning-with-files/templates/progress.md +114 -0
  130. cnhkmcp/untracked/skills/planning-with-files/templates/task_plan.md +132 -0
  131. cnhkmcp/untracked//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +35 -11
  132. cnhkmcp/vector_db/_manifest.json +1 -0
  133. cnhkmcp/vector_db/_meta.json +1 -0
  134. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/METADATA +1 -1
  135. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/RECORD +142 -31
  136. /cnhkmcp/untracked/{skills/expression_verifier → APP/trailSomeAlphas/skills/brain-feature-implementation}/scripts/validator.py +0 -0
  137. /cnhkmcp/untracked/skills/{expression_verifier → alpha-expression-verifier}/SKILL.md +0 -0
  138. /cnhkmcp/untracked/skills/{expression_verifier → alpha-expression-verifier}/scripts/verify_expr.py +0 -0
  139. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/WHEEL +0 -0
  140. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/entry_points.txt +0 -0
  141. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/licenses/LICENSE +0 -0
  142. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,588 @@
1
+ import json
2
+ import csv
3
+ import os
4
+ import re
5
+ import subprocess
6
+ import sys
7
+ import time
8
+ from pathlib import Path
9
+
10
+ import requests
11
+
12
+ BASE_DIR = Path(__file__).resolve().parent
13
+ SKILLS_DIR = BASE_DIR / "skills"
14
+ TEMPLATE_ENHANCE_DIR = SKILLS_DIR / "template_final_enhance"
15
+ DEFAULT_FEATURE_IMPLEMENTATION_DIR = SKILLS_DIR / "brain-feature-implementation"
16
+ DEFAULT_FEATURE_IMPLEMENTATION_SCRIPTS = DEFAULT_FEATURE_IMPLEMENTATION_DIR / "scripts"
17
+
18
+ DEFAULT_MOONSHOT_MODEL = os.environ.get("MOONSHOT_MODEL", "kimi-k2.5")
19
+ DEFAULT_MAX_ENHANCED_TEMPLATES = int(os.environ.get("MAX_ENHANCED_TEMPLATES", "60"))
20
+
21
+
22
+ def find_latest_idea_json(feature_implementation_dir: Path) -> Path:
23
+ data_root = feature_implementation_dir / "data"
24
+ if not data_root.exists():
25
+ raise FileNotFoundError(f"data folder not found: {data_root}")
26
+ idea_files = list(data_root.glob("**/idea_*.json"))
27
+ if not idea_files:
28
+ raise FileNotFoundError(f"No idea_*.json found under: {data_root}")
29
+ # Prefer newest by mtime
30
+ return max(idea_files, key=lambda p: p.stat().st_mtime)
31
+
32
+
33
+ def read_text(path: Path) -> str:
34
+ return path.read_text(encoding="utf-8")
35
+
36
+
37
+ def call_moonshot(
38
+ api_key: str,
39
+ model: str,
40
+ system_prompt: str,
41
+ user_prompt: str,
42
+ timeout_s: int = 180,
43
+ retries: int = 2,
44
+ backoff_s: float = 2.0,
45
+ ) -> str:
46
+ base_url = os.environ.get("MOONSHOT_BASE_URL", "https://api.moonshot.cn/v1")
47
+ url = f"{base_url.rstrip('/')}/chat/completions"
48
+ headers = {
49
+ "Authorization": f"Bearer {api_key}",
50
+ "Content-Type": "application/json",
51
+ }
52
+ payload = {
53
+ "model": model,
54
+ "messages": [
55
+ {"role": "system", "content": system_prompt},
56
+ {"role": "user", "content": user_prompt},
57
+ ],
58
+ "temperature": 1,
59
+ # Default to streaming so the user can observe model progress.
60
+ "stream": True,
61
+ }
62
+
63
+ def _stream_sse_and_collect(resp: requests.Response) -> str:
64
+ """Read OpenAI-compatible SSE stream and print deltas live.
65
+
66
+ Still returns the full accumulated assistant content so existing callers
67
+ (which expect a string) keep working.
68
+ """
69
+
70
+ content_parts: list[str] = []
71
+ thinking = False
72
+
73
+ for raw_line in resp.iter_lines(decode_unicode=True):
74
+ if not raw_line:
75
+ continue
76
+ line = raw_line.strip()
77
+ if not line.startswith("data:"):
78
+ continue
79
+ data_str = line[5:].strip()
80
+ if data_str == "[DONE]":
81
+ break
82
+
83
+ try:
84
+ event = json.loads(data_str)
85
+ except Exception:
86
+ continue
87
+
88
+ choices = event.get("choices") or []
89
+ if not choices:
90
+ continue
91
+ choice0 = choices[0] if isinstance(choices[0], dict) else None
92
+ if not choice0:
93
+ continue
94
+
95
+ delta = choice0.get("delta") or {}
96
+ if not isinstance(delta, dict):
97
+ delta = {}
98
+
99
+ # Moonshot/Kimi exposes reasoning tokens as `reasoning_content`.
100
+ reasoning = delta.get("reasoning_content")
101
+ if reasoning:
102
+ if not thinking:
103
+ thinking = True
104
+ print("=============开始思考=============", flush=True)
105
+ print(str(reasoning), end="", flush=True)
106
+
107
+ piece = delta.get("content")
108
+ if piece:
109
+ if thinking:
110
+ thinking = False
111
+ print("\n=============思考结束=============", flush=True)
112
+ content_parts.append(str(piece))
113
+ print(str(piece), end="", flush=True)
114
+
115
+ finish_reason = choice0.get("finish_reason")
116
+ if finish_reason:
117
+ break
118
+
119
+ if thinking:
120
+ print("\n=============思考结束=============", flush=True)
121
+
122
+ return "".join(content_parts)
123
+
124
+ last_exc: Exception | None = None
125
+ for attempt in range(retries + 1):
126
+ try:
127
+ resp = requests.post(url, headers=headers, json=payload, timeout=timeout_s, stream=True)
128
+ resp.encoding = "utf-8"
129
+ if resp.status_code >= 300:
130
+ raise RuntimeError(f"Moonshot API error {resp.status_code}: {resp.text}")
131
+
132
+ # Prefer SSE streaming when available.
133
+ ctype = (resp.headers.get("Content-Type") or "").lower()
134
+ if "text/event-stream" in ctype or payload.get("stream"):
135
+ return _stream_sse_and_collect(resp)
136
+
137
+ data = resp.json()
138
+ return data["choices"][0]["message"]["content"]
139
+ except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as exc:
140
+ last_exc = exc
141
+ if attempt >= retries:
142
+ raise
143
+ time.sleep(backoff_s * (2**attempt))
144
+ except requests.exceptions.RequestException as exc:
145
+ last_exc = exc
146
+ if attempt >= retries:
147
+ raise
148
+ time.sleep(backoff_s * (2**attempt))
149
+ except Exception as exc:
150
+ last_exc = exc
151
+ if attempt >= retries:
152
+ raise
153
+ time.sleep(backoff_s * (2**attempt))
154
+
155
+ raise last_exc or RuntimeError("Moonshot request failed")
156
+
157
+
158
+ def _salvage_json_array(text: str):
159
+ """Try parse JSON, or salvage the first JSON array in a text blob."""
160
+
161
+ try:
162
+ return json.loads(text)
163
+ except Exception:
164
+ m = re.search(r"\[.*\]", text, flags=re.DOTALL)
165
+ if not m:
166
+ return None
167
+ try:
168
+ return json.loads(m.group(0))
169
+ except Exception:
170
+ return None
171
+
172
+
173
+ def _extract_items(parsed) -> list[dict]:
174
+ """Normalize common LLM JSON shapes to a list of dict items."""
175
+ if isinstance(parsed, list):
176
+ return [x for x in parsed if isinstance(x, dict)]
177
+ if isinstance(parsed, dict):
178
+ for key in ("items", "data", "result", "results", "templates"):
179
+ val = parsed.get(key)
180
+ if isinstance(val, list):
181
+ return [x for x in val if isinstance(x, dict)]
182
+ return []
183
+
184
+
185
+ def run_implement_idea(
186
+ feature_implementation_dir: Path,
187
+ scripts_dir: Path,
188
+ dataset_folder: str,
189
+ template: str,
190
+ idea: str,
191
+ ) -> Path | None:
192
+ """Run implement_idea.py for one template.
193
+
194
+ Returns the newly created idea_*.json path if detectable.
195
+ """
196
+
197
+ impl = scripts_dir / "implement_idea.py"
198
+ data_dir = feature_implementation_dir / "data" / dataset_folder
199
+
200
+ before = set(data_dir.glob("*_idea_*.json")) if data_dir.exists() else set()
201
+
202
+ args_list = [
203
+ sys.executable,
204
+ str(impl),
205
+ "--template",
206
+ template,
207
+ "--dataset",
208
+ dataset_folder,
209
+ "--idea",
210
+ idea or "",
211
+ ]
212
+
213
+ result = subprocess.run(
214
+ args_list,
215
+ cwd=scripts_dir,
216
+ capture_output=True,
217
+ text=True,
218
+ )
219
+ if result.returncode != 0:
220
+ raise RuntimeError(
221
+ "Command failed: "
222
+ + " ".join(args_list)
223
+ + f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
224
+ )
225
+
226
+ after = set(data_dir.glob("*_idea_*.json")) if data_dir.exists() else set()
227
+ created = sorted(after - before)
228
+ return created[-1] if created else None
229
+
230
+
231
+ def _extract_keys_from_template(template: str) -> list[str]:
232
+ return re.findall(r"\{([A-Za-z0-9_]+)\}", template)
233
+
234
+
235
+ def _matches_metric(field_id: str, metric: str) -> bool:
236
+ if len(metric) <= 3:
237
+ return re.search(rf"(^|_){re.escape(metric)}(_|$)", field_id, flags=re.IGNORECASE) is not None
238
+ return metric in field_id
239
+
240
+
241
+ def load_dataset_ids(dataset_csv: Path, max_rows: int = 200000) -> list[str]:
242
+ ids: list[str] = []
243
+ with dataset_csv.open("r", encoding="utf-8", newline="") as f:
244
+ reader = csv.reader(f)
245
+ header = next(reader, None)
246
+ if not header:
247
+ return ids
248
+ try:
249
+ id_idx = header.index("id")
250
+ except ValueError:
251
+ return ids
252
+ for i, row in enumerate(reader):
253
+ if i >= max_rows:
254
+ break
255
+ if id_idx < len(row):
256
+ val = (row[id_idx] or "").strip()
257
+ if val:
258
+ ids.append(val)
259
+ return ids
260
+
261
+
262
+ def normalize_for_validator(expression: str) -> str:
263
+ """Normalize expressions to satisfy validator rules (e.g., winsorize std=).
264
+
265
+ Currently converts winsorize(x, N) -> winsorize(x, std=N)
266
+ when the second argument is positional and not named.
267
+ """
268
+
269
+ def _rewrite_func(expr: str, func_name: str, param_name: str) -> str:
270
+ out = []
271
+ i = 0
272
+ func_token = f"{func_name}("
273
+ while i < len(expr):
274
+ idx = expr.find(func_token, i)
275
+ if idx == -1:
276
+ out.append(expr[i:])
277
+ break
278
+ out.append(expr[i:idx])
279
+ j = idx + len(func_token)
280
+ depth = 1
281
+ args_start = j
282
+ while j < len(expr) and depth > 0:
283
+ ch = expr[j]
284
+ if ch == '(':
285
+ depth += 1
286
+ elif ch == ')':
287
+ depth -= 1
288
+ j += 1
289
+ args_str = expr[args_start : j - 1]
290
+ # split top-level args
291
+ args = []
292
+ buf = []
293
+ depth2 = 0
294
+ for ch in args_str:
295
+ if ch == '(':
296
+ depth2 += 1
297
+ elif ch == ')':
298
+ depth2 -= 1
299
+ if ch == ',' and depth2 == 0:
300
+ args.append("".join(buf).strip())
301
+ buf = []
302
+ else:
303
+ buf.append(ch)
304
+ if buf:
305
+ args.append("".join(buf).strip())
306
+
307
+ if len(args) >= 2 and '=' not in args[1]:
308
+ args[1] = f"{param_name}={args[1]}"
309
+ new_args = ", ".join(args)
310
+ out.append(f"{func_name}({new_args})")
311
+ i = j
312
+ return "".join(out)
313
+
314
+ normalized = expression
315
+ normalized = _rewrite_func(normalized, "winsorize", "std")
316
+ return normalized
317
+
318
+
319
+ def parse_metadata_from_filename(path: Path) -> tuple[str, str, int] | None:
320
+ """Extract dataset_id, region, delay from filename like:
321
+ <dataset>_<region>_<delay>_idea_<timestamp>.json
322
+
323
+ Returns (dataset_id, region, delay) or None.
324
+ """
325
+ name = path.name
326
+ parts = name.split("_")
327
+ if len(parts) < 5:
328
+ return None
329
+ if parts[-2] != "idea":
330
+ return None
331
+ region = parts[-4]
332
+ delay_str = parts[-3]
333
+ if not delay_str.isdigit():
334
+ return None
335
+ dataset_id = "_".join(parts[:-4])
336
+ if not dataset_id:
337
+ return None
338
+ return dataset_id, region, int(delay_str)
339
+
340
+
341
+ def main():
342
+ """Zero-arg entrypoint.
343
+
344
+ Behavior:
345
+ - Pick the newest idea_*.json under skills/brain-feature-implementation/data/**
346
+ - Generate as many enhanced templates as possible (capped by DEFAULT_MAX_ENHANCED_TEMPLATES)
347
+ - Save enhanced templates JSON and (optionally) implement them
348
+
349
+ Optional env overrides:
350
+ - IDEA_JSON: absolute/relative path to a specific idea_*.json
351
+ - MOONSHOT_API_KEY / MOONSHOT_BASE_URL / MOONSHOT_MODEL
352
+ - MAX_ENHANCED_TEMPLATES (default 60)
353
+ - NO_IMPLEMENT=1 to skip implement_idea.py
354
+ """
355
+
356
+ idea_json_env = os.environ.get("IDEA_JSON", "").strip()
357
+ if idea_json_env:
358
+ idea_json_path = Path(idea_json_env).expanduser().resolve()
359
+ else:
360
+ idea_json_path = find_latest_idea_json(DEFAULT_FEATURE_IMPLEMENTATION_DIR).resolve()
361
+
362
+ if not idea_json_path.exists():
363
+ raise FileNotFoundError(f"idea json not found: {idea_json_path}")
364
+
365
+ print(f"Using idea json: {idea_json_path}")
366
+
367
+ payload = json.loads(idea_json_path.read_text(encoding="utf-8"))
368
+ if not isinstance(payload, dict):
369
+ raise ValueError("idea json must be an object with 'template' and 'idea' fields")
370
+ if "template" not in payload:
371
+ raise ValueError("idea json missing required field 'template'")
372
+ if "idea" not in payload:
373
+ raise ValueError("idea json missing required field 'idea'")
374
+ raw_template = str(payload.get("template") or "").strip()
375
+ raw_idea = str(payload.get("idea") or "").strip()
376
+ if not raw_template:
377
+ raise ValueError("idea json field 'template' is empty")
378
+
379
+ # Infer feature-implementation location from the idea json path when possible:
380
+ # <...>/brain-feature-implementation/data/<dataset_folder>/idea_*.json
381
+ feature_implementation_dir = DEFAULT_FEATURE_IMPLEMENTATION_DIR
382
+ scripts_dir = DEFAULT_FEATURE_IMPLEMENTATION_SCRIPTS
383
+ try:
384
+ if idea_json_path.parent.parent.name.lower() == "data":
385
+ feature_implementation_dir = idea_json_path.parent.parent.parent
386
+ scripts_dir = feature_implementation_dir / "scripts"
387
+ except Exception:
388
+ pass
389
+
390
+ if not scripts_dir.exists():
391
+ raise FileNotFoundError(f"implement scripts folder not found: {scripts_dir}")
392
+
393
+ if idea_json_path.parent.parent.name.lower() == "data":
394
+ dataset_folder = idea_json_path.parent.name
395
+ else:
396
+ parsed = parse_metadata_from_filename(idea_json_path)
397
+ if not parsed:
398
+ raise ValueError(
399
+ "idea json filename must be like <dataset>_<region>_<delay>_idea_<ts>.json "
400
+ "when not located under brain-feature-implementation/data/<dataset_folder>/"
401
+ )
402
+ dataset_id_from_name, region_from_name, delay_from_name = parsed
403
+ dataset_folder = f"{dataset_id_from_name}_{region_from_name}_delay{delay_from_name}"
404
+
405
+ # Validate dataset CSV exists to ensure implement_idea can parse placeholders.
406
+ dataset_csv = feature_implementation_dir / "data" / dataset_folder / f"{dataset_folder}.csv"
407
+ if not dataset_csv.exists():
408
+ raise FileNotFoundError(
409
+ "Dataset CSV not found for enhancement. "
410
+ f"Expected: {dataset_csv}"
411
+ )
412
+ print(f"Using dataset CSV: {dataset_csv}")
413
+ try:
414
+ with dataset_csv.open("r", encoding="utf-8", newline="") as f:
415
+ reader = csv.reader(f)
416
+ header = next(reader, None)
417
+ row_count = 0
418
+ for _ in reader:
419
+ row_count += 1
420
+ if row_count >= 5:
421
+ break
422
+ if not header:
423
+ raise ValueError("Dataset CSV missing header row")
424
+ if row_count == 0:
425
+ raise ValueError("Dataset CSV has no data rows")
426
+ print(f"Dataset CSV header columns: {len(header)}; sample rows: {row_count}")
427
+ except Exception as e:
428
+ raise RuntimeError(f"Failed to read dataset CSV: {e}")
429
+
430
+ dataset_ids = load_dataset_ids(dataset_csv)
431
+ if not dataset_ids:
432
+ print("Warning: Could not load dataset ids from CSV 'id' column.")
433
+
434
+ guide1_path = TEMPLATE_ENHANCE_DIR / "单因子思考逻辑链.md"
435
+ guide2_path = TEMPLATE_ENHANCE_DIR / "op总结.md"
436
+ if not guide1_path.exists():
437
+ raise FileNotFoundError(f"Missing guidance file: {guide1_path}")
438
+ if not guide2_path.exists():
439
+ raise FileNotFoundError(f"Missing guidance file: {guide2_path}")
440
+
441
+ guide1 = read_text(guide1_path)
442
+ guide2 = read_text(guide2_path)
443
+
444
+ system_prompt = "\n\n".join(
445
+ [
446
+ "An alpha template is a reusable recipe that captures an economic idea and leaves “slots” (data fields, operators, groups, decay, neutralization choices, etc.) to instantiate many candidate alphas. Typical structure: clean data (backfill, winsorize) → transform/compare across time or peers → rank/neutralize → (optionally) decay/turnover tune. Templates encourage systematic search, reuse, and diversification while keeping an explicit economic rationale.",
447
+ "",
448
+ "Some Example Templates and rationales",
449
+ "",
450
+ "CAPM residual (market/sector-neutral return): ts_regression(returns, group_mean(returns, log(ts_mean(cap,21)), sector), 252, rettype=0) after backfill+winsorize. Rationale: strip market/sector beta to isolate idiosyncratic alpha; sector-weighted by smoothed log-cap to reduce large-cap dominance.",
451
+ "CAPM beta (slope) template: same regression with rettype=2; pre-clean target/market (ts_backfill(...,63) + winsorize(std=4)). Rationale: rank stocks by relative risk within sector; long low-β, short high-β, or study β dispersion across groups.",
452
+ "CAPM generalized to any feature: data = winsorize(ts_backfill(<data>,63),std=4); data_gpm = group_mean(data, log(ts_mean(cap,21)), sector); resid = ts_regression(data, data_gpm, 252, rettype=0). Rationale: pull out the component unexplained by group average of same feature; reduces common-mode exposure.",
453
+ "Actual vs estimate spread (analyst): group_zscore( group_zscore(<act>, industry) – group_zscore(<est>, industry), industry ) or the abstracted group_compare(diff(group_compare(act,...), group_compare(est,...)), ...). Rationale: surprise/beat-miss signal within industry, normalized to peers to avoid level bias.",
454
+ "Analyst term-structure (fp1 vs fy1/fp2/fy2): group_zscore( group_zscore(anl14_mean_eps_<period1>, industry) – group_zscore(anl14_mean_eps_<period2>, industry), industry ) with operator/group slots. Rationale: cross-period expectation steepness; rising near-term vs long-term forecasts can flag momentum/inflection.",
455
+ "Option Greeks net spread: group_operator(<put_greek> - <call_greek>, <grouping_data>) over industry/sector (Delta/Gamma/Vega/Theta). Rationale: options-implied sentiment/convexity skew vs peers; outlier net Greeks may precede spot moves; extend with multi-Greek composites or time-series deltas.",
456
+ "",
457
+ "based on the following guidance of how to make a data collation template into a signal, and guidance on how to utilize the best of operators.",
458
+ "",
459
+ "guidance of how to make a data collation template into a signal",
460
+ "--------------",
461
+ guide1,
462
+ "--------------",
463
+ "guidance on how to use the best of operators",
464
+ "--------------",
465
+ guide2,
466
+ "--------------",
467
+ "",
468
+ "Return ONLY valid JSON (no markdown / no code fences).",
469
+ ]
470
+ )
471
+
472
+ user_prompt_obj = {
473
+ "instruction": "Improve the following raw template. Keep { } placeholders unchanged (they represent datafields). Return at least 5 diverse and complicate enhanced templates as possible.",
474
+ "input": {
475
+ "template": raw_template,
476
+ "idea": raw_idea,
477
+ },
478
+ "output_format": [
479
+ {"enhanced_template": "", "idea": ""},
480
+ {"enhanced_template": "", "idea": ""},
481
+ ],
482
+ "idea_answer_in": "Chinese",
483
+ }
484
+
485
+ api_key = os.environ.get("MOONSHOT_API_KEY")
486
+ if not api_key:
487
+ raise ValueError("Missing Moonshot API key. Set MOONSHOT_API_KEY")
488
+
489
+ raw = call_moonshot(
490
+ api_key=api_key,
491
+ model=DEFAULT_MOONSHOT_MODEL,
492
+ system_prompt=system_prompt,
493
+ user_prompt=json.dumps(user_prompt_obj, ensure_ascii=False, indent=2),
494
+ timeout_s=600,
495
+ retries=2,
496
+ backoff_s=2.0,
497
+ )
498
+
499
+ parsed = _salvage_json_array(raw)
500
+ items = _extract_items(parsed)
501
+ if not items:
502
+ raise RuntimeError(f"LLM output did not contain a usable JSON array. Raw output:\n{raw}")
503
+
504
+ enhanced = []
505
+ for item in items:
506
+ t = str(item.get("enhanced_template") or item.get("template") or "").strip()
507
+ idea = str(item.get("idea") or "").strip()
508
+ if not t:
509
+ continue
510
+ enhanced.append({"template": t, "idea": idea})
511
+ # Do NOT truncate here; keep all returned templates.
512
+
513
+ if not enhanced:
514
+ raise RuntimeError(f"No enhanced templates parsed from LLM output. Raw output:\n{raw}")
515
+
516
+ out_dir = idea_json_path.parent
517
+ ts = int(time.time())
518
+ enhanced_path = out_dir / f"enhanced_templates_{ts}.json"
519
+ enhanced_path.write_text(json.dumps(enhanced, ensure_ascii=False, indent=2), encoding="utf-8")
520
+ print(f"Enhanced templates saved to: {enhanced_path}")
521
+
522
+ # Implement each enhanced template and merge expressions for this run only.
523
+ all_exprs = []
524
+ created_files = []
525
+
526
+ for idx, item in enumerate(enhanced, start=1):
527
+ t = item.get("template")
528
+ if not t:
529
+ continue
530
+ idea = item.get("idea") or ""
531
+ if dataset_ids:
532
+ metrics = _extract_keys_from_template(t)
533
+ missing = [m for m in metrics if not any(_matches_metric(fid, m) for fid in dataset_ids)]
534
+ if missing:
535
+ print(f"Template {idx} missing metrics in CSV id list: {missing}")
536
+ print(f"\n[{idx}/{len(enhanced)}] Implementing enhanced_template: {t}")
537
+ created = run_implement_idea(
538
+ feature_implementation_dir=feature_implementation_dir,
539
+ scripts_dir=scripts_dir,
540
+ dataset_folder=dataset_folder,
541
+ template=t,
542
+ idea=idea,
543
+ )
544
+ if created:
545
+ created_files.append(created)
546
+
547
+ for jf in created_files:
548
+ try:
549
+ data = json.loads(jf.read_text(encoding="utf-8"))
550
+ exprs = data.get("expression_list", [])
551
+ if exprs:
552
+ all_exprs.extend([str(x) for x in exprs])
553
+ except Exception:
554
+ pass
555
+
556
+ unique = []
557
+ seen = set()
558
+ for ex in all_exprs:
559
+ norm_ex = normalize_for_validator(ex)
560
+ if norm_ex not in seen:
561
+ unique.append(norm_ex)
562
+ seen.add(norm_ex)
563
+
564
+ # Validate expressions and keep only valid ones
565
+ try:
566
+ if str(scripts_dir) not in sys.path:
567
+ sys.path.insert(0, str(scripts_dir))
568
+ from validator import ExpressionValidator # type: ignore
569
+ validator = ExpressionValidator()
570
+ validated = []
571
+ for expr in unique:
572
+ result = validator.check_expression(expr)
573
+ if result.get("valid"):
574
+ validated.append(expr)
575
+ else:
576
+ print(f"Invalid expression filtered: {expr}")
577
+ unique = validated
578
+ print(f"Validation kept {len(unique)} expressions")
579
+ except Exception as e:
580
+ print(f"Warning: validator failed, keeping unvalidated expressions. Error: {e}")
581
+
582
+ merged_path = out_dir / f"enhanced_final_expressions_{ts}.json"
583
+ merged_path.write_text(json.dumps(unique, ensure_ascii=False, indent=2), encoding="utf-8")
584
+ print(f"\nMerged {len(unique)} unique expressions to: {merged_path}")
585
+
586
+
587
+ if __name__ == "__main__":
588
+ main()
@@ -0,0 +1,3 @@
1
+ pandas
2
+ requests
3
+ tqdm