cnhkmcp 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnhkmcp/__init__.py +125 -0
- cnhkmcp/untracked/APP/.gitignore +32 -0
- cnhkmcp/untracked/APP/MODULAR_STRUCTURE.md +112 -0
- cnhkmcp/untracked/APP/README.md +309 -0
- cnhkmcp/untracked/APP/Tranformer/Transformer.py +2192 -0
- cnhkmcp/untracked/APP/Tranformer/ace.log +0 -0
- cnhkmcp/untracked/APP/Tranformer/ace_lib.py +1489 -0
- cnhkmcp/untracked/APP/Tranformer/helpful_functions.py +180 -0
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates.json +1786 -0
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates_/321/207/320/264/342/225/221/321/204/342/225/233/320/233.json +654 -0
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_error.json +261 -0
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_success.json +170 -0
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_/321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/277/321/207/320/253/342/224/244/321/206/320/236/320/265/321/210/342/225/234/342/225/234/321/205/320/225/320/265Machine_lib.json +22 -0
- cnhkmcp/untracked/APP/Tranformer/parsetab.py +60 -0
- cnhkmcp/untracked/APP/Tranformer/template_summary.txt +408 -0
- cnhkmcp/untracked/APP/Tranformer/transformer_config.json +7 -0
- cnhkmcp/untracked/APP/Tranformer/validator.py +889 -0
- cnhkmcp/untracked/APP/ace.log +65 -0
- cnhkmcp/untracked/APP/ace_lib.py +1489 -0
- cnhkmcp/untracked/APP/blueprints/__init__.py +6 -0
- cnhkmcp/untracked/APP/blueprints/feature_engineering.py +347 -0
- cnhkmcp/untracked/APP/blueprints/idea_house.py +221 -0
- cnhkmcp/untracked/APP/blueprints/inspiration_house.py +432 -0
- cnhkmcp/untracked/APP/blueprints/paper_analysis.py +570 -0
- cnhkmcp/untracked/APP/custom_templates/templates.json +1257 -0
- cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +400 -0
- cnhkmcp/untracked/APP/give_me_idea/ace_lib.py +1489 -0
- cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +247 -0
- cnhkmcp/untracked/APP/give_me_idea/helpful_functions.py +180 -0
- cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +11 -0
- cnhkmcp/untracked/APP/helpful_functions.py +180 -0
- cnhkmcp/untracked/APP/hkSimulator/ace.log +0 -0
- cnhkmcp/untracked/APP/hkSimulator/ace_lib.py +1476 -0
- cnhkmcp/untracked/APP/hkSimulator/autosim_20251205_145240.log +0 -0
- cnhkmcp/untracked/APP/hkSimulator/autosim_20251215_030103.log +0 -0
- cnhkmcp/untracked/APP/hkSimulator/autosimulator.py +447 -0
- cnhkmcp/untracked/APP/hkSimulator/helpful_functions.py +180 -0
- cnhkmcp/untracked/APP/mirror_config.txt +20 -0
- cnhkmcp/untracked/APP/operaters.csv +129 -0
- cnhkmcp/untracked/APP/requirements.txt +53 -0
- cnhkmcp/untracked/APP/run_app.bat +28 -0
- cnhkmcp/untracked/APP/run_app.sh +34 -0
- cnhkmcp/untracked/APP/setup_tsinghua.bat +39 -0
- cnhkmcp/untracked/APP/setup_tsinghua.sh +43 -0
- cnhkmcp/untracked/APP/simulator/alpha_submitter.py +404 -0
- cnhkmcp/untracked/APP/simulator/simulator_wqb.py +618 -0
- cnhkmcp/untracked/APP/ssrn-3332513.pdf +109188 -19
- cnhkmcp/untracked/APP/static/brain.js +528 -0
- cnhkmcp/untracked/APP/static/decoder.js +1540 -0
- cnhkmcp/untracked/APP/static/feature_engineering.js +1729 -0
- cnhkmcp/untracked/APP/static/idea_house.js +937 -0
- cnhkmcp/untracked/APP/static/inspiration.js +465 -0
- cnhkmcp/untracked/APP/static/inspiration_house.js +868 -0
- cnhkmcp/untracked/APP/static/paper_analysis.js +390 -0
- cnhkmcp/untracked/APP/static/script.js +2942 -0
- cnhkmcp/untracked/APP/static/simulator.js +597 -0
- cnhkmcp/untracked/APP/static/styles.css +3127 -0
- cnhkmcp/untracked/APP/static/usage_widget.js +508 -0
- cnhkmcp/untracked/APP/templates/alpha_inspector.html +511 -0
- cnhkmcp/untracked/APP/templates/feature_engineering.html +960 -0
- cnhkmcp/untracked/APP/templates/idea_house.html +564 -0
- cnhkmcp/untracked/APP/templates/index.html +911 -0
- cnhkmcp/untracked/APP/templates/inspiration_house.html +861 -0
- cnhkmcp/untracked/APP/templates/paper_analysis.html +91 -0
- cnhkmcp/untracked/APP/templates/simulator.html +343 -0
- cnhkmcp/untracked/APP/templates/transformer_web.html +580 -0
- cnhkmcp/untracked/APP/usage.md +351 -0
- cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/ace_lib.py +1489 -0
- cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/brain_alpha_inspector.py +712 -0
- cnhkmcp/untracked/APP//321/207/342/225/235/320/250/321/205/320/230/320/226/321/204/342/225/225/320/220/321/211/320/221/320/243/321/206/320/261/320/265/helpful_functions.py +180 -0
- cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +2393 -0
- cnhkmcp/untracked/arXiv_API_Tool_Manual.md +490 -0
- cnhkmcp/untracked/arxiv_api.py +229 -0
- cnhkmcp/untracked/forum_functions.py +998 -0
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/forum_functions.py +407 -0
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/platform_functions.py +2415 -0
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/user_config.json +31 -0
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/210/320/276/320/271AI/321/210/320/277/342/225/227/321/210/342/224/220/320/251/321/204/342/225/225/320/272/321/206/320/246/320/227/321/206/320/261/320/263/321/206/320/255/320/265/321/205/320/275/320/266/321/204/342/225/235/320/252/321/204/342/225/225/320/233/321/210/342/225/234/342/225/234/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270.md +101 -0
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +190 -0
- cnhkmcp/untracked/platform_functions.py +2886 -0
- cnhkmcp/untracked/sample_mcp_config.json +11 -0
- cnhkmcp/untracked/user_config.json +31 -0
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/320/237/320/222/321/210/320/220/320/223/321/206/320/246/320/227/321/206/320/261/320/263_BRAIN_Alpha_Test_Requirements_and_Tips.md +202 -0
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Alpha_explaination_workflow.md +56 -0
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_6_Tips_Datafield_Exploration_Guide.md +194 -0
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_BRAIN_Alpha_Improvement_Workflow.md +101 -0
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_Dataset_Exploration_Expert_Manual.md +436 -0
- cnhkmcp/untracked//321/207/320/264/342/225/221/321/204/342/225/233/320/233/321/205/342/225/226/320/265/321/204/342/225/234/320/254/321/206/342/225/241/320/221_daily_report_workflow.md +128 -0
- cnhkmcp/untracked//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +190 -0
- cnhkmcp-2.0.1.dist-info/METADATA +187 -0
- cnhkmcp-2.0.1.dist-info/RECORD +95 -0
- cnhkmcp-2.0.1.dist-info/WHEEL +5 -0
- cnhkmcp-2.0.1.dist-info/entry_points.txt +2 -0
- cnhkmcp-2.0.1.dist-info/licenses/LICENSE +21 -0
- cnhkmcp-2.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import ace_lib
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import json
|
|
4
|
+
import openai
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
import random
|
|
9
|
+
|
|
10
|
+
# Default Moonshot Configuration
|
|
11
|
+
DEFAULT_MOONSHOT_BASE_URL = "https://api.moonshot.cn/v1"
|
|
12
|
+
DEFAULT_MOONSHOT_MODEL = "kimi-k2-turbo-preview"
|
|
13
|
+
|
|
14
|
+
def get_llm_client(api_key, base_url):
|
|
15
|
+
return openai.OpenAI(
|
|
16
|
+
api_key=api_key,
|
|
17
|
+
base_url=base_url,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
def test_llm_connection(api_key, base_url, model):
|
|
21
|
+
print("\nTesting LLM connection...")
|
|
22
|
+
client = get_llm_client(api_key, base_url)
|
|
23
|
+
try:
|
|
24
|
+
client.chat.completions.create(
|
|
25
|
+
model=model,
|
|
26
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
27
|
+
max_tokens=5
|
|
28
|
+
)
|
|
29
|
+
print("LLM connection successful.")
|
|
30
|
+
return True
|
|
31
|
+
except Exception as e:
|
|
32
|
+
print(f"LLM connection failed: {e}")
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
def call_llm_with_retry(client, model, system_prompt, operators_df, datafields_df, dataset_id, max_retries=5):
|
|
36
|
+
|
|
37
|
+
n_ops = len(operators_df)
|
|
38
|
+
n_fields = len(datafields_df)
|
|
39
|
+
|
|
40
|
+
for attempt in range(max_retries + 1):
|
|
41
|
+
print(f"\nAttempt {attempt + 1}/{max_retries + 1} - Preparing prompt with {n_ops} operators and {n_fields} datafields...")
|
|
42
|
+
|
|
43
|
+
# Sample rows if needed, otherwise take head
|
|
44
|
+
# Using head for stability, but could be random sample
|
|
45
|
+
ops_subset = operators_df.head(n_ops)
|
|
46
|
+
fields_subset = datafields_df.head(n_fields)
|
|
47
|
+
|
|
48
|
+
operators_info = ops_subset[['name', 'category', 'description', 'extra_side_note']].to_string()
|
|
49
|
+
datafields_info = fields_subset[['id', 'description', 'subcategory']].to_string()
|
|
50
|
+
|
|
51
|
+
user_prompt = f"""
|
|
52
|
+
Here is the information about available operators (first {n_ops} rows):
|
|
53
|
+
{operators_info}
|
|
54
|
+
|
|
55
|
+
Here is the information about the dataset '{dataset_id}' (first {n_fields} rows):
|
|
56
|
+
{datafields_info}
|
|
57
|
+
|
|
58
|
+
Please come up with several Alpha templates based on this information.
|
|
59
|
+
Specify the AI answer in Chinese.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
print("Sending request to LLM...")
|
|
64
|
+
completion = client.chat.completions.create(
|
|
65
|
+
model=model,
|
|
66
|
+
messages=[
|
|
67
|
+
{"role": "system", "content": system_prompt},
|
|
68
|
+
{"role": "user", "content": user_prompt}
|
|
69
|
+
],
|
|
70
|
+
temperature=0.3,
|
|
71
|
+
)
|
|
72
|
+
return completion.choices[0].message.content
|
|
73
|
+
|
|
74
|
+
except openai.BadRequestError as e:
|
|
75
|
+
error_msg = str(e)
|
|
76
|
+
print(f"LLM Bad Request Error: {error_msg}")
|
|
77
|
+
|
|
78
|
+
# Check for token limit error
|
|
79
|
+
if "token limit" in error_msg or "context_length_exceeded" in error_msg or "400" in error_msg:
|
|
80
|
+
print("Token limit exceeded. Reducing context size...")
|
|
81
|
+
n_ops = max(1, n_ops // 2)
|
|
82
|
+
n_fields = max(1, n_fields // 2)
|
|
83
|
+
if n_ops == 1 and n_fields == 1:
|
|
84
|
+
print("Cannot reduce context further.")
|
|
85
|
+
return f"Failed after retries: {e}"
|
|
86
|
+
else:
|
|
87
|
+
return f"LLM Error (not token related): {e}"
|
|
88
|
+
|
|
89
|
+
except Exception as e:
|
|
90
|
+
return f"General Error calling LLM: {e}"
|
|
91
|
+
|
|
92
|
+
return "Max retries exceeded."
|
|
93
|
+
|
|
94
|
+
def main():
|
|
95
|
+
print("=== BRAIN Alpha Generator Full Version ===\n")
|
|
96
|
+
|
|
97
|
+
# 1. Interactive Login
|
|
98
|
+
print("--- Step 1: Login to BRAIN ---")
|
|
99
|
+
email = input("Enter BRAIN Email: ").strip()
|
|
100
|
+
while not email:
|
|
101
|
+
email = input("Email is required. Enter BRAIN Email: ").strip()
|
|
102
|
+
|
|
103
|
+
import getpass
|
|
104
|
+
password = getpass.getpass("Enter BRAIN Password: ").strip()
|
|
105
|
+
while not password:
|
|
106
|
+
password = getpass.getpass("Password is required. Enter BRAIN Password: ").strip()
|
|
107
|
+
|
|
108
|
+
# Monkeypatch ace_lib.get_credentials to use provided inputs
|
|
109
|
+
ace_lib.get_credentials = lambda: (email, password)
|
|
110
|
+
|
|
111
|
+
print("Logging in...")
|
|
112
|
+
try:
|
|
113
|
+
s = ace_lib.start_session()
|
|
114
|
+
print("Login successful.")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
print(f"Login failed: {e}")
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
# 2. LLM Configuration
|
|
120
|
+
print("\n--- Step 2: LLM Configuration ---")
|
|
121
|
+
base_url = input(f"Enter LLM Base URL (default: {DEFAULT_MOONSHOT_BASE_URL}): ").strip()
|
|
122
|
+
if not base_url:
|
|
123
|
+
base_url = DEFAULT_MOONSHOT_BASE_URL
|
|
124
|
+
|
|
125
|
+
api_key = input("Enter LLM API Key (required): ").strip()
|
|
126
|
+
while not api_key:
|
|
127
|
+
print("API Key is required.")
|
|
128
|
+
api_key = input("Enter LLM API Key: ").strip()
|
|
129
|
+
|
|
130
|
+
model_name = input(f"Enter LLM Model Name (default: {DEFAULT_MOONSHOT_MODEL}): ").strip()
|
|
131
|
+
if not model_name:
|
|
132
|
+
model_name = DEFAULT_MOONSHOT_MODEL
|
|
133
|
+
|
|
134
|
+
if not test_llm_connection(api_key, base_url, model_name):
|
|
135
|
+
print("Aborting due to LLM connection failure.")
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
llm_client = get_llm_client(api_key, base_url)
|
|
139
|
+
|
|
140
|
+
# 3. Load Operators
|
|
141
|
+
print("\n--- Step 3: Load Operators ---")
|
|
142
|
+
print("Getting operators...")
|
|
143
|
+
try:
|
|
144
|
+
operators_df = ace_lib.get_operators(s)
|
|
145
|
+
operators_df = operators_df[operators_df['scope'] == 'REGULAR']
|
|
146
|
+
print(f"Retrieved {len(operators_df)} operators (REGULAR only).")
|
|
147
|
+
|
|
148
|
+
print("Fetching documentation for operators...")
|
|
149
|
+
operators_df = operators_df.copy()
|
|
150
|
+
|
|
151
|
+
def fetch_doc_content(doc_path):
|
|
152
|
+
if pd.isna(doc_path) or not doc_path:
|
|
153
|
+
return None
|
|
154
|
+
url = ace_lib.brain_api_url + doc_path
|
|
155
|
+
try:
|
|
156
|
+
r = s.get(url)
|
|
157
|
+
if r.status_code == 200:
|
|
158
|
+
return json.dumps(r.json())
|
|
159
|
+
return None
|
|
160
|
+
except Exception:
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
operators_df['extra_side_note'] = operators_df['documentation'].apply(fetch_doc_content)
|
|
164
|
+
operators_df.drop(columns=['documentation', 'level'], inplace=True)
|
|
165
|
+
print("Operators loaded and processed.")
|
|
166
|
+
|
|
167
|
+
except Exception as e:
|
|
168
|
+
print(f"Failed to get operators: {e}")
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
# 4. Dataset Selection
|
|
172
|
+
print("\n--- Step 4: Select Dataset ---")
|
|
173
|
+
region = input("Enter Region (default: USA): ").strip() or "USA"
|
|
174
|
+
delay = input("Enter Delay (default: 1): ").strip() or "1"
|
|
175
|
+
universe = input("Enter Universe (default: TOP3000): ").strip() or "TOP3000"
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
delay = int(delay)
|
|
179
|
+
except ValueError:
|
|
180
|
+
print("Invalid delay, using default 1")
|
|
181
|
+
delay = 1
|
|
182
|
+
|
|
183
|
+
print(f"Fetching datasets for Region={region}, Delay={delay}, Universe={universe}...")
|
|
184
|
+
try:
|
|
185
|
+
datasets_df = ace_lib.get_datasets(
|
|
186
|
+
s,
|
|
187
|
+
region=region,
|
|
188
|
+
delay=delay,
|
|
189
|
+
universe=universe
|
|
190
|
+
)
|
|
191
|
+
print(f"Retrieved {len(datasets_df)} datasets.")
|
|
192
|
+
print(datasets_df[['id', 'name', 'category', 'subcategory']].head(10))
|
|
193
|
+
except Exception as e:
|
|
194
|
+
print(f"Failed to get datasets: {e}")
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
# 5. Dataset Detail
|
|
198
|
+
print("\n--- Step 5: Get Dataset Details ---")
|
|
199
|
+
dataset_id = input("Enter Dataset ID to analyze (e.g., analyst10): ").strip()
|
|
200
|
+
while not dataset_id:
|
|
201
|
+
dataset_id = input("Dataset ID is required: ").strip()
|
|
202
|
+
|
|
203
|
+
print(f"Getting datafields for dataset: {dataset_id}...")
|
|
204
|
+
try:
|
|
205
|
+
datafields_df = ace_lib.get_datafields(
|
|
206
|
+
s,
|
|
207
|
+
region=region,
|
|
208
|
+
delay=delay,
|
|
209
|
+
universe=universe,
|
|
210
|
+
dataset_id=dataset_id
|
|
211
|
+
)
|
|
212
|
+
print(f"Retrieved {len(datafields_df)} datafields.")
|
|
213
|
+
except Exception as e:
|
|
214
|
+
print(f"Failed to get datafields: {e}")
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
# 6. Generate Alpha Templates
|
|
218
|
+
print("\n--- Step 6: Generate Alpha Templates ---")
|
|
219
|
+
|
|
220
|
+
# Load System Prompt
|
|
221
|
+
# Use relative path based on the script location
|
|
222
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
223
|
+
system_prompt_path = os.path.join(script_dir, "what_is_Alpha_template.md")
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
with open(system_prompt_path, "r", encoding="utf-8") as f:
|
|
227
|
+
system_prompt = f.read()
|
|
228
|
+
print(f"System prompt loaded from {system_prompt_path}")
|
|
229
|
+
except Exception as e:
|
|
230
|
+
print(f"System prompt file not found at {system_prompt_path}, using default. Error: {e}")
|
|
231
|
+
system_prompt = "You are a helpful assistant for generating Alpha templates."
|
|
232
|
+
|
|
233
|
+
response = call_llm_with_retry(
|
|
234
|
+
llm_client,
|
|
235
|
+
model_name,
|
|
236
|
+
system_prompt,
|
|
237
|
+
operators_df,
|
|
238
|
+
datafields_df,
|
|
239
|
+
dataset_id
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
print("\n=== LLM Response ===")
|
|
243
|
+
print(response)
|
|
244
|
+
print("====================")
|
|
245
|
+
|
|
246
|
+
if __name__ == "__main__":
|
|
247
|
+
main()
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pandas.io.formats.style import Styler
|
|
7
|
+
|
|
8
|
+
brain_api_url = os.environ.get("BRAIN_API_URL", "https://api.worldquantbrain.com")
|
|
9
|
+
brain_url = os.environ.get("BRAIN_URL", "https://platform.worldquantbrain.com")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def make_clickable_alpha_id(alpha_id: str) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Create a clickable HTML link for an alpha ID.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
alpha_id (str): The ID of the alpha.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
str: An HTML string containing a clickable link to the alpha's page on the platform.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
url = brain_url + "/alpha/"
|
|
24
|
+
return f'<a href="{url}{alpha_id}">{alpha_id}</a>'
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def prettify_result(
|
|
28
|
+
result: list, detailed_tests_view: bool = False, clickable_alpha_id: bool = False
|
|
29
|
+
) -> Union[pd.DataFrame, Styler]:
|
|
30
|
+
"""
|
|
31
|
+
Combine and format simulation results into a single DataFrame for analysis.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
result (list): A list of dictionaries containing simulation results.
|
|
35
|
+
detailed_tests_view (bool, optional): If True, include detailed test results. Defaults to False.
|
|
36
|
+
clickable_alpha_id (bool, optional): If True, make alpha IDs clickable. Defaults to False.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
pandas.DataFrame or pandas.io.formats.style.Styler: A DataFrame containing formatted results,
|
|
40
|
+
optionally with clickable alpha IDs.
|
|
41
|
+
"""
|
|
42
|
+
list_of_is_stats = [result[x]["is_stats"] for x in range(len(result)) if result[x]["is_stats"] is not None]
|
|
43
|
+
is_stats_df = pd.concat(list_of_is_stats).reset_index(drop=True)
|
|
44
|
+
is_stats_df = is_stats_df.sort_values("fitness", ascending=False)
|
|
45
|
+
|
|
46
|
+
expressions = {
|
|
47
|
+
result[x]["alpha_id"]: (
|
|
48
|
+
{
|
|
49
|
+
"selection": result[x]["simulate_data"]["selection"],
|
|
50
|
+
"combo": result[x]["simulate_data"]["combo"],
|
|
51
|
+
}
|
|
52
|
+
if result[x]["simulate_data"]["type"] == "SUPER"
|
|
53
|
+
else result[x]["simulate_data"]["regular"]
|
|
54
|
+
)
|
|
55
|
+
for x in range(len(result))
|
|
56
|
+
if result[x]["is_stats"] is not None
|
|
57
|
+
}
|
|
58
|
+
expression_df = pd.DataFrame(list(expressions.items()), columns=["alpha_id", "expression"])
|
|
59
|
+
|
|
60
|
+
list_of_is_tests = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
|
|
61
|
+
is_tests_df = pd.concat(list_of_is_tests, sort=True).reset_index(drop=True)
|
|
62
|
+
is_tests_df = is_tests_df[is_tests_df["result"] != "WARNING"]
|
|
63
|
+
if detailed_tests_view:
|
|
64
|
+
cols = ["limit", "result", "value"]
|
|
65
|
+
is_tests_df["details"] = is_tests_df[cols].to_dict(orient="records")
|
|
66
|
+
is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="details").reset_index()
|
|
67
|
+
else:
|
|
68
|
+
is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="result").reset_index()
|
|
69
|
+
|
|
70
|
+
alpha_stats = pd.merge(is_stats_df, expression_df, on="alpha_id")
|
|
71
|
+
alpha_stats = pd.merge(alpha_stats, is_tests_df, on="alpha_id")
|
|
72
|
+
alpha_stats = alpha_stats.drop(columns=alpha_stats.columns[(alpha_stats == "PENDING").any()])
|
|
73
|
+
alpha_stats.columns = alpha_stats.columns.str.replace("(?<=[a-z])(?=[A-Z])", "_", regex=True).str.lower()
|
|
74
|
+
if clickable_alpha_id:
|
|
75
|
+
return alpha_stats.style.format({"alpha_id": lambda x: make_clickable_alpha_id(str(x))})
|
|
76
|
+
return alpha_stats
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def concat_pnl(result: list) -> pd.DataFrame:
|
|
80
|
+
"""
|
|
81
|
+
Combine PnL results from multiple alphas into a single DataFrame.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
result (list): A list of dictionaries containing simulation results with PnL data.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
pandas.DataFrame: A DataFrame containing combined PnL data for all alphas.
|
|
88
|
+
"""
|
|
89
|
+
list_of_pnls = [result[x]["pnl"] for x in range(len(result)) if result[x]["pnl"] is not None]
|
|
90
|
+
pnls_df = pd.concat(list_of_pnls).reset_index()
|
|
91
|
+
|
|
92
|
+
return pnls_df
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def concat_is_tests(result: list) -> pd.DataFrame:
|
|
96
|
+
"""
|
|
97
|
+
Combine in-sample test results from multiple alphas into a single DataFrame.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
result (list): A list of dictionaries containing simulation results with in-sample test data.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
pandas.DataFrame: A DataFrame containing combined in-sample test results for all alphas.
|
|
104
|
+
"""
|
|
105
|
+
is_tests_list = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
|
|
106
|
+
is_tests_df = pd.concat(is_tests_list, sort=True).reset_index(drop=True)
|
|
107
|
+
return is_tests_df
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def save_simulation_result(result: dict) -> None:
|
|
111
|
+
"""
|
|
112
|
+
Save the simulation result to a JSON file in the 'simulation_results' folder.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
result (dict): A dictionary containing the simulation result for an alpha.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
alpha_id = result["id"]
|
|
119
|
+
region = result["settings"]["region"]
|
|
120
|
+
folder_path = "simulation_results/"
|
|
121
|
+
file_path = os.path.join(folder_path, f"{alpha_id}_{region}")
|
|
122
|
+
|
|
123
|
+
os.makedirs(folder_path, exist_ok=True)
|
|
124
|
+
|
|
125
|
+
with open(file_path, "w") as file:
|
|
126
|
+
json.dump(result, file)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def save_pnl(pnl_df: pd.DataFrame, alpha_id: str, region: str) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Save the PnL data for an alpha to a CSV file in the 'alphas_pnl' folder.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
pnl_df (pandas.DataFrame): The DataFrame containing PnL data.
|
|
135
|
+
alpha_id (str): The ID of the alpha.
|
|
136
|
+
region (str): The region for which the PnL data was generated.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
folder_path = "alphas_pnl/"
|
|
140
|
+
file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
|
|
141
|
+
os.makedirs(folder_path, exist_ok=True)
|
|
142
|
+
|
|
143
|
+
pnl_df.to_csv(file_path)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def save_yearly_stats(yearly_stats: pd.DataFrame, alpha_id: str, region: str):
|
|
147
|
+
"""
|
|
148
|
+
Save the yearly statistics for an alpha to a CSV file in the 'yearly_stats' folder.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
yearly_stats (pandas.DataFrame): The DataFrame containing yearly statistics.
|
|
152
|
+
alpha_id (str): The ID of the alpha.
|
|
153
|
+
region (str): The region for which the statistics were generated.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
folder_path = "yearly_stats/"
|
|
157
|
+
file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
|
|
158
|
+
os.makedirs(folder_path, exist_ok=True)
|
|
159
|
+
|
|
160
|
+
yearly_stats.to_csv(file_path, index=False)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def expand_dict_columns(data: pd.DataFrame) -> pd.DataFrame:
|
|
164
|
+
"""
|
|
165
|
+
Expand dictionary columns in a DataFrame into separate columns.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
data (pandas.DataFrame): The input DataFrame with dictionary columns.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
pandas.DataFrame: A new DataFrame with expanded columns.
|
|
172
|
+
"""
|
|
173
|
+
dict_columns = list(filter(lambda x: isinstance(data[x].iloc[0], dict), data.columns))
|
|
174
|
+
new_columns = pd.concat(
|
|
175
|
+
[data[col].apply(pd.Series).rename(columns=lambda x: f"{col}_{x}") for col in dict_columns],
|
|
176
|
+
axis=1,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
data = pd.concat([data, new_columns], axis=1)
|
|
180
|
+
return data
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
What an alpha template is
|
|
2
|
+
An alpha template is a reusable recipe that captures an economic idea and leaves “slots” (data fields, operators, groups, decay, neutralization choices, etc.) to instantiate many candidate alphas. Typical structure: clean data (backfill, winsorize) → transform/compare across time or peers → rank/neutralize → (optionally) decay/turnover tune. Templates encourage systematic search, reuse, and diversification while keeping an explicit economic rationale.
|
|
3
|
+
|
|
4
|
+
Some Example Templates and rationales
|
|
5
|
+
|
|
6
|
+
CAPM residual (market/sector-neutral return): ts_regression(returns, group_mean(returns, log(ts_mean(cap,21)), sector), 252, rettype=0) after backfill+winsorize. Rationale: strip market/sector beta to isolate idiosyncratic alpha; sector-weighted by smoothed log-cap to reduce large-cap dominance.
|
|
7
|
+
CAPM beta (slope) template: same regression with rettype=2; pre-clean target/market (ts_backfill(...,63) + winsorize(std=4)). Rationale: rank stocks by relative risk within sector; long low-β, short high-β, or study β dispersion across groups.
|
|
8
|
+
CAPM generalized to any feature: data = winsorize(ts_backfill(<data>,63),std=4); data_gpm = group_mean(data, log(ts_mean(cap,21)), sector); resid = ts_regression(data, data_gpm, 252, rettype=0). Rationale: pull out the component unexplained by group average of same feature; reduces common-mode exposure.
|
|
9
|
+
Actual vs estimate spread (analyst): group_zscore( group_zscore(<act>, industry) – group_zscore(<est>, industry), industry ) or the abstracted group_compare(diff(group_compare(act,...), group_compare(est,...)), ...). Rationale: surprise/beat-miss signal within industry, normalized to peers to avoid level bias.
|
|
10
|
+
Analyst term-structure (fp1 vs fy1/fp2/fy2): group_zscore( group_zscore(anl14_mean_eps_<period1>, industry) – group_zscore(anl14_mean_eps_<period2>, industry), industry ) with operator/group slots. Rationale: cross-period expectation steepness; rising near-term vs long-term forecasts can flag momentum/inflection.
|
|
11
|
+
Option Greeks net spread: group_operator(<put_greek> - <call_greek>, <grouping_data>) over industry/sector (Delta/Gamma/Vega/Theta). Rationale: options-implied sentiment/convexity skew vs peers; outlier net Greeks may precede spot moves; extend with multi-Greek composites or time-series deltas.
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pandas.io.formats.style import Styler
|
|
7
|
+
|
|
8
|
+
brain_api_url = os.environ.get("BRAIN_API_URL", "https://api.worldquantbrain.com")
|
|
9
|
+
brain_url = os.environ.get("BRAIN_URL", "https://platform.worldquantbrain.com")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def make_clickable_alpha_id(alpha_id: str) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Create a clickable HTML link for an alpha ID.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
alpha_id (str): The ID of the alpha.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
str: An HTML string containing a clickable link to the alpha's page on the platform.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
url = brain_url + "/alpha/"
|
|
24
|
+
return f'<a href="{url}{alpha_id}">{alpha_id}</a>'
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def prettify_result(
|
|
28
|
+
result: list, detailed_tests_view: bool = False, clickable_alpha_id: bool = False
|
|
29
|
+
) -> Union[pd.DataFrame, Styler]:
|
|
30
|
+
"""
|
|
31
|
+
Combine and format simulation results into a single DataFrame for analysis.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
result (list): A list of dictionaries containing simulation results.
|
|
35
|
+
detailed_tests_view (bool, optional): If True, include detailed test results. Defaults to False.
|
|
36
|
+
clickable_alpha_id (bool, optional): If True, make alpha IDs clickable. Defaults to False.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
pandas.DataFrame or pandas.io.formats.style.Styler: A DataFrame containing formatted results,
|
|
40
|
+
optionally with clickable alpha IDs.
|
|
41
|
+
"""
|
|
42
|
+
list_of_is_stats = [result[x]["is_stats"] for x in range(len(result)) if result[x]["is_stats"] is not None]
|
|
43
|
+
is_stats_df = pd.concat(list_of_is_stats).reset_index(drop=True)
|
|
44
|
+
is_stats_df = is_stats_df.sort_values("fitness", ascending=False)
|
|
45
|
+
|
|
46
|
+
expressions = {
|
|
47
|
+
result[x]["alpha_id"]: (
|
|
48
|
+
{
|
|
49
|
+
"selection": result[x]["simulate_data"]["selection"],
|
|
50
|
+
"combo": result[x]["simulate_data"]["combo"],
|
|
51
|
+
}
|
|
52
|
+
if result[x]["simulate_data"]["type"] == "SUPER"
|
|
53
|
+
else result[x]["simulate_data"]["regular"]
|
|
54
|
+
)
|
|
55
|
+
for x in range(len(result))
|
|
56
|
+
if result[x]["is_stats"] is not None
|
|
57
|
+
}
|
|
58
|
+
expression_df = pd.DataFrame(list(expressions.items()), columns=["alpha_id", "expression"])
|
|
59
|
+
|
|
60
|
+
list_of_is_tests = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
|
|
61
|
+
is_tests_df = pd.concat(list_of_is_tests, sort=True).reset_index(drop=True)
|
|
62
|
+
is_tests_df = is_tests_df[is_tests_df["result"] != "WARNING"]
|
|
63
|
+
if detailed_tests_view:
|
|
64
|
+
cols = ["limit", "result", "value"]
|
|
65
|
+
is_tests_df["details"] = is_tests_df[cols].to_dict(orient="records")
|
|
66
|
+
is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="details").reset_index()
|
|
67
|
+
else:
|
|
68
|
+
is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="result").reset_index()
|
|
69
|
+
|
|
70
|
+
alpha_stats = pd.merge(is_stats_df, expression_df, on="alpha_id")
|
|
71
|
+
alpha_stats = pd.merge(alpha_stats, is_tests_df, on="alpha_id")
|
|
72
|
+
alpha_stats = alpha_stats.drop(columns=alpha_stats.columns[(alpha_stats == "PENDING").any()])
|
|
73
|
+
alpha_stats.columns = alpha_stats.columns.str.replace("(?<=[a-z])(?=[A-Z])", "_", regex=True).str.lower()
|
|
74
|
+
if clickable_alpha_id:
|
|
75
|
+
return alpha_stats.style.format({"alpha_id": lambda x: make_clickable_alpha_id(str(x))})
|
|
76
|
+
return alpha_stats
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def concat_pnl(result: list) -> pd.DataFrame:
|
|
80
|
+
"""
|
|
81
|
+
Combine PnL results from multiple alphas into a single DataFrame.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
result (list): A list of dictionaries containing simulation results with PnL data.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
pandas.DataFrame: A DataFrame containing combined PnL data for all alphas.
|
|
88
|
+
"""
|
|
89
|
+
list_of_pnls = [result[x]["pnl"] for x in range(len(result)) if result[x]["pnl"] is not None]
|
|
90
|
+
pnls_df = pd.concat(list_of_pnls).reset_index()
|
|
91
|
+
|
|
92
|
+
return pnls_df
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def concat_is_tests(result: list) -> pd.DataFrame:
|
|
96
|
+
"""
|
|
97
|
+
Combine in-sample test results from multiple alphas into a single DataFrame.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
result (list): A list of dictionaries containing simulation results with in-sample test data.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
pandas.DataFrame: A DataFrame containing combined in-sample test results for all alphas.
|
|
104
|
+
"""
|
|
105
|
+
is_tests_list = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
|
|
106
|
+
is_tests_df = pd.concat(is_tests_list, sort=True).reset_index(drop=True)
|
|
107
|
+
return is_tests_df
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def save_simulation_result(result: dict) -> None:
|
|
111
|
+
"""
|
|
112
|
+
Save the simulation result to a JSON file in the 'simulation_results' folder.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
result (dict): A dictionary containing the simulation result for an alpha.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
alpha_id = result["id"]
|
|
119
|
+
region = result["settings"]["region"]
|
|
120
|
+
folder_path = "simulation_results/"
|
|
121
|
+
file_path = os.path.join(folder_path, f"{alpha_id}_{region}")
|
|
122
|
+
|
|
123
|
+
os.makedirs(folder_path, exist_ok=True)
|
|
124
|
+
|
|
125
|
+
with open(file_path, "w") as file:
|
|
126
|
+
json.dump(result, file)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def save_pnl(pnl_df: pd.DataFrame, alpha_id: str, region: str) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Save the PnL data for an alpha to a CSV file in the 'alphas_pnl' folder.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
pnl_df (pandas.DataFrame): The DataFrame containing PnL data.
|
|
135
|
+
alpha_id (str): The ID of the alpha.
|
|
136
|
+
region (str): The region for which the PnL data was generated.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
folder_path = "alphas_pnl/"
|
|
140
|
+
file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
|
|
141
|
+
os.makedirs(folder_path, exist_ok=True)
|
|
142
|
+
|
|
143
|
+
pnl_df.to_csv(file_path)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def save_yearly_stats(yearly_stats: pd.DataFrame, alpha_id: str, region: str):
|
|
147
|
+
"""
|
|
148
|
+
Save the yearly statistics for an alpha to a CSV file in the 'yearly_stats' folder.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
yearly_stats (pandas.DataFrame): The DataFrame containing yearly statistics.
|
|
152
|
+
alpha_id (str): The ID of the alpha.
|
|
153
|
+
region (str): The region for which the statistics were generated.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
folder_path = "yearly_stats/"
|
|
157
|
+
file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
|
|
158
|
+
os.makedirs(folder_path, exist_ok=True)
|
|
159
|
+
|
|
160
|
+
yearly_stats.to_csv(file_path, index=False)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def expand_dict_columns(data: pd.DataFrame) -> pd.DataFrame:
|
|
164
|
+
"""
|
|
165
|
+
Expand dictionary columns in a DataFrame into separate columns.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
data (pandas.DataFrame): The input DataFrame with dictionary columns.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
pandas.DataFrame: A new DataFrame with expanded columns.
|
|
172
|
+
"""
|
|
173
|
+
dict_columns = list(filter(lambda x: isinstance(data[x].iloc[0], dict), data.columns))
|
|
174
|
+
new_columns = pd.concat(
|
|
175
|
+
[data[col].apply(pd.Series).rename(columns=lambda x: f"{col}_{x}") for col in dict_columns],
|
|
176
|
+
axis=1,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
data = pd.concat([data, new_columns], axis=1)
|
|
180
|
+
return data
|
|
File without changes
|