cnhkmcp 2.1.9__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. cnhkmcp/__init__.py +1 -1
  2. cnhkmcp/untracked/AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221/BRAIN_AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221Mac_Linux/321/207/320/231/320/230/321/206/320/254/320/274.zip +0 -0
  3. cnhkmcp/untracked/AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221//321/205/320/237/320/234/321/205/320/227/342/225/227/321/205/320/276/320/231/321/210/320/263/320/225AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221_Windows/321/207/320/231/320/230/321/206/320/254/320/274.exe +0 -0
  4. cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/chroma.sqlite3 +0 -0
  5. cnhkmcp/untracked/skills/brain-data-feature-engineering/OUTPUT_TEMPLATE.md +325 -0
  6. cnhkmcp/untracked/skills/brain-data-feature-engineering/SKILL.md +263 -0
  7. cnhkmcp/untracked/skills/brain-data-feature-engineering/examples.md +244 -0
  8. cnhkmcp/untracked/skills/brain-data-feature-engineering/reference.md +493 -0
  9. cnhkmcp/untracked/skills/brain-feature-implementation/SKILL.md +87 -0
  10. cnhkmcp/untracked/skills/brain-feature-implementation/config.json +6 -0
  11. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/analyst15_GLB_delay1.csv +289 -0
  12. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/final_expressions.json +410 -0
  13. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588244.json +4 -0
  14. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588251.json +20 -0
  15. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588273.json +23 -0
  16. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588293.json +23 -0
  17. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588319.json +23 -0
  18. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588322.json +14 -0
  19. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588325.json +20 -0
  20. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588328.json +23 -0
  21. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588354.json +23 -0
  22. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588357.json +23 -0
  23. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588361.json +23 -0
  24. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588364.json +23 -0
  25. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588368.json +23 -0
  26. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588391.json +14 -0
  27. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588394.json +23 -0
  28. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588397.json +59 -0
  29. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588400.json +35 -0
  30. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588403.json +20 -0
  31. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588428.json +23 -0
  32. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588431.json +32 -0
  33. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588434.json +20 -0
  34. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588438.json +20 -0
  35. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588441.json +14 -0
  36. cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588468.json +20 -0
  37. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/ace_lib.py +1514 -0
  38. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/fetch_dataset.py +107 -0
  39. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/helpful_functions.py +180 -0
  40. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/implement_idea.py +164 -0
  41. cnhkmcp/untracked/skills/brain-feature-implementation/scripts/merge_expression_list.py +88 -0
  42. cnhkmcp/untracked/skills/planning-with-files/SKILL.md +211 -0
  43. cnhkmcp/untracked/skills/planning-with-files/examples.md +202 -0
  44. cnhkmcp/untracked/skills/planning-with-files/reference.md +218 -0
  45. cnhkmcp/untracked/skills/planning-with-files/scripts/check-complete.sh +44 -0
  46. cnhkmcp/untracked/skills/planning-with-files/scripts/init-session.sh +120 -0
  47. cnhkmcp/untracked/skills/planning-with-files/templates/findings.md +95 -0
  48. cnhkmcp/untracked/skills/planning-with-files/templates/progress.md +114 -0
  49. cnhkmcp/untracked/skills/planning-with-files/templates/task_plan.md +132 -0
  50. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.2.0.dist-info}/METADATA +1 -1
  51. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.2.0.dist-info}/RECORD +55 -10
  52. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.2.0.dist-info}/WHEEL +0 -0
  53. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.2.0.dist-info}/entry_points.txt +0 -0
  54. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.2.0.dist-info}/licenses/LICENSE +0 -0
  55. {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,107 @@
1
+ import json
2
+ import os
3
+ import argparse
4
+ import pandas as pd
5
+ from pathlib import Path
6
+ import sys
7
+
8
+ print("Script started...", flush=True)
9
+
10
+ # Ensure local imports work by adding the script directory to sys.path
11
+ script_dir = Path(__file__).resolve().parent
12
+ sys.path.append(str(script_dir))
13
+
14
+ try:
15
+ import ace_lib
16
+ except ImportError:
17
+ print("Error: Could not import 'ace_lib'. Make sure it is in the same directory.")
18
+ sys.exit(1)
19
+
20
+ def load_config(config_path):
21
+ try:
22
+ with open(config_path, 'r') as f:
23
+ return json.load(f)
24
+ except Exception as e:
25
+ print(f"Error loading config file: {e}")
26
+ return None
27
+
28
+ def main():
29
+ parser = argparse.ArgumentParser(description="Fetch dataset fields from WorldQuant BRAIN")
30
+ parser.add_argument("--datasetid", required=True, help="ID of the dataset to fetch (e.g., specific dataset ID)")
31
+ parser.add_argument("--region", default="USA", help="Region (default: USA)")
32
+ parser.add_argument("--delay", type=int, default=1, help="Delay (default: 1)")
33
+ parser.add_argument("--universe", default="TOP3000", help="Universe (default: TOP3000)")
34
+ parser.add_argument("--instrument-type", default="EQUITY", dest="instrument_type", help="Instrument Type (default: EQUITY)")
35
+
36
+ args = parser.parse_args()
37
+
38
+ # Determine paths relative to this script
39
+ # User requested: robust and no absolute paths hardcoded
40
+ workspace_dir = script_dir.parent
41
+ config_path = workspace_dir / "config.json"
42
+ data_dir = workspace_dir / "data"
43
+
44
+ # Ensure data directory exists
45
+ data_dir.mkdir(parents=True, exist_ok=True)
46
+
47
+ # Load configuration
48
+ if not config_path.exists():
49
+ print(f"Error: Config file not found at {config_path}")
50
+ sys.exit(1)
51
+
52
+ config = load_config(config_path)
53
+ if not config:
54
+ sys.exit(1)
55
+
56
+ # Extract credentials
57
+ creds = config.get("BRAIN_CREDENTIALS", {})
58
+ email = creds.get("email")
59
+ password = creds.get("password")
60
+
61
+ if not email or not password:
62
+ print("Error: BRAIN_CREDENTIALS (email/password) not found in config.json")
63
+ sys.exit(1)
64
+
65
+ # Override ace_lib.get_credentials to use our config values
66
+ # ace_lib.start_session() internally calls get_credentials()
67
+ ace_lib.get_credentials = lambda: (email, password)
68
+
69
+ try:
70
+ print(f"Logging in as {email}...")
71
+ session = ace_lib.start_session()
72
+
73
+ print(f"Fetching datafields for dataset: {args.datasetid} (Region: {args.region}, Delay: {args.delay})...")
74
+
75
+ # Fetch datafields using the library function
76
+ df = ace_lib.get_datafields(
77
+ session,
78
+ dataset_id=args.datasetid,
79
+ region=args.region,
80
+ delay=args.delay,
81
+ universe=args.universe,
82
+ instrument_type=args.instrument_type
83
+ )
84
+
85
+ if df is None or df.empty:
86
+ print("Warning: No data found or empty response.")
87
+ else:
88
+ # Construct a safe filename and folder name
89
+ safe_dataset_id = "".join([c for c in args.datasetid if c.isalnum() or c in ('-','_')])
90
+ folder_name = f"{safe_dataset_id}_{args.region}_delay{args.delay}"
91
+ dataset_folder = data_dir / folder_name
92
+ dataset_folder.mkdir(parents=True, exist_ok=True)
93
+
94
+ filename = f"{folder_name}.csv"
95
+ output_path = dataset_folder / filename
96
+
97
+ print(f"Saving {len(df)} records to {output_path}...")
98
+ df.to_csv(output_path, index=False)
99
+ print("Success.")
100
+
101
+ except Exception as e:
102
+ print(f"An error occurred during execution: {e}")
103
+ import traceback
104
+ traceback.print_exc()
105
+
106
+ if __name__ == "__main__":
107
+ main()
@@ -0,0 +1,180 @@
1
+ import json
2
+ import os
3
+ from typing import Union
4
+
5
+ import pandas as pd
6
+ from pandas.io.formats.style import Styler
7
+
8
+ brain_api_url = os.environ.get("BRAIN_API_URL", "https://api.worldquantbrain.com")
9
+ brain_url = os.environ.get("BRAIN_URL", "https://platform.worldquantbrain.com")
10
+
11
+
12
+ def make_clickable_alpha_id(alpha_id: str) -> str:
13
+ """
14
+ Create a clickable HTML link for an alpha ID.
15
+
16
+ Args:
17
+ alpha_id (str): The ID of the alpha.
18
+
19
+ Returns:
20
+ str: An HTML string containing a clickable link to the alpha's page on the platform.
21
+ """
22
+
23
+ url = brain_url + "/alpha/"
24
+ return f'<a href="{url}{alpha_id}">{alpha_id}</a>'
25
+
26
+
27
+ def prettify_result(
28
+ result: list, detailed_tests_view: bool = False, clickable_alpha_id: bool = False
29
+ ) -> Union[pd.DataFrame, Styler]:
30
+ """
31
+ Combine and format simulation results into a single DataFrame for analysis.
32
+
33
+ Args:
34
+ result (list): A list of dictionaries containing simulation results.
35
+ detailed_tests_view (bool, optional): If True, include detailed test results. Defaults to False.
36
+ clickable_alpha_id (bool, optional): If True, make alpha IDs clickable. Defaults to False.
37
+
38
+ Returns:
39
+ pandas.DataFrame or pandas.io.formats.style.Styler: A DataFrame containing formatted results,
40
+ optionally with clickable alpha IDs.
41
+ """
42
+ list_of_is_stats = [result[x]["is_stats"] for x in range(len(result)) if result[x]["is_stats"] is not None]
43
+ is_stats_df = pd.concat(list_of_is_stats).reset_index(drop=True)
44
+ is_stats_df = is_stats_df.sort_values("fitness", ascending=False)
45
+
46
+ expressions = {
47
+ result[x]["alpha_id"]: (
48
+ {
49
+ "selection": result[x]["simulate_data"]["selection"],
50
+ "combo": result[x]["simulate_data"]["combo"],
51
+ }
52
+ if result[x]["simulate_data"]["type"] == "SUPER"
53
+ else result[x]["simulate_data"]["regular"]
54
+ )
55
+ for x in range(len(result))
56
+ if result[x]["is_stats"] is not None
57
+ }
58
+ expression_df = pd.DataFrame(list(expressions.items()), columns=["alpha_id", "expression"])
59
+
60
+ list_of_is_tests = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
61
+ is_tests_df = pd.concat(list_of_is_tests, sort=True).reset_index(drop=True)
62
+ is_tests_df = is_tests_df[is_tests_df["result"] != "WARNING"]
63
+ if detailed_tests_view:
64
+ cols = ["limit", "result", "value"]
65
+ is_tests_df["details"] = is_tests_df[cols].to_dict(orient="records")
66
+ is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="details").reset_index()
67
+ else:
68
+ is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="result").reset_index()
69
+
70
+ alpha_stats = pd.merge(is_stats_df, expression_df, on="alpha_id")
71
+ alpha_stats = pd.merge(alpha_stats, is_tests_df, on="alpha_id")
72
+ alpha_stats = alpha_stats.drop(columns=alpha_stats.columns[(alpha_stats == "PENDING").any()])
73
+ alpha_stats.columns = alpha_stats.columns.str.replace("(?<=[a-z])(?=[A-Z])", "_", regex=True).str.lower()
74
+ if clickable_alpha_id:
75
+ return alpha_stats.style.format({"alpha_id": lambda x: make_clickable_alpha_id(str(x))})
76
+ return alpha_stats
77
+
78
+
79
+ def concat_pnl(result: list) -> pd.DataFrame:
80
+ """
81
+ Combine PnL results from multiple alphas into a single DataFrame.
82
+
83
+ Args:
84
+ result (list): A list of dictionaries containing simulation results with PnL data.
85
+
86
+ Returns:
87
+ pandas.DataFrame: A DataFrame containing combined PnL data for all alphas.
88
+ """
89
+ list_of_pnls = [result[x]["pnl"] for x in range(len(result)) if result[x]["pnl"] is not None]
90
+ pnls_df = pd.concat(list_of_pnls).reset_index()
91
+
92
+ return pnls_df
93
+
94
+
95
+ def concat_is_tests(result: list) -> pd.DataFrame:
96
+ """
97
+ Combine in-sample test results from multiple alphas into a single DataFrame.
98
+
99
+ Args:
100
+ result (list): A list of dictionaries containing simulation results with in-sample test data.
101
+
102
+ Returns:
103
+ pandas.DataFrame: A DataFrame containing combined in-sample test results for all alphas.
104
+ """
105
+ is_tests_list = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
106
+ is_tests_df = pd.concat(is_tests_list, sort=True).reset_index(drop=True)
107
+ return is_tests_df
108
+
109
+
110
+ def save_simulation_result(result: dict) -> None:
111
+ """
112
+ Save the simulation result to a JSON file in the 'simulation_results' folder.
113
+
114
+ Args:
115
+ result (dict): A dictionary containing the simulation result for an alpha.
116
+ """
117
+
118
+ alpha_id = result["id"]
119
+ region = result["settings"]["region"]
120
+ folder_path = "simulation_results/"
121
+ file_path = os.path.join(folder_path, f"{alpha_id}_{region}")
122
+
123
+ os.makedirs(folder_path, exist_ok=True)
124
+
125
+ with open(file_path, "w", encoding="utf-8") as file:
126
+ json.dump(result, file)
127
+
128
+
129
+ def save_pnl(pnl_df: pd.DataFrame, alpha_id: str, region: str) -> None:
130
+ """
131
+ Save the PnL data for an alpha to a CSV file in the 'alphas_pnl' folder.
132
+
133
+ Args:
134
+ pnl_df (pandas.DataFrame): The DataFrame containing PnL data.
135
+ alpha_id (str): The ID of the alpha.
136
+ region (str): The region for which the PnL data was generated.
137
+ """
138
+
139
+ folder_path = "alphas_pnl/"
140
+ file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
141
+ os.makedirs(folder_path, exist_ok=True)
142
+
143
+ pnl_df.to_csv(file_path)
144
+
145
+
146
+ def save_yearly_stats(yearly_stats: pd.DataFrame, alpha_id: str, region: str):
147
+ """
148
+ Save the yearly statistics for an alpha to a CSV file in the 'yearly_stats' folder.
149
+
150
+ Args:
151
+ yearly_stats (pandas.DataFrame): The DataFrame containing yearly statistics.
152
+ alpha_id (str): The ID of the alpha.
153
+ region (str): The region for which the statistics were generated.
154
+ """
155
+
156
+ folder_path = "yearly_stats/"
157
+ file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
158
+ os.makedirs(folder_path, exist_ok=True)
159
+
160
+ yearly_stats.to_csv(file_path, index=False)
161
+
162
+
163
+ def expand_dict_columns(data: pd.DataFrame) -> pd.DataFrame:
164
+ """
165
+ Expand dictionary columns in a DataFrame into separate columns.
166
+
167
+ Args:
168
+ data (pandas.DataFrame): The input DataFrame with dictionary columns.
169
+
170
+ Returns:
171
+ pandas.DataFrame: A new DataFrame with expanded columns.
172
+ """
173
+ dict_columns = list(filter(lambda x: isinstance(data[x].iloc[0], dict), data.columns))
174
+ new_columns = pd.concat(
175
+ [data[col].apply(pd.Series).rename(columns=lambda x: f"{col}_{x}") for col in dict_columns],
176
+ axis=1,
177
+ )
178
+
179
+ data = pd.concat([data, new_columns], axis=1)
180
+ return data
@@ -0,0 +1,164 @@
1
+ import pandas as pd
2
+ from pathlib import Path
3
+ import argparse
4
+ import sys
5
+ import re
6
+ import json
7
+ import time
8
+
9
+ def load_data(dataset_name=None):
10
+ script_dir = Path(__file__).resolve().parent
11
+ workspace_dir = script_dir.parent
12
+
13
+ if not dataset_name:
14
+ data_root = workspace_dir / "data"
15
+ if not data_root.exists():
16
+ print("Error: Data directory not found.", file=sys.stderr)
17
+ sys.exit(1)
18
+
19
+ subdirs = [d for d in data_root.iterdir() if d.is_dir()]
20
+
21
+ if len(subdirs) == 1:
22
+ dataset_name = subdirs[0].name
23
+ print(f"Auto-detected dataset: {dataset_name}", file=sys.stderr)
24
+ elif len(subdirs) > 1:
25
+ print("Error: Multiple datasets found. Please specify --dataset.", file=sys.stderr)
26
+ print("Available datasets:", file=sys.stderr)
27
+ for d in subdirs:
28
+ print(f" {d.name}", file=sys.stderr)
29
+ sys.exit(1)
30
+ else:
31
+ print("Error: No dataset folders found inside data directory.", file=sys.stderr)
32
+ sys.exit(1)
33
+
34
+ dataset_dir = workspace_dir / "data" / dataset_name
35
+ data_path = dataset_dir / f"{dataset_name}.csv"
36
+
37
+ print(f"Loading data from {data_path}...", file=sys.stderr)
38
+ try:
39
+ df = pd.read_csv(data_path)
40
+ return df, dataset_dir
41
+ except FileNotFoundError:
42
+ print(f"Error: Data file not found at {data_path}. Please run fetch_dataset.py first.", file=sys.stderr)
43
+ sys.exit(1)
44
+
45
+ def extract_keys_from_template(template):
46
+ return re.findall(r'\{([A-Za-z0-9_]+)\}', template)
47
+
48
+ def match_single_horizon_auto(df, template):
49
+ """
50
+ Auto-detects metrics from template and finds matching fields.
51
+ """
52
+ metrics = extract_keys_from_template(template)
53
+ if not metrics:
54
+ print("Error: No variables found in template (use {variable} format).", file=sys.stderr)
55
+ return []
56
+
57
+ # Sort metrics by length descending to match most specific suffixes first
58
+ metrics = sorted(metrics, key=len, reverse=True)
59
+ primary = metrics[0]
60
+
61
+ # Try different separators or exact match
62
+ # we look for columns that end with the primary metric, optionally followed by numeric suffix (e.g. _1234)
63
+ # Regex: .*<primary>(?:_\d+)?$
64
+ import re
65
+ primary_regex = re.escape(primary) + r'(?:_\d+)?$'
66
+ candidates = df[df['id'].str.match(f'.*{primary_regex}')]['id'].unique().tolist()
67
+
68
+ results = []
69
+ seen = set()
70
+
71
+ # Try different separators or exact match
72
+ # We look for columns that contain the primary metric at any position
73
+ import re
74
+ primary_regex = re.escape(primary)
75
+ candidates = df[df['id'].str.contains(primary_regex, regex=True)]['id'].unique().tolist()
76
+
77
+ results = []
78
+ seen = set()
79
+
80
+ for cand in candidates:
81
+ # Determine base prefix
82
+ # We identify the prefix by taking everything before the first occurrence of the primary metric
83
+ match = re.search(re.escape(primary), cand)
84
+ if not match:
85
+ continue
86
+
87
+ # Base includes everything up to the metric (e.g., "dataset_prefix_")
88
+ base = cand[:match.start()]
89
+
90
+ # Verify other metrics exist with this base
91
+ field_map = {primary: cand}
92
+ all_found = True
93
+
94
+ for m in metrics[1:]:
95
+ # Construct target pattern for other metrics: Must start with the same base followed by the metric
96
+ # We allow any suffix after the metric (e.g. IDs, versions)
97
+ target_pattern = f"^{re.escape(base)}{re.escape(m)}"
98
+ target_matches = df[df['id'].str.match(target_pattern)]['id'].tolist()
99
+
100
+ if not target_matches:
101
+ all_found = False
102
+ break
103
+ # Use the first match found for the secondary metric
104
+ field_map[m] = target_matches[0]
105
+
106
+ if all_found:
107
+ try:
108
+ expr = template.format(**field_map)
109
+ if expr not in seen:
110
+ seen.add(expr)
111
+ # Create a readable label for the horizon/group
112
+ if base:
113
+ # Strip standard separators
114
+ horizon_label = base.strip("_")
115
+ else:
116
+ horizon_label = "global"
117
+
118
+ results.append((horizon_label, expr))
119
+ except KeyError as e:
120
+ continue
121
+
122
+ return results
123
+
124
+ def main():
125
+ parser = argparse.ArgumentParser(description="Generate Alpha Expressions based on patterns")
126
+ parser.add_argument("--template", required=True, help="Python format string (e.g. '{st_dev} / abs({mean})')")
127
+ parser.add_argument("--dataset", help="Name of the dataset folder. Auto-detected if only one exists.")
128
+
129
+ args = parser.parse_args()
130
+
131
+ df, dataset_dir = load_data(args.dataset)
132
+
133
+ results = match_single_horizon_auto(df, args.template)
134
+
135
+ # Output
136
+ expression_list = []
137
+ if not results:
138
+ print("No matching expressions found.")
139
+ else:
140
+ print(f"Generated {len(results)} expressions:\n")
141
+ # print(f"{'Context':<30} | Expression")
142
+ # print("-" * 120)
143
+
144
+ for context, expr in results:
145
+ # print(f"{context:<30} | {expr}")
146
+ expression_list.append(expr)
147
+
148
+ # Save results to JSON (Always save for debugging)
149
+ timestamp = int(time.time())
150
+ json_output = {
151
+ "template": args.template,
152
+ "expression_list": expression_list
153
+ }
154
+
155
+ output_file = dataset_dir / f"idea_{timestamp}.json"
156
+ try:
157
+ with open(output_file, 'w') as f:
158
+ json.dump(json_output, f, indent=4)
159
+ print(f"\nSaved idea configuration to: {output_file}")
160
+ except Exception as e:
161
+ print(f"Error saving JSON: {e}", file=sys.stderr)
162
+
163
+ if __name__ == "__main__":
164
+ main()
@@ -0,0 +1,88 @@
1
+ import json
2
+ import argparse
3
+ from pathlib import Path
4
+ import sys
5
+
6
+ def load_data_dir(dataset_name=None):
7
+ script_dir = Path(__file__).resolve().parent
8
+ workspace_dir = script_dir.parent
9
+
10
+ if not dataset_name:
11
+ data_root = workspace_dir / "data"
12
+ if not data_root.exists():
13
+ print("Error: Data directory not found.", file=sys.stderr)
14
+ sys.exit(1)
15
+
16
+ subdirs = [d for d in data_root.iterdir() if d.is_dir()]
17
+
18
+ if len(subdirs) == 1:
19
+ dataset_name = subdirs[0].name
20
+ print(f"Auto-detected dataset: {dataset_name}", file=sys.stderr)
21
+ return workspace_dir / "data" / dataset_name
22
+ elif len(subdirs) > 1:
23
+ print("Error: Multiple datasets found. Please specify --dataset.", file=sys.stderr)
24
+ sys.exit(1)
25
+ else:
26
+ print("Error: No dataset folders found inside data directory.", file=sys.stderr)
27
+ sys.exit(1)
28
+
29
+ return workspace_dir / "data" / dataset_name
30
+
31
+ def main():
32
+ parser = argparse.ArgumentParser(description="Merge all generated expressions from idea JSON files.")
33
+ parser.add_argument("--dataset", help="Name of the dataset folder containing idea JSONs.")
34
+ parser.add_argument("--output", default="final_expressions.json", help="Output filename.")
35
+
36
+ args = parser.parse_args()
37
+
38
+ dataset_dir = load_data_dir(args.dataset)
39
+
40
+ if not dataset_dir.exists():
41
+ print(f"Error: Dataset directory {dataset_dir} does not exist.", file=sys.stderr)
42
+ sys.exit(1)
43
+
44
+ all_expressions = []
45
+
46
+ # Find all idea_*.json files
47
+ json_files = list(dataset_dir.glob("idea_*.json"))
48
+
49
+ if not json_files:
50
+ print(f"No idea_*.json files found in {dataset_dir}", file=sys.stderr)
51
+ sys.exit(0)
52
+
53
+ print(f"Found {len(json_files)} idea files. Merging...")
54
+
55
+ for jf in json_files:
56
+ try:
57
+ with open(jf, 'r') as f:
58
+ data = json.load(f)
59
+ exprs = data.get("expression_list", [])
60
+ if exprs:
61
+ all_expressions.extend(exprs)
62
+ print(f" + {jf.name}: {len(exprs)} expressions")
63
+ else:
64
+ print(f" - {jf.name}: 0 expressions")
65
+ except Exception as e:
66
+ print(f" ! Error reading {jf.name}: {e}", file=sys.stderr)
67
+
68
+ # Remove duplicates if desired? Usually we keep them or set them.
69
+ # Let's make unique to be safe, but preserve order as best as possible.
70
+ unique_expressions = []
71
+ seen = set()
72
+ for ex in all_expressions:
73
+ if ex not in seen:
74
+ unique_expressions.append(ex)
75
+ seen.add(ex)
76
+
77
+ output_path = dataset_dir / args.output
78
+
79
+ try:
80
+ with open(output_path, 'w') as f:
81
+ json.dump(unique_expressions, f, indent=4)
82
+ print(f"\nSuccessfully merged {len(unique_expressions)} unique expressions.")
83
+ print(f"Output saved to: {output_path}")
84
+ except Exception as e:
85
+ print(f"Error saving output: {e}", file=sys.stderr)
86
+
87
+ if __name__ == "__main__":
88
+ main()