cnhkmcp 2.1.9__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnhkmcp/__init__.py +1 -1
- cnhkmcp/untracked/AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221/BRAIN_AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221Mac_Linux/321/207/320/231/320/230/321/206/320/254/320/274.zip +0 -0
- cnhkmcp/untracked/AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221//321/205/320/237/320/234/321/205/320/227/342/225/227/321/205/320/276/320/231/321/210/320/263/320/225AI/321/206/320/231/320/243/321/205/342/225/226/320/265/321/204/342/225/221/342/225/221_Windows/321/207/320/231/320/230/321/206/320/254/320/274.exe +0 -0
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/README.md +1 -1
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/config.json +2 -2
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/main.py +1 -1
- cnhkmcp/untracked/AI/321/206/320/261/320/234/321/211/320/255/320/262/321/206/320/237/320/242/321/204/342/225/227/342/225/242/vector_db/chroma.sqlite3 +0 -0
- cnhkmcp/untracked/APP/Tranformer/Transformer.py +2 -2
- cnhkmcp/untracked/APP/Tranformer/transformer_config.json +1 -1
- cnhkmcp/untracked/APP/blueprints/feature_engineering.py +2 -2
- cnhkmcp/untracked/APP/blueprints/inspiration_house.py +4 -4
- cnhkmcp/untracked/APP/blueprints/paper_analysis.py +3 -3
- cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +34 -73
- cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +2 -2
- cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +366 -1
- cnhkmcp/untracked/APP/static/inspiration.js +345 -13
- cnhkmcp/untracked/APP/templates/index.html +11 -3
- cnhkmcp/untracked/APP/templates/transformer_web.html +1 -1
- cnhkmcp/untracked/APP/trailSomeAlphas/README.md +38 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/ace.log +66 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/enhance_template.py +588 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/requirements.txt +3 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/run_pipeline.py +1001 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/run_pipeline_step_by_step.ipynb +5258 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/OUTPUT_TEMPLATE.md +325 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/SKILL.md +503 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/examples.md +244 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/output_report/ASI_delay1_analyst11_ideas.md +285 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/reference.md +399 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/SKILL.md +40 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/config.json +6 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709385783386000.json +388 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709386274840400.json +131 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709386838244700.json +1926 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709387369198500.json +31 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709387908905800.json +1926 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709388486243600.json +240 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709389024058600.json +1926 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709389549608700.json +41 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709390068714000.json +110 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709390591996900.json +36 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709391129137100.json +31 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709391691643500.json +41 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709392192099200.json +31 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709392703423500.json +46 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769709393213729400.json +246 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710186683932500.json +388 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710187165414300.json +131 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710187665211700.json +1926 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710188149193400.json +31 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710188667627400.json +1926 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710189220822000.json +240 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710189726189500.json +1926 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710190248066100.json +41 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710190768298700.json +110 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710191282588100.json +36 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710191838960900.json +31 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710192396688000.json +41 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710192941922400.json +31 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710193473524600.json +46 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710194001961200.json +246 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710420975888800.json +46 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710421647590100.json +196 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710422131378500.json +5 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710422644184400.json +196 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710423702350600.json +196 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_1_idea_1769710424244661800.json +5 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/analyst11_ASI_delay1.csv +211 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/analyst11_ASI_delay1/final_expressions.json +7062 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/ace.log +3 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/ace_lib.py +1514 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/fetch_dataset.py +113 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/helpful_functions.py +180 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/implement_idea.py +236 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/merge_expression_list.py +90 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/parsetab.py +60 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/template_final_enhance/op/321/206/320/220/342/225/227/321/207/342/225/227/320/243.md +434 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/template_final_enhance/sample_prompt.md +62 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/template_final_enhance//321/205/320/235/320/245/321/205/320/253/320/260/321/205/320/275/320/240/321/206/320/220/320/255/321/210/320/220/320/223/321/211/320/220/342/225/227/321/210/342/225/233/320/241/321/211/320/243/342/225/233.md +354 -0
- cnhkmcp/untracked/APP/usage.md +2 -2
- cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +388 -8
- cnhkmcp/untracked/skills/alpha-expression-verifier/scripts/validator.py +889 -0
- cnhkmcp/untracked/skills/brain-data-feature-engineering/OUTPUT_TEMPLATE.md +325 -0
- cnhkmcp/untracked/skills/brain-data-feature-engineering/SKILL.md +263 -0
- cnhkmcp/untracked/skills/brain-data-feature-engineering/examples.md +244 -0
- cnhkmcp/untracked/skills/brain-data-feature-engineering/reference.md +493 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/SKILL.md +87 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/config.json +6 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/analyst15_GLB_delay1.csv +289 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/final_expressions.json +410 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588244.json +4 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588251.json +20 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588273.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588293.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588319.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588322.json +14 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588325.json +20 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588328.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588354.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588357.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588361.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588364.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588368.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588391.json +14 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588394.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588397.json +59 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588400.json +35 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588403.json +20 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588428.json +23 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588431.json +32 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588434.json +20 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588438.json +20 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588441.json +14 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/data/analyst15_GLB_delay1/idea_1768588468.json +20 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/scripts/ace_lib.py +1514 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/scripts/fetch_dataset.py +107 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/scripts/helpful_functions.py +180 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/scripts/implement_idea.py +165 -0
- cnhkmcp/untracked/skills/brain-feature-implementation/scripts/merge_expression_list.py +88 -0
- cnhkmcp/untracked/skills/brain-improve-alpha-performance/arXiv_API_Tool_Manual.md +490 -0
- cnhkmcp/untracked/skills/brain-improve-alpha-performance/reference.md +1 -1
- cnhkmcp/untracked/skills/brain-improve-alpha-performance/scripts/arxiv_api.py +229 -0
- cnhkmcp/untracked/skills/planning-with-files/SKILL.md +211 -0
- cnhkmcp/untracked/skills/planning-with-files/examples.md +202 -0
- cnhkmcp/untracked/skills/planning-with-files/reference.md +218 -0
- cnhkmcp/untracked/skills/planning-with-files/scripts/check-complete.sh +44 -0
- cnhkmcp/untracked/skills/planning-with-files/scripts/init-session.sh +120 -0
- cnhkmcp/untracked/skills/planning-with-files/templates/findings.md +95 -0
- cnhkmcp/untracked/skills/planning-with-files/templates/progress.md +114 -0
- cnhkmcp/untracked/skills/planning-with-files/templates/task_plan.md +132 -0
- cnhkmcp/untracked//321/211/320/225/320/235/321/207/342/225/234/320/276/321/205/320/231/320/235/321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/230/320/241_/321/205/320/276/320/231/321/210/320/263/320/225/321/205/342/224/220/320/225/321/210/320/266/320/221/321/204/342/225/233/320/255/321/210/342/225/241/320/246/321/205/320/234/320/225.py +35 -11
- cnhkmcp/vector_db/_manifest.json +1 -0
- cnhkmcp/vector_db/_meta.json +1 -0
- {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/METADATA +1 -1
- {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/RECORD +142 -31
- /cnhkmcp/untracked/{skills/expression_verifier → APP/trailSomeAlphas/skills/brain-feature-implementation}/scripts/validator.py +0 -0
- /cnhkmcp/untracked/skills/{expression_verifier → alpha-expression-verifier}/SKILL.md +0 -0
- /cnhkmcp/untracked/skills/{expression_verifier → alpha-expression-verifier}/scripts/verify_expr.py +0 -0
- {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/WHEEL +0 -0
- {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/entry_points.txt +0 -0
- {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/licenses/LICENSE +0 -0
- {cnhkmcp-2.1.9.dist-info → cnhkmcp-2.3.0.dist-info}/top_level.txt +0 -0
cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/fetch_dataset.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import argparse
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
print("Script started...", flush=True)
|
|
9
|
+
|
|
10
|
+
# Ensure local imports work by adding the script directory to sys.path
|
|
11
|
+
script_dir = Path(__file__).resolve().parent
|
|
12
|
+
sys.path.append(str(script_dir))
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
import ace_lib
|
|
16
|
+
except ImportError:
|
|
17
|
+
print("Error: Could not import 'ace_lib'. Make sure it is in the same directory.")
|
|
18
|
+
sys.exit(1)
|
|
19
|
+
|
|
20
|
+
def load_config(config_path):
|
|
21
|
+
try:
|
|
22
|
+
with open(config_path, 'r') as f:
|
|
23
|
+
return json.load(f)
|
|
24
|
+
except Exception as e:
|
|
25
|
+
print(f"Error loading config file: {e}")
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
def main():
|
|
29
|
+
parser = argparse.ArgumentParser(description="Fetch dataset fields from WorldQuant BRAIN")
|
|
30
|
+
parser.add_argument("--datasetid", required=True, help="ID of the dataset to fetch (e.g., specific dataset ID)")
|
|
31
|
+
parser.add_argument("--region", default="USA", help="Region (default: USA)")
|
|
32
|
+
parser.add_argument("--delay", type=int, default=1, help="Delay (default: 1)")
|
|
33
|
+
parser.add_argument("--universe", default="TOP3000", help="Universe (default: TOP3000)")
|
|
34
|
+
parser.add_argument("--instrument-type", default="EQUITY", dest="instrument_type", help="Instrument Type (default: EQUITY)")
|
|
35
|
+
|
|
36
|
+
args = parser.parse_args()
|
|
37
|
+
|
|
38
|
+
# Determine paths relative to this script
|
|
39
|
+
# User requested: robust and no absolute paths hardcoded
|
|
40
|
+
workspace_dir = script_dir.parent
|
|
41
|
+
config_path = workspace_dir / "config.json"
|
|
42
|
+
data_dir = workspace_dir / "data"
|
|
43
|
+
|
|
44
|
+
# Ensure data directory exists
|
|
45
|
+
data_dir.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
|
|
47
|
+
# Load configuration
|
|
48
|
+
if not config_path.exists():
|
|
49
|
+
print(f"Error: Config file not found at {config_path}")
|
|
50
|
+
sys.exit(1)
|
|
51
|
+
|
|
52
|
+
config = load_config(config_path)
|
|
53
|
+
if not config:
|
|
54
|
+
sys.exit(1)
|
|
55
|
+
|
|
56
|
+
# Extract credentials (env override -> config)
|
|
57
|
+
email = os.environ.get("BRAIN_USERNAME") or os.environ.get("BRAIN_EMAIL")
|
|
58
|
+
password = os.environ.get("BRAIN_PASSWORD")
|
|
59
|
+
if not email or not password:
|
|
60
|
+
creds = config.get("BRAIN_CREDENTIALS", {})
|
|
61
|
+
email = email or creds.get("email")
|
|
62
|
+
password = password or creds.get("password")
|
|
63
|
+
|
|
64
|
+
if not email or not password:
|
|
65
|
+
print("Error: BRAIN credentials missing. Set BRAIN_USERNAME/BRAIN_PASSWORD or config.json")
|
|
66
|
+
sys.exit(1)
|
|
67
|
+
|
|
68
|
+
# Override ace_lib.get_credentials to use our config values
|
|
69
|
+
# ace_lib.start_session() internally calls get_credentials()
|
|
70
|
+
ace_lib.get_credentials = lambda: (email, password)
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
print(f"Logging in as {email}...")
|
|
74
|
+
session = ace_lib.start_session()
|
|
75
|
+
|
|
76
|
+
print(f"Fetching datafields for dataset: {args.datasetid} (Region: {args.region}, Delay: {args.delay})...")
|
|
77
|
+
|
|
78
|
+
# Fetch datafields using the library function
|
|
79
|
+
df = ace_lib.get_datafields(
|
|
80
|
+
session,
|
|
81
|
+
dataset_id=args.datasetid,
|
|
82
|
+
region=args.region,
|
|
83
|
+
delay=args.delay,
|
|
84
|
+
universe=args.universe,
|
|
85
|
+
instrument_type=args.instrument_type,
|
|
86
|
+
data_type="ALL",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if df is None or df.empty:
|
|
90
|
+
print("Error: No data found or empty response.")
|
|
91
|
+
sys.exit(1)
|
|
92
|
+
|
|
93
|
+
# Construct a safe filename and folder name
|
|
94
|
+
safe_dataset_id = "".join([c for c in args.datasetid if c.isalnum() or c in ('-','_')])
|
|
95
|
+
folder_name = f"{safe_dataset_id}_{args.region}_delay{args.delay}"
|
|
96
|
+
dataset_folder = data_dir / folder_name
|
|
97
|
+
dataset_folder.mkdir(parents=True, exist_ok=True)
|
|
98
|
+
|
|
99
|
+
filename = f"{folder_name}.csv"
|
|
100
|
+
output_path = dataset_folder / filename
|
|
101
|
+
|
|
102
|
+
print(f"Saving {len(df)} records to {output_path}...")
|
|
103
|
+
df.to_csv(output_path, index=False)
|
|
104
|
+
print("Success.")
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
print(f"An error occurred during execution: {e}")
|
|
108
|
+
import traceback
|
|
109
|
+
traceback.print_exc()
|
|
110
|
+
sys.exit(1)
|
|
111
|
+
|
|
112
|
+
if __name__ == "__main__":
|
|
113
|
+
main()
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pandas.io.formats.style import Styler
|
|
7
|
+
|
|
8
|
+
brain_api_url = os.environ.get("BRAIN_API_URL", "https://api.worldquantbrain.com")
|
|
9
|
+
brain_url = os.environ.get("BRAIN_URL", "https://platform.worldquantbrain.com")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def make_clickable_alpha_id(alpha_id: str) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Create a clickable HTML link for an alpha ID.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
alpha_id (str): The ID of the alpha.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
str: An HTML string containing a clickable link to the alpha's page on the platform.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
url = brain_url + "/alpha/"
|
|
24
|
+
return f'<a href="{url}{alpha_id}">{alpha_id}</a>'
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def prettify_result(
|
|
28
|
+
result: list, detailed_tests_view: bool = False, clickable_alpha_id: bool = False
|
|
29
|
+
) -> Union[pd.DataFrame, Styler]:
|
|
30
|
+
"""
|
|
31
|
+
Combine and format simulation results into a single DataFrame for analysis.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
result (list): A list of dictionaries containing simulation results.
|
|
35
|
+
detailed_tests_view (bool, optional): If True, include detailed test results. Defaults to False.
|
|
36
|
+
clickable_alpha_id (bool, optional): If True, make alpha IDs clickable. Defaults to False.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
pandas.DataFrame or pandas.io.formats.style.Styler: A DataFrame containing formatted results,
|
|
40
|
+
optionally with clickable alpha IDs.
|
|
41
|
+
"""
|
|
42
|
+
list_of_is_stats = [result[x]["is_stats"] for x in range(len(result)) if result[x]["is_stats"] is not None]
|
|
43
|
+
is_stats_df = pd.concat(list_of_is_stats).reset_index(drop=True)
|
|
44
|
+
is_stats_df = is_stats_df.sort_values("fitness", ascending=False)
|
|
45
|
+
|
|
46
|
+
expressions = {
|
|
47
|
+
result[x]["alpha_id"]: (
|
|
48
|
+
{
|
|
49
|
+
"selection": result[x]["simulate_data"]["selection"],
|
|
50
|
+
"combo": result[x]["simulate_data"]["combo"],
|
|
51
|
+
}
|
|
52
|
+
if result[x]["simulate_data"]["type"] == "SUPER"
|
|
53
|
+
else result[x]["simulate_data"]["regular"]
|
|
54
|
+
)
|
|
55
|
+
for x in range(len(result))
|
|
56
|
+
if result[x]["is_stats"] is not None
|
|
57
|
+
}
|
|
58
|
+
expression_df = pd.DataFrame(list(expressions.items()), columns=["alpha_id", "expression"])
|
|
59
|
+
|
|
60
|
+
list_of_is_tests = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
|
|
61
|
+
is_tests_df = pd.concat(list_of_is_tests, sort=True).reset_index(drop=True)
|
|
62
|
+
is_tests_df = is_tests_df[is_tests_df["result"] != "WARNING"]
|
|
63
|
+
if detailed_tests_view:
|
|
64
|
+
cols = ["limit", "result", "value"]
|
|
65
|
+
is_tests_df["details"] = is_tests_df[cols].to_dict(orient="records")
|
|
66
|
+
is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="details").reset_index()
|
|
67
|
+
else:
|
|
68
|
+
is_tests_df = is_tests_df.pivot(index="alpha_id", columns="name", values="result").reset_index()
|
|
69
|
+
|
|
70
|
+
alpha_stats = pd.merge(is_stats_df, expression_df, on="alpha_id")
|
|
71
|
+
alpha_stats = pd.merge(alpha_stats, is_tests_df, on="alpha_id")
|
|
72
|
+
alpha_stats = alpha_stats.drop(columns=alpha_stats.columns[(alpha_stats == "PENDING").any()])
|
|
73
|
+
alpha_stats.columns = alpha_stats.columns.str.replace("(?<=[a-z])(?=[A-Z])", "_", regex=True).str.lower()
|
|
74
|
+
if clickable_alpha_id:
|
|
75
|
+
return alpha_stats.style.format({"alpha_id": lambda x: make_clickable_alpha_id(str(x))})
|
|
76
|
+
return alpha_stats
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def concat_pnl(result: list) -> pd.DataFrame:
|
|
80
|
+
"""
|
|
81
|
+
Combine PnL results from multiple alphas into a single DataFrame.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
result (list): A list of dictionaries containing simulation results with PnL data.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
pandas.DataFrame: A DataFrame containing combined PnL data for all alphas.
|
|
88
|
+
"""
|
|
89
|
+
list_of_pnls = [result[x]["pnl"] for x in range(len(result)) if result[x]["pnl"] is not None]
|
|
90
|
+
pnls_df = pd.concat(list_of_pnls).reset_index()
|
|
91
|
+
|
|
92
|
+
return pnls_df
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def concat_is_tests(result: list) -> pd.DataFrame:
|
|
96
|
+
"""
|
|
97
|
+
Combine in-sample test results from multiple alphas into a single DataFrame.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
result (list): A list of dictionaries containing simulation results with in-sample test data.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
pandas.DataFrame: A DataFrame containing combined in-sample test results for all alphas.
|
|
104
|
+
"""
|
|
105
|
+
is_tests_list = [result[x]["is_tests"] for x in range(len(result)) if result[x]["is_tests"] is not None]
|
|
106
|
+
is_tests_df = pd.concat(is_tests_list, sort=True).reset_index(drop=True)
|
|
107
|
+
return is_tests_df
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def save_simulation_result(result: dict) -> None:
|
|
111
|
+
"""
|
|
112
|
+
Save the simulation result to a JSON file in the 'simulation_results' folder.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
result (dict): A dictionary containing the simulation result for an alpha.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
alpha_id = result["id"]
|
|
119
|
+
region = result["settings"]["region"]
|
|
120
|
+
folder_path = "simulation_results/"
|
|
121
|
+
file_path = os.path.join(folder_path, f"{alpha_id}_{region}")
|
|
122
|
+
|
|
123
|
+
os.makedirs(folder_path, exist_ok=True)
|
|
124
|
+
|
|
125
|
+
with open(file_path, "w", encoding="utf-8") as file:
|
|
126
|
+
json.dump(result, file)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def save_pnl(pnl_df: pd.DataFrame, alpha_id: str, region: str) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Save the PnL data for an alpha to a CSV file in the 'alphas_pnl' folder.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
pnl_df (pandas.DataFrame): The DataFrame containing PnL data.
|
|
135
|
+
alpha_id (str): The ID of the alpha.
|
|
136
|
+
region (str): The region for which the PnL data was generated.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
folder_path = "alphas_pnl/"
|
|
140
|
+
file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
|
|
141
|
+
os.makedirs(folder_path, exist_ok=True)
|
|
142
|
+
|
|
143
|
+
pnl_df.to_csv(file_path)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def save_yearly_stats(yearly_stats: pd.DataFrame, alpha_id: str, region: str):
|
|
147
|
+
"""
|
|
148
|
+
Save the yearly statistics for an alpha to a CSV file in the 'yearly_stats' folder.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
yearly_stats (pandas.DataFrame): The DataFrame containing yearly statistics.
|
|
152
|
+
alpha_id (str): The ID of the alpha.
|
|
153
|
+
region (str): The region for which the statistics were generated.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
folder_path = "yearly_stats/"
|
|
157
|
+
file_path = os.path.join(folder_path, f"{alpha_id}_{region}.csv")
|
|
158
|
+
os.makedirs(folder_path, exist_ok=True)
|
|
159
|
+
|
|
160
|
+
yearly_stats.to_csv(file_path, index=False)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def expand_dict_columns(data: pd.DataFrame) -> pd.DataFrame:
|
|
164
|
+
"""
|
|
165
|
+
Expand dictionary columns in a DataFrame into separate columns.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
data (pandas.DataFrame): The input DataFrame with dictionary columns.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
pandas.DataFrame: A new DataFrame with expanded columns.
|
|
172
|
+
"""
|
|
173
|
+
dict_columns = list(filter(lambda x: isinstance(data[x].iloc[0], dict), data.columns))
|
|
174
|
+
new_columns = pd.concat(
|
|
175
|
+
[data[col].apply(pd.Series).rename(columns=lambda x: f"{col}_{x}") for col in dict_columns],
|
|
176
|
+
axis=1,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
data = pd.concat([data, new_columns], axis=1)
|
|
180
|
+
return data
|
cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/implement_idea.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
import re
|
|
6
|
+
import json
|
|
7
|
+
import time
|
|
8
|
+
import itertools
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _safe_filename_component(value: str) -> str:
|
|
12
|
+
return re.sub(r"[^A-Za-z0-9_-]+", "_", str(value)).strip("_")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _parse_dataset_folder_parts(dataset_folder_name: str) -> tuple[str, str, str] | None:
|
|
16
|
+
"""Parse '<datasetId>_<REGION>_delay<DELAY>' into (datasetId, REGION, DELAY).
|
|
17
|
+
|
|
18
|
+
Returns None if parsing fails.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
name = str(dataset_folder_name)
|
|
22
|
+
marker = "_delay"
|
|
23
|
+
pos = name.rfind(marker)
|
|
24
|
+
if pos == -1:
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
prefix = name[:pos]
|
|
28
|
+
delay_str = name[pos + len(marker) :]
|
|
29
|
+
if not delay_str.isdigit():
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
region_pos = prefix.rfind("_")
|
|
33
|
+
if region_pos == -1:
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
dataset_id = prefix[:region_pos]
|
|
37
|
+
region = prefix[region_pos + 1 :]
|
|
38
|
+
if not dataset_id or not region:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
return dataset_id, region, delay_str
|
|
42
|
+
|
|
43
|
+
def load_data(dataset_name=None):
|
|
44
|
+
script_dir = Path(__file__).resolve().parent
|
|
45
|
+
workspace_dir = script_dir.parent
|
|
46
|
+
|
|
47
|
+
if not dataset_name:
|
|
48
|
+
data_root = workspace_dir / "data"
|
|
49
|
+
if not data_root.exists():
|
|
50
|
+
print("Error: Data directory not found.", file=sys.stderr)
|
|
51
|
+
sys.exit(1)
|
|
52
|
+
|
|
53
|
+
subdirs = [d for d in data_root.iterdir() if d.is_dir()]
|
|
54
|
+
|
|
55
|
+
if len(subdirs) == 1:
|
|
56
|
+
dataset_name = subdirs[0].name
|
|
57
|
+
print(f"Auto-detected dataset: {dataset_name}", file=sys.stderr)
|
|
58
|
+
elif len(subdirs) > 1:
|
|
59
|
+
print("Error: Multiple datasets found. Please specify --dataset.", file=sys.stderr)
|
|
60
|
+
print("Available datasets:", file=sys.stderr)
|
|
61
|
+
for d in subdirs:
|
|
62
|
+
print(f" {d.name}", file=sys.stderr)
|
|
63
|
+
sys.exit(1)
|
|
64
|
+
else:
|
|
65
|
+
print("Error: No dataset folders found inside data directory.", file=sys.stderr)
|
|
66
|
+
sys.exit(1)
|
|
67
|
+
|
|
68
|
+
dataset_dir = workspace_dir / "data" / dataset_name
|
|
69
|
+
data_path = dataset_dir / f"{dataset_name}.csv"
|
|
70
|
+
|
|
71
|
+
print(f"Loading data from {data_path}...", file=sys.stderr)
|
|
72
|
+
try:
|
|
73
|
+
df = pd.read_csv(data_path)
|
|
74
|
+
return df, dataset_dir
|
|
75
|
+
except FileNotFoundError:
|
|
76
|
+
print(f"Error: Data file not found at {data_path}. Please run fetch_dataset.py first.", file=sys.stderr)
|
|
77
|
+
sys.exit(1)
|
|
78
|
+
|
|
79
|
+
def extract_keys_from_template(template):
|
|
80
|
+
return re.findall(r'\{([A-Za-z0-9_]+)\}', template)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _matches_metric(field_id: str, metric: str) -> bool:
|
|
84
|
+
"""Return True if field_id is a plausible match for metric.
|
|
85
|
+
|
|
86
|
+
For very short metrics, require token-boundary matches to avoid accidental
|
|
87
|
+
matches (e.g. 'ta' in 'total').
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
fid = str(field_id)
|
|
91
|
+
m = str(metric)
|
|
92
|
+
if len(m) <= 3:
|
|
93
|
+
return re.search(rf"(^|_){re.escape(m)}(_|$)", fid, flags=re.IGNORECASE) is not None
|
|
94
|
+
return m in fid
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _common_prefix_len(a: str, b: str) -> int:
|
|
98
|
+
n = min(len(a), len(b))
|
|
99
|
+
i = 0
|
|
100
|
+
while i < n and a[i] == b[i]:
|
|
101
|
+
i += 1
|
|
102
|
+
return i
|
|
103
|
+
|
|
104
|
+
def match_single_horizon_auto(df, template):
|
|
105
|
+
"""Generate expressions from a template by matching each {variable} to dataset field ids.
|
|
106
|
+
|
|
107
|
+
Previous behavior required all variables to share an identical "base prefix".
|
|
108
|
+
That is often too strict for datasets with mixed naming conventions.
|
|
109
|
+
|
|
110
|
+
New behavior:
|
|
111
|
+
- Build candidate lists per metric.
|
|
112
|
+
- Iterate over a limited set of primary candidates.
|
|
113
|
+
- For each primary candidate, pick the closest-looking candidates for other metrics
|
|
114
|
+
(by common prefix length), but DO NOT require the same base.
|
|
115
|
+
- Combine candidates (capped) and render expressions.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
metrics = extract_keys_from_template(template)
|
|
119
|
+
if not metrics:
|
|
120
|
+
print("Error: No variables found in template (use {variable} format).", file=sys.stderr)
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
metrics = sorted(metrics, key=len, reverse=True)
|
|
124
|
+
primary = metrics[0]
|
|
125
|
+
|
|
126
|
+
ids = df["id"].dropna().astype(str).tolist()
|
|
127
|
+
|
|
128
|
+
# Build candidates per metric
|
|
129
|
+
candidates_by_metric: dict[str, list[str]] = {}
|
|
130
|
+
for m in metrics:
|
|
131
|
+
cands = [fid for fid in ids if _matches_metric(fid, m)]
|
|
132
|
+
# de-dup while preserving order
|
|
133
|
+
seen = set()
|
|
134
|
+
uniq = []
|
|
135
|
+
for x in cands:
|
|
136
|
+
if x not in seen:
|
|
137
|
+
seen.add(x)
|
|
138
|
+
uniq.append(x)
|
|
139
|
+
candidates_by_metric[m] = uniq
|
|
140
|
+
|
|
141
|
+
if not candidates_by_metric.get(primary):
|
|
142
|
+
return []
|
|
143
|
+
for m in metrics[1:]:
|
|
144
|
+
if not candidates_by_metric.get(m):
|
|
145
|
+
return []
|
|
146
|
+
|
|
147
|
+
MAX_PRIMARY_CANDIDATES = 30
|
|
148
|
+
MAX_SECONDARY_CHOICES = 8
|
|
149
|
+
MAX_EXPRESSIONS = 5000
|
|
150
|
+
|
|
151
|
+
results = []
|
|
152
|
+
seen_expr = set()
|
|
153
|
+
|
|
154
|
+
primary_candidates = candidates_by_metric[primary][:MAX_PRIMARY_CANDIDATES]
|
|
155
|
+
for primary_id in primary_candidates:
|
|
156
|
+
# For each secondary metric, choose best candidates by similarity to primary_id
|
|
157
|
+
chosen_by_metric: dict[str, list[str]] = {primary: [primary_id]}
|
|
158
|
+
for m in metrics[1:]:
|
|
159
|
+
cands = candidates_by_metric[m]
|
|
160
|
+
ranked = sorted(cands, key=lambda fid: _common_prefix_len(primary_id, fid), reverse=True)
|
|
161
|
+
chosen_by_metric[m] = ranked[:MAX_SECONDARY_CHOICES]
|
|
162
|
+
|
|
163
|
+
# Combine candidates across metrics
|
|
164
|
+
metric_order = metrics
|
|
165
|
+
pools = [chosen_by_metric[m] for m in metric_order]
|
|
166
|
+
for combo in itertools.product(*pools):
|
|
167
|
+
field_map = dict(zip(metric_order, combo))
|
|
168
|
+
try:
|
|
169
|
+
expr = template.format(**field_map)
|
|
170
|
+
except Exception:
|
|
171
|
+
continue
|
|
172
|
+
if expr in seen_expr:
|
|
173
|
+
continue
|
|
174
|
+
seen_expr.add(expr)
|
|
175
|
+
results.append(("flex", expr))
|
|
176
|
+
if len(results) >= MAX_EXPRESSIONS:
|
|
177
|
+
return results
|
|
178
|
+
|
|
179
|
+
return results
|
|
180
|
+
|
|
181
|
+
def main():
|
|
182
|
+
parser = argparse.ArgumentParser(description="Generate Alpha Expressions based on patterns")
|
|
183
|
+
parser.add_argument("--template", required=True, help="Python format string (e.g. '{st_dev} / abs({mean})')")
|
|
184
|
+
parser.add_argument("--dataset", help="Name of the dataset folder. Auto-detected if only one exists.")
|
|
185
|
+
parser.add_argument(
|
|
186
|
+
"--idea",
|
|
187
|
+
default="",
|
|
188
|
+
help="Optional natural-language description of what this template represents.",
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
args = parser.parse_args()
|
|
192
|
+
|
|
193
|
+
df, dataset_dir = load_data(args.dataset)
|
|
194
|
+
|
|
195
|
+
results = match_single_horizon_auto(df, args.template)
|
|
196
|
+
|
|
197
|
+
# Output
|
|
198
|
+
expression_list = []
|
|
199
|
+
if not results:
|
|
200
|
+
print("No matching expressions found.")
|
|
201
|
+
else:
|
|
202
|
+
print(f"Generated {len(results)} expressions:\n")
|
|
203
|
+
# print(f"{'Context':<30} | Expression")
|
|
204
|
+
# print("-" * 120)
|
|
205
|
+
|
|
206
|
+
for context, expr in results:
|
|
207
|
+
# print(f"{context:<30} | {expr}")
|
|
208
|
+
expression_list.append(expr)
|
|
209
|
+
|
|
210
|
+
# Save results to JSON (Always save for debugging)
|
|
211
|
+
# Use nanosecond precision to avoid collisions when called in a tight loop.
|
|
212
|
+
timestamp = time.time_ns()
|
|
213
|
+
json_output = {
|
|
214
|
+
"template": args.template,
|
|
215
|
+
"idea": args.idea,
|
|
216
|
+
"expression_list": expression_list
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
parts = _parse_dataset_folder_parts(dataset_dir.name)
|
|
220
|
+
if parts:
|
|
221
|
+
dataset_id, region, delay_str = parts
|
|
222
|
+
prefix = f"{_safe_filename_component(dataset_id)}_{_safe_filename_component(region)}_{_safe_filename_component(delay_str)}"
|
|
223
|
+
else:
|
|
224
|
+
# Fallback: keep output stable even if dataset folder naming differs.
|
|
225
|
+
prefix = _safe_filename_component(dataset_dir.name) or "dataset"
|
|
226
|
+
|
|
227
|
+
output_file = dataset_dir / f"{prefix}_idea_{timestamp}.json"
|
|
228
|
+
try:
|
|
229
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
|
230
|
+
json.dump(json_output, f, indent=4, ensure_ascii=False)
|
|
231
|
+
print(f"\nSaved idea configuration to: {output_file}")
|
|
232
|
+
except Exception as e:
|
|
233
|
+
print(f"Error saving JSON: {e}", file=sys.stderr)
|
|
234
|
+
|
|
235
|
+
if __name__ == "__main__":
|
|
236
|
+
main()
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import argparse
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
def load_data_dir(dataset_name=None):
|
|
7
|
+
script_dir = Path(__file__).resolve().parent
|
|
8
|
+
workspace_dir = script_dir.parent
|
|
9
|
+
|
|
10
|
+
if not dataset_name:
|
|
11
|
+
data_root = workspace_dir / "data"
|
|
12
|
+
if not data_root.exists():
|
|
13
|
+
print("Error: Data directory not found.", file=sys.stderr)
|
|
14
|
+
sys.exit(1)
|
|
15
|
+
|
|
16
|
+
subdirs = [d for d in data_root.iterdir() if d.is_dir()]
|
|
17
|
+
|
|
18
|
+
if len(subdirs) == 1:
|
|
19
|
+
dataset_name = subdirs[0].name
|
|
20
|
+
print(f"Auto-detected dataset: {dataset_name}", file=sys.stderr)
|
|
21
|
+
return workspace_dir / "data" / dataset_name
|
|
22
|
+
elif len(subdirs) > 1:
|
|
23
|
+
print("Error: Multiple datasets found. Please specify --dataset.", file=sys.stderr)
|
|
24
|
+
sys.exit(1)
|
|
25
|
+
else:
|
|
26
|
+
print("Error: No dataset folders found inside data directory.", file=sys.stderr)
|
|
27
|
+
sys.exit(1)
|
|
28
|
+
|
|
29
|
+
return workspace_dir / "data" / dataset_name
|
|
30
|
+
|
|
31
|
+
def main():
|
|
32
|
+
parser = argparse.ArgumentParser(description="Merge all generated expressions from idea JSON files.")
|
|
33
|
+
parser.add_argument("--dataset", help="Name of the dataset folder containing idea JSONs.")
|
|
34
|
+
parser.add_argument("--output", default="final_expressions.json", help="Output filename.")
|
|
35
|
+
|
|
36
|
+
args = parser.parse_args()
|
|
37
|
+
|
|
38
|
+
dataset_dir = load_data_dir(args.dataset)
|
|
39
|
+
|
|
40
|
+
if not dataset_dir.exists():
|
|
41
|
+
print(f"Error: Dataset directory {dataset_dir} does not exist.", file=sys.stderr)
|
|
42
|
+
sys.exit(1)
|
|
43
|
+
|
|
44
|
+
all_expressions = []
|
|
45
|
+
|
|
46
|
+
# Find all idea json files (supports idea_*.json and *_idea_*.json)
|
|
47
|
+
json_files = list(dataset_dir.glob("idea_*.json"))
|
|
48
|
+
if not json_files:
|
|
49
|
+
json_files = list(dataset_dir.glob("*_idea_*.json"))
|
|
50
|
+
|
|
51
|
+
if not json_files:
|
|
52
|
+
print(f"No idea_*.json files found in {dataset_dir}", file=sys.stderr)
|
|
53
|
+
sys.exit(0)
|
|
54
|
+
|
|
55
|
+
print(f"Found {len(json_files)} idea files. Merging...")
|
|
56
|
+
|
|
57
|
+
for jf in json_files:
|
|
58
|
+
try:
|
|
59
|
+
with open(jf, 'r') as f:
|
|
60
|
+
data = json.load(f)
|
|
61
|
+
exprs = data.get("expression_list", [])
|
|
62
|
+
if exprs:
|
|
63
|
+
all_expressions.extend(exprs)
|
|
64
|
+
print(f" + {jf.name}: {len(exprs)} expressions")
|
|
65
|
+
else:
|
|
66
|
+
print(f" - {jf.name}: 0 expressions")
|
|
67
|
+
except Exception as e:
|
|
68
|
+
print(f" ! Error reading {jf.name}: {e}", file=sys.stderr)
|
|
69
|
+
|
|
70
|
+
# Remove duplicates if desired? Usually we keep them or set them.
|
|
71
|
+
# Let's make unique to be safe, but preserve order as best as possible.
|
|
72
|
+
unique_expressions = []
|
|
73
|
+
seen = set()
|
|
74
|
+
for ex in all_expressions:
|
|
75
|
+
if ex not in seen:
|
|
76
|
+
unique_expressions.append(ex)
|
|
77
|
+
seen.add(ex)
|
|
78
|
+
|
|
79
|
+
output_path = dataset_dir / args.output
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
with open(output_path, 'w') as f:
|
|
83
|
+
json.dump(unique_expressions, f, indent=4)
|
|
84
|
+
print(f"\nSuccessfully merged {len(unique_expressions)} unique expressions.")
|
|
85
|
+
print(f"Output saved to: {output_path}")
|
|
86
|
+
except Exception as e:
|
|
87
|
+
print(f"Error saving output: {e}", file=sys.stderr)
|
|
88
|
+
|
|
89
|
+
if __name__ == "__main__":
|
|
90
|
+
main()
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
|
|
2
|
+
# parsetab.py
|
|
3
|
+
# This file is automatically generated. Do not edit.
|
|
4
|
+
# pylint: disable=W,C,R
|
|
5
|
+
_tabversion = '3.10'
|
|
6
|
+
|
|
7
|
+
_lr_method = 'LALR'
|
|
8
|
+
|
|
9
|
+
_lr_signature = 'ASSIGN BOOLEAN CATEGORY COMMA DIVIDE EQUAL FIELD FUNCTION GREATER GREATEREQUAL IDENTIFIER LESS LESSEQUAL LPAREN MINUS NOTEQUAL NUMBER PLUS RPAREN STRING TIMESexpression : comparison\n| expression EQUAL comparison\n| expression NOTEQUAL comparison\n| expression GREATER comparison\n| expression LESS comparison\n| expression GREATEREQUAL comparison\n| expression LESSEQUAL comparisoncomparison : term\n| comparison PLUS term\n| comparison MINUS termterm : factor\n| term TIMES factor\n| term DIVIDE factorfactor : NUMBER\n| STRING\n| FIELD\n| CATEGORY\n| IDENTIFIER\n| BOOLEAN\n| MINUS factor\n| LPAREN expression RPAREN\n| function_callfunction_call : FUNCTION LPAREN args RPARENargs : arg_list\n| emptyarg_list : arg\n| arg_list COMMA argarg : expression\n| IDENTIFIER ASSIGN expressionempty :'
|
|
10
|
+
|
|
11
|
+
_lr_action_items = {'NUMBER':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,]),'STRING':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,]),'FIELD':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,]),'CATEGORY':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,]),'IDENTIFIER':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[10,10,10,10,10,10,10,10,10,10,10,10,10,44,44,10,]),'BOOLEAN':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'MINUS':([0,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,27,28,29,30,31,32,33,34,35,36,37,38,44,45,46,47,],[4,22,-8,4,-11,-14,-15,-16,-17,-18,-19,4,-22,4,4,4,4,4,4,4,4,4,4,-20,4,22,22,22,22,22,22,-9,-10,-12,-13,-21,-18,-23,4,4,]),'LPAREN':([0,4,12,14,15,16,17,18,19,20,21,22,23,24,27,46,47,],[12,12,12,27,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'FUNCTION':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'$end':([1,2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,45,],[0,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,-23,]),'EQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[15,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,15,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,15,-18,-23,15,]),'NOTEQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[16,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,16,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,16,-18,-23,16,]),'GREATER':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[17,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,17,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,17,-18,-23,17,]),'LESS':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[18,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,18,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,18,-18,-23,18,]),'GREATEREQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[19,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,19,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,19,-18,-23,19,]),'LESSEQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[20,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,20,-18,-23,20,]),'RPAREN':([2,3,5,6,7,8,9,10,11,13,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,48,49,],[-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,38,-30,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,45,-24,-25,-26,-28,-18,-23,-27,-29,]),'COMMA':([2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,40,42,43,44,45,48,49,],[-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,46,-26,-28,-18,-23,-27,-29,]),'PLUS':([2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,44,45,],[21,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,21,21,21,21,21,21,-9,-10,-12,-13,-21,-18,-23,]),'TIMES':([3,5,6,7,8,9,10,11,13,25,34,35,36,37,38,44,45,],[23,-11,-14,-15,-16,-17,-18,-19,-22,-20,23,23,-12,-13,-21,-18,-23,]),'DIVIDE':([3,5,6,7,8,9,10,11,13,25,34,35,36,37,38,44,45,],[24,-11,-14,-15,-16,-17,-18,-19,-22,-20,24,24,-12,-13,-21,-18,-23,]),'ASSIGN':([44,],[47,]),}
|
|
12
|
+
|
|
13
|
+
_lr_action = {}
|
|
14
|
+
for _k, _v in _lr_action_items.items():
|
|
15
|
+
for _x,_y in zip(_v[0],_v[1]):
|
|
16
|
+
if not _x in _lr_action: _lr_action[_x] = {}
|
|
17
|
+
_lr_action[_x][_k] = _y
|
|
18
|
+
del _lr_action_items
|
|
19
|
+
|
|
20
|
+
_lr_goto_items = {'expression':([0,12,27,46,47,],[1,26,43,43,49,]),'comparison':([0,12,15,16,17,18,19,20,27,46,47,],[2,2,28,29,30,31,32,33,2,2,2,]),'term':([0,12,15,16,17,18,19,20,21,22,27,46,47,],[3,3,3,3,3,3,3,3,34,35,3,3,3,]),'factor':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[5,25,5,5,5,5,5,5,5,5,5,36,37,5,5,5,]),'function_call':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'args':([27,],[39,]),'arg_list':([27,],[40,]),'empty':([27,],[41,]),'arg':([27,46,],[42,48,]),}
|
|
21
|
+
|
|
22
|
+
_lr_goto = {}
|
|
23
|
+
for _k, _v in _lr_goto_items.items():
|
|
24
|
+
for _x, _y in zip(_v[0], _v[1]):
|
|
25
|
+
if not _x in _lr_goto: _lr_goto[_x] = {}
|
|
26
|
+
_lr_goto[_x][_k] = _y
|
|
27
|
+
del _lr_goto_items
|
|
28
|
+
_lr_productions = [
|
|
29
|
+
("S' -> expression","S'",1,None,None,None),
|
|
30
|
+
('expression -> comparison','expression',1,'p_expression','validator.py',383),
|
|
31
|
+
('expression -> expression EQUAL comparison','expression',3,'p_expression','validator.py',384),
|
|
32
|
+
('expression -> expression NOTEQUAL comparison','expression',3,'p_expression','validator.py',385),
|
|
33
|
+
('expression -> expression GREATER comparison','expression',3,'p_expression','validator.py',386),
|
|
34
|
+
('expression -> expression LESS comparison','expression',3,'p_expression','validator.py',387),
|
|
35
|
+
('expression -> expression GREATEREQUAL comparison','expression',3,'p_expression','validator.py',388),
|
|
36
|
+
('expression -> expression LESSEQUAL comparison','expression',3,'p_expression','validator.py',389),
|
|
37
|
+
('comparison -> term','comparison',1,'p_comparison','validator.py',396),
|
|
38
|
+
('comparison -> comparison PLUS term','comparison',3,'p_comparison','validator.py',397),
|
|
39
|
+
('comparison -> comparison MINUS term','comparison',3,'p_comparison','validator.py',398),
|
|
40
|
+
('term -> factor','term',1,'p_term','validator.py',405),
|
|
41
|
+
('term -> term TIMES factor','term',3,'p_term','validator.py',406),
|
|
42
|
+
('term -> term DIVIDE factor','term',3,'p_term','validator.py',407),
|
|
43
|
+
('factor -> NUMBER','factor',1,'p_factor','validator.py',414),
|
|
44
|
+
('factor -> STRING','factor',1,'p_factor','validator.py',415),
|
|
45
|
+
('factor -> FIELD','factor',1,'p_factor','validator.py',416),
|
|
46
|
+
('factor -> CATEGORY','factor',1,'p_factor','validator.py',417),
|
|
47
|
+
('factor -> IDENTIFIER','factor',1,'p_factor','validator.py',418),
|
|
48
|
+
('factor -> BOOLEAN','factor',1,'p_factor','validator.py',419),
|
|
49
|
+
('factor -> MINUS factor','factor',2,'p_factor','validator.py',420),
|
|
50
|
+
('factor -> LPAREN expression RPAREN','factor',3,'p_factor','validator.py',421),
|
|
51
|
+
('factor -> function_call','factor',1,'p_factor','validator.py',422),
|
|
52
|
+
('function_call -> FUNCTION LPAREN args RPAREN','function_call',4,'p_function_call','validator.py',450),
|
|
53
|
+
('args -> arg_list','args',1,'p_args','validator.py',454),
|
|
54
|
+
('args -> empty','args',1,'p_args','validator.py',455),
|
|
55
|
+
('arg_list -> arg','arg_list',1,'p_arg_list','validator.py',462),
|
|
56
|
+
('arg_list -> arg_list COMMA arg','arg_list',3,'p_arg_list','validator.py',463),
|
|
57
|
+
('arg -> expression','arg',1,'p_arg','validator.py',470),
|
|
58
|
+
('arg -> IDENTIFIER ASSIGN expression','arg',3,'p_arg','validator.py',471),
|
|
59
|
+
('empty -> <empty>','empty',0,'p_empty','validator.py',478),
|
|
60
|
+
]
|