cnhkmcp 2.3.0__py3-none-any.whl → 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnhkmcp/__init__.py +1 -1
- cnhkmcp/untracked/APP/simulator/wqb20260130130030.log +210 -0
- cnhkmcp/untracked/APP/simulator/wqb20260130131757.log +104 -0
- cnhkmcp/untracked/APP/simulator/wqb20260130172245.log +70 -0
- cnhkmcp/untracked/APP/static/inspiration.js +5 -1
- cnhkmcp/untracked/APP/templates/index.html +7 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/run_pipeline.py +135 -85
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/output_report/GLB_delay1_fundamental72_ideas.md +362 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/final_expressions.json +138 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759441444909600.json +38 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759441920092000.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759442418767100.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759442902507600.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759443377036200.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759443845377000.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759444313546700.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759444784598600.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759445274311200.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759445747421700.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759446222137800.json +22 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759446686222600.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759447154698500.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759447629677000.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759448102331200.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759448573382000.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_delay1.csv +330 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/fetch_dataset.py +7 -1
- cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +13 -2
- cnhkmcp/untracked/back_up/platform_functions.py +2 -2
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/platform_functions.py +2 -2
- cnhkmcp/untracked/platform_functions.py +2 -2
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/METADATA +1 -1
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/RECORD +37 -15
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/WHEEL +0 -0
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/entry_points.txt +0 -0
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/licenses/LICENSE +0 -0
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ import datetime as dt
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
|
+
import shutil
|
|
6
7
|
import subprocess
|
|
7
8
|
import sys
|
|
8
9
|
import csv
|
|
@@ -327,6 +328,7 @@ def build_prompt(
|
|
|
327
328
|
region: str,
|
|
328
329
|
delay: int,
|
|
329
330
|
universe: str,
|
|
331
|
+
data_type: str,
|
|
330
332
|
fields_summary: list[dict],
|
|
331
333
|
field_count: int,
|
|
332
334
|
feature_engineering_skill_md: str,
|
|
@@ -336,8 +338,7 @@ def build_prompt(
|
|
|
336
338
|
):
|
|
337
339
|
# NOTE: The user requested that we DO NOT invent our own system prompt.
|
|
338
340
|
# Instead, we embed the two skill specs as the authoritative instructions.
|
|
339
|
-
|
|
340
|
-
[
|
|
341
|
+
prompt_lines = [
|
|
341
342
|
"You are executing two skills in sequence:",
|
|
342
343
|
"1) brain-data-feature-engineering",
|
|
343
344
|
"2) brain-feature-implementation",
|
|
@@ -353,6 +354,15 @@ def build_prompt(
|
|
|
353
354
|
"-------",
|
|
354
355
|
f'"allowed_placeholders": {allowed_metric_suffixes}',
|
|
355
356
|
"",
|
|
357
|
+
]
|
|
358
|
+
|
|
359
|
+
if str(data_type).upper() == "VECTOR":
|
|
360
|
+
prompt_lines.append(
|
|
361
|
+
"since all the following the data is vector type data, before you do any process, you should choose a vector operator to generate its statistical feature to use, the data cannot be directly use. for example, if datafieldA and datafieldB are vector type data, you can use vec_avg(datafieldA) - vec_avg(datafieldB), where vec_avg() operator is used to generate the average of the data on a certain date. similarly, vector type operator can only be used on the vector type operator directly and cannot be nested, for example vec_avg(vec_sum(datafield)) is a false use."
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
prompt_lines.extend(
|
|
365
|
+
[
|
|
356
366
|
"CRITICAL OUTPUT RULES (to ensure implement_idea.py can generate expressions):",
|
|
357
367
|
"- Every Implementation Example MUST be a Python format template using {variable}.",
|
|
358
368
|
"- Every {variable} MUST come from the allowed_placeholders list provided in user content.",
|
|
@@ -366,6 +376,8 @@ def build_prompt(
|
|
|
366
376
|
]
|
|
367
377
|
)
|
|
368
378
|
|
|
379
|
+
system_prompt = "\n".join(prompt_lines)
|
|
380
|
+
|
|
369
381
|
user_prompt = {
|
|
370
382
|
"instructions": {
|
|
371
383
|
"output_format": "Fill OUTPUT_TEMPLATE.md with concrete content.",
|
|
@@ -732,6 +744,21 @@ def run_script(args_list: list[str], cwd: Path):
|
|
|
732
744
|
)
|
|
733
745
|
return result.stdout
|
|
734
746
|
|
|
747
|
+
|
|
748
|
+
def delete_path_if_exists(path: Path):
|
|
749
|
+
"""Best-effort delete a file or directory."""
|
|
750
|
+
|
|
751
|
+
try:
|
|
752
|
+
if not path.exists():
|
|
753
|
+
return
|
|
754
|
+
if path.is_dir():
|
|
755
|
+
shutil.rmtree(path, ignore_errors=True)
|
|
756
|
+
else:
|
|
757
|
+
path.unlink(missing_ok=True)
|
|
758
|
+
except Exception:
|
|
759
|
+
# Best-effort cleanup only; rerun should still proceed.
|
|
760
|
+
return
|
|
761
|
+
|
|
735
762
|
def main():
|
|
736
763
|
parser = argparse.ArgumentParser(description="Run feature engineering + implementation pipeline")
|
|
737
764
|
parser.add_argument("--data-category", required=True, help="Dataset category (e.g., analyst, fundamental)")
|
|
@@ -740,6 +767,12 @@ def main():
|
|
|
740
767
|
parser.add_argument("--universe", default="TOP3000", help="Universe (default: TOP3000)")
|
|
741
768
|
parser.add_argument("--dataset-id", required=True, help="Dataset id (required)")
|
|
742
769
|
parser.add_argument("--instrument-type", default="EQUITY", help="Instrument type (default: EQUITY)")
|
|
770
|
+
parser.add_argument(
|
|
771
|
+
"--data-type",
|
|
772
|
+
default="MATRIX",
|
|
773
|
+
choices=["MATRIX", "VECTOR"],
|
|
774
|
+
help="Data type to request from BRAIN datafields (MATRIX or VECTOR). Default: MATRIX",
|
|
775
|
+
)
|
|
743
776
|
parser.add_argument("--ideas-file", default=None, help="Use existing ideas markdown instead of generating")
|
|
744
777
|
parser.add_argument(
|
|
745
778
|
"--regen-ideas",
|
|
@@ -774,96 +807,105 @@ def main():
|
|
|
774
807
|
email, password = load_brain_credentials_from_env_or_args(args.username, args.password, config_path)
|
|
775
808
|
session = start_brain_session(email, password)
|
|
776
809
|
|
|
810
|
+
# Always rerun cleanly: remove prior generated artifacts so we never reuse stale ideas/data.
|
|
811
|
+
# - If --ideas-file is provided, we treat it as user-managed input and do NOT delete it.
|
|
812
|
+
# - We DO delete the dataset-specific folder under feature-implementation/data.
|
|
813
|
+
if not args.ideas_file:
|
|
814
|
+
default_ideas = (
|
|
815
|
+
FEATURE_ENGINEERING_DIR
|
|
816
|
+
/ "output_report"
|
|
817
|
+
/ f"{args.region}_delay{args.delay}_{args.dataset_id}_ideas.md"
|
|
818
|
+
)
|
|
819
|
+
delete_path_if_exists(default_ideas)
|
|
820
|
+
|
|
821
|
+
guessed_dataset_folder = f"{safe_dataset_id(args.dataset_id)}_{args.region}_delay{args.delay}"
|
|
822
|
+
guessed_dataset_dir = FEATURE_IMPLEMENTATION_DIR / "data" / guessed_dataset_folder
|
|
823
|
+
delete_path_if_exists(guessed_dataset_dir)
|
|
824
|
+
|
|
777
825
|
ideas_path = None
|
|
778
826
|
if args.ideas_file:
|
|
779
827
|
ideas_path = Path(args.ideas_file).resolve()
|
|
780
828
|
if not ideas_path.exists():
|
|
781
829
|
raise FileNotFoundError(f"Ideas file not found: {ideas_path}")
|
|
782
830
|
else:
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
831
|
+
# Always regenerate ideas (never reuse an existing markdown report).
|
|
832
|
+
datasets_df = ace_lib.get_datasets(
|
|
833
|
+
session,
|
|
834
|
+
instrument_type=args.instrument_type,
|
|
835
|
+
region=args.region,
|
|
836
|
+
delay=args.delay,
|
|
837
|
+
universe=args.universe,
|
|
838
|
+
theme="ALL",
|
|
787
839
|
)
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
api_key = (
|
|
858
|
-
args.moonshot_api_key
|
|
859
|
-
or os.environ.get("MOONSHOT_API_KEY")
|
|
860
|
-
)
|
|
861
|
-
if not api_key:
|
|
862
|
-
raise ValueError("Moonshot API key missing. Set MOONSHOT_API_KEY or pass --moonshot-api-key")
|
|
863
|
-
|
|
864
|
-
report = call_moonshot(api_key, args.moonshot_model, system_prompt, user_prompt)
|
|
865
|
-
# Save first, then normalize placeholders after dataset download.
|
|
866
|
-
ideas_path = save_ideas_report(report, args.region, args.delay, args.dataset_id)
|
|
840
|
+
|
|
841
|
+
dataset_name = None
|
|
842
|
+
dataset_description = None
|
|
843
|
+
id_col = pick_first_present_column(datasets_df, ["id", "dataset_id", "datasetId"])
|
|
844
|
+
name_col = pick_first_present_column(datasets_df, ["name", "dataset_name", "datasetName"])
|
|
845
|
+
desc_col = pick_first_present_column(datasets_df, ["description", "desc", "dataset_description"])
|
|
846
|
+
if id_col:
|
|
847
|
+
matched = datasets_df[datasets_df[id_col].astype(str) == str(args.dataset_id)]
|
|
848
|
+
if not matched.empty:
|
|
849
|
+
row = matched.iloc[0]
|
|
850
|
+
dataset_name = row.get(name_col) if name_col else None
|
|
851
|
+
dataset_description = row.get(desc_col) if desc_col else None
|
|
852
|
+
|
|
853
|
+
fields_df = ace_lib.get_datafields(
|
|
854
|
+
session,
|
|
855
|
+
instrument_type=args.instrument_type,
|
|
856
|
+
region=args.region,
|
|
857
|
+
delay=args.delay,
|
|
858
|
+
universe=args.universe,
|
|
859
|
+
dataset_id=args.dataset_id,
|
|
860
|
+
data_type=args.data_type,
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
fields_summary, field_count = build_field_summary(fields_df, max_fields=args.max_fields)
|
|
864
|
+
|
|
865
|
+
feature_engineering_skill_md = read_text_optional(FEATURE_ENGINEERING_DIR / "SKILL.md")
|
|
866
|
+
feature_implementation_skill_md = read_text_optional(FEATURE_IMPLEMENTATION_DIR / "SKILL.md")
|
|
867
|
+
allowed_metric_suffixes = build_allowed_metric_suffixes(fields_df, max_suffixes=300)
|
|
868
|
+
|
|
869
|
+
allowed_operators = []
|
|
870
|
+
if not args.no_operators_in_prompt:
|
|
871
|
+
try:
|
|
872
|
+
operators_df = ace_lib.get_operators(session)
|
|
873
|
+
keep_vector = _vector_ratio_from_datafields_df(fields_df) > 0.5
|
|
874
|
+
_, allowed_ops, _ = filter_operators_df(operators_df, keep_vector=keep_vector)
|
|
875
|
+
if args.max_operators is not None and args.max_operators > 0:
|
|
876
|
+
allowed_operators = allowed_ops[: args.max_operators]
|
|
877
|
+
else:
|
|
878
|
+
allowed_operators = allowed_ops
|
|
879
|
+
except Exception as exc:
|
|
880
|
+
print(f"Warning: failed to fetch/filter operators; continuing without operators in prompt. Error: {exc}", file=sys.stderr)
|
|
881
|
+
|
|
882
|
+
system_prompt, user_prompt = build_prompt(
|
|
883
|
+
dataset_id=args.dataset_id,
|
|
884
|
+
dataset_name=dataset_name,
|
|
885
|
+
dataset_description=dataset_description,
|
|
886
|
+
data_category=args.data_category,
|
|
887
|
+
region=args.region,
|
|
888
|
+
delay=args.delay,
|
|
889
|
+
universe=args.universe,
|
|
890
|
+
data_type=args.data_type,
|
|
891
|
+
fields_summary=fields_summary,
|
|
892
|
+
field_count=field_count,
|
|
893
|
+
feature_engineering_skill_md=feature_engineering_skill_md,
|
|
894
|
+
feature_implementation_skill_md=feature_implementation_skill_md,
|
|
895
|
+
allowed_metric_suffixes=allowed_metric_suffixes,
|
|
896
|
+
allowed_operators=allowed_operators,
|
|
897
|
+
)
|
|
898
|
+
|
|
899
|
+
api_key = (
|
|
900
|
+
args.moonshot_api_key
|
|
901
|
+
or os.environ.get("MOONSHOT_API_KEY")
|
|
902
|
+
)
|
|
903
|
+
if not api_key:
|
|
904
|
+
raise ValueError("Moonshot API key missing. Set MOONSHOT_API_KEY or pass --moonshot-api-key")
|
|
905
|
+
|
|
906
|
+
report = call_moonshot(api_key, args.moonshot_model, system_prompt, user_prompt)
|
|
907
|
+
# Save first, then normalize placeholders after dataset download.
|
|
908
|
+
ideas_path = save_ideas_report(report, args.region, args.delay, args.dataset_id)
|
|
867
909
|
|
|
868
910
|
ideas_text = ideas_path.read_text(encoding="utf-8")
|
|
869
911
|
|
|
@@ -891,11 +933,19 @@ def main():
|
|
|
891
933
|
args.universe,
|
|
892
934
|
"--instrument-type",
|
|
893
935
|
args.instrument_type,
|
|
936
|
+
"--data-type",
|
|
937
|
+
args.data_type,
|
|
894
938
|
],
|
|
895
939
|
cwd=FEATURE_IMPLEMENTATION_SCRIPTS,
|
|
896
940
|
)
|
|
897
941
|
|
|
898
942
|
dataset_folder = f"{safe_dataset_id(dataset_id)}_{args.region}_delay{args.delay}"
|
|
943
|
+
|
|
944
|
+
# If the ideas file references a different dataset id than the CLI args,
|
|
945
|
+
# ensure we also clean that dataset folder before fetching.
|
|
946
|
+
if dataset_folder != guessed_dataset_folder:
|
|
947
|
+
delete_path_if_exists(FEATURE_IMPLEMENTATION_DIR / "data" / dataset_folder)
|
|
948
|
+
|
|
899
949
|
dataset_csv_path = FEATURE_IMPLEMENTATION_DIR / "data" / dataset_folder / f"{dataset_folder}.csv"
|
|
900
950
|
if not dataset_csv_path.exists():
|
|
901
951
|
raise RuntimeError(
|