cnhkmcp 2.3.0__py3-none-any.whl → 2.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. cnhkmcp/__init__.py +1 -1
  2. cnhkmcp/untracked/APP/simulator/wqb20260130130030.log +210 -0
  3. cnhkmcp/untracked/APP/simulator/wqb20260130131757.log +104 -0
  4. cnhkmcp/untracked/APP/simulator/wqb20260130172245.log +70 -0
  5. cnhkmcp/untracked/APP/static/inspiration.js +5 -1
  6. cnhkmcp/untracked/APP/templates/index.html +7 -0
  7. cnhkmcp/untracked/APP/trailSomeAlphas/run_pipeline.py +135 -85
  8. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/output_report/GLB_delay1_fundamental72_ideas.md +362 -0
  9. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/final_expressions.json +138 -0
  10. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759441444909600.json +38 -0
  11. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759441920092000.json +14 -0
  12. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759442418767100.json +14 -0
  13. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759442902507600.json +14 -0
  14. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759443377036200.json +10 -0
  15. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759443845377000.json +14 -0
  16. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759444313546700.json +10 -0
  17. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759444784598600.json +14 -0
  18. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759445274311200.json +14 -0
  19. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759445747421700.json +10 -0
  20. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759446222137800.json +22 -0
  21. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759446686222600.json +14 -0
  22. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759447154698500.json +10 -0
  23. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759447629677000.json +10 -0
  24. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759448102331200.json +10 -0
  25. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769759448573382000.json +14 -0
  26. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_delay1.csv +330 -0
  27. cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/fetch_dataset.py +7 -1
  28. cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +13 -2
  29. cnhkmcp/untracked/back_up/platform_functions.py +2 -2
  30. cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/platform_functions.py +2 -2
  31. cnhkmcp/untracked/platform_functions.py +2 -2
  32. {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/METADATA +1 -1
  33. {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/RECORD +37 -15
  34. {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/WHEEL +0 -0
  35. {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/entry_points.txt +0 -0
  36. {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/licenses/LICENSE +0 -0
  37. {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.1.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import datetime as dt
3
3
  import json
4
4
  import os
5
5
  import re
6
+ import shutil
6
7
  import subprocess
7
8
  import sys
8
9
  import csv
@@ -327,6 +328,7 @@ def build_prompt(
327
328
  region: str,
328
329
  delay: int,
329
330
  universe: str,
331
+ data_type: str,
330
332
  fields_summary: list[dict],
331
333
  field_count: int,
332
334
  feature_engineering_skill_md: str,
@@ -336,8 +338,7 @@ def build_prompt(
336
338
  ):
337
339
  # NOTE: The user requested that we DO NOT invent our own system prompt.
338
340
  # Instead, we embed the two skill specs as the authoritative instructions.
339
- system_prompt = "\n".join(
340
- [
341
+ prompt_lines = [
341
342
  "You are executing two skills in sequence:",
342
343
  "1) brain-data-feature-engineering",
343
344
  "2) brain-feature-implementation",
@@ -353,6 +354,15 @@ def build_prompt(
353
354
  "-------",
354
355
  f'"allowed_placeholders": {allowed_metric_suffixes}',
355
356
  "",
357
+ ]
358
+
359
+ if str(data_type).upper() == "VECTOR":
360
+ prompt_lines.append(
361
+ "since all the following the data is vector type data, before you do any process, you should choose a vector operator to generate its statistical feature to use, the data cannot be directly use. for example, if datafieldA and datafieldB are vector type data, you can use vec_avg(datafieldA) - vec_avg(datafieldB), where vec_avg() operator is used to generate the average of the data on a certain date. similarly, vector type operator can only be used on the vector type operator directly and cannot be nested, for example vec_avg(vec_sum(datafield)) is a false use."
362
+ )
363
+
364
+ prompt_lines.extend(
365
+ [
356
366
  "CRITICAL OUTPUT RULES (to ensure implement_idea.py can generate expressions):",
357
367
  "- Every Implementation Example MUST be a Python format template using {variable}.",
358
368
  "- Every {variable} MUST come from the allowed_placeholders list provided in user content.",
@@ -366,6 +376,8 @@ def build_prompt(
366
376
  ]
367
377
  )
368
378
 
379
+ system_prompt = "\n".join(prompt_lines)
380
+
369
381
  user_prompt = {
370
382
  "instructions": {
371
383
  "output_format": "Fill OUTPUT_TEMPLATE.md with concrete content.",
@@ -732,6 +744,21 @@ def run_script(args_list: list[str], cwd: Path):
732
744
  )
733
745
  return result.stdout
734
746
 
747
+
748
+ def delete_path_if_exists(path: Path):
749
+ """Best-effort delete a file or directory."""
750
+
751
+ try:
752
+ if not path.exists():
753
+ return
754
+ if path.is_dir():
755
+ shutil.rmtree(path, ignore_errors=True)
756
+ else:
757
+ path.unlink(missing_ok=True)
758
+ except Exception:
759
+ # Best-effort cleanup only; rerun should still proceed.
760
+ return
761
+
735
762
  def main():
736
763
  parser = argparse.ArgumentParser(description="Run feature engineering + implementation pipeline")
737
764
  parser.add_argument("--data-category", required=True, help="Dataset category (e.g., analyst, fundamental)")
@@ -740,6 +767,12 @@ def main():
740
767
  parser.add_argument("--universe", default="TOP3000", help="Universe (default: TOP3000)")
741
768
  parser.add_argument("--dataset-id", required=True, help="Dataset id (required)")
742
769
  parser.add_argument("--instrument-type", default="EQUITY", help="Instrument type (default: EQUITY)")
770
+ parser.add_argument(
771
+ "--data-type",
772
+ default="MATRIX",
773
+ choices=["MATRIX", "VECTOR"],
774
+ help="Data type to request from BRAIN datafields (MATRIX or VECTOR). Default: MATRIX",
775
+ )
743
776
  parser.add_argument("--ideas-file", default=None, help="Use existing ideas markdown instead of generating")
744
777
  parser.add_argument(
745
778
  "--regen-ideas",
@@ -774,96 +807,105 @@ def main():
774
807
  email, password = load_brain_credentials_from_env_or_args(args.username, args.password, config_path)
775
808
  session = start_brain_session(email, password)
776
809
 
810
+ # Always rerun cleanly: remove prior generated artifacts so we never reuse stale ideas/data.
811
+ # - If --ideas-file is provided, we treat it as user-managed input and do NOT delete it.
812
+ # - We DO delete the dataset-specific folder under feature-implementation/data.
813
+ if not args.ideas_file:
814
+ default_ideas = (
815
+ FEATURE_ENGINEERING_DIR
816
+ / "output_report"
817
+ / f"{args.region}_delay{args.delay}_{args.dataset_id}_ideas.md"
818
+ )
819
+ delete_path_if_exists(default_ideas)
820
+
821
+ guessed_dataset_folder = f"{safe_dataset_id(args.dataset_id)}_{args.region}_delay{args.delay}"
822
+ guessed_dataset_dir = FEATURE_IMPLEMENTATION_DIR / "data" / guessed_dataset_folder
823
+ delete_path_if_exists(guessed_dataset_dir)
824
+
777
825
  ideas_path = None
778
826
  if args.ideas_file:
779
827
  ideas_path = Path(args.ideas_file).resolve()
780
828
  if not ideas_path.exists():
781
829
  raise FileNotFoundError(f"Ideas file not found: {ideas_path}")
782
830
  else:
783
- default_ideas = (
784
- FEATURE_ENGINEERING_DIR
785
- / "output_report"
786
- / f"{args.region}_delay{args.delay}_{args.dataset_id}_ideas.md"
831
+ # Always regenerate ideas (never reuse an existing markdown report).
832
+ datasets_df = ace_lib.get_datasets(
833
+ session,
834
+ instrument_type=args.instrument_type,
835
+ region=args.region,
836
+ delay=args.delay,
837
+ universe=args.universe,
838
+ theme="ALL",
787
839
  )
788
- if default_ideas.exists() and not args.regen_ideas:
789
- ideas_path = default_ideas
790
- else:
791
- datasets_df = ace_lib.get_datasets(
792
- session,
793
- instrument_type=args.instrument_type,
794
- region=args.region,
795
- delay=args.delay,
796
- universe=args.universe,
797
- theme="ALL",
798
- )
799
-
800
- dataset_name = None
801
- dataset_description = None
802
- id_col = pick_first_present_column(datasets_df, ["id", "dataset_id", "datasetId"])
803
- name_col = pick_first_present_column(datasets_df, ["name", "dataset_name", "datasetName"])
804
- desc_col = pick_first_present_column(datasets_df, ["description", "desc", "dataset_description"])
805
- if id_col:
806
- matched = datasets_df[datasets_df[id_col].astype(str) == str(args.dataset_id)]
807
- if not matched.empty:
808
- row = matched.iloc[0]
809
- dataset_name = row.get(name_col) if name_col else None
810
- dataset_description = row.get(desc_col) if desc_col else None
811
-
812
- fields_df = ace_lib.get_datafields(
813
- session,
814
- instrument_type=args.instrument_type,
815
- region=args.region,
816
- delay=args.delay,
817
- universe=args.universe,
818
- dataset_id=args.dataset_id,
819
- data_type="ALL",
820
- )
821
-
822
- fields_summary, field_count = build_field_summary(fields_df, max_fields=args.max_fields)
823
-
824
- feature_engineering_skill_md = read_text_optional(FEATURE_ENGINEERING_DIR / "SKILL.md")
825
- feature_implementation_skill_md = read_text_optional(FEATURE_IMPLEMENTATION_DIR / "SKILL.md")
826
- allowed_metric_suffixes = build_allowed_metric_suffixes(fields_df, max_suffixes=300)
827
-
828
- allowed_operators = []
829
- if not args.no_operators_in_prompt:
830
- try:
831
- operators_df = ace_lib.get_operators(session)
832
- keep_vector = _vector_ratio_from_datafields_df(fields_df) > 0.5
833
- _, allowed_ops, _ = filter_operators_df(operators_df, keep_vector=keep_vector)
834
- if args.max_operators is not None and args.max_operators > 0:
835
- allowed_operators = allowed_ops[: args.max_operators]
836
- else:
837
- allowed_operators = allowed_ops
838
- except Exception as exc:
839
- print(f"Warning: failed to fetch/filter operators; continuing without operators in prompt. Error: {exc}", file=sys.stderr)
840
-
841
- system_prompt, user_prompt = build_prompt(
842
- dataset_id=args.dataset_id,
843
- dataset_name=dataset_name,
844
- dataset_description=dataset_description,
845
- data_category=args.data_category,
846
- region=args.region,
847
- delay=args.delay,
848
- universe=args.universe,
849
- fields_summary=fields_summary,
850
- field_count=field_count,
851
- feature_engineering_skill_md=feature_engineering_skill_md,
852
- feature_implementation_skill_md=feature_implementation_skill_md,
853
- allowed_metric_suffixes=allowed_metric_suffixes,
854
- allowed_operators=allowed_operators,
855
- )
856
-
857
- api_key = (
858
- args.moonshot_api_key
859
- or os.environ.get("MOONSHOT_API_KEY")
860
- )
861
- if not api_key:
862
- raise ValueError("Moonshot API key missing. Set MOONSHOT_API_KEY or pass --moonshot-api-key")
863
-
864
- report = call_moonshot(api_key, args.moonshot_model, system_prompt, user_prompt)
865
- # Save first, then normalize placeholders after dataset download.
866
- ideas_path = save_ideas_report(report, args.region, args.delay, args.dataset_id)
840
+
841
+ dataset_name = None
842
+ dataset_description = None
843
+ id_col = pick_first_present_column(datasets_df, ["id", "dataset_id", "datasetId"])
844
+ name_col = pick_first_present_column(datasets_df, ["name", "dataset_name", "datasetName"])
845
+ desc_col = pick_first_present_column(datasets_df, ["description", "desc", "dataset_description"])
846
+ if id_col:
847
+ matched = datasets_df[datasets_df[id_col].astype(str) == str(args.dataset_id)]
848
+ if not matched.empty:
849
+ row = matched.iloc[0]
850
+ dataset_name = row.get(name_col) if name_col else None
851
+ dataset_description = row.get(desc_col) if desc_col else None
852
+
853
+ fields_df = ace_lib.get_datafields(
854
+ session,
855
+ instrument_type=args.instrument_type,
856
+ region=args.region,
857
+ delay=args.delay,
858
+ universe=args.universe,
859
+ dataset_id=args.dataset_id,
860
+ data_type=args.data_type,
861
+ )
862
+
863
+ fields_summary, field_count = build_field_summary(fields_df, max_fields=args.max_fields)
864
+
865
+ feature_engineering_skill_md = read_text_optional(FEATURE_ENGINEERING_DIR / "SKILL.md")
866
+ feature_implementation_skill_md = read_text_optional(FEATURE_IMPLEMENTATION_DIR / "SKILL.md")
867
+ allowed_metric_suffixes = build_allowed_metric_suffixes(fields_df, max_suffixes=300)
868
+
869
+ allowed_operators = []
870
+ if not args.no_operators_in_prompt:
871
+ try:
872
+ operators_df = ace_lib.get_operators(session)
873
+ keep_vector = _vector_ratio_from_datafields_df(fields_df) > 0.5
874
+ _, allowed_ops, _ = filter_operators_df(operators_df, keep_vector=keep_vector)
875
+ if args.max_operators is not None and args.max_operators > 0:
876
+ allowed_operators = allowed_ops[: args.max_operators]
877
+ else:
878
+ allowed_operators = allowed_ops
879
+ except Exception as exc:
880
+ print(f"Warning: failed to fetch/filter operators; continuing without operators in prompt. Error: {exc}", file=sys.stderr)
881
+
882
+ system_prompt, user_prompt = build_prompt(
883
+ dataset_id=args.dataset_id,
884
+ dataset_name=dataset_name,
885
+ dataset_description=dataset_description,
886
+ data_category=args.data_category,
887
+ region=args.region,
888
+ delay=args.delay,
889
+ universe=args.universe,
890
+ data_type=args.data_type,
891
+ fields_summary=fields_summary,
892
+ field_count=field_count,
893
+ feature_engineering_skill_md=feature_engineering_skill_md,
894
+ feature_implementation_skill_md=feature_implementation_skill_md,
895
+ allowed_metric_suffixes=allowed_metric_suffixes,
896
+ allowed_operators=allowed_operators,
897
+ )
898
+
899
+ api_key = (
900
+ args.moonshot_api_key
901
+ or os.environ.get("MOONSHOT_API_KEY")
902
+ )
903
+ if not api_key:
904
+ raise ValueError("Moonshot API key missing. Set MOONSHOT_API_KEY or pass --moonshot-api-key")
905
+
906
+ report = call_moonshot(api_key, args.moonshot_model, system_prompt, user_prompt)
907
+ # Save first, then normalize placeholders after dataset download.
908
+ ideas_path = save_ideas_report(report, args.region, args.delay, args.dataset_id)
867
909
 
868
910
  ideas_text = ideas_path.read_text(encoding="utf-8")
869
911
 
@@ -891,11 +933,19 @@ def main():
891
933
  args.universe,
892
934
  "--instrument-type",
893
935
  args.instrument_type,
936
+ "--data-type",
937
+ args.data_type,
894
938
  ],
895
939
  cwd=FEATURE_IMPLEMENTATION_SCRIPTS,
896
940
  )
897
941
 
898
942
  dataset_folder = f"{safe_dataset_id(dataset_id)}_{args.region}_delay{args.delay}"
943
+
944
+ # If the ideas file references a different dataset id than the CLI args,
945
+ # ensure we also clean that dataset folder before fetching.
946
+ if dataset_folder != guessed_dataset_folder:
947
+ delete_path_if_exists(FEATURE_IMPLEMENTATION_DIR / "data" / dataset_folder)
948
+
899
949
  dataset_csv_path = FEATURE_IMPLEMENTATION_DIR / "data" / dataset_folder / f"{dataset_folder}.csv"
900
950
  if not dataset_csv_path.exists():
901
951
  raise RuntimeError(