cnhkmcp 2.3.0__py3-none-any.whl → 2.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnhkmcp/__init__.py +1 -1
- cnhkmcp/untracked/APP/Tranformer/parsetab.py +60 -0
- cnhkmcp/untracked/APP/Tranformer/validator.py +78 -4
- cnhkmcp/untracked/APP/static/inspiration.js +46 -4
- cnhkmcp/untracked/APP/templates/index.html +33 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/enhance_template.py +132 -6
- cnhkmcp/untracked/APP/trailSomeAlphas/run_pipeline.py +135 -85
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/SKILL.md +17 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-data-feature-engineering/output_report/GLB_delay1_fundamental72_ideas.md +415 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/final_expressions.json +76 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852468022627100.json +22 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852468554457600.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852469133324600.json +8 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852469704433900.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852470248911900.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852470805192900.json +8 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852471380158000.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852471944247400.json +22 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852472483548800.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852473053891800.json +22 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852473617716000.json +22 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852474172815700.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852474735778500.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852475315478500.json +14 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852475912897000.json +8 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852476474911100.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852978914367200.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852979426164800.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852979945511100.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852980480251500.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769852981007315500.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769854621979784200.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769854622483457900.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769854623010559800.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769854623572902300.json +5 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_1_idea_1769854624091016000.json +10 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_delay1.csv +330 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_delay1.csv.bak_1769852868 +330 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/data/fundamental72_GLB_delay1/fundamental72_GLB_delay1.csv.bak_1769854511 +330 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/ace.log +12 -0
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/fetch_dataset.py +7 -1
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/brain-feature-implementation/scripts/validator.py +80 -4
- cnhkmcp/untracked/APP/trailSomeAlphas/skills/template_final_enhance/op/321/206/320/220/342/225/227/321/207/342/225/227/320/243.md +24 -18
- cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +27 -2
- cnhkmcp/untracked/back_up/platform_functions.py +2 -2
- cnhkmcp/untracked/mcp/321/206/320/246/320/227/321/204/342/225/227/342/225/242/321/210/320/276/342/225/221/321/205/320/255/320/253/321/207/320/231/320/2302_/321/205/320/266/320/222/321/206/320/256/320/254/321/205/320/236/320/257/321/207/320/231/320/230/321/205/320/240/320/277/321/205/320/232/320/270/321/204/342/225/225/320/235/321/204/342/225/221/320/226/321/206/342/225/241/320/237/321/210/320/267/320/230/321/205/320/251/320/270/321/205/342/226/221/342/226/222/321/210/320/277/320/245/321/210/342/224/220/320/251/321/204/342/225/225/320/272/platform_functions.py +2 -2
- cnhkmcp/untracked/platform_functions.py +2 -2
- cnhkmcp/untracked/skills/alpha-expression-verifier/scripts/parsetab.py +60 -0
- cnhkmcp/untracked/skills/alpha-expression-verifier/scripts/validator.py +78 -4
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.2.dist-info}/METADATA +1 -1
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.2.dist-info}/RECORD +55 -22
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.2.dist-info}/WHEEL +0 -0
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.2.dist-info}/entry_points.txt +0 -0
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.2.dist-info}/licenses/LICENSE +0 -0
- {cnhkmcp-2.3.0.dist-info → cnhkmcp-2.3.2.dist-info}/top_level.txt +0 -0
cnhkmcp/__init__.py
CHANGED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
|
|
2
|
+
# parsetab.py
|
|
3
|
+
# This file is automatically generated. Do not edit.
|
|
4
|
+
# pylint: disable=W,C,R
|
|
5
|
+
_tabversion = '3.10'
|
|
6
|
+
|
|
7
|
+
_lr_method = 'LALR'
|
|
8
|
+
|
|
9
|
+
_lr_signature = 'ASSIGN BOOLEAN CATEGORY COMMA DIVIDE EQUAL FIELD FUNCTION GREATER GREATEREQUAL IDENTIFIER LESS LESSEQUAL LPAREN MINUS NOTEQUAL NUMBER PLUS RPAREN STRING TIMESexpression : comparison\n| expression EQUAL comparison\n| expression NOTEQUAL comparison\n| expression GREATER comparison\n| expression LESS comparison\n| expression GREATEREQUAL comparison\n| expression LESSEQUAL comparisoncomparison : term\n| comparison PLUS term\n| comparison MINUS termterm : factor\n| term TIMES factor\n| term DIVIDE factorfactor : NUMBER\n| STRING\n| FIELD\n| CATEGORY\n| IDENTIFIER\n| BOOLEAN\n| MINUS factor\n| LPAREN expression RPAREN\n| function_callfunction_call : FUNCTION LPAREN args RPARENargs : arg_list\n| emptyarg_list : arg\n| arg_list COMMA argarg : expression\n| IDENTIFIER ASSIGN expressionempty :'
|
|
10
|
+
|
|
11
|
+
_lr_action_items = {'NUMBER':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,]),'STRING':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,]),'FIELD':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,]),'CATEGORY':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,]),'IDENTIFIER':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[10,10,10,10,10,10,10,10,10,10,10,10,10,44,44,10,]),'BOOLEAN':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'MINUS':([0,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,27,28,29,30,31,32,33,34,35,36,37,38,44,45,46,47,],[4,22,-8,4,-11,-14,-15,-16,-17,-18,-19,4,-22,4,4,4,4,4,4,4,4,4,4,-20,4,22,22,22,22,22,22,-9,-10,-12,-13,-21,-18,-23,4,4,]),'LPAREN':([0,4,12,14,15,16,17,18,19,20,21,22,23,24,27,46,47,],[12,12,12,27,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'FUNCTION':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'$end':([1,2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,45,],[0,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,-23,]),'EQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[15,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,15,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,15,-18,-23,15,]),'NOTEQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[16,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,16,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,16,-18,-23,16,]),'GREATER':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[17,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,17,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,17,-18,-23,17,]),'LESS':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[18,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,18,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,18,-18,-23,18,]),'GREATEREQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[19,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,19,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,19,-18,-23,19,]),'LESSEQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[20,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,20,-18,-23,20,]),'RPAREN':([2,3,5,6,7,8,9,10,11,13,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,48,49,],[-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,38,-30,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,45,-24,-25,-26,-28,-18,-23,-27,-29,]),'COMMA':([2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,40,42,43,44,45,48,49,],[-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,46,-26,-28,-18,-23,-27,-29,]),'PLUS':([2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,44,45,],[21,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,21,21,21,21,21,21,-9,-10,-12,-13,-21,-18,-23,]),'TIMES':([3,5,6,7,8,9,10,11,13,25,34,35,36,37,38,44,45,],[23,-11,-14,-15,-16,-17,-18,-19,-22,-20,23,23,-12,-13,-21,-18,-23,]),'DIVIDE':([3,5,6,7,8,9,10,11,13,25,34,35,36,37,38,44,45,],[24,-11,-14,-15,-16,-17,-18,-19,-22,-20,24,24,-12,-13,-21,-18,-23,]),'ASSIGN':([44,],[47,]),}
|
|
12
|
+
|
|
13
|
+
_lr_action = {}
|
|
14
|
+
for _k, _v in _lr_action_items.items():
|
|
15
|
+
for _x,_y in zip(_v[0],_v[1]):
|
|
16
|
+
if not _x in _lr_action: _lr_action[_x] = {}
|
|
17
|
+
_lr_action[_x][_k] = _y
|
|
18
|
+
del _lr_action_items
|
|
19
|
+
|
|
20
|
+
_lr_goto_items = {'expression':([0,12,27,46,47,],[1,26,43,43,49,]),'comparison':([0,12,15,16,17,18,19,20,27,46,47,],[2,2,28,29,30,31,32,33,2,2,2,]),'term':([0,12,15,16,17,18,19,20,21,22,27,46,47,],[3,3,3,3,3,3,3,3,34,35,3,3,3,]),'factor':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[5,25,5,5,5,5,5,5,5,5,5,36,37,5,5,5,]),'function_call':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'args':([27,],[39,]),'arg_list':([27,],[40,]),'empty':([27,],[41,]),'arg':([27,46,],[42,48,]),}
|
|
21
|
+
|
|
22
|
+
_lr_goto = {}
|
|
23
|
+
for _k, _v in _lr_goto_items.items():
|
|
24
|
+
for _x, _y in zip(_v[0], _v[1]):
|
|
25
|
+
if not _x in _lr_goto: _lr_goto[_x] = {}
|
|
26
|
+
_lr_goto[_x][_k] = _y
|
|
27
|
+
del _lr_goto_items
|
|
28
|
+
_lr_productions = [
|
|
29
|
+
("S' -> expression","S'",1,None,None,None),
|
|
30
|
+
('expression -> comparison','expression',1,'p_expression','validator.py',386),
|
|
31
|
+
('expression -> expression EQUAL comparison','expression',3,'p_expression','validator.py',387),
|
|
32
|
+
('expression -> expression NOTEQUAL comparison','expression',3,'p_expression','validator.py',388),
|
|
33
|
+
('expression -> expression GREATER comparison','expression',3,'p_expression','validator.py',389),
|
|
34
|
+
('expression -> expression LESS comparison','expression',3,'p_expression','validator.py',390),
|
|
35
|
+
('expression -> expression GREATEREQUAL comparison','expression',3,'p_expression','validator.py',391),
|
|
36
|
+
('expression -> expression LESSEQUAL comparison','expression',3,'p_expression','validator.py',392),
|
|
37
|
+
('comparison -> term','comparison',1,'p_comparison','validator.py',399),
|
|
38
|
+
('comparison -> comparison PLUS term','comparison',3,'p_comparison','validator.py',400),
|
|
39
|
+
('comparison -> comparison MINUS term','comparison',3,'p_comparison','validator.py',401),
|
|
40
|
+
('term -> factor','term',1,'p_term','validator.py',408),
|
|
41
|
+
('term -> term TIMES factor','term',3,'p_term','validator.py',409),
|
|
42
|
+
('term -> term DIVIDE factor','term',3,'p_term','validator.py',410),
|
|
43
|
+
('factor -> NUMBER','factor',1,'p_factor','validator.py',417),
|
|
44
|
+
('factor -> STRING','factor',1,'p_factor','validator.py',418),
|
|
45
|
+
('factor -> FIELD','factor',1,'p_factor','validator.py',419),
|
|
46
|
+
('factor -> CATEGORY','factor',1,'p_factor','validator.py',420),
|
|
47
|
+
('factor -> IDENTIFIER','factor',1,'p_factor','validator.py',421),
|
|
48
|
+
('factor -> BOOLEAN','factor',1,'p_factor','validator.py',422),
|
|
49
|
+
('factor -> MINUS factor','factor',2,'p_factor','validator.py',423),
|
|
50
|
+
('factor -> LPAREN expression RPAREN','factor',3,'p_factor','validator.py',424),
|
|
51
|
+
('factor -> function_call','factor',1,'p_factor','validator.py',425),
|
|
52
|
+
('function_call -> FUNCTION LPAREN args RPAREN','function_call',4,'p_function_call','validator.py',453),
|
|
53
|
+
('args -> arg_list','args',1,'p_args','validator.py',457),
|
|
54
|
+
('args -> empty','args',1,'p_args','validator.py',458),
|
|
55
|
+
('arg_list -> arg','arg_list',1,'p_arg_list','validator.py',465),
|
|
56
|
+
('arg_list -> arg_list COMMA arg','arg_list',3,'p_arg_list','validator.py',466),
|
|
57
|
+
('arg -> expression','arg',1,'p_arg','validator.py',473),
|
|
58
|
+
('arg -> IDENTIFIER ASSIGN expression','arg',3,'p_arg','validator.py',474),
|
|
59
|
+
('empty -> <empty>','empty',0,'p_empty','validator.py',481),
|
|
60
|
+
]
|
|
@@ -170,7 +170,10 @@ supported_functions = {
|
|
|
170
170
|
'scale_down': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'constant']},
|
|
171
171
|
|
|
172
172
|
# Arithmetic 类别函数
|
|
173
|
-
|
|
173
|
+
# add(x, y, ..., filter=false)
|
|
174
|
+
# NOTE: add() is variadic (>=2 terms) with an optional boolean filter flag.
|
|
175
|
+
# We validate it with custom logic in validate_function().
|
|
176
|
+
'add': {'min_args': 2, 'max_args': 101, 'arg_types': ['expression'] * 101},
|
|
174
177
|
'multiply': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 99 + ['boolean'], 'param_names': ['x', 'y', 'filter']}, # multiply(x, y, ..., filter=false)
|
|
175
178
|
'sign': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
|
|
176
179
|
'subtract': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'boolean']}, # subtract(x, y, filter=false)
|
|
@@ -501,6 +504,10 @@ class ExpressionValidator:
|
|
|
501
504
|
if not function_info:
|
|
502
505
|
return [f"未知函数: {function_name}"]
|
|
503
506
|
|
|
507
|
+
# Custom validation for variadic functions with optional flags
|
|
508
|
+
if function_name == 'add':
|
|
509
|
+
return self._validate_add(args, is_in_group_arg)
|
|
510
|
+
|
|
504
511
|
errors = []
|
|
505
512
|
|
|
506
513
|
# 检查参数数量
|
|
@@ -591,9 +598,9 @@ class ExpressionValidator:
|
|
|
591
598
|
if arg.node_type != 'number':
|
|
592
599
|
errors.append(f"参数 {arg_index+1} 应该是一个数字,但得到 {arg.node_type}")
|
|
593
600
|
elif expected_type == 'boolean':
|
|
594
|
-
#
|
|
595
|
-
if arg.node_type
|
|
596
|
-
errors.append(f"参数 {arg_index+1} 应该是一个布尔值(0/1),但得到 {arg.node_type}")
|
|
601
|
+
# 布尔值可以是 true/false 或数字(0/1)
|
|
602
|
+
if arg.node_type not in {'boolean', 'number'}:
|
|
603
|
+
errors.append(f"参数 {arg_index+1} 应该是一个布尔值(true/false 或 0/1),但得到 {arg.node_type}")
|
|
597
604
|
elif expected_type == 'field':
|
|
598
605
|
if arg.node_type != 'field' and arg.node_type != 'category':
|
|
599
606
|
# 允许field或category作为字段参数
|
|
@@ -610,6 +617,73 @@ class ExpressionValidator:
|
|
|
610
617
|
# group函数的category参数可以是任何类型(field、category等),不进行类型校验
|
|
611
618
|
|
|
612
619
|
return errors
|
|
620
|
+
|
|
621
|
+
def _validate_add(self, args: List[Any], is_in_group_arg: bool = False) -> List[str]:
|
|
622
|
+
"""Validate add(x, y, ..., filter=false).
|
|
623
|
+
|
|
624
|
+
Rules:
|
|
625
|
+
- At least 2 positional expression terms.
|
|
626
|
+
- Optional filter flag can be provided as:
|
|
627
|
+
- named argument: filter=<boolean>
|
|
628
|
+
- last positional argument: <boolean> or 0/1
|
|
629
|
+
"""
|
|
630
|
+
errors: List[str] = []
|
|
631
|
+
|
|
632
|
+
if len(args) < 2:
|
|
633
|
+
return [f"函数 add 需要至少 2 个参数,但只提供了 {len(args)}"]
|
|
634
|
+
|
|
635
|
+
named_filter_nodes: List[ASTNode] = []
|
|
636
|
+
positional_nodes: List[ASTNode] = []
|
|
637
|
+
|
|
638
|
+
for arg in args:
|
|
639
|
+
if isinstance(arg, dict) and arg.get('type') == 'named':
|
|
640
|
+
name = arg.get('name')
|
|
641
|
+
value = arg.get('value')
|
|
642
|
+
if name != 'filter':
|
|
643
|
+
errors.append(f"函数 add 不存在参数 '{name}'")
|
|
644
|
+
continue
|
|
645
|
+
if not hasattr(value, 'node_type'):
|
|
646
|
+
errors.append("函数 add 的参数 filter 格式错误")
|
|
647
|
+
continue
|
|
648
|
+
named_filter_nodes.append(value)
|
|
649
|
+
elif isinstance(arg, dict) and arg.get('type') == 'positional':
|
|
650
|
+
value = arg.get('value')
|
|
651
|
+
if hasattr(value, 'node_type'):
|
|
652
|
+
positional_nodes.append(value)
|
|
653
|
+
else:
|
|
654
|
+
errors.append("函数 add 的位置参数格式错误")
|
|
655
|
+
elif hasattr(arg, 'node_type'):
|
|
656
|
+
positional_nodes.append(arg)
|
|
657
|
+
else:
|
|
658
|
+
errors.append("函数 add 的参数格式错误")
|
|
659
|
+
|
|
660
|
+
if len(named_filter_nodes) > 1:
|
|
661
|
+
errors.append("函数 add 的参数 'filter' 只能出现一次")
|
|
662
|
+
|
|
663
|
+
positional_filter_node: Optional[ASTNode] = None
|
|
664
|
+
# Only infer a positional filter flag when:
|
|
665
|
+
# - no named filter is provided
|
|
666
|
+
# - there are at least 3 positional args (x, y, filter)
|
|
667
|
+
# - the last arg is boolean or numeric 0/1
|
|
668
|
+
if not named_filter_nodes and len(positional_nodes) >= 3:
|
|
669
|
+
last = positional_nodes[-1]
|
|
670
|
+
if last.node_type == 'boolean' or (last.node_type == 'number' and last.value in {0, 1}):
|
|
671
|
+
positional_filter_node = positional_nodes.pop()
|
|
672
|
+
|
|
673
|
+
if len(positional_nodes) < 2:
|
|
674
|
+
errors.append(f"函数 add 需要至少 2 个输入项(不含filter),但只提供了 {len(positional_nodes)}")
|
|
675
|
+
|
|
676
|
+
for idx, node in enumerate(positional_nodes):
|
|
677
|
+
errors.extend(self._validate_arg_type(node, 'expression', idx, 'add', is_in_group_arg))
|
|
678
|
+
|
|
679
|
+
if positional_filter_node is not None and named_filter_nodes:
|
|
680
|
+
errors.append("函数 add 的 filter 不能同时用位置参数和命名参数传递")
|
|
681
|
+
if positional_filter_node is not None:
|
|
682
|
+
errors.extend(self._validate_arg_type(positional_filter_node, 'boolean', len(positional_nodes), 'add', is_in_group_arg))
|
|
683
|
+
if named_filter_nodes:
|
|
684
|
+
errors.extend(self._validate_arg_type(named_filter_nodes[0], 'boolean', len(positional_nodes), 'add', is_in_group_arg))
|
|
685
|
+
|
|
686
|
+
return errors
|
|
613
687
|
|
|
614
688
|
def validate_ast(self, ast: Optional[ASTNode], is_in_group_arg: bool = False) -> List[str]:
|
|
615
689
|
"""递归验证抽象语法树"""
|
|
@@ -15,7 +15,8 @@ let inspirationState = {
|
|
|
15
15
|
selectedDatasetCategory: null,
|
|
16
16
|
pipelineTaskId: null,
|
|
17
17
|
pipelineEventSource: null,
|
|
18
|
-
enhanceTaskId: null
|
|
18
|
+
enhanceTaskId: null,
|
|
19
|
+
enhanceDataType: 'MATRIX'
|
|
19
20
|
};
|
|
20
21
|
|
|
21
22
|
document.addEventListener('DOMContentLoaded', function() {
|
|
@@ -47,8 +48,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|
|
47
48
|
const enhanceBtn = document.getElementById('inspire-enhance');
|
|
48
49
|
if (enhanceBtn) {
|
|
49
50
|
enhanceBtn.addEventListener('click', () => {
|
|
50
|
-
|
|
51
|
-
if (input) input.click();
|
|
51
|
+
openEnhanceDataTypeModal();
|
|
52
52
|
});
|
|
53
53
|
}
|
|
54
54
|
const enhanceInput = document.getElementById('inspire-idea-file');
|
|
@@ -59,6 +59,14 @@ document.addEventListener('DOMContentLoaded', function() {
|
|
|
59
59
|
if (enhanceDlBtn) {
|
|
60
60
|
enhanceDlBtn.addEventListener('click', downloadEnhanceZip);
|
|
61
61
|
}
|
|
62
|
+
|
|
63
|
+
// Enhance data type modal wiring
|
|
64
|
+
const dtypeClose = document.getElementById('enhance-datatype-close');
|
|
65
|
+
if (dtypeClose) dtypeClose.addEventListener('click', closeEnhanceDataTypeModal);
|
|
66
|
+
const dtypeCancel = document.getElementById('enhance-datatype-cancel');
|
|
67
|
+
if (dtypeCancel) dtypeCancel.addEventListener('click', closeEnhanceDataTypeModal);
|
|
68
|
+
const dtypeConfirm = document.getElementById('enhance-datatype-confirm');
|
|
69
|
+
if (dtypeConfirm) dtypeConfirm.addEventListener('click', confirmEnhanceDataType);
|
|
62
70
|
|
|
63
71
|
// Initially disable generate button until tested
|
|
64
72
|
const genBtn = document.getElementById('inspire-generate');
|
|
@@ -344,6 +352,7 @@ function generateAlphaTemplates() {
|
|
|
344
352
|
const region = document.getElementById('inspire-region').value;
|
|
345
353
|
const delay = document.getElementById('inspire-delay').value;
|
|
346
354
|
const universe = document.getElementById('inspire-universe').value;
|
|
355
|
+
const dataType = (document.getElementById('inspire-data-type') || {}).value || 'MATRIX';
|
|
347
356
|
|
|
348
357
|
const outputDiv = document.getElementById('inspire-output');
|
|
349
358
|
outputDiv.innerHTML = '正在生成模板... 这可能需要几分钟...';
|
|
@@ -354,7 +363,8 @@ function generateAlphaTemplates() {
|
|
|
354
363
|
body: JSON.stringify({
|
|
355
364
|
apiKey, baseUrl, model,
|
|
356
365
|
region, delay, universe,
|
|
357
|
-
datasetId: inspirationState.selectedDataset
|
|
366
|
+
datasetId: inspirationState.selectedDataset,
|
|
367
|
+
dataType
|
|
358
368
|
})
|
|
359
369
|
})
|
|
360
370
|
.then(res => res.json())
|
|
@@ -393,6 +403,7 @@ function runDirectAlphaPipeline() {
|
|
|
393
403
|
const region = document.getElementById('inspire-region').value;
|
|
394
404
|
const delay = document.getElementById('inspire-delay').value;
|
|
395
405
|
const universe = document.getElementById('inspire-universe').value;
|
|
406
|
+
const dataType = (document.getElementById('inspire-data-type') || {}).value || 'MATRIX';
|
|
396
407
|
const dataCategory = inspirationState.selectedDatasetCategory;
|
|
397
408
|
|
|
398
409
|
if (!region || !delay || !universe) {
|
|
@@ -440,6 +451,7 @@ function runDirectAlphaPipeline() {
|
|
|
440
451
|
region,
|
|
441
452
|
delay,
|
|
442
453
|
universe,
|
|
454
|
+
dataType,
|
|
443
455
|
apiKey,
|
|
444
456
|
baseUrl,
|
|
445
457
|
model
|
|
@@ -640,6 +652,7 @@ function handleEnhanceFile(event) {
|
|
|
640
652
|
formData.append('apiKey', apiKey);
|
|
641
653
|
formData.append('baseUrl', baseUrl);
|
|
642
654
|
formData.append('model', model);
|
|
655
|
+
formData.append('dataType', inspirationState.enhanceDataType || 'MATRIX');
|
|
643
656
|
|
|
644
657
|
fetch('/api/inspiration/enhance-template', {
|
|
645
658
|
method: 'POST',
|
|
@@ -671,6 +684,35 @@ function handleEnhanceFile(event) {
|
|
|
671
684
|
});
|
|
672
685
|
}
|
|
673
686
|
|
|
687
|
+
function openEnhanceDataTypeModal() {
|
|
688
|
+
const modal = document.getElementById('enhanceDataTypeModal');
|
|
689
|
+
if (!modal) {
|
|
690
|
+
// Fallback: if modal missing, proceed with default.
|
|
691
|
+
const input = document.getElementById('inspire-idea-file');
|
|
692
|
+
if (input) input.click();
|
|
693
|
+
return;
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// Default to MATRIX each time unless user already picked.
|
|
697
|
+
const sel = document.getElementById('inspire-enhance-data-type');
|
|
698
|
+
if (sel) sel.value = inspirationState.enhanceDataType || 'MATRIX';
|
|
699
|
+
modal.style.display = 'block';
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
function closeEnhanceDataTypeModal() {
|
|
703
|
+
const modal = document.getElementById('enhanceDataTypeModal');
|
|
704
|
+
if (modal) modal.style.display = 'none';
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
function confirmEnhanceDataType() {
|
|
708
|
+
const sel = document.getElementById('inspire-enhance-data-type');
|
|
709
|
+
const dt = sel ? sel.value : 'MATRIX';
|
|
710
|
+
inspirationState.enhanceDataType = (dt === 'VECTOR') ? 'VECTOR' : 'MATRIX';
|
|
711
|
+
closeEnhanceDataTypeModal();
|
|
712
|
+
const input = document.getElementById('inspire-idea-file');
|
|
713
|
+
if (input) input.click();
|
|
714
|
+
}
|
|
715
|
+
|
|
674
716
|
function startEnhanceStream(taskId, totalFiles) {
|
|
675
717
|
const outputDiv = document.getElementById('inspire-output');
|
|
676
718
|
const streamEl = document.getElementById('inspire-enhance-stream');
|
|
@@ -870,6 +870,13 @@
|
|
|
870
870
|
<label style="display: block; margin-bottom: 5px; font-size: 0.9em; color: #666;">Delay</label>
|
|
871
871
|
<select id="inspire-delay" class="form-input" style="width: 100%;"><option>Select Region First</option></select>
|
|
872
872
|
</div>
|
|
873
|
+
<div style="margin-bottom: 10px;">
|
|
874
|
+
<label style="display: block; margin-bottom: 5px; font-size: 0.9em; color: #666;">data type</label>
|
|
875
|
+
<select id="inspire-data-type" class="form-input" style="width: 100%;">
|
|
876
|
+
<option value="MATRIX" selected>MATRIX</option>
|
|
877
|
+
<option value="VECTOR">VECTOR</option>
|
|
878
|
+
</select>
|
|
879
|
+
</div>
|
|
873
880
|
</div>
|
|
874
881
|
</div>
|
|
875
882
|
</div>
|
|
@@ -930,6 +937,32 @@
|
|
|
930
937
|
</div>
|
|
931
938
|
</div>
|
|
932
939
|
|
|
940
|
+
<!-- Inspiration Enhance Data Type Modal -->
|
|
941
|
+
<div id="enhanceDataTypeModal" class="modal" style="display: none; position: fixed; z-index: 1100; left: 0; top: 0; width: 100%; height: 100%; overflow: auto; background-color: rgba(0,0,0,0.4);">
|
|
942
|
+
<div class="modal-content" style="background-color: #fefefe; margin: 10% auto; padding: 0; border: 1px solid #888; width: 520px; max-width: 90%; border-radius: 8px;">
|
|
943
|
+
<div class="modal-header" style="display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #eee; padding: 12px 16px;">
|
|
944
|
+
<h3 style="margin: 0;">选择数据类型</h3>
|
|
945
|
+
<span id="enhance-datatype-close" style="color: #aaa; font-size: 26px; font-weight: bold; cursor: pointer;">×</span>
|
|
946
|
+
</div>
|
|
947
|
+
<div class="modal-body" style="padding: 16px;">
|
|
948
|
+
<div style="margin-bottom: 10px; color: #666; font-size: 13px;">
|
|
949
|
+
请选择你要增强的历史模板对应的数据类型(区分大小写)。
|
|
950
|
+
</div>
|
|
951
|
+
<div style="margin-bottom: 10px;">
|
|
952
|
+
<label style="display: block; margin-bottom: 6px; font-size: 0.9em; color: #666;">data type</label>
|
|
953
|
+
<select id="inspire-enhance-data-type" class="form-input" style="width: 100%;">
|
|
954
|
+
<option value="MATRIX" selected>MATRIX</option>
|
|
955
|
+
<option value="VECTOR">VECTOR</option>
|
|
956
|
+
</select>
|
|
957
|
+
</div>
|
|
958
|
+
</div>
|
|
959
|
+
<div class="modal-footer" style="display: flex; justify-content: flex-end; gap: 10px; padding: 12px 16px; border-top: 1px solid #eee;">
|
|
960
|
+
<button id="enhance-datatype-cancel" class="btn btn-outline">取消</button>
|
|
961
|
+
<button id="enhance-datatype-confirm" class="btn btn-primary">继续选择文件</button>
|
|
962
|
+
</div>
|
|
963
|
+
</div>
|
|
964
|
+
</div>
|
|
965
|
+
|
|
933
966
|
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
|
934
967
|
<script src="{{ url_for('static', filename='brain.js') }}"></script>
|
|
935
968
|
<script src="{{ url_for('static', filename='script.js') }}"></script>
|
|
@@ -2,6 +2,7 @@ import json
|
|
|
2
2
|
import csv
|
|
3
3
|
import os
|
|
4
4
|
import re
|
|
5
|
+
import shutil
|
|
5
6
|
import subprocess
|
|
6
7
|
import sys
|
|
7
8
|
import time
|
|
@@ -18,6 +19,10 @@ DEFAULT_FEATURE_IMPLEMENTATION_SCRIPTS = DEFAULT_FEATURE_IMPLEMENTATION_DIR / "s
|
|
|
18
19
|
DEFAULT_MOONSHOT_MODEL = os.environ.get("MOONSHOT_MODEL", "kimi-k2.5")
|
|
19
20
|
DEFAULT_MAX_ENHANCED_TEMPLATES = int(os.environ.get("MAX_ENHANCED_TEMPLATES", "60"))
|
|
20
21
|
|
|
22
|
+
VECTOR_DATA_TYPE_HINT = (
|
|
23
|
+
"since the data is vector type data, the data cannot be directly use. before you do any process, you should choose a vector operator to generate its statistical feature to use (if the current template did not do so or you think you can have a better choice of another vector operator). for example, if datafieldA and datafieldB are vector type data, you cannot use vec_avg(datafieldA) - vec_avg(datafieldB). similarly, vector type operator can only be used on the vector type operator."
|
|
24
|
+
)
|
|
25
|
+
|
|
21
26
|
|
|
22
27
|
def find_latest_idea_json(feature_implementation_dir: Path) -> Path:
|
|
23
28
|
data_root = feature_implementation_dir / "data"
|
|
@@ -259,6 +264,114 @@ def load_dataset_ids(dataset_csv: Path, max_rows: int = 200000) -> list[str]:
|
|
|
259
264
|
return ids
|
|
260
265
|
|
|
261
266
|
|
|
267
|
+
def parse_metadata_from_dataset_folder(dataset_folder: str) -> tuple[str, str, int]:
|
|
268
|
+
"""Extract dataset_id, region, delay from folder name like:
|
|
269
|
+
<dataset_id>_<region>_delay<delay>
|
|
270
|
+
|
|
271
|
+
Returns (dataset_id, region, delay).
|
|
272
|
+
"""
|
|
273
|
+
name = (dataset_folder or "").strip()
|
|
274
|
+
parts = name.split("_")
|
|
275
|
+
if len(parts) < 3:
|
|
276
|
+
raise ValueError(f"Invalid dataset folder name: {dataset_folder}")
|
|
277
|
+
|
|
278
|
+
delay_part = parts[-1]
|
|
279
|
+
m = re.fullmatch(r"delay(\d+)", delay_part)
|
|
280
|
+
if not m:
|
|
281
|
+
raise ValueError(f"Invalid dataset folder name (missing delay suffix): {dataset_folder}")
|
|
282
|
+
delay = int(m.group(1))
|
|
283
|
+
|
|
284
|
+
region = parts[-2]
|
|
285
|
+
dataset_id = "_".join(parts[:-2])
|
|
286
|
+
if not dataset_id:
|
|
287
|
+
raise ValueError(f"Invalid dataset folder name (missing dataset id): {dataset_folder}")
|
|
288
|
+
|
|
289
|
+
return dataset_id, region, delay
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def ensure_dataset_csv_data_type(
|
|
293
|
+
feature_implementation_dir: Path,
|
|
294
|
+
scripts_dir: Path,
|
|
295
|
+
dataset_folder: str,
|
|
296
|
+
data_type: str,
|
|
297
|
+
) -> None:
|
|
298
|
+
"""Ensure the dataset CSV corresponds to the requested data_type.
|
|
299
|
+
|
|
300
|
+
For enhance flow, the goal is to constrain implement_idea.py placeholder matching.
|
|
301
|
+
When data_type is VECTOR, rebuild the dataset folder by refetching CSV as VECTOR.
|
|
302
|
+
"""
|
|
303
|
+
data_type = (data_type or "MATRIX").strip().upper()
|
|
304
|
+
if data_type != "VECTOR":
|
|
305
|
+
return
|
|
306
|
+
|
|
307
|
+
dataset_id, region, delay = parse_metadata_from_dataset_folder(dataset_folder)
|
|
308
|
+
fetch_script = scripts_dir / "fetch_dataset.py"
|
|
309
|
+
if not fetch_script.exists():
|
|
310
|
+
raise FileNotFoundError(f"fetch_dataset.py not found: {fetch_script}")
|
|
311
|
+
|
|
312
|
+
# IMPORTANT: do NOT delete the whole dataset folder.
|
|
313
|
+
# That folder may contain idea_*.json, enhanced_*.json and other artifacts.
|
|
314
|
+
# We only need to ensure the CSV is VECTOR-only.
|
|
315
|
+
data_dir = feature_implementation_dir / "data" / dataset_folder
|
|
316
|
+
dataset_csv = data_dir / f"{dataset_folder}.csv"
|
|
317
|
+
backup_csv: Path | None = None
|
|
318
|
+
if dataset_csv.exists():
|
|
319
|
+
backup_csv = dataset_csv.with_suffix(dataset_csv.suffix + f".bak_{int(time.time())}")
|
|
320
|
+
try:
|
|
321
|
+
print(f"DATA_TYPE=VECTOR => backing up existing CSV: {dataset_csv} -> {backup_csv}")
|
|
322
|
+
shutil.copy2(dataset_csv, backup_csv)
|
|
323
|
+
except Exception:
|
|
324
|
+
backup_csv = None
|
|
325
|
+
|
|
326
|
+
# Keep defaults consistent with fetch_dataset.py unless explicitly overridden.
|
|
327
|
+
universe = (os.environ.get("UNIVERSE") or "TOP3000").strip()
|
|
328
|
+
instrument_type = (os.environ.get("INSTRUMENT_TYPE") or "EQUITY").strip()
|
|
329
|
+
|
|
330
|
+
cmd = [
|
|
331
|
+
sys.executable,
|
|
332
|
+
str(fetch_script),
|
|
333
|
+
"--datasetid",
|
|
334
|
+
dataset_id,
|
|
335
|
+
"--region",
|
|
336
|
+
region,
|
|
337
|
+
"--delay",
|
|
338
|
+
str(delay),
|
|
339
|
+
"--universe",
|
|
340
|
+
universe,
|
|
341
|
+
"--instrument-type",
|
|
342
|
+
instrument_type,
|
|
343
|
+
"--data-type",
|
|
344
|
+
"VECTOR",
|
|
345
|
+
]
|
|
346
|
+
print(f"Rebuilding dataset CSV as VECTOR via: {' '.join(cmd)}")
|
|
347
|
+
|
|
348
|
+
result = subprocess.run(
|
|
349
|
+
cmd,
|
|
350
|
+
cwd=scripts_dir,
|
|
351
|
+
capture_output=True,
|
|
352
|
+
text=True,
|
|
353
|
+
)
|
|
354
|
+
if result.returncode != 0:
|
|
355
|
+
# Roll back CSV if we backed it up.
|
|
356
|
+
if backup_csv and backup_csv.exists():
|
|
357
|
+
try:
|
|
358
|
+
print("VECTOR rebuild failed; restoring previous CSV backup.")
|
|
359
|
+
shutil.copy2(backup_csv, dataset_csv)
|
|
360
|
+
except Exception:
|
|
361
|
+
pass
|
|
362
|
+
raise RuntimeError(
|
|
363
|
+
"VECTOR dataset rebuild failed: "
|
|
364
|
+
+ " ".join(cmd)
|
|
365
|
+
+ f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
|
|
366
|
+
)
|
|
367
|
+
if result.stdout.strip():
|
|
368
|
+
print(result.stdout)
|
|
369
|
+
if result.stderr.strip():
|
|
370
|
+
print(result.stderr, file=sys.stderr)
|
|
371
|
+
if not dataset_csv.exists():
|
|
372
|
+
raise FileNotFoundError(f"VECTOR dataset rebuild succeeded but CSV not found: {dataset_csv}")
|
|
373
|
+
|
|
374
|
+
|
|
262
375
|
def normalize_for_validator(expression: str) -> str:
|
|
263
376
|
"""Normalize expressions to satisfy validator rules (e.g., winsorize std=).
|
|
264
377
|
|
|
@@ -350,7 +463,6 @@ def main():
|
|
|
350
463
|
- IDEA_JSON: absolute/relative path to a specific idea_*.json
|
|
351
464
|
- MOONSHOT_API_KEY / MOONSHOT_BASE_URL / MOONSHOT_MODEL
|
|
352
465
|
- MAX_ENHANCED_TEMPLATES (default 60)
|
|
353
|
-
- NO_IMPLEMENT=1 to skip implement_idea.py
|
|
354
466
|
"""
|
|
355
467
|
|
|
356
468
|
idea_json_env = os.environ.get("IDEA_JSON", "").strip()
|
|
@@ -402,6 +514,18 @@ def main():
|
|
|
402
514
|
dataset_id_from_name, region_from_name, delay_from_name = parsed
|
|
403
515
|
dataset_folder = f"{dataset_id_from_name}_{region_from_name}_delay{delay_from_name}"
|
|
404
516
|
|
|
517
|
+
data_type = (os.environ.get("DATA_TYPE") or "MATRIX").strip()
|
|
518
|
+
if data_type not in ("MATRIX", "VECTOR"):
|
|
519
|
+
data_type = "MATRIX"
|
|
520
|
+
|
|
521
|
+
# Guarantee implement_idea sees only VECTOR ids by rebuilding the dataset CSV as VECTOR.
|
|
522
|
+
ensure_dataset_csv_data_type(
|
|
523
|
+
feature_implementation_dir=feature_implementation_dir,
|
|
524
|
+
scripts_dir=scripts_dir,
|
|
525
|
+
dataset_folder=dataset_folder,
|
|
526
|
+
data_type=data_type,
|
|
527
|
+
)
|
|
528
|
+
|
|
405
529
|
# Validate dataset CSV exists to ensure implement_idea can parse placeholders.
|
|
406
530
|
dataset_csv = feature_implementation_dir / "data" / dataset_folder / f"{dataset_folder}.csv"
|
|
407
531
|
if not dataset_csv.exists():
|
|
@@ -445,14 +569,14 @@ def main():
|
|
|
445
569
|
[
|
|
446
570
|
"An alpha template is a reusable recipe that captures an economic idea and leaves “slots” (data fields, operators, groups, decay, neutralization choices, etc.) to instantiate many candidate alphas. Typical structure: clean data (backfill, winsorize) → transform/compare across time or peers → rank/neutralize → (optionally) decay/turnover tune. Templates encourage systematic search, reuse, and diversification while keeping an explicit economic rationale.",
|
|
447
571
|
"",
|
|
448
|
-
"Some Example Templates and rationales",
|
|
572
|
+
"Some Example Templates and rationales to help you understand the format",
|
|
449
573
|
"",
|
|
450
574
|
"CAPM residual (market/sector-neutral return): ts_regression(returns, group_mean(returns, log(ts_mean(cap,21)), sector), 252, rettype=0) after backfill+winsorize. Rationale: strip market/sector beta to isolate idiosyncratic alpha; sector-weighted by smoothed log-cap to reduce large-cap dominance.",
|
|
451
575
|
"CAPM beta (slope) template: same regression with rettype=2; pre-clean target/market (ts_backfill(...,63) + winsorize(std=4)). Rationale: rank stocks by relative risk within sector; long low-β, short high-β, or study β dispersion across groups.",
|
|
452
|
-
"CAPM generalized to any feature: data = winsorize(ts_backfill(
|
|
453
|
-
"Actual vs estimate spread (analyst): group_zscore( group_zscore(
|
|
454
|
-
"Analyst term-structure (fp1 vs fy1/fp2/fy2): group_zscore( group_zscore(
|
|
455
|
-
"Option Greeks net spread: group_operator(
|
|
576
|
+
"CAPM generalized to any feature: data = winsorize(ts_backfill({data},63),std=4); data_gpm = group_mean(data, log(ts_mean(cap,21)), sector); resid = ts_regression(data, data_gpm, 252, rettype=0). Rationale: pull out the component unexplained by group average of same feature; reduces common-mode exposure.",
|
|
577
|
+
"Actual vs estimate spread (analyst): group_zscore( group_zscore({act}, industry) – group_zscore({est}, industry), industry ) or the abstracted group_compare(diff(group_compare(act,...), group_compare(est,...)), ...). Rationale: surprise/beat-miss signal within industry, normalized to peers to avoid level bias.",
|
|
578
|
+
"Analyst term-structure (fp1 vs fy1/fp2/fy2): group_zscore( group_zscore({mean_eps_period1}, industry) – group_zscore({mean_eps_period2}, industry), industry ) with operator/group slots. Rationale: cross-period expectation steepness; rising near-term vs long-term forecasts can flag momentum/inflection.",
|
|
579
|
+
"Option Greeks net spread: group_operator({put_greek} - {call_greek}, {grouping_data}) over industry/sector (Delta/Gamma/Vega/Theta). Rationale: options-implied sentiment/convexity skew vs peers; outlier net Greeks may precede spot moves; extend with multi-Greek composites or time-series deltas.",
|
|
456
580
|
"",
|
|
457
581
|
"based on the following guidance of how to make a data collation template into a signal, and guidance on how to utilize the best of operators.",
|
|
458
582
|
"",
|
|
@@ -465,6 +589,8 @@ def main():
|
|
|
465
589
|
guide2,
|
|
466
590
|
"--------------",
|
|
467
591
|
"",
|
|
592
|
+
VECTOR_DATA_TYPE_HINT if data_type == "VECTOR" else "",
|
|
593
|
+
"",
|
|
468
594
|
"Return ONLY valid JSON (no markdown / no code fences).",
|
|
469
595
|
]
|
|
470
596
|
)
|