PyPI - cnhkmcp - Versions diffs - 2.3.0__py3-none-any.whl → 2.3.2__py3-none-any.whl - Mend

cnhkmcp 2.3.0py3-none-any.whl → 2.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

cnhkmcp/__init__.py CHANGED Viewed

@@ -50,7 +50,7 @@ from .untracked.forum_functions import (
     read_full_forum_post
 )
-__version__ = "2.3.0"
+__version__ = "2.3.2"
 __author__ = "CNHK"
 __email__ = "cnhk@example.com"

cnhkmcp/untracked/APP/Tranformer/parsetab.py ADDED Viewed

@@ -0,0 +1,60 @@
+# parsetab.py
+# This file is automatically generated. Do not edit.
+# pylint: disable=W,C,R
+_tabversion = '3.10'
+_lr_method = 'LALR'
+_lr_signature = 'ASSIGN BOOLEAN CATEGORY COMMA DIVIDE EQUAL FIELD FUNCTION GREATER GREATEREQUAL IDENTIFIER LESS LESSEQUAL LPAREN MINUS NOTEQUAL NUMBER PLUS RPAREN STRING TIMESexpression : comparison\n| expression EQUAL comparison\n| expression NOTEQUAL comparison\n| expression GREATER comparison\n| expression LESS comparison\n| expression GREATEREQUAL comparison\n| expression LESSEQUAL comparisoncomparison : term\n| comparison PLUS term\n| comparison MINUS termterm : factor\n| term TIMES factor\n| term DIVIDE factorfactor : NUMBER\n| STRING\n| FIELD\n| CATEGORY\n| IDENTIFIER\n| BOOLEAN\n| MINUS factor\n| LPAREN expression RPAREN\n| function_callfunction_call : FUNCTION LPAREN args RPARENargs : arg_list\n| emptyarg_list : arg\n| arg_list COMMA argarg : expression\n| IDENTIFIER ASSIGN expressionempty :'
+_lr_action_items = {'NUMBER':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,]),'STRING':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,]),'FIELD':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,]),'CATEGORY':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,]),'IDENTIFIER':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[10,10,10,10,10,10,10,10,10,10,10,10,10,44,44,10,]),'BOOLEAN':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'MINUS':([0,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,27,28,29,30,31,32,33,34,35,36,37,38,44,45,46,47,],[4,22,-8,4,-11,-14,-15,-16,-17,-18,-19,4,-22,4,4,4,4,4,4,4,4,4,4,-20,4,22,22,22,22,22,22,-9,-10,-12,-13,-21,-18,-23,4,4,]),'LPAREN':([0,4,12,14,15,16,17,18,19,20,21,22,23,24,27,46,47,],[12,12,12,27,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'FUNCTION':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'$end':([1,2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,45,],[0,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,-23,]),'EQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[15,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,15,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,15,-18,-23,15,]),'NOTEQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[16,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,16,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,16,-18,-23,16,]),'GREATER':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[17,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,17,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,17,-18,-23,17,]),'LESS':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[18,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,18,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,18,-18,-23,18,]),'GREATEREQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[19,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,19,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,19,-18,-23,19,]),'LESSEQUAL':([1,2,3,5,6,7,8,9,10,11,13,25,26,28,29,30,31,32,33,34,35,36,37,38,43,44,45,49,],[20,-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,20,-18,-23,20,]),'RPAREN':([2,3,5,6,7,8,9,10,11,13,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,48,49,],[-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,38,-30,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,45,-24,-25,-26,-28,-18,-23,-27,-29,]),'COMMA':([2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,40,42,43,44,45,48,49,],[-1,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,-2,-3,-4,-5,-6,-7,-9,-10,-12,-13,-21,46,-26,-28,-18,-23,-27,-29,]),'PLUS':([2,3,5,6,7,8,9,10,11,13,25,28,29,30,31,32,33,34,35,36,37,38,44,45,],[21,-8,-11,-14,-15,-16,-17,-18,-19,-22,-20,21,21,21,21,21,21,-9,-10,-12,-13,-21,-18,-23,]),'TIMES':([3,5,6,7,8,9,10,11,13,25,34,35,36,37,38,44,45,],[23,-11,-14,-15,-16,-17,-18,-19,-22,-20,23,23,-12,-13,-21,-18,-23,]),'DIVIDE':([3,5,6,7,8,9,10,11,13,25,34,35,36,37,38,44,45,],[24,-11,-14,-15,-16,-17,-18,-19,-22,-20,24,24,-12,-13,-21,-18,-23,]),'ASSIGN':([44,],[47,]),}
+_lr_action = {}
+for _k, _v in _lr_action_items.items():
+   for _x,_y in zip(_v[0],_v[1]):
+      if not _x in _lr_action:  _lr_action[_x] = {}
+      _lr_action[_x][_k] = _y
+del _lr_action_items
+_lr_goto_items = {'expression':([0,12,27,46,47,],[1,26,43,43,49,]),'comparison':([0,12,15,16,17,18,19,20,27,46,47,],[2,2,28,29,30,31,32,33,2,2,2,]),'term':([0,12,15,16,17,18,19,20,21,22,27,46,47,],[3,3,3,3,3,3,3,3,34,35,3,3,3,]),'factor':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[5,25,5,5,5,5,5,5,5,5,5,36,37,5,5,5,]),'function_call':([0,4,12,15,16,17,18,19,20,21,22,23,24,27,46,47,],[13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'args':([27,],[39,]),'arg_list':([27,],[40,]),'empty':([27,],[41,]),'arg':([27,46,],[42,48,]),}
+_lr_goto = {}
+for _k, _v in _lr_goto_items.items():
+   for _x, _y in zip(_v[0], _v[1]):
+       if not _x in _lr_goto: _lr_goto[_x] = {}
+       _lr_goto[_x][_k] = _y
+del _lr_goto_items
+_lr_productions = [
+  ("S' -> expression","S'",1,None,None,None),
+  ('expression -> comparison','expression',1,'p_expression','validator.py',386),
+  ('expression -> expression EQUAL comparison','expression',3,'p_expression','validator.py',387),
+  ('expression -> expression NOTEQUAL comparison','expression',3,'p_expression','validator.py',388),
+  ('expression -> expression GREATER comparison','expression',3,'p_expression','validator.py',389),
+  ('expression -> expression LESS comparison','expression',3,'p_expression','validator.py',390),
+  ('expression -> expression GREATEREQUAL comparison','expression',3,'p_expression','validator.py',391),
+  ('expression -> expression LESSEQUAL comparison','expression',3,'p_expression','validator.py',392),
+  ('comparison -> term','comparison',1,'p_comparison','validator.py',399),
+  ('comparison -> comparison PLUS term','comparison',3,'p_comparison','validator.py',400),
+  ('comparison -> comparison MINUS term','comparison',3,'p_comparison','validator.py',401),
+  ('term -> factor','term',1,'p_term','validator.py',408),
+  ('term -> term TIMES factor','term',3,'p_term','validator.py',409),
+  ('term -> term DIVIDE factor','term',3,'p_term','validator.py',410),
+  ('factor -> NUMBER','factor',1,'p_factor','validator.py',417),
+  ('factor -> STRING','factor',1,'p_factor','validator.py',418),
+  ('factor -> FIELD','factor',1,'p_factor','validator.py',419),
+  ('factor -> CATEGORY','factor',1,'p_factor','validator.py',420),
+  ('factor -> IDENTIFIER','factor',1,'p_factor','validator.py',421),
+  ('factor -> BOOLEAN','factor',1,'p_factor','validator.py',422),
+  ('factor -> MINUS factor','factor',2,'p_factor','validator.py',423),
+  ('factor -> LPAREN expression RPAREN','factor',3,'p_factor','validator.py',424),
+  ('factor -> function_call','factor',1,'p_factor','validator.py',425),
+  ('function_call -> FUNCTION LPAREN args RPAREN','function_call',4,'p_function_call','validator.py',453),
+  ('args -> arg_list','args',1,'p_args','validator.py',457),
+  ('args -> empty','args',1,'p_args','validator.py',458),
+  ('arg_list -> arg','arg_list',1,'p_arg_list','validator.py',465),
+  ('arg_list -> arg_list COMMA arg','arg_list',3,'p_arg_list','validator.py',466),
+  ('arg -> expression','arg',1,'p_arg','validator.py',473),
+  ('arg -> IDENTIFIER ASSIGN expression','arg',3,'p_arg','validator.py',474),
+  ('empty -> <empty>','empty',0,'p_empty','validator.py',481),
+]

cnhkmcp/untracked/APP/Tranformer/validator.py CHANGED Viewed

@@ -170,7 +170,10 @@ supported_functions = {
     'scale_down': {'min_args': 2, 'max_args': 2, 'arg_types': ['expression', 'number'], 'param_names': ['x', 'constant']},
     # Arithmetic 类别函数
-    'add': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'boolean']},  # add(x, y, filter=false)
+    # add(x, y, ..., filter=false)
+    # NOTE: add() is variadic (>=2 terms) with an optional boolean filter flag.
+    # We validate it with custom logic in validate_function().
+    'add': {'min_args': 2, 'max_args': 101, 'arg_types': ['expression'] * 101},
     'multiply': {'min_args': 2, 'max_args': 100, 'arg_types': ['expression'] * 99 + ['boolean'], 'param_names': ['x', 'y', 'filter']},  # multiply(x, y, ..., filter=false)
     'sign': {'min_args': 1, 'max_args': 1, 'arg_types': ['expression']},
     'subtract': {'min_args': 2, 'max_args': 3, 'arg_types': ['expression', 'expression', 'boolean']},  # subtract(x, y, filter=false)
@@ -501,6 +504,10 @@ class ExpressionValidator:
         if not function_info:
             return [f"未知函数: {function_name}"]
+        # Custom validation for variadic functions with optional flags
+        if function_name == 'add':
+            return self._validate_add(args, is_in_group_arg)
         errors = []
         # 检查参数数量
@@ -591,9 +598,9 @@ class ExpressionValidator:
             if arg.node_type != 'number':
                 errors.append(f"参数 {arg_index+1} 应该是一个数字，但得到 {arg.node_type}")
         elif expected_type == 'boolean':
-            # 布尔值可以是数字（0/1）
-            if arg.node_type != 'number':
-                errors.append(f"参数 {arg_index+1} 应该是一个布尔值（0/1），但得到 {arg.node_type}")
+            # 布尔值可以是 true/false 或数字（0/1）
+            if arg.node_type not in {'boolean', 'number'}:
+                errors.append(f"参数 {arg_index+1} 应该是一个布尔值（true/false 或 0/1），但得到 {arg.node_type}")
         elif expected_type == 'field':
             if arg.node_type != 'field' and arg.node_type != 'category':
                 # 允许field或category作为字段参数
@@ -610,6 +617,73 @@ class ExpressionValidator:
             # group函数的category参数可以是任何类型（field、category等），不进行类型校验
         return errors
+    def _validate_add(self, args: List[Any], is_in_group_arg: bool = False) -> List[str]:
+        """Validate add(x, y, ..., filter=false).
+        Rules:
+        - At least 2 positional expression terms.
+        - Optional filter flag can be provided as:
+          - named argument: filter=<boolean>
+          - last positional argument: <boolean> or 0/1
+        """
+        errors: List[str] = []
+        if len(args) < 2:
+            return [f"函数 add 需要至少 2 个参数，但只提供了 {len(args)}"]
+        named_filter_nodes: List[ASTNode] = []
+        positional_nodes: List[ASTNode] = []
+        for arg in args:
+            if isinstance(arg, dict) and arg.get('type') == 'named':
+                name = arg.get('name')
+                value = arg.get('value')
+                if name != 'filter':
+                    errors.append(f"函数 add 不存在参数 '{name}'")
+                    continue
+                if not hasattr(value, 'node_type'):
+                    errors.append("函数 add 的参数 filter 格式错误")
+                    continue
+                named_filter_nodes.append(value)
+            elif isinstance(arg, dict) and arg.get('type') == 'positional':
+                value = arg.get('value')
+                if hasattr(value, 'node_type'):
+                    positional_nodes.append(value)
+                else:
+                    errors.append("函数 add 的位置参数格式错误")
+            elif hasattr(arg, 'node_type'):
+                positional_nodes.append(arg)
+            else:
+                errors.append("函数 add 的参数格式错误")
+        if len(named_filter_nodes) > 1:
+            errors.append("函数 add 的参数 'filter' 只能出现一次")
+        positional_filter_node: Optional[ASTNode] = None
+        # Only infer a positional filter flag when:
+        # - no named filter is provided
+        # - there are at least 3 positional args (x, y, filter)
+        # - the last arg is boolean or numeric 0/1
+        if not named_filter_nodes and len(positional_nodes) >= 3:
+            last = positional_nodes[-1]
+            if last.node_type == 'boolean' or (last.node_type == 'number' and last.value in {0, 1}):
+                positional_filter_node = positional_nodes.pop()
+        if len(positional_nodes) < 2:
+            errors.append(f"函数 add 需要至少 2 个输入项（不含filter），但只提供了 {len(positional_nodes)}")
+        for idx, node in enumerate(positional_nodes):
+            errors.extend(self._validate_arg_type(node, 'expression', idx, 'add', is_in_group_arg))
+        if positional_filter_node is not None and named_filter_nodes:
+            errors.append("函数 add 的 filter 不能同时用位置参数和命名参数传递")
+        if positional_filter_node is not None:
+            errors.extend(self._validate_arg_type(positional_filter_node, 'boolean', len(positional_nodes), 'add', is_in_group_arg))
+        if named_filter_nodes:
+            errors.extend(self._validate_arg_type(named_filter_nodes[0], 'boolean', len(positional_nodes), 'add', is_in_group_arg))
+        return errors
     def validate_ast(self, ast: Optional[ASTNode], is_in_group_arg: bool = False) -> List[str]:
         """递归验证抽象语法树"""

cnhkmcp/untracked/APP/static/inspiration.js CHANGED Viewed

@@ -15,7 +15,8 @@ let inspirationState = {
     selectedDatasetCategory: null,
     pipelineTaskId: null,
     pipelineEventSource: null,
-    enhanceTaskId: null
+    enhanceTaskId: null,
+    enhanceDataType: 'MATRIX'
 };
 document.addEventListener('DOMContentLoaded', function() {
@@ -47,8 +48,7 @@ document.addEventListener('DOMContentLoaded', function() {
     const enhanceBtn = document.getElementById('inspire-enhance');
     if (enhanceBtn) {
         enhanceBtn.addEventListener('click', () => {
-            const input = document.getElementById('inspire-idea-file');
-            if (input) input.click();
+            openEnhanceDataTypeModal();
         });
     }
     const enhanceInput = document.getElementById('inspire-idea-file');
@@ -59,6 +59,14 @@ document.addEventListener('DOMContentLoaded', function() {
     if (enhanceDlBtn) {
         enhanceDlBtn.addEventListener('click', downloadEnhanceZip);
     }
+    // Enhance data type modal wiring
+    const dtypeClose = document.getElementById('enhance-datatype-close');
+    if (dtypeClose) dtypeClose.addEventListener('click', closeEnhanceDataTypeModal);
+    const dtypeCancel = document.getElementById('enhance-datatype-cancel');
+    if (dtypeCancel) dtypeCancel.addEventListener('click', closeEnhanceDataTypeModal);
+    const dtypeConfirm = document.getElementById('enhance-datatype-confirm');
+    if (dtypeConfirm) dtypeConfirm.addEventListener('click', confirmEnhanceDataType);
     // Initially disable generate button until tested
     const genBtn = document.getElementById('inspire-generate');
@@ -344,6 +352,7 @@ function generateAlphaTemplates() {
     const region = document.getElementById('inspire-region').value;
     const delay = document.getElementById('inspire-delay').value;
     const universe = document.getElementById('inspire-universe').value;
+    const dataType = (document.getElementById('inspire-data-type') || {}).value || 'MATRIX';
     const outputDiv = document.getElementById('inspire-output');
     outputDiv.innerHTML = '正在生成模板... 这可能需要几分钟...';
@@ -354,7 +363,8 @@ function generateAlphaTemplates() {
         body: JSON.stringify({
             apiKey, baseUrl, model,
             region, delay, universe,
-            datasetId: inspirationState.selectedDataset
+            datasetId: inspirationState.selectedDataset,
+            dataType
         })
     })
     .then(res => res.json())
@@ -393,6 +403,7 @@ function runDirectAlphaPipeline() {
     const region = document.getElementById('inspire-region').value;
     const delay = document.getElementById('inspire-delay').value;
     const universe = document.getElementById('inspire-universe').value;
+    const dataType = (document.getElementById('inspire-data-type') || {}).value || 'MATRIX';
     const dataCategory = inspirationState.selectedDatasetCategory;
     if (!region || !delay || !universe) {
@@ -440,6 +451,7 @@ function runDirectAlphaPipeline() {
             region,
             delay,
             universe,
+            dataType,
             apiKey,
             baseUrl,
             model
@@ -640,6 +652,7 @@ function handleEnhanceFile(event) {
     formData.append('apiKey', apiKey);
     formData.append('baseUrl', baseUrl);
     formData.append('model', model);
+    formData.append('dataType', inspirationState.enhanceDataType || 'MATRIX');
     fetch('/api/inspiration/enhance-template', {
         method: 'POST',
@@ -671,6 +684,35 @@ function handleEnhanceFile(event) {
     });
 }
+function openEnhanceDataTypeModal() {
+    const modal = document.getElementById('enhanceDataTypeModal');
+    if (!modal) {
+        // Fallback: if modal missing, proceed with default.
+        const input = document.getElementById('inspire-idea-file');
+        if (input) input.click();
+        return;
+    }
+    // Default to MATRIX each time unless user already picked.
+    const sel = document.getElementById('inspire-enhance-data-type');
+    if (sel) sel.value = inspirationState.enhanceDataType || 'MATRIX';
+    modal.style.display = 'block';
+}
+function closeEnhanceDataTypeModal() {
+    const modal = document.getElementById('enhanceDataTypeModal');
+    if (modal) modal.style.display = 'none';
+}
+function confirmEnhanceDataType() {
+    const sel = document.getElementById('inspire-enhance-data-type');
+    const dt = sel ? sel.value : 'MATRIX';
+    inspirationState.enhanceDataType = (dt === 'VECTOR') ? 'VECTOR' : 'MATRIX';
+    closeEnhanceDataTypeModal();
+    const input = document.getElementById('inspire-idea-file');
+    if (input) input.click();
+}
 function startEnhanceStream(taskId, totalFiles) {
     const outputDiv = document.getElementById('inspire-output');
     const streamEl = document.getElementById('inspire-enhance-stream');

cnhkmcp/untracked/APP/templates/index.html CHANGED Viewed

@@ -870,6 +870,13 @@
                                 <label style="display: block; margin-bottom: 5px; font-size: 0.9em; color: #666;">Delay</label>
                                 <select id="inspire-delay" class="form-input" style="width: 100%;"><option>Select Region First</option></select>
                             </div>
+                            <div style="margin-bottom: 10px;">
+                                <label style="display: block; margin-bottom: 5px; font-size: 0.9em; color: #666;">data type</label>
+                                <select id="inspire-data-type" class="form-input" style="width: 100%;">
+                                    <option value="MATRIX" selected>MATRIX</option>
+                                    <option value="VECTOR">VECTOR</option>
+                                </select>
+                            </div>
                         </div>
                     </div>
                 </div>
@@ -930,6 +937,32 @@
         </div>
     </div>
+    <!-- Inspiration Enhance Data Type Modal -->
+    <div id="enhanceDataTypeModal" class="modal" style="display: none; position: fixed; z-index: 1100; left: 0; top: 0; width: 100%; height: 100%; overflow: auto; background-color: rgba(0,0,0,0.4);">
+        <div class="modal-content" style="background-color: #fefefe; margin: 10% auto; padding: 0; border: 1px solid #888; width: 520px; max-width: 90%; border-radius: 8px;">
+            <div class="modal-header" style="display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #eee; padding: 12px 16px;">
+                <h3 style="margin: 0;">选择数据类型</h3>
+                <span id="enhance-datatype-close" style="color: #aaa; font-size: 26px; font-weight: bold; cursor: pointer;">&times;</span>
+            </div>
+            <div class="modal-body" style="padding: 16px;">
+                <div style="margin-bottom: 10px; color: #666; font-size: 13px;">
+                    请选择你要增强的历史模板对应的数据类型（区分大小写）。
+                </div>
+                <div style="margin-bottom: 10px;">
+                    <label style="display: block; margin-bottom: 6px; font-size: 0.9em; color: #666;">data type</label>
+                    <select id="inspire-enhance-data-type" class="form-input" style="width: 100%;">
+                        <option value="MATRIX" selected>MATRIX</option>
+                        <option value="VECTOR">VECTOR</option>
+                    </select>
+                </div>
+            </div>
+            <div class="modal-footer" style="display: flex; justify-content: flex-end; gap: 10px; padding: 12px 16px; border-top: 1px solid #eee;">
+                <button id="enhance-datatype-cancel" class="btn btn-outline">取消</button>
+                <button id="enhance-datatype-confirm" class="btn btn-primary">继续选择文件</button>
+            </div>
+        </div>
+    </div>
     <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
     <script src="{{ url_for('static', filename='brain.js') }}"></script>
     <script src="{{ url_for('static', filename='script.js') }}"></script>

cnhkmcp/untracked/APP/trailSomeAlphas/enhance_template.py CHANGED Viewed

@@ -2,6 +2,7 @@ import json
 import csv
 import os
 import re
+import shutil
 import subprocess
 import sys
 import time
@@ -18,6 +19,10 @@ DEFAULT_FEATURE_IMPLEMENTATION_SCRIPTS = DEFAULT_FEATURE_IMPLEMENTATION_DIR / "s
 DEFAULT_MOONSHOT_MODEL = os.environ.get("MOONSHOT_MODEL", "kimi-k2.5")
 DEFAULT_MAX_ENHANCED_TEMPLATES = int(os.environ.get("MAX_ENHANCED_TEMPLATES", "60"))
+VECTOR_DATA_TYPE_HINT = (
+	"since the data is vector type data, the data cannot be directly use. before you do any process, you should choose a vector operator to generate its statistical feature to use (if the current template did not do so or you think you can have a better choice of another vector operator). for example, if datafieldA and datafieldB are vector type data, you cannot use vec_avg(datafieldA) -  vec_avg(datafieldB). similarly, vector type operator can only be used on the vector type operator."
+)
 def find_latest_idea_json(feature_implementation_dir: Path) -> Path:
 	data_root = feature_implementation_dir / "data"
@@ -259,6 +264,114 @@ def load_dataset_ids(dataset_csv: Path, max_rows: int = 200000) -> list[str]:
 	return ids
+def parse_metadata_from_dataset_folder(dataset_folder: str) -> tuple[str, str, int]:
+	"""Extract dataset_id, region, delay from folder name like:
+	<dataset_id>_<region>_delay<delay>
+	Returns (dataset_id, region, delay).
+	"""
+	name = (dataset_folder or "").strip()
+	parts = name.split("_")
+	if len(parts) < 3:
+		raise ValueError(f"Invalid dataset folder name: {dataset_folder}")
+	delay_part = parts[-1]
+	m = re.fullmatch(r"delay(\d+)", delay_part)
+	if not m:
+		raise ValueError(f"Invalid dataset folder name (missing delay suffix): {dataset_folder}")
+	delay = int(m.group(1))
+	region = parts[-2]
+	dataset_id = "_".join(parts[:-2])
+	if not dataset_id:
+		raise ValueError(f"Invalid dataset folder name (missing dataset id): {dataset_folder}")
+	return dataset_id, region, delay
+def ensure_dataset_csv_data_type(
+	feature_implementation_dir: Path,
+	scripts_dir: Path,
+	dataset_folder: str,
+	data_type: str,
+) -> None:
+	"""Ensure the dataset CSV corresponds to the requested data_type.
+	For enhance flow, the goal is to constrain implement_idea.py placeholder matching.
+	When data_type is VECTOR, rebuild the dataset folder by refetching CSV as VECTOR.
+	"""
+	data_type = (data_type or "MATRIX").strip().upper()
+	if data_type != "VECTOR":
+		return
+	dataset_id, region, delay = parse_metadata_from_dataset_folder(dataset_folder)
+	fetch_script = scripts_dir / "fetch_dataset.py"
+	if not fetch_script.exists():
+		raise FileNotFoundError(f"fetch_dataset.py not found: {fetch_script}")
+	# IMPORTANT: do NOT delete the whole dataset folder.
+	# That folder may contain idea_*.json, enhanced_*.json and other artifacts.
+	# We only need to ensure the CSV is VECTOR-only.
+	data_dir = feature_implementation_dir / "data" / dataset_folder
+	dataset_csv = data_dir / f"{dataset_folder}.csv"
+	backup_csv: Path | None = None
+	if dataset_csv.exists():
+		backup_csv = dataset_csv.with_suffix(dataset_csv.suffix + f".bak_{int(time.time())}")
+		try:
+			print(f"DATA_TYPE=VECTOR => backing up existing CSV: {dataset_csv} -> {backup_csv}")
+			shutil.copy2(dataset_csv, backup_csv)
+		except Exception:
+			backup_csv = None
+	# Keep defaults consistent with fetch_dataset.py unless explicitly overridden.
+	universe = (os.environ.get("UNIVERSE") or "TOP3000").strip()
+	instrument_type = (os.environ.get("INSTRUMENT_TYPE") or "EQUITY").strip()
+	cmd = [
+		sys.executable,
+		str(fetch_script),
+		"--datasetid",
+		dataset_id,
+		"--region",
+		region,
+		"--delay",
+		str(delay),
+		"--universe",
+		universe,
+		"--instrument-type",
+		instrument_type,
+		"--data-type",
+		"VECTOR",
+	]
+	print(f"Rebuilding dataset CSV as VECTOR via: {' '.join(cmd)}")
+	result = subprocess.run(
+		cmd,
+		cwd=scripts_dir,
+		capture_output=True,
+		text=True,
+	)
+	if result.returncode != 0:
+		# Roll back CSV if we backed it up.
+		if backup_csv and backup_csv.exists():
+			try:
+				print("VECTOR rebuild failed; restoring previous CSV backup.")
+				shutil.copy2(backup_csv, dataset_csv)
+			except Exception:
+				pass
+		raise RuntimeError(
+			"VECTOR dataset rebuild failed: "
+			+ " ".join(cmd)
+			+ f"\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
+		)
+	if result.stdout.strip():
+		print(result.stdout)
+	if result.stderr.strip():
+		print(result.stderr, file=sys.stderr)
+	if not dataset_csv.exists():
+		raise FileNotFoundError(f"VECTOR dataset rebuild succeeded but CSV not found: {dataset_csv}")
 def normalize_for_validator(expression: str) -> str:
 	"""Normalize expressions to satisfy validator rules (e.g., winsorize std=).
@@ -350,7 +463,6 @@ def main():
 	- IDEA_JSON: absolute/relative path to a specific idea_*.json
 	- MOONSHOT_API_KEY / MOONSHOT_BASE_URL / MOONSHOT_MODEL
 	- MAX_ENHANCED_TEMPLATES (default 60)
-	- NO_IMPLEMENT=1 to skip implement_idea.py
 	"""
 	idea_json_env = os.environ.get("IDEA_JSON", "").strip()
@@ -402,6 +514,18 @@ def main():
 		dataset_id_from_name, region_from_name, delay_from_name = parsed
 		dataset_folder = f"{dataset_id_from_name}_{region_from_name}_delay{delay_from_name}"
+	data_type = (os.environ.get("DATA_TYPE") or "MATRIX").strip()
+	if data_type not in ("MATRIX", "VECTOR"):
+		data_type = "MATRIX"
+	# Guarantee implement_idea sees only VECTOR ids by rebuilding the dataset CSV as VECTOR.
+	ensure_dataset_csv_data_type(
+		feature_implementation_dir=feature_implementation_dir,
+		scripts_dir=scripts_dir,
+		dataset_folder=dataset_folder,
+		data_type=data_type,
+	)
 	# Validate dataset CSV exists to ensure implement_idea can parse placeholders.
 	dataset_csv = feature_implementation_dir / "data" / dataset_folder / f"{dataset_folder}.csv"
 	if not dataset_csv.exists():
@@ -445,14 +569,14 @@ def main():
 		[
 			"An alpha template is a reusable recipe that captures an economic idea and leaves “slots” (data fields, operators, groups, decay, neutralization choices, etc.) to instantiate many candidate alphas. Typical structure: clean data (backfill, winsorize) → transform/compare across time or peers → rank/neutralize → (optionally) decay/turnover tune. Templates encourage systematic search, reuse, and diversification while keeping an explicit economic rationale.",
 			"",
-			"Some Example Templates and rationales",
+			"Some Example Templates and rationales to help you understand the format",
 			"",
 			"CAPM residual (market/sector-neutral return): ts_regression(returns, group_mean(returns, log(ts_mean(cap,21)), sector), 252, rettype=0) after backfill+winsorize. Rationale: strip market/sector beta to isolate idiosyncratic alpha; sector-weighted by smoothed log-cap to reduce large-cap dominance.",
 			"CAPM beta (slope) template: same regression with rettype=2; pre-clean target/market (ts_backfill(...,63) + winsorize(std=4)). Rationale: rank stocks by relative risk within sector; long low-β, short high-β, or study β dispersion across groups.",
-			"CAPM generalized to any feature: data = winsorize(ts_backfill(<data>,63),std=4); data_gpm = group_mean(data, log(ts_mean(cap,21)), sector); resid = ts_regression(data, data_gpm, 252, rettype=0). Rationale: pull out the component unexplained by group average of same feature; reduces common-mode exposure.",
-			"Actual vs estimate spread (analyst): group_zscore( group_zscore(<act>, industry) – group_zscore(<est>, industry), industry ) or the abstracted group_compare(diff(group_compare(act,...), group_compare(est,...)), ...). Rationale: surprise/beat-miss signal within industry, normalized to peers to avoid level bias.",
-			"Analyst term-structure (fp1 vs fy1/fp2/fy2): group_zscore( group_zscore(anl14_mean_eps_<period1>, industry) – group_zscore(anl14_mean_eps_<period2>, industry), industry ) with operator/group slots. Rationale: cross-period expectation steepness; rising near-term vs long-term forecasts can flag momentum/inflection.",
-			"Option Greeks net spread: group_operator(<put_greek> - <call_greek>, <grouping_data>) over industry/sector (Delta/Gamma/Vega/Theta). Rationale: options-implied sentiment/convexity skew vs peers; outlier net Greeks may precede spot moves; extend with multi-Greek composites or time-series deltas.",
+			"CAPM generalized to any feature: data = winsorize(ts_backfill({data},63),std=4); data_gpm = group_mean(data, log(ts_mean(cap,21)), sector); resid = ts_regression(data, data_gpm, 252, rettype=0). Rationale: pull out the component unexplained by group average of same feature; reduces common-mode exposure.",
+			"Actual vs estimate spread (analyst): group_zscore( group_zscore({act}, industry) – group_zscore({est}, industry), industry ) or the abstracted group_compare(diff(group_compare(act,...), group_compare(est,...)), ...). Rationale: surprise/beat-miss signal within industry, normalized to peers to avoid level bias.",
+			"Analyst term-structure (fp1 vs fy1/fp2/fy2): group_zscore( group_zscore({mean_eps_period1}, industry) – group_zscore({mean_eps_period2}, industry), industry ) with operator/group slots. Rationale: cross-period expectation steepness; rising near-term vs long-term forecasts can flag momentum/inflection.",
+			"Option Greeks net spread: group_operator({put_greek} - {call_greek}, {grouping_data}) over industry/sector (Delta/Gamma/Vega/Theta). Rationale: options-implied sentiment/convexity skew vs peers; outlier net Greeks may precede spot moves; extend with multi-Greek composites or time-series deltas.",
 			"",
 			"based on the following guidance of how to make a data collation template into a signal, and guidance on how to utilize the best of operators.",
 			"",
@@ -465,6 +589,8 @@ def main():
 			guide2,
 			"--------------",
 			"",
+			VECTOR_DATA_TYPE_HINT if data_type == "VECTOR" else "",
+			"",
 			"Return ONLY valid JSON (no markdown / no code fences).",
 		]
 	)

cnhkmcp 2.3.0__py3-none-any.whl → 2.3.2__py3-none-any.whl

cnhkmcp 2.3.0py3-none-any.whl → 2.3.2py3-none-any.whl