cnhkmcp 1.8.10__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnhkmcp/untracked/APP/MODULAR_STRUCTURE.md +38 -49
- cnhkmcp/untracked/APP/Tranformer/Transformer.py +131 -1
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_candidates.json +951 -2055
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_error.json +261 -1
- cnhkmcp/untracked/APP/Tranformer/output/Alpha_generated_expressions_success.json +168 -1362
- cnhkmcp/untracked/APP/Tranformer/template_summary.txt +57 -1
- cnhkmcp/untracked/APP/ace.log +26 -0
- cnhkmcp/untracked/APP/give_me_idea/BRAIN_Alpha_Template_Expert_SystemPrompt.md +400 -0
- cnhkmcp/untracked/APP/give_me_idea/ace_lib.py +1489 -0
- cnhkmcp/untracked/APP/give_me_idea/alpha_data_specific_template_master.py +247 -0
- cnhkmcp/untracked/APP/give_me_idea/helpful_functions.py +180 -0
- cnhkmcp/untracked/APP/give_me_idea/what_is_Alpha_template.md +11 -0
- cnhkmcp/untracked/APP/static/brain.js +13 -3
- cnhkmcp/untracked/APP/static/inspiration.js +434 -0
- cnhkmcp/untracked/APP/templates/index.html +126 -0
- cnhkmcp/untracked/APP/usage.md +29 -3
- cnhkmcp/untracked/APP//321/210/342/224/220/320/240/321/210/320/261/320/234/321/206/320/231/320/243/321/205/342/225/235/320/220/321/206/320/230/320/241.py +233 -1
- {cnhkmcp-1.8.10.dist-info → cnhkmcp-2.0.dist-info}/METADATA +1 -1
- {cnhkmcp-1.8.10.dist-info → cnhkmcp-2.0.dist-info}/RECORD +23 -17
- {cnhkmcp-1.8.10.dist-info → cnhkmcp-2.0.dist-info}/WHEEL +0 -0
- {cnhkmcp-1.8.10.dist-info → cnhkmcp-2.0.dist-info}/entry_points.txt +0 -0
- {cnhkmcp-1.8.10.dist-info → cnhkmcp-2.0.dist-info}/licenses/LICENSE +0 -0
- {cnhkmcp-1.8.10.dist-info → cnhkmcp-2.0.dist-info}/top_level.txt +0 -0
|
@@ -4,21 +4,30 @@ This document describes the modular structure implemented for the BRAIN Expressi
|
|
|
4
4
|
|
|
5
5
|
## Overview
|
|
6
6
|
|
|
7
|
-
The application has been refactored to use Flask blueprints for better code organization and maintainability. The paper analysis
|
|
7
|
+
The application has been refactored to use Flask blueprints for better code organization and maintainability. The application is divided into several functional modules including paper analysis, feature engineering, idea generation, and inspiration.
|
|
8
8
|
|
|
9
9
|
## Project Structure
|
|
10
10
|
|
|
11
11
|
```
|
|
12
12
|
BRAINProject/
|
|
13
|
-
├──
|
|
13
|
+
├── 运行打开我.py # Main Flask application entry point
|
|
14
14
|
├── blueprints/ # Blueprint modules
|
|
15
15
|
│ ├── __init__.py # Package initialization
|
|
16
|
+
│ ├── feature_engineering.py # Feature engineering blueprint
|
|
17
|
+
│ ├── idea_house.py # Idea house blueprint (Coze integration)
|
|
18
|
+
│ ├── inspiration_house.py # Inspiration house blueprint
|
|
16
19
|
│ └── paper_analysis.py # Paper analysis blueprint
|
|
17
20
|
├── templates/
|
|
18
21
|
│ ├── index.html # Main page template
|
|
22
|
+
│ ├── feature_engineering.html # Feature engineering page template
|
|
23
|
+
│ ├── idea_house.html # Idea house page template
|
|
24
|
+
│ ├── inspiration_house.html # Inspiration house page template
|
|
19
25
|
│ └── paper_analysis.html # Paper analysis page template
|
|
20
26
|
├── static/
|
|
21
27
|
│ ├── script.js # Main application JavaScript
|
|
28
|
+
│ ├── feature_engineering.js # Feature engineering JavaScript
|
|
29
|
+
│ ├── idea_house.js # Idea house JavaScript
|
|
30
|
+
│ ├── inspiration_house.js # Inspiration house JavaScript
|
|
22
31
|
│ ├── paper_analysis.js # Paper analysis JavaScript
|
|
23
32
|
│ ├── brain.js # BRAIN API functions
|
|
24
33
|
│ ├── decoder.js # Template decoder functions
|
|
@@ -29,34 +38,29 @@ BRAINProject/
|
|
|
29
38
|
## Blueprint Structure
|
|
30
39
|
|
|
31
40
|
### Paper Analysis Blueprint (`blueprints/paper_analysis.py`)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
- Main application routes
|
|
54
|
-
|
|
55
|
-
#### Features:
|
|
56
|
-
- Auto-dependency installation
|
|
57
|
-
- BRAIN API integration
|
|
58
|
-
- Session management for multiple users
|
|
59
|
-
- Template decoding functionality
|
|
41
|
+
Handles paper analysis functionality.
|
|
42
|
+
- Routes: `/paper-analysis/`
|
|
43
|
+
- Features: File processing (PDF, DOCX, etc.), Deepseek API integration.
|
|
44
|
+
|
|
45
|
+
### Feature Engineering Blueprint (`blueprints/feature_engineering.py`)
|
|
46
|
+
Handles feature engineering tasks.
|
|
47
|
+
- Routes: `/feature-engineering/`
|
|
48
|
+
- Features: Deepseek/Kimi API integration for feature generation.
|
|
49
|
+
|
|
50
|
+
### Idea House Blueprint (`blueprints/idea_house.py`)
|
|
51
|
+
Handles idea generation using Coze API.
|
|
52
|
+
- Routes: `/idea-house/`
|
|
53
|
+
- Features: Coze API integration for processing data fields.
|
|
54
|
+
|
|
55
|
+
### Inspiration House Blueprint (`blueprints/inspiration_house.py`)
|
|
56
|
+
Handles inspiration generation.
|
|
57
|
+
- Routes: `/inspiration-house/`
|
|
58
|
+
- Features: Deepseek/Kimi API integration.
|
|
59
|
+
|
|
60
|
+
### Main Application (`运行打开我.py`)
|
|
61
|
+
The main application entry point.
|
|
62
|
+
- Routes: Main application routes, BRAIN API authentication.
|
|
63
|
+
- Features: Auto-dependency installation, Blueprint registration.
|
|
60
64
|
|
|
61
65
|
## Benefits of This Structure
|
|
62
66
|
|
|
@@ -66,17 +70,6 @@ The main application now focuses on:
|
|
|
66
70
|
4. **Separation of Concerns**: Each blueprint handles a specific domain
|
|
67
71
|
5. **Testability**: Individual modules can be tested independently
|
|
68
72
|
|
|
69
|
-
## URL Structure
|
|
70
|
-
|
|
71
|
-
### Before Refactoring:
|
|
72
|
-
- `/paper-analysis` - Paper analysis page
|
|
73
|
-
- `/api/test-deepseek` - Test Deepseek API
|
|
74
|
-
- `/api/analyze-paper` - Analyze paper
|
|
75
|
-
|
|
76
|
-
### After Refactoring:
|
|
77
|
-
- `/paper-analysis/` - Paper analysis page (blueprint)
|
|
78
|
-
- `/paper-analysis/api/test-deepseek` - Test Deepseek API (blueprint)
|
|
79
|
-
- `/paper-analysis/api/analyze-paper` - Analyze paper (blueprint)
|
|
80
73
|
|
|
81
74
|
## Dependencies
|
|
82
75
|
|
|
@@ -86,13 +79,9 @@ The main application now focuses on:
|
|
|
86
79
|
- requests
|
|
87
80
|
- pandas (for BRAIN API integration)
|
|
88
81
|
|
|
89
|
-
###
|
|
90
|
-
-
|
|
91
|
-
-
|
|
92
|
-
- python-docx (for Word documents)
|
|
93
|
-
- docx2txt (for legacy DOC files)
|
|
94
|
-
- striprtf (for RTF files)
|
|
95
|
-
- PyMuPDF (alternative PDF library)
|
|
82
|
+
### Module Specific Dependencies:
|
|
83
|
+
- **Paper Analysis**: PyPDF2, pdfplumber, python-docx, docx2txt, striprtf, PyMuPDF
|
|
84
|
+
- **Idea House**: cozepy (for Coze API)
|
|
96
85
|
|
|
97
86
|
## Adding New Blueprints
|
|
98
87
|
|
|
@@ -109,7 +98,7 @@ To add a new blueprint:
|
|
|
109
98
|
def index():
|
|
110
99
|
return render_template('new_feature.html')
|
|
111
100
|
```
|
|
112
|
-
3. Import and register the blueprint in `
|
|
101
|
+
3. Import and register the blueprint in the main application file (`运行打开我.py`):
|
|
113
102
|
```python
|
|
114
103
|
from blueprints.new_feature import new_blueprint
|
|
115
104
|
app.register_blueprint(new_blueprint)
|
|
@@ -5,6 +5,58 @@ import asyncio
|
|
|
5
5
|
import openai
|
|
6
6
|
import re
|
|
7
7
|
from typing import Optional, Union # Added this import
|
|
8
|
+
try:
|
|
9
|
+
from .validator_hooks import is_valid_template_expr, has_empty_datafield_candidates
|
|
10
|
+
except Exception:
|
|
11
|
+
# Fallback for direct script execution
|
|
12
|
+
try:
|
|
13
|
+
from validator_hooks import is_valid_template_expr, has_empty_datafield_candidates
|
|
14
|
+
except Exception:
|
|
15
|
+
is_valid_template_expr = None
|
|
16
|
+
has_empty_datafield_candidates = None
|
|
17
|
+
|
|
18
|
+
# --- Validation wrappers to integrate into the pipeline ---
|
|
19
|
+
def _filter_valid_templates(
|
|
20
|
+
proposed_templates: dict,
|
|
21
|
+
operators_meta,
|
|
22
|
+
brain_session,
|
|
23
|
+
settings: dict,
|
|
24
|
+
parse_alpha_code_func,
|
|
25
|
+
):
|
|
26
|
+
"""Return dict of only templates that pass validation.
|
|
27
|
+
|
|
28
|
+
Safe no-op if validation helpers are unavailable.
|
|
29
|
+
"""
|
|
30
|
+
if not is_valid_template_expr or not parse_alpha_code_func:
|
|
31
|
+
return proposed_templates
|
|
32
|
+
filtered = {}
|
|
33
|
+
for template_expr, template_expl in proposed_templates.items():
|
|
34
|
+
try:
|
|
35
|
+
if is_valid_template_expr(
|
|
36
|
+
template_expr,
|
|
37
|
+
operators_meta,
|
|
38
|
+
brain_session,
|
|
39
|
+
settings,
|
|
40
|
+
parse_alpha_code_func,
|
|
41
|
+
):
|
|
42
|
+
filtered[template_expr] = template_expl
|
|
43
|
+
except Exception:
|
|
44
|
+
# Be conservative: drop on exceptions
|
|
45
|
+
continue
|
|
46
|
+
return filtered
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _should_skip_due_to_empty_candidates(populated_info: dict) -> bool:
|
|
50
|
+
"""True if any data_field placeholder has zero candidates.
|
|
51
|
+
|
|
52
|
+
Safe no-op fallback when helper is missing.
|
|
53
|
+
"""
|
|
54
|
+
if not has_empty_datafield_candidates:
|
|
55
|
+
return False
|
|
56
|
+
try:
|
|
57
|
+
return has_empty_datafield_candidates(populated_info)
|
|
58
|
+
except Exception:
|
|
59
|
+
return False
|
|
8
60
|
import logging
|
|
9
61
|
import pandas as pd
|
|
10
62
|
import os
|
|
@@ -36,7 +88,8 @@ DATA_CATEGORIES = None
|
|
|
36
88
|
|
|
37
89
|
template_summary = """# BRAIN论坛Alpha模板精华总结
|
|
38
90
|
|
|
39
|
-
本文档旨在系统性地整理和总结优秀Alpha
|
|
91
|
+
本文档旨在系统性地整理和总结优秀Alpha模板,它是一种可复用的标准化框架性表达式,它承载着特定的经济逻辑,并预留出若干 “配置项”(包括数据字段、算子、分组方式、衰减规则、中性化方案等),用于生成多个候选阿尔法因子。其典型流程为:数据清洗(数据回填、缩尾处理)→ 跨时间或跨标的维度进行转换 / 对比 → 排序 / 中性化处理 →(可选步骤)衰减调整 / 换手率优化。这种模板模式能够推动系统化的因子挖掘、复用与多元化配置,同时确保每一个因子都具备清晰可追溯的经济逻辑支撑。
|
|
92
|
+
以下每个模板都附有其核心思想、变量说明、适用场景及原帖链接,方便您理解、应用和进一步探索。
|
|
40
93
|
使用时请思考如何将下列模板与有的Alpha表达式结合,创造出新的模板来捕捉和发现市场规律,找到”好“公司和”坏“公司
|
|
41
94
|
**使用前请注意:**
|
|
42
95
|
* **过拟合风险**:部分模板可能存在过拟合风险,请谨慎使用,并结合IS-Ladder测试、多市场回测等方法进行验证。
|
|
@@ -387,6 +440,61 @@ template_summary = """# BRAIN论坛Alpha模板精华总结
|
|
|
387
440
|
* **优化方向**:
|
|
388
441
|
* **事件驱动**: 在财报日前后缩短 `ts_mean` 的窗口,提高灵敏度。
|
|
389
442
|
|
|
443
|
+
---
|
|
444
|
+
|
|
445
|
+
## 新增模板(CAPM與估值、分析師期限、期權、搜尋優化)
|
|
446
|
+
|
|
447
|
+
### 1. CAPM殘差模板(市場/行業中性收益)
|
|
448
|
+
* **表達式**: `ts_regression(returns, group_mean(returns, log(ts_mean(cap,21)), sector), 252, rettype=0)`。
|
|
449
|
+
* **核心思想**: 回歸剔除市場/行業暴露,保留超額收益殘差作為Alpha。
|
|
450
|
+
* **適用場景**: 通用起手式,回歸殘差可作後續動量或價值信號的底板。
|
|
451
|
+
* **優化**: 改`rettype=2`獲取beta斜率,用於風險排序或低/高beta組合;可加入`winsorize`、`ts_backfill`預處理。
|
|
452
|
+
|
|
453
|
+
### 2. CAPM廣義殘差(任意特徵)
|
|
454
|
+
* **表達式**: `data = winsorize(ts_backfill(<data>,63), std=4); gpm = group_mean(data, log(ts_mean(cap,21)), sector); resid = ts_regression(data, gpm, 252, rettype=0)`。
|
|
455
|
+
* **核心思想**: 將任意特徵去除組均值成分,提取行業相對的特異性部分。
|
|
456
|
+
* **適用場景**: 基本面、情緒、替代數據的組內殘差提純。
|
|
457
|
+
* **優化**: 先`group_zscore`再回歸;對`resid`再做`ts_zscore`或`ts_mean`平滑。
|
|
458
|
+
|
|
459
|
+
### 3. CAPM Beta排序模板
|
|
460
|
+
* **表達式**: `target_data = winsorize(ts_backfill(<target>,63), std=4); market_data = winsorize(ts_backfill(<market>,63), std=4); beta = ts_regression(target_data, group_mean(market_data, log(ts_mean(cap,21)), sector), 252, rettype=2)`。
|
|
461
|
+
* **核心思想**: 提取行業內相對beta,作為風險/防禦排序;低beta偏防禦,高beta偏進攻。
|
|
462
|
+
* **優化**: 行業或國家分組;可按beta分桶做長低/短高,或反向用於高波段套利。
|
|
463
|
+
|
|
464
|
+
### 4. 實際-預估差異模板(Analyst Surprise)
|
|
465
|
+
* **表達式**: `group_zscore(subtract(group_zscore(<act>, industry), group_zscore(<est>, industry)), industry)`。
|
|
466
|
+
* **核心思想**: 行業內標準化後的實際值與預估值差,捕捉超預期或低於預期的驚喜。
|
|
467
|
+
* **適用場景**: analyst7/analyst14/earnings估值類字段。
|
|
468
|
+
* **優化**: 對差分再做`ts_zscore`;門檻交易只在|z|>1.5時開倉。
|
|
469
|
+
|
|
470
|
+
### 5. 分析師期限結構模板(近遠期預估斜率)
|
|
471
|
+
* **表達式**: `group_zscore(subtract(group_zscore(anl14_mean_eps_<p1>, industry), group_zscore(anl14_mean_eps_<p2>, industry)), industry)`,`<p1>/<p2>`為fp1/fp2/fy1/fy2等。
|
|
472
|
+
* **核心思想**: 比較短期與長期預估的行業內斜率,捕捉預期加速或鈍化。
|
|
473
|
+
* **適用場景**: analyst14/15 期別字段;適用成長/拐點挖掘。
|
|
474
|
+
* **優化**: 擴展到多期間差分或`ts_delta`跟蹤斜率變化;對斜率做`rank`或`winsorize`。
|
|
475
|
+
|
|
476
|
+
### 6. 期權Greeks淨值模板
|
|
477
|
+
* **表達式**: `group_operator(<put_greek> - <call_greek>, <group>)`,Greek可選Delta/Gamma/Vega/Theta。
|
|
478
|
+
* **核心思想**: 同組內看多vs看空的期權敏感度差,反映隱含情緒或凸性差異。
|
|
479
|
+
* **適用場景**: Option數據集;行業或市值分組下的情緒/波動信號。
|
|
480
|
+
* **優化**: 多Greek加權組合;對淨值再`ts_mean`平滑;事件期(財報)可降權或過濾。
|
|
481
|
+
|
|
482
|
+
### 7. IV Skew動量擴展
|
|
483
|
+
* **表達式**: `ts_delta(implied_volatility_call_<w>, <p>) - ts_delta(implied_volatility_put_<w>, <p>)`。
|
|
484
|
+
* **核心思想**: Call與Put隱含波動變化差捕捉情緒轉折;可做多情緒改善、做空情緒惡化。
|
|
485
|
+
* **優化**: 加`trade_when(abs(skew)>thr)`門檻;財報前後縮窗;行業中性。
|
|
486
|
+
|
|
487
|
+
### 8. 殘差動量精簡版
|
|
488
|
+
* **表達式**: `res = regression_neut(returns, <common_factor_matrix>); ts_mean(res, <window>)`。
|
|
489
|
+
* **核心思想**: 先剝離市場/風格暴露,再對特異收益做動量;較原版多重回歸更輕量。
|
|
490
|
+
* **優化**: 使用`ts_decay_linear`增加近期權重;行業內`group_rank`提升截面穩定度。
|
|
491
|
+
|
|
492
|
+
### 9. 分紅/現金流組間殘差(簡版)
|
|
493
|
+
* **表達式**: `alpha = ts_zscore(ts_backfill(<cf_or_div_field>,90)); g = group_mean(alpha, <group>, <weight_opt>); resid = alpha - g; group_zscore(resid, <group>)`。
|
|
494
|
+
* **核心思想**: 先回填平滑,再對組均值做殘差,捕捉組內相對高/低分紅或現金流質量。
|
|
495
|
+
* **適用場景**: fnd8/fnd6/topdiv等分紅現金流字段;行業/國家分組。
|
|
496
|
+
* **優化**: 權重可用log(cap)或vol逆;對resid再做`ts_mean`平滑。
|
|
497
|
+
|
|
390
498
|
"""
|
|
391
499
|
|
|
392
500
|
class SingleSession(requests.Session):
|
|
@@ -1607,6 +1715,23 @@ async def generate_new_alphas(alpha_description, brain_session, template_summary
|
|
|
1607
1715
|
print("\n--- Proposed Alpha Templates (JSON) (建议的Alpha模板,多样性会受到模型和模板总结文档的影响) ---")
|
|
1608
1716
|
print(json.dumps(proposed_templates, indent=4))
|
|
1609
1717
|
|
|
1718
|
+
# --- Validation: Drop templates with suspicious literal identifiers ---
|
|
1719
|
+
try:
|
|
1720
|
+
operators_meta = get_brain_operators().get('operators', [])
|
|
1721
|
+
proposed_templates = _filter_valid_templates(
|
|
1722
|
+
proposed_templates,
|
|
1723
|
+
operators_meta,
|
|
1724
|
+
brain_session,
|
|
1725
|
+
details.get('settings', {}),
|
|
1726
|
+
parse_alpha_code,
|
|
1727
|
+
)
|
|
1728
|
+
except Exception as e:
|
|
1729
|
+
print(f"⚠ 模板校验步骤出现异常,跳过校验: {e}")
|
|
1730
|
+
|
|
1731
|
+
if not proposed_templates:
|
|
1732
|
+
print("❌ 所有模板在校验后被丢弃,无法继续。")
|
|
1733
|
+
sys.exit(1)
|
|
1734
|
+
|
|
1610
1735
|
# --- Step 5: Process all proposed templates and gather candidates ---
|
|
1611
1736
|
# --- Step 6: Prepare for Output ---
|
|
1612
1737
|
# Ensure the output directory exists next to this script
|
|
@@ -1625,6 +1750,11 @@ async def generate_new_alphas(alpha_description, brain_session, template_summary
|
|
|
1625
1750
|
print(f"\n--- Populating template (正在填充模板): '{template_expr}' ---")
|
|
1626
1751
|
try:
|
|
1627
1752
|
populated_info = await populate_template(brain_session, details, template_expr, template_expl, operator_summary, llm_client, top_n_datafield=top_n_datafield, user_region=user_region, user_universe=user_universe, user_delay=user_delay, user_category=user_category, user_data_type=user_data_type)
|
|
1753
|
+
|
|
1754
|
+
# Skip templates where any data_field placeholder has zero candidates
|
|
1755
|
+
if _should_skip_due_to_empty_candidates(populated_info):
|
|
1756
|
+
print("⚠ 该模板存在数据字段候选为空的占位符,跳过此模板。")
|
|
1757
|
+
continue
|
|
1628
1758
|
|
|
1629
1759
|
final_output[template_expr] = {
|
|
1630
1760
|
"template_explanation": template_expl,
|