quantnodes 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuantNodes/__init__.py +15 -0
- QuantNodes/__main__.py +14 -0
- QuantNodes/agent/__init__.py +158 -0
- QuantNodes/agent/agents/__init__.py +13 -0
- QuantNodes/agent/agents/definition.py +180 -0
- QuantNodes/agent/agents/manager.py +73 -0
- QuantNodes/agent/config/__init__.py +34 -0
- QuantNodes/agent/config/executor.py +958 -0
- QuantNodes/agent/config/loader.py +427 -0
- QuantNodes/agent/config/templates/bollinger_bands.yaml +84 -0
- QuantNodes/agent/config/templates/dual_ma.yaml +72 -0
- QuantNodes/agent/config/templates/empty.yaml +56 -0
- QuantNodes/agent/config/templates/mean_reversion.yaml +47 -0
- QuantNodes/agent/config/templates/mean_reversion_zscore.yaml +90 -0
- QuantNodes/agent/config/templates/momentum.yaml +81 -0
- QuantNodes/agent/config/templates/momentum_breakout.yaml +84 -0
- QuantNodes/agent/config/templates/rsi_strategy.yaml +72 -0
- QuantNodes/agent/config/templates/volume_price.yaml +86 -0
- QuantNodes/agent/config/types.py +156 -0
- QuantNodes/agent/config_mapper.py +293 -0
- QuantNodes/agent/core/__init__.py +19 -0
- QuantNodes/agent/core/dream.py +47 -0
- QuantNodes/agent/core/quant_dream.py +274 -0
- QuantNodes/agent/cron_jobs.py +314 -0
- QuantNodes/agent/nanobot_bridge.py +242 -0
- QuantNodes/agent/permission/__init__.py +30 -0
- QuantNodes/agent/permission/defaults.py +36 -0
- QuantNodes/agent/permission/evaluate.py +41 -0
- QuantNodes/agent/permission/models.py +59 -0
- QuantNodes/agent/permission/service.py +133 -0
- QuantNodes/agent/providers/__init__.py +11 -0
- QuantNodes/agent/providers/base.py +102 -0
- QuantNodes/agent/providers/quantnodes.py +610 -0
- QuantNodes/agent/providers/rate_limiter.py +326 -0
- QuantNodes/agent/providers/registry.py +163 -0
- QuantNodes/agent/skills/__init__.py +20 -0
- QuantNodes/agent/skills/base.py +118 -0
- QuantNodes/agent/skills/bridge.py +73 -0
- QuantNodes/agent/skills/factor/__init__.py +14 -0
- QuantNodes/agent/skills/factor/correlation.py +99 -0
- QuantNodes/agent/skills/factor/group_backtest.py +114 -0
- QuantNodes/agent/skills/factor/ic_analysis.py +106 -0
- QuantNodes/agent/skills/loader.py +107 -0
- QuantNodes/agent/skills/registry.py +105 -0
- QuantNodes/agent/skills/strategy/__init__.py +16 -0
- QuantNodes/agent/skills/strategy/bollinger.py +86 -0
- QuantNodes/agent/skills/strategy/dual_ma.py +82 -0
- QuantNodes/agent/skills/strategy/momentum.py +74 -0
- QuantNodes/agent/skills/strategy/rsi_reversal.py +99 -0
- QuantNodes/agent/skills_quant/__init__.py +14 -0
- QuantNodes/agent/skills_quant/backtest-analyze/SKILL.md +42 -0
- QuantNodes/agent/skills_quant/config-driven/SKILL.md +72 -0
- QuantNodes/agent/skills_quant/factor-research/SKILL.md +40 -0
- QuantNodes/agent/skills_quant/quant-dream/SKILL.md +55 -0
- QuantNodes/agent/skills_quant/risk-management/SKILL.md +45 -0
- QuantNodes/agent/skills_quant/strategy-design/SKILL.md +43 -0
- QuantNodes/agent/templates/__init__.py +4 -0
- QuantNodes/agent/tools/__init__.py +173 -0
- QuantNodes/agent/tools/_workspace.py +51 -0
- QuantNodes/agent/tools/alpha_backtest.py +328 -0
- QuantNodes/agent/tools/alpha_evaluate.py +493 -0
- QuantNodes/agent/tools/backtest.py +226 -0
- QuantNodes/agent/tools/base.py +133 -0
- QuantNodes/agent/tools/code_search.py +207 -0
- QuantNodes/agent/tools/config_backtest.py +401 -0
- QuantNodes/agent/tools/context.py +97 -0
- QuantNodes/agent/tools/dream_skill.py +77 -0
- QuantNodes/agent/tools/echo.py +38 -0
- QuantNodes/agent/tools/factor.py +231 -0
- QuantNodes/agent/tools/file_ops.py +201 -0
- QuantNodes/agent/tools/git_ops.py +190 -0
- QuantNodes/agent/tools/operator_lookup.py +218 -0
- QuantNodes/agent/tools/output_truncation.py +77 -0
- QuantNodes/agent/tools/path_check.py +43 -0
- QuantNodes/agent/tools/pipeline.py +62 -0
- QuantNodes/agent/tools/registry.py +150 -0
- QuantNodes/agent/tools/sandbox.py +62 -0
- QuantNodes/agent/tools/shell_safety.py +63 -0
- QuantNodes/agent/tools/strategy.py +106 -0
- QuantNodes/agent/tools/task.py +171 -0
- QuantNodes/agent/tools/web_fetch.py +142 -0
- QuantNodes/agent/tools/web_search.py +114 -0
- QuantNodes/agent/tools/wiki.py +370 -0
- QuantNodes/agent/utils/__init__.py +11 -0
- QuantNodes/agent/utils/helpers.py +43 -0
- QuantNodes/agent/utils/prompt_templates.py +30 -0
- QuantNodes/agent/workflows/__init__.py +20 -0
- QuantNodes/agent/workflows/implementations/__init__.py +8 -0
- QuantNodes/agent/workflows/implementations/alpha_gpt.py +508 -0
- QuantNodes/agent/workflows/implementations/mcts.py +442 -0
- QuantNodes/agent/workflows/parsers.py +44 -0
- QuantNodes/agent/workflows/registry.py +119 -0
- QuantNodes/agent/workflows/step_agent.py +219 -0
- QuantNodes/agent/workflows/tool.py +198 -0
- QuantNodes/ai/__init__.py +93 -0
- QuantNodes/ai/llm/__init__.py +75 -0
- QuantNodes/ai/llm/base.py +233 -0
- QuantNodes/ai/llm/decorators.py +281 -0
- QuantNodes/ai/llm/gateway.py +571 -0
- QuantNodes/ai/llm/null.py +76 -0
- QuantNodes/ai/llm/openai.py +435 -0
- QuantNodes/ai/optimizer.py +405 -0
- QuantNodes/ai/prompts/__init__.py +229 -0
- QuantNodes/ai/sandbox.py +371 -0
- QuantNodes/ai/sandbox_pandas_bridge.py +150 -0
- QuantNodes/ai/strategy_gen.py +396 -0
- QuantNodes/backtest/__init__.py +64 -0
- QuantNodes/backtest/backtest_node.py +188 -0
- QuantNodes/backtest/broker_node.py +378 -0
- QuantNodes/backtest/config_runner.py +397 -0
- QuantNodes/backtest/config_strategy.py +64 -0
- QuantNodes/backtest/risk_node.py +360 -0
- QuantNodes/backtest/strategy_node.py +268 -0
- QuantNodes/cache_node/__init__.py +19 -0
- QuantNodes/cache_node/base.py +244 -0
- QuantNodes/cache_node/cache_store.py +99 -0
- QuantNodes/cache_node/metadata.py +100 -0
- QuantNodes/cli/__init__.py +109 -0
- QuantNodes/cli/_helpers.py +511 -0
- QuantNodes/cli/command.py +110 -0
- QuantNodes/cli/commands/__init__.py +69 -0
- QuantNodes/cli/commands/agent.py +158 -0
- QuantNodes/cli/commands/alpha.py +951 -0
- QuantNodes/cli/commands/chat.py +38 -0
- QuantNodes/cli/commands/evolve.py +120 -0
- QuantNodes/cli/commands/factor.py +569 -0
- QuantNodes/cli/commands/init.py +190 -0
- QuantNodes/cli/commands/run.py +259 -0
- QuantNodes/cli/commands/serve.py +398 -0
- QuantNodes/cli/commands/version.py +120 -0
- QuantNodes/cli/enhanced.py +146 -0
- QuantNodes/conf_node/__init__.py +37 -0
- QuantNodes/conf_node/base.py +120 -0
- QuantNodes/conf_node/env_config.py +132 -0
- QuantNodes/conf_node/ini_config.py +70 -0
- QuantNodes/conf_node/json_config.py +69 -0
- QuantNodes/conf_node/yaml_config.py +78 -0
- QuantNodes/constants.py +17 -0
- QuantNodes/core/__init__.py +196 -0
- QuantNodes/core/_lookback_helpers.py +49 -0
- QuantNodes/core/ast_parser.py +198 -0
- QuantNodes/core/base.py +61 -0
- QuantNodes/core/cache_manager.py +344 -0
- QuantNodes/core/cache_utils.py +150 -0
- QuantNodes/core/cond_builder.py +53 -0
- QuantNodes/core/config.py +170 -0
- QuantNodes/core/constants.py +48 -0
- QuantNodes/core/control.py +412 -0
- QuantNodes/core/data_preprocessing.py +453 -0
- QuantNodes/core/data_source.py +46 -0
- QuantNodes/core/events.py +178 -0
- QuantNodes/core/evolution/__init__.py +22 -0
- QuantNodes/core/evolution/loop.py +583 -0
- QuantNodes/core/evolution/operators.py +289 -0
- QuantNodes/core/evolution/settings.py +44 -0
- QuantNodes/core/expression.py +841 -0
- QuantNodes/core/feedback/__init__.py +38 -0
- QuantNodes/core/feedback/channels.py +182 -0
- QuantNodes/core/feedback/collector.py +91 -0
- QuantNodes/core/feedback/dataclass.py +239 -0
- QuantNodes/core/feedback/llm_judge.py +138 -0
- QuantNodes/core/knowledge/__init__.py +69 -0
- QuantNodes/core/knowledge/knowledge_base.py +217 -0
- QuantNodes/core/knowledge/lineage_compress.py +196 -0
- QuantNodes/core/knowledge/lineage_expand.py +123 -0
- QuantNodes/core/knowledge/metrics/__init__.py +43 -0
- QuantNodes/core/knowledge/metrics/evaluator.py +176 -0
- QuantNodes/core/knowledge/metrics/metrics.py +220 -0
- QuantNodes/core/knowledge/rag_prompt.py +196 -0
- QuantNodes/core/knowledge/retriever.py +209 -0
- QuantNodes/core/lambda_node.py +81 -0
- QuantNodes/core/monitoring/__init__.py +22 -0
- QuantNodes/core/monitoring/collector.py +292 -0
- QuantNodes/core/monitoring/dashboard.py +365 -0
- QuantNodes/core/node.py +375 -0
- QuantNodes/core/pandas_utils.py +504 -0
- QuantNodes/core/parallel/__init__.py +15 -0
- QuantNodes/core/parallel/worker.py +140 -0
- QuantNodes/core/parallel/worker_process.py +265 -0
- QuantNodes/core/path_utils.py +73 -0
- QuantNodes/core/pipeline.py +328 -0
- QuantNodes/core/plugin.py +135 -0
- QuantNodes/core/quality_gate/__init__.py +32 -0
- QuantNodes/core/quality_gate/complexity.py +94 -0
- QuantNodes/core/quality_gate/consistency.py +26 -0
- QuantNodes/core/quality_gate/node.py +97 -0
- QuantNodes/core/quality_gate/redundancy.py +51 -0
- QuantNodes/core/quality_gate/settings.py +43 -0
- QuantNodes/core/quality_gate/zoo.py +98 -0
- QuantNodes/core/serializable.py +116 -0
- QuantNodes/core/serialization.py +673 -0
- QuantNodes/core/tools.py +333 -0
- QuantNodes/core/trajectory/__init__.py +25 -0
- QuantNodes/core/trajectory/entry.py +116 -0
- QuantNodes/core/trajectory/lineage.py +67 -0
- QuantNodes/core/trajectory/pool.py +211 -0
- QuantNodes/core/trajectory/selector.py +140 -0
- QuantNodes/core/visualization/__init__.py +33 -0
- QuantNodes/core/visualization/builder.py +233 -0
- QuantNodes/core/visualization/gate_breakdown.py +140 -0
- QuantNodes/core/visualization/lineage_dag.py +203 -0
- QuantNodes/core/visualization/metric_distribution.py +125 -0
- QuantNodes/core/visualization/report.py +68 -0
- QuantNodes/database_node/__init__.py +69 -0
- QuantNodes/database_node/base.py +135 -0
- QuantNodes/database_node/clickhouse_node.py +272 -0
- QuantNodes/database_node/csv_node.py +83 -0
- QuantNodes/database_node/duckdb_node.py +86 -0
- QuantNodes/database_node/factory.py +83 -0
- QuantNodes/database_node/mysql_node.py +100 -0
- QuantNodes/database_node/parquet_node.py +75 -0
- QuantNodes/database_node/sqlite_node.py +67 -0
- QuantNodes/factor_node/__init__.py +50 -0
- QuantNodes/factor_node/factor.py +563 -0
- QuantNodes/factor_node/factor_db.py +421 -0
- QuantNodes/factor_node/factor_functions/__init__.py +252 -0
- QuantNodes/factor_node/factor_functions/_helpers.py +358 -0
- QuantNodes/factor_node/factor_functions/_helpers_debug.py +317 -0
- QuantNodes/factor_node/factor_functions/composite_ops.py +136 -0
- QuantNodes/factor_node/factor_functions/math_ops.py +433 -0
- QuantNodes/factor_node/factor_functions/section_ops.py +290 -0
- QuantNodes/factor_node/factor_functions/talib_ops.py +1293 -0
- QuantNodes/factor_node/factor_functions/time_ops.py +535 -0
- QuantNodes/factor_node/factor_operation.py +1115 -0
- QuantNodes/factor_node/factor_table.py +1073 -0
- QuantNodes/factor_node/quant_nodes_object.py +60 -0
- QuantNodes/mcp_server/__init__.py +27 -0
- QuantNodes/mcp_server/__main__.py +4 -0
- QuantNodes/mcp_server/server.py +272 -0
- QuantNodes/methods/__init__.py +28 -0
- QuantNodes/methods/pipeline.py +100 -0
- QuantNodes/methods/sandbox.py +102 -0
- QuantNodes/monitor/__init__.py +27 -0
- QuantNodes/monitor/agent_tools/__init__.py +5 -0
- QuantNodes/monitor/agent_tools/monitor_tool.py +98 -0
- QuantNodes/monitor/agent_tools/schedule_tool.py +98 -0
- QuantNodes/monitor/agent_tools/version_tool.py +133 -0
- QuantNodes/monitor/monitor/__init__.py +6 -0
- QuantNodes/monitor/monitor/alerter.py +60 -0
- QuantNodes/monitor/monitor/collector.py +164 -0
- QuantNodes/monitor/monitor/dashboard.py +115 -0
- QuantNodes/monitor/monitor/drift.py +190 -0
- QuantNodes/monitor/scheduler/__init__.py +4 -0
- QuantNodes/monitor/scheduler/runner.py +133 -0
- QuantNodes/monitor/scheduler/scheduler.py +184 -0
- QuantNodes/monitor/storage/__init__.py +16 -0
- QuantNodes/monitor/storage/models.py +70 -0
- QuantNodes/monitor/storage/repository.py +407 -0
- QuantNodes/monitor/version/__init__.py +4 -0
- QuantNodes/monitor/version/diff.py +81 -0
- QuantNodes/monitor/version/version_manager.py +182 -0
- QuantNodes/operator_node/__init__.py +28 -0
- QuantNodes/operator_node/base.py +97 -0
- QuantNodes/operator_node/query_node.py +129 -0
- QuantNodes/operator_node/sql_builder.py +125 -0
- QuantNodes/operator_node/sql_utils.py +172 -0
- QuantNodes/operator_node/transform.py +130 -0
- QuantNodes/operators/__init__.py +90 -0
- QuantNodes/operators/_engine.py +108 -0
- QuantNodes/operators/composite.py +161 -0
- QuantNodes/operators/composite_dag.py +667 -0
- QuantNodes/operators/composite_dag_ops.py +343 -0
- QuantNodes/operators/composite_dag_pandas_ops.py +382 -0
- QuantNodes/operators/custom.py +408 -0
- QuantNodes/operators/facade.py +164 -0
- QuantNodes/operators/math.py +163 -0
- QuantNodes/operators/proxy.py +29 -0
- QuantNodes/operators/registry.py +144 -0
- QuantNodes/operators/section.py +99 -0
- QuantNodes/operators/talib.py +757 -0
- QuantNodes/operators/templates.py +95 -0
- QuantNodes/operators/time_series.py +136 -0
- QuantNodes/prompts/__init__.py +20 -0
- QuantNodes/prompts/backtest/__init__.py +12 -0
- QuantNodes/prompts/backtest/factor_based.py +86 -0
- QuantNodes/prompts/backtest/standard.py +73 -0
- QuantNodes/prompts/factor/__init__.py +14 -0
- QuantNodes/prompts/factor/correlation.py +77 -0
- QuantNodes/prompts/factor/group_backtest.py +86 -0
- QuantNodes/prompts/factor/ic_analysis.py +91 -0
- QuantNodes/prompts/strategy/__init__.py +18 -0
- QuantNodes/prompts/strategy/market_neutral.py +96 -0
- QuantNodes/prompts/strategy/mean_reversion.py +107 -0
- QuantNodes/prompts/strategy/momentum.py +160 -0
- QuantNodes/prompts/strategy/pairs_trading.py +107 -0
- QuantNodes/prompts/strategy/trend_following.py +96 -0
- QuantNodes/research/README.md +106 -0
- QuantNodes/research/__init__.py +154 -0
- QuantNodes/research/_legacy_3c/__init__.py +61 -0
- QuantNodes/research/_legacy_3c/auto_researcher.py +289 -0
- QuantNodes/research/_legacy_3c/factor_evaluator.py +560 -0
- QuantNodes/research/_legacy_3c/factor_miner.py +318 -0
- QuantNodes/research/_legacy_3c/mcts_search.py +324 -0
- QuantNodes/research/factor_test/__init__.py +25 -0
- QuantNodes/research/factor_test/config.py +184 -0
- QuantNodes/research/factor_test/config_builder.py +276 -0
- QuantNodes/research/factor_test/e2e/data_prep.py +163 -0
- QuantNodes/research/factor_test/e2e/run_evolution_e2e.py +309 -0
- QuantNodes/research/factor_test/evolution_adapter.py +231 -0
- QuantNodes/research/factor_test/feedback_wrapper.py +102 -0
- QuantNodes/research/factor_test/ifind_db/__init__.py +7 -0
- QuantNodes/research/factor_test/ifind_db/fetcher.py +224 -0
- QuantNodes/research/factor_test/ifind_db/ifind_database.py +689 -0
- QuantNodes/research/factor_test/nodes/__init__.py +1 -0
- QuantNodes/research/factor_test/nodes/_base.py +91 -0
- QuantNodes/research/factor_test/nodes/adjust_date_node.py +48 -0
- QuantNodes/research/factor_test/nodes/configs.py +240 -0
- QuantNodes/research/factor_test/nodes/factor_neutralize_node.py +87 -0
- QuantNodes/research/factor_test/nodes/factor_preprocess_node.py +222 -0
- QuantNodes/research/factor_test/nodes/factor_score_node.py +141 -0
- QuantNodes/research/factor_test/nodes/factor_test_report_node.py +153 -0
- QuantNodes/research/factor_test/nodes/group_analyzer_node.py +317 -0
- QuantNodes/research/factor_test/nodes/ic_analyzer_node.py +112 -0
- QuantNodes/research/factor_test/nodes/load_data_node.py +100 -0
- QuantNodes/research/factor_test/nodes/long_short_node.py +93 -0
- QuantNodes/research/factor_test/nodes/neutralizers.py +222 -0
- QuantNodes/research/factor_test/nodes/preprocess_strategies.py +277 -0
- QuantNodes/research/factor_test/nodes/risk_correlation_node.py +112 -0
- QuantNodes/research/factor_test/nodes/sample_pool_filter_node.py +110 -0
- QuantNodes/research/factor_test/nodes/tradability_filter_node.py +92 -0
- QuantNodes/research/factor_test/pipeline_runner.py +305 -0
- QuantNodes/research/factor_test/pipeline_spec.py +216 -0
- QuantNodes/research/factor_test/utils/__init__.py +26 -0
- QuantNodes/research/factor_test/utils/constants.py +86 -0
- QuantNodes/research/factor_test/utils/data_loader.py +141 -0
- QuantNodes/research/factor_test/utils/date_utils.py +232 -0
- QuantNodes/research/factor_test/utils/file_loaders.py +150 -0
- QuantNodes/research/factor_test/utils/labels.py +37 -0
- QuantNodes/research/factor_test/utils/metrics_extractor.py +55 -0
- QuantNodes/research/factor_test/utils/performance_metrics.py +175 -0
- QuantNodes/research/factor_test/utils/safe_load.py +106 -0
- QuantNodes/research/quant_alpha/CHANGELOG.md +80 -0
- QuantNodes/research/quant_alpha/README.md +142 -0
- QuantNodes/research/quant_alpha/__init__.py +45 -0
- QuantNodes/research/quant_alpha/adapters/__init__.py +99 -0
- QuantNodes/research/quant_alpha/adapters/calculator.py +503 -0
- QuantNodes/research/quant_alpha/adapters/expression.py +387 -0
- QuantNodes/research/quant_alpha/alpha101_design/__init__.py +50 -0
- QuantNodes/research/quant_alpha/alpha101_design/few_shot_examples.py +243 -0
- QuantNodes/research/quant_alpha/alpha101_design/philosophy.py +474 -0
- QuantNodes/research/quant_alpha/alpha158_design/__init__.py +63 -0
- QuantNodes/research/quant_alpha/alpha158_design/few_shot_examples.py +219 -0
- QuantNodes/research/quant_alpha/alpha158_design/philosophy.py +240 -0
- QuantNodes/research/quant_alpha/evaluation/__init__.py +47 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/__init__.py +8 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/g1_handcrafted.py +135 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/g2_llm_only.py +269 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/g3_alpha_gpt.py +152 -0
- QuantNodes/research/quant_alpha/evaluation/clickhouse_data_loader.py +227 -0
- QuantNodes/research/quant_alpha/evaluation/contracts.py +376 -0
- QuantNodes/research/quant_alpha/evaluation/evaluators/__init__.py +6 -0
- QuantNodes/research/quant_alpha/evaluation/evaluators/polars_evaluator.py +545 -0
- QuantNodes/research/quant_alpha/evaluation/mock_data_loader.py +226 -0
- QuantNodes/research/quant_alpha/evaluation/runner.py +243 -0
- QuantNodes/research/quant_alpha/llm/__init__.py +38 -0
- QuantNodes/research/quant_alpha/llm/parser.py +681 -0
- QuantNodes/research/quant_alpha/logic_driven_pipeline.py +411 -0
- QuantNodes/research/quant_alpha/logic_mining/__init__.py +74 -0
- QuantNodes/research/quant_alpha/logic_mining/compiler.py +457 -0
- QuantNodes/research/quant_alpha/logic_mining/generator.py +366 -0
- QuantNodes/research/quant_alpha/logic_mining/models.py +252 -0
- QuantNodes/research/quant_alpha/logic_mining/parser.py +287 -0
- QuantNodes/research/quant_alpha/logic_mining/pipelines.py +297 -0
- QuantNodes/research/quant_alpha/logic_mining/sources.py +149 -0
- QuantNodes/research/quant_alpha/mcts/__init__.py +66 -0
- QuantNodes/research/quant_alpha/mcts/cache.py +262 -0
- QuantNodes/research/quant_alpha/mcts/extension_ops.py +320 -0
- QuantNodes/research/quant_alpha/mcts/feedback.py +825 -0
- QuantNodes/research/quant_alpha/mcts/op_prior.py +180 -0
- QuantNodes/research/quant_alpha/mcts/search.py +540 -0
- QuantNodes/research/quant_alpha/mcts/tree.py +201 -0
- QuantNodes/research/quant_alpha/operator_vocab/__init__.py +50 -0
- QuantNodes/research/quant_alpha/operator_vocab/config.py +54 -0
- QuantNodes/research/quant_alpha/operator_vocab/metadata.py +263 -0
- QuantNodes/research/quant_alpha/operator_vocab/vocabulary.py +481 -0
- QuantNodes/research/quant_alpha/pipeline.py +1027 -0
- QuantNodes/research/quant_alpha/types/__init__.py +27 -0
- QuantNodes/research/quant_alpha/types/constants.py +28 -0
- QuantNodes/research/quant_alpha/types/state.py +205 -0
- QuantNodes/research/quant_alpha/workflow/__init__.py +32 -0
- QuantNodes/research/quant_alpha/workflow/alpha_gpt.py +911 -0
- QuantNodes/research/quant_alpha/workflow/alpha_logics.py +416 -0
- QuantNodes/research/quant_alpha/workflow/state.py +27 -0
- QuantNodes/research/report_reproducer.py +485 -0
- QuantNodes/research/wiki.py +1155 -0
- QuantNodes/symbolic/__init__.py +51 -0
- QuantNodes/symbolic/compiler.py +113 -0
- QuantNodes/symbolic/dialect.py +260 -0
- QuantNodes/symbolic/executor.py +147 -0
- QuantNodes/symbolic/expression.py +234 -0
- QuantNodes/symbolic/functions.py +433 -0
- QuantNodes/symbolic/optimizer.py +165 -0
- QuantNodes/ui_node/__init__.py +30 -0
- QuantNodes/ui_node/base.py +222 -0
- quantnodes-3.0.0.dist-info/METADATA +463 -0
- quantnodes-3.0.0.dist-info/RECORD +399 -0
- quantnodes-3.0.0.dist-info/WHEEL +5 -0
- quantnodes-3.0.0.dist-info/entry_points.txt +24 -0
- quantnodes-3.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""RAGEvaluator — 汇总 5 个指标, 输出统一报告。"""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import asdict, dataclass, field
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from QuantNodes.core.path_utils import ensure_parent
|
|
12
|
+
|
|
13
|
+
from .metrics import (
|
|
14
|
+
hit_rate_at_k,
|
|
15
|
+
intra_list_diversity,
|
|
16
|
+
jaccard_similarity,
|
|
17
|
+
lineage_coverage,
|
|
18
|
+
mean_hit_rate_at_k,
|
|
19
|
+
mean_lineage_coverage,
|
|
20
|
+
mean_ndcg_at_k,
|
|
21
|
+
mean_reciprocal_rank,
|
|
22
|
+
ndcg_at_k,
|
|
23
|
+
reciprocal_rank,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class QueryResult:
|
|
29
|
+
"""单个 query 的评估结果。"""
|
|
30
|
+
query: str
|
|
31
|
+
retrieved_ids: list[str]
|
|
32
|
+
relevant_ids: list[str]
|
|
33
|
+
relevance_scores: dict[str, float] = field(default_factory=dict)
|
|
34
|
+
lineage_ids: list[str] = field(default_factory=list)
|
|
35
|
+
hit_at_5: float = 0.0
|
|
36
|
+
hit_at_10: float = 0.0
|
|
37
|
+
ndcg_at_5: float = 0.0
|
|
38
|
+
ndcg_at_10: float = 0.0
|
|
39
|
+
mrr: float = 0.0
|
|
40
|
+
lineage_cov: float = 0.0
|
|
41
|
+
diversity: float = 0.0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class EvalReport:
|
|
46
|
+
"""评估汇总报告。"""
|
|
47
|
+
n_queries: int
|
|
48
|
+
hit_at_5: float
|
|
49
|
+
hit_at_10: float
|
|
50
|
+
ndcg_at_5: float
|
|
51
|
+
ndcg_at_10: float
|
|
52
|
+
mrr: float
|
|
53
|
+
lineage_coverage: float
|
|
54
|
+
diversity: float
|
|
55
|
+
per_query: list[QueryResult] = field(default_factory=list)
|
|
56
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> dict:
|
|
59
|
+
return {
|
|
60
|
+
"n_queries": self.n_queries,
|
|
61
|
+
"hit_at_5": self.hit_at_5,
|
|
62
|
+
"hit_at_10": self.hit_at_10,
|
|
63
|
+
"ndcg_at_5": self.ndcg_at_5,
|
|
64
|
+
"ndcg_at_10": self.ndcg_at_10,
|
|
65
|
+
"mrr": self.mrr,
|
|
66
|
+
"lineage_coverage": self.lineage_coverage,
|
|
67
|
+
"diversity": self.diversity,
|
|
68
|
+
"timestamp": self.timestamp.isoformat(),
|
|
69
|
+
"per_query": [asdict(q) for q in self.per_query],
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class RAGEvaluator:
|
|
74
|
+
"""RAG 评估器 — 接受多 query 结果, 汇总 5 个指标。
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
k_values: HitRate/NDCG 评估的 K 列表 (默认 [5, 10])
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, k_values: list[int] | None = None):
|
|
81
|
+
self.k_values = k_values or [5, 10]
|
|
82
|
+
|
|
83
|
+
def evaluate(
|
|
84
|
+
self,
|
|
85
|
+
queries: list[str],
|
|
86
|
+
retrieved: list[list[str]],
|
|
87
|
+
relevant: list[list[str]],
|
|
88
|
+
relevance_scores: list[dict[str, float]] | None = None,
|
|
89
|
+
lineage_ids: list[list[str]] | None = None,
|
|
90
|
+
token_lists: list[list[list[str]]] | None = None,
|
|
91
|
+
) -> EvalReport:
|
|
92
|
+
"""评估多 query 结果。
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
queries: 查询文本列表
|
|
96
|
+
retrieved: 每 query 的检索结果 (entry_id 列表, 有序)
|
|
97
|
+
relevant: 每 query 的相关 entry_id 集合
|
|
98
|
+
relevance_scores: 每 query 的 doc_id -> relevance (NDCG 用, 可选)
|
|
99
|
+
lineage_ids: 每 query 的 ground truth 谱系 entry_id (可选)
|
|
100
|
+
token_lists: 每 query 的检索条目 token 列表 (diversity 用, 可选)
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
EvalReport
|
|
104
|
+
"""
|
|
105
|
+
n = len(queries)
|
|
106
|
+
relevance_scores = relevance_scores or [{} for _ in range(n)]
|
|
107
|
+
lineage_ids = lineage_ids or [[] for _ in range(n)]
|
|
108
|
+
token_lists = token_lists or [[] for _ in range(n)]
|
|
109
|
+
|
|
110
|
+
# Per-query
|
|
111
|
+
per_query: list[QueryResult] = []
|
|
112
|
+
for q, ret, rel, scores, lin, toks in zip(
|
|
113
|
+
queries, retrieved, relevant, relevance_scores, lineage_ids, token_lists
|
|
114
|
+
):
|
|
115
|
+
k5 = self.k_values[0] if len(self.k_values) > 0 else 5
|
|
116
|
+
k10 = self.k_values[1] if len(self.k_values) > 1 else 10
|
|
117
|
+
h5 = hit_rate_at_k(ret, rel, k=k5)
|
|
118
|
+
h10 = hit_rate_at_k(ret, rel, k=k10)
|
|
119
|
+
n5 = ndcg_at_k(ret, scores, k=k5)
|
|
120
|
+
n10 = ndcg_at_k(ret, scores, k=k10)
|
|
121
|
+
mrr = reciprocal_rank(ret, rel)
|
|
122
|
+
lin_cov = lineage_coverage(ret, lin)
|
|
123
|
+
# diversity: 1 - avg pairwise jaccard of token_lists
|
|
124
|
+
if len(toks) >= 2:
|
|
125
|
+
sims = [
|
|
126
|
+
jaccard_similarity(toks[i], toks[j])
|
|
127
|
+
for i in range(len(toks))
|
|
128
|
+
for j in range(i + 1, len(toks))
|
|
129
|
+
]
|
|
130
|
+
div = 1.0 - (sum(sims) / len(sims))
|
|
131
|
+
else:
|
|
132
|
+
div = 1.0
|
|
133
|
+
per_query.append(QueryResult(
|
|
134
|
+
query=q, retrieved_ids=list(ret), relevant_ids=list(rel),
|
|
135
|
+
relevance_scores=dict(scores), lineage_ids=list(lin),
|
|
136
|
+
hit_at_5=h5, hit_at_10=h10, ndcg_at_5=n5, ndcg_at_10=n10,
|
|
137
|
+
mrr=mrr, lineage_cov=lin_cov, diversity=div,
|
|
138
|
+
))
|
|
139
|
+
|
|
140
|
+
# 汇总
|
|
141
|
+
h5_mean = mean_hit_rate_at_k(retrieved, relevant, k=self.k_values[0])
|
|
142
|
+
h10_mean = mean_hit_rate_at_k(
|
|
143
|
+
retrieved, relevant, k=self.k_values[1] if len(self.k_values) > 1 else 10
|
|
144
|
+
)
|
|
145
|
+
n5_mean = mean_ndcg_at_k(retrieved, relevance_scores, k=self.k_values[0])
|
|
146
|
+
n10_mean = mean_ndcg_at_k(
|
|
147
|
+
retrieved, relevance_scores, k=self.k_values[1] if len(self.k_values) > 1 else 10
|
|
148
|
+
)
|
|
149
|
+
mrr_mean = mean_reciprocal_rank(retrieved, relevant)
|
|
150
|
+
lin_mean = mean_lineage_coverage(retrieved, lineage_ids)
|
|
151
|
+
div_mean = intra_list_diversity(token_lists)
|
|
152
|
+
|
|
153
|
+
return EvalReport(
|
|
154
|
+
n_queries=n,
|
|
155
|
+
hit_at_5=h5_mean, hit_at_10=h10_mean,
|
|
156
|
+
ndcg_at_5=n5_mean, ndcg_at_10=n10_mean,
|
|
157
|
+
mrr=mrr_mean,
|
|
158
|
+
lineage_coverage=lin_mean,
|
|
159
|
+
diversity=div_mean,
|
|
160
|
+
per_query=per_query,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def save(self, report: EvalReport, path: Path | str) -> None:
|
|
164
|
+
"""保存为 JSON。"""
|
|
165
|
+
path = Path(path)
|
|
166
|
+
ensure_parent(path)
|
|
167
|
+
with path.open("w", encoding="utf-8") as f:
|
|
168
|
+
json.dump(report.to_dict(), f, ensure_ascii=False, indent=2)
|
|
169
|
+
|
|
170
|
+
def save_csv(self, report: EvalReport, path: Path | str) -> None:
|
|
171
|
+
"""保存 per-query 为 CSV。"""
|
|
172
|
+
path = Path(path)
|
|
173
|
+
ensure_parent(path)
|
|
174
|
+
rows = [asdict(q) for q in report.per_query]
|
|
175
|
+
df = pd.DataFrame(rows)
|
|
176
|
+
df.to_csv(path, index=False)
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""RAG 评估指标 — 5 个核心指标。
|
|
2
|
+
|
|
3
|
+
- HitRate@K: 检索 Top-K 是否命中相关 entry
|
|
4
|
+
- NDCG@K: Normalized Discounted Cumulative Gain (位置权重)
|
|
5
|
+
- MRR: Mean Reciprocal Rank (首个相关 entry 的倒数排名)
|
|
6
|
+
- LineageCoverage: 检索结果覆盖的谱系比例 (vs. ground truth 谱系)
|
|
7
|
+
- IntraListDiversity: 检索结果内部多样性 (1 - 平均 pairwise similarity)
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import math
|
|
12
|
+
from typing import Iterable, Mapping, Sequence
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# ============================================================================
|
|
16
|
+
# 1. HitRate@K
|
|
17
|
+
# ============================================================================
|
|
18
|
+
|
|
19
|
+
def hit_rate_at_k(
|
|
20
|
+
retrieved_ids: Sequence[str],
|
|
21
|
+
relevant_ids: Iterable[str],
|
|
22
|
+
k: int = 5,
|
|
23
|
+
) -> float:
|
|
24
|
+
"""检索 Top-K 中是否含至少 1 个 relevant entry。
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
retrieved_ids: 检索器返回的 entry_id 列表 (有序)
|
|
28
|
+
relevant_ids: 真实相关 entry_id 集合
|
|
29
|
+
k: 截断 K
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
float: 1.0 (命中) / 0.0 (未命中)
|
|
33
|
+
"""
|
|
34
|
+
rel_set = set(relevant_ids)
|
|
35
|
+
top_k = retrieved_ids[:k]
|
|
36
|
+
return 1.0 if any(rid in rel_set for rid in top_k) else 0.0
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def mean_hit_rate_at_k(
|
|
40
|
+
queries_retrieved: Sequence[Sequence[str]],
|
|
41
|
+
queries_relevant: Sequence[Iterable[str]],
|
|
42
|
+
k: int = 5,
|
|
43
|
+
) -> float:
|
|
44
|
+
"""多 query 平均 HitRate@K。"""
|
|
45
|
+
if not queries_retrieved:
|
|
46
|
+
return 0.0
|
|
47
|
+
scores = [
|
|
48
|
+
hit_rate_at_k(ret, rel, k=k)
|
|
49
|
+
for ret, rel in zip(queries_retrieved, queries_relevant)
|
|
50
|
+
]
|
|
51
|
+
return sum(scores) / len(scores)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ============================================================================
|
|
55
|
+
# 2. NDCG@K
|
|
56
|
+
# ============================================================================
|
|
57
|
+
|
|
58
|
+
def dcg_at_k(
|
|
59
|
+
retrieved_ids: Sequence[str],
|
|
60
|
+
relevance_scores: Mapping[str, float],
|
|
61
|
+
k: int = 5,
|
|
62
|
+
) -> float:
|
|
63
|
+
"""Discounted Cumulative Gain。
|
|
64
|
+
|
|
65
|
+
relevance_scores: doc_id -> relevance (通常 0/1, 可更高)
|
|
66
|
+
"""
|
|
67
|
+
dcg = 0.0
|
|
68
|
+
for i, rid in enumerate(retrieved_ids[:k]):
|
|
69
|
+
rel = relevance_scores.get(rid, 0.0)
|
|
70
|
+
# 标准公式: rel_i / log2(i+2) (i=0 -> log2(2)=1)
|
|
71
|
+
dcg += rel / math.log2(i + 2)
|
|
72
|
+
return dcg
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def ndcg_at_k(
|
|
76
|
+
retrieved_ids: Sequence[str],
|
|
77
|
+
relevance_scores: Mapping[str, float],
|
|
78
|
+
k: int = 5,
|
|
79
|
+
) -> float:
|
|
80
|
+
"""Normalized DCG (0-1)。"""
|
|
81
|
+
actual = dcg_at_k(retrieved_ids, relevance_scores, k=k)
|
|
82
|
+
# ideal: 按 relevance 降序排列的 DCG
|
|
83
|
+
ideal_rels = sorted(relevance_scores.values(), reverse=True)[:k]
|
|
84
|
+
ideal = sum(rel / math.log2(i + 2) for i, rel in enumerate(ideal_rels))
|
|
85
|
+
if ideal == 0:
|
|
86
|
+
return 0.0
|
|
87
|
+
return actual / ideal
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def mean_ndcg_at_k(
|
|
91
|
+
queries_retrieved: Sequence[Sequence[str]],
|
|
92
|
+
queries_relevance: Sequence[Mapping[str, float]],
|
|
93
|
+
k: int = 5,
|
|
94
|
+
) -> float:
|
|
95
|
+
"""多 query 平均 NDCG@K。"""
|
|
96
|
+
if not queries_retrieved:
|
|
97
|
+
return 0.0
|
|
98
|
+
scores = [
|
|
99
|
+
ndcg_at_k(ret, rel, k=k)
|
|
100
|
+
for ret, rel in zip(queries_retrieved, queries_relevance)
|
|
101
|
+
]
|
|
102
|
+
return sum(scores) / len(scores)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ============================================================================
|
|
106
|
+
# 3. MRR (Mean Reciprocal Rank)
|
|
107
|
+
# ============================================================================
|
|
108
|
+
|
|
109
|
+
def reciprocal_rank(
|
|
110
|
+
retrieved_ids: Sequence[str],
|
|
111
|
+
relevant_ids: Iterable[str],
|
|
112
|
+
) -> float:
|
|
113
|
+
"""首个 relevant entry 的倒数排名 (0 表示未命中)。"""
|
|
114
|
+
rel_set = set(relevant_ids)
|
|
115
|
+
for i, rid in enumerate(retrieved_ids, 1):
|
|
116
|
+
if rid in rel_set:
|
|
117
|
+
return 1.0 / i
|
|
118
|
+
return 0.0
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def mean_reciprocal_rank(
|
|
122
|
+
queries_retrieved: Sequence[Sequence[str]],
|
|
123
|
+
queries_relevant: Sequence[Iterable[str]],
|
|
124
|
+
) -> float:
|
|
125
|
+
"""多 query 平均 MRR。"""
|
|
126
|
+
if not queries_retrieved:
|
|
127
|
+
return 0.0
|
|
128
|
+
return sum(
|
|
129
|
+
reciprocal_rank(ret, rel)
|
|
130
|
+
for ret, rel in zip(queries_retrieved, queries_relevant)
|
|
131
|
+
) / len(queries_retrieved)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# ============================================================================
|
|
135
|
+
# 4. Lineage Coverage (谱系覆盖率)
|
|
136
|
+
# ============================================================================
|
|
137
|
+
|
|
138
|
+
def lineage_coverage(
|
|
139
|
+
retrieved_ids: Iterable[str],
|
|
140
|
+
lineage_ids: Iterable[str],
|
|
141
|
+
) -> float:
|
|
142
|
+
"""检索结果中包含的 ground truth 谱系 entry 比例。
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
retrieved_ids: 检索器返回的 entry_id 列表
|
|
146
|
+
lineage_ids: ground truth 谱系 (e.g. expand_lineage 的 ancestors + descendants)
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
float: 0-1, 1.0 = 谱系完全覆盖
|
|
150
|
+
"""
|
|
151
|
+
ret_set = set(retrieved_ids)
|
|
152
|
+
lineage_set = set(lineage_ids)
|
|
153
|
+
if not lineage_set:
|
|
154
|
+
return 0.0
|
|
155
|
+
covered = sum(1 for lid in lineage_set if lid in ret_set)
|
|
156
|
+
return covered / len(lineage_set)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def mean_lineage_coverage(
|
|
160
|
+
queries_retrieved: Sequence[Iterable[str]],
|
|
161
|
+
queries_lineage: Sequence[Iterable[str]],
|
|
162
|
+
) -> float:
|
|
163
|
+
"""多 query 平均 lineage coverage。"""
|
|
164
|
+
if not queries_retrieved:
|
|
165
|
+
return 0.0
|
|
166
|
+
return sum(
|
|
167
|
+
lineage_coverage(ret, lin)
|
|
168
|
+
for ret, lin in zip(queries_retrieved, queries_lineage)
|
|
169
|
+
) / len(queries_retrieved)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ============================================================================
|
|
173
|
+
# 5. Intra-List Diversity
|
|
174
|
+
# ============================================================================
|
|
175
|
+
|
|
176
|
+
def intra_list_diversity(
|
|
177
|
+
items: Sequence[Sequence[Sequence[str]]],
|
|
178
|
+
similarity_fn=None,
|
|
179
|
+
) -> float:
|
|
180
|
+
"""检索结果内部多样性: 1 - 平均 pairwise jaccard 相似度。
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
items: 多 query 的 token 化结果。结构 = Sequence[Sequence[Sequence[str]]]
|
|
184
|
+
- 外层: 每个 query 一份
|
|
185
|
+
- 中层: 每 query 的 Top-K 个条目
|
|
186
|
+
- 内层: 每个条目的 token 列表
|
|
187
|
+
例: [[['momentum', 'close'], ['reversal', 'open']], ...]
|
|
188
|
+
similarity_fn: 自定义相似度函数 (默认 jaccard_similarity)
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
float: 0-1, 1.0 = 完全多样
|
|
192
|
+
"""
|
|
193
|
+
if not items:
|
|
194
|
+
return 0.0
|
|
195
|
+
if similarity_fn is None:
|
|
196
|
+
similarity_fn = jaccard_similarity
|
|
197
|
+
|
|
198
|
+
diversities: list[float] = []
|
|
199
|
+
for tokens_list in items:
|
|
200
|
+
if len(tokens_list) < 2:
|
|
201
|
+
diversities.append(1.0) # 单元素算最大多样
|
|
202
|
+
continue
|
|
203
|
+
sims: list[float] = []
|
|
204
|
+
for i in range(len(tokens_list)):
|
|
205
|
+
for j in range(i + 1, len(tokens_list)):
|
|
206
|
+
sims.append(similarity_fn(tokens_list[i], tokens_list[j]))
|
|
207
|
+
avg_sim = sum(sims) / len(sims) if sims else 0.0
|
|
208
|
+
diversities.append(1.0 - avg_sim)
|
|
209
|
+
return sum(diversities) / len(diversities)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def jaccard_similarity(a: Sequence[str], b: Sequence[str]) -> float:
|
|
213
|
+
"""Jaccard 相似度 = |A ∩ B| / |A ∪ B|。"""
|
|
214
|
+
set_a, set_b = set(a), set(b)
|
|
215
|
+
if not set_a and not set_b:
|
|
216
|
+
return 0.0
|
|
217
|
+
union = set_a | set_b
|
|
218
|
+
if not union:
|
|
219
|
+
return 0.0
|
|
220
|
+
return len(set_a & set_b) / len(union)
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""RAG Prompt — 从检索结果构造带上下文的 prompt。
|
|
2
|
+
|
|
3
|
+
用途: Hypothesizer / Mutator 在生成新因子前, 检索 top-k 历史因子作为 in-context 示例。
|
|
4
|
+
|
|
5
|
+
Week 8 升级: 谱系 RAG — 每个示例附 ancestors/descendants 上下文, 树状展开。
|
|
6
|
+
Week 9 升级: 谱系压缩 — use_compress=True 时, 祖先/后裔段先用 LLM/启发式
|
|
7
|
+
总结为 1 段简短描述, 减少 token 消耗。
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from ..trajectory import TrajectoryEntry, TrajectoryPool
|
|
14
|
+
from .knowledge_base import KnowledgeBase
|
|
15
|
+
from .lineage_compress import Compressor
|
|
16
|
+
from .lineage_expand import expand_lineage
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
_RAG_HEADER = """你是一个量化研究员, 负责基于历史经验和当前研究假设生成新 alpha 因子。
|
|
20
|
+
下方是历史表现良好的 {n_examples} 个因子作为参考, 含完整演化谱系 (ancestors / descendants):
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
_RAG_EXAMPLE_TEMPLATE = """---
|
|
24
|
+
示例 {idx}: {name}
|
|
25
|
+
表达式: {expression}
|
|
26
|
+
描述: {description}
|
|
27
|
+
指标: sharpe={sharpe}, arr={arr}, ic_mean={ic_mean}
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
_LINEAGE_RELATION_TEMPLATE = (
|
|
31
|
+
"{relation} (depth={depth}): {name} | sharpe={sharpe} | {expression}"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
_LINEAGE_COMPRESSED_TEMPLATE = """{relation} ({n} entries): {summary}"""
|
|
35
|
+
|
|
36
|
+
_RAG_TASK_TEMPLATE = """
|
|
37
|
+
现在, 请基于以下研究假设生成新因子:
|
|
38
|
+
研究假设: {hypothesis}
|
|
39
|
+
补充描述: {description}
|
|
40
|
+
|
|
41
|
+
请综合参考示例的设计思路, 生成一个与历史不同但经济意义清晰的因子。
|
|
42
|
+
返回 JSON: {{"name": "因子名", "expression": "代码表达式", "description": "因子描述"}}
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def build_rag_prompt(
|
|
47
|
+
direction: str,
|
|
48
|
+
description: str,
|
|
49
|
+
kb: Optional[KnowledgeBase] = None,
|
|
50
|
+
top_k: int = 3,
|
|
51
|
+
min_score: float = 0.01,
|
|
52
|
+
include_lineage: bool = True,
|
|
53
|
+
max_ancestor_depth: int = 2,
|
|
54
|
+
max_descendant_depth: int = 2,
|
|
55
|
+
use_compress: bool = False,
|
|
56
|
+
compressor: Optional[Compressor] = None,
|
|
57
|
+
) -> str:
|
|
58
|
+
"""构造带 RAG 上下文的 prompt (Week 9 升级版: 含谱系 + 压缩)。
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
direction: 研究方向 (hypothesis)
|
|
62
|
+
description: 补充描述
|
|
63
|
+
kb: KnowledgeBase (None=不附 RAG 上下文)
|
|
64
|
+
top_k: 检索 top-k 数量
|
|
65
|
+
min_score: 最小相似度阈值
|
|
66
|
+
include_lineage: 是否附加祖先/后裔
|
|
67
|
+
max_ancestor_depth: 祖先展开深度
|
|
68
|
+
max_descendant_depth: 后裔展开深度
|
|
69
|
+
use_compress: 是否压缩谱系段 (默认 False, 沿用 Week 8 多行格式)
|
|
70
|
+
compressor: Compressor 实例 (use_compress=True 时必填,
|
|
71
|
+
内部未传时自动构造 mock Compressor)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
str: 完整 prompt
|
|
75
|
+
"""
|
|
76
|
+
rag_section = ""
|
|
77
|
+
if kb is not None and len(kb) > 0:
|
|
78
|
+
query = f"{direction} {description}"
|
|
79
|
+
results = kb.query(query, top_k=top_k, min_score=min_score)
|
|
80
|
+
if results:
|
|
81
|
+
example_parts = []
|
|
82
|
+
for i, (entry, score) in enumerate(results, 1):
|
|
83
|
+
example_parts.append(_format_example(i, entry, score))
|
|
84
|
+
if include_lineage and kb.pool is not None and entry is not None:
|
|
85
|
+
lineage_str = _format_lineage(
|
|
86
|
+
kb.pool, entry.entry_id,
|
|
87
|
+
max_ancestor_depth=max_ancestor_depth,
|
|
88
|
+
max_descendant_depth=max_descendant_depth,
|
|
89
|
+
use_compress=use_compress,
|
|
90
|
+
compressor=compressor,
|
|
91
|
+
)
|
|
92
|
+
if lineage_str:
|
|
93
|
+
example_parts.append(lineage_str)
|
|
94
|
+
rag_section = (
|
|
95
|
+
_RAG_HEADER.format(n_examples=len(results))
|
|
96
|
+
+ "\n".join(example_parts)
|
|
97
|
+
)
|
|
98
|
+
return rag_section + _RAG_TASK_TEMPLATE.format(
|
|
99
|
+
hypothesis=direction, description=description,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _format_example(idx: int, entry: TrajectoryEntry | None, score: float) -> str:
|
|
104
|
+
"""格式化单个示例。"""
|
|
105
|
+
if entry is None:
|
|
106
|
+
return _RAG_EXAMPLE_TEMPLATE.format(
|
|
107
|
+
idx=idx, name="<unknown>", expression="", description="",
|
|
108
|
+
sharpe="?", arr="?", ic_mean="?",
|
|
109
|
+
)
|
|
110
|
+
cfg = entry.config_snapshot or {}
|
|
111
|
+
factor_cfg = cfg.get("factor", {}) if isinstance(cfg, dict) else {}
|
|
112
|
+
metrics = entry.metrics or {}
|
|
113
|
+
return _RAG_EXAMPLE_TEMPLATE.format(
|
|
114
|
+
idx=idx,
|
|
115
|
+
name=factor_cfg.get("name", entry.entry_id[:8]),
|
|
116
|
+
expression=factor_cfg.get("expression", ""),
|
|
117
|
+
description=factor_cfg.get("description", ""),
|
|
118
|
+
sharpe=metrics.get("sharpe", "?"),
|
|
119
|
+
arr=metrics.get("arr", "?"),
|
|
120
|
+
ic_mean=metrics.get("ic_mean", "?"),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _format_lineage(
|
|
125
|
+
pool: TrajectoryPool,
|
|
126
|
+
root_id: str,
|
|
127
|
+
max_ancestor_depth: int = 2,
|
|
128
|
+
max_descendant_depth: int = 2,
|
|
129
|
+
use_compress: bool = False,
|
|
130
|
+
compressor: Optional[Compressor] = None,
|
|
131
|
+
) -> str:
|
|
132
|
+
"""格式化谱系上下文, 嵌入示例后。"""
|
|
133
|
+
expanded = expand_lineage(
|
|
134
|
+
pool, root_id,
|
|
135
|
+
max_ancestor_depth=max_ancestor_depth,
|
|
136
|
+
max_descendant_depth=max_descendant_depth,
|
|
137
|
+
)
|
|
138
|
+
ancestors = expanded["ancestors"]
|
|
139
|
+
descendants = expanded["descendants"]
|
|
140
|
+
|
|
141
|
+
if not ancestors and not descendants:
|
|
142
|
+
return ""
|
|
143
|
+
|
|
144
|
+
if use_compress:
|
|
145
|
+
# Week 9: 压缩模式
|
|
146
|
+
comp = compressor or Compressor(model="mock")
|
|
147
|
+
return _format_lineage_compressed(ancestors, descendants, comp)
|
|
148
|
+
|
|
149
|
+
# Week 8: 多行展开模式
|
|
150
|
+
return _format_lineage_expanded(ancestors, descendants)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _format_lineage_expanded(ancestors, descendants) -> str:
|
|
154
|
+
"""Week 8 风格: 多行展开。"""
|
|
155
|
+
parts: list[str] = [" 谱系上下文:"]
|
|
156
|
+
for depth, entry in ancestors:
|
|
157
|
+
cfg = entry.config_snapshot or {}
|
|
158
|
+
factor_cfg = cfg.get("factor", {}) if isinstance(cfg, dict) else {}
|
|
159
|
+
parts.append(" " + _LINEAGE_RELATION_TEMPLATE.format(
|
|
160
|
+
relation="↑ ancestor",
|
|
161
|
+
depth=depth,
|
|
162
|
+
name=factor_cfg.get("name", entry.entry_id[:8]),
|
|
163
|
+
sharpe=(entry.metrics or {}).get("sharpe", "?"),
|
|
164
|
+
expression=factor_cfg.get("expression", "")[:40],
|
|
165
|
+
))
|
|
166
|
+
for depth, entry in descendants:
|
|
167
|
+
cfg = entry.config_snapshot or {}
|
|
168
|
+
factor_cfg = cfg.get("factor", {}) if isinstance(cfg, dict) else {}
|
|
169
|
+
parts.append(" " + _LINEAGE_RELATION_TEMPLATE.format(
|
|
170
|
+
relation="↓ descendant",
|
|
171
|
+
depth=depth,
|
|
172
|
+
name=factor_cfg.get("name", entry.entry_id[:8]),
|
|
173
|
+
sharpe=(entry.metrics or {}).get("sharpe", "?"),
|
|
174
|
+
expression=factor_cfg.get("expression", "")[:40],
|
|
175
|
+
))
|
|
176
|
+
return "\n".join(parts)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _format_lineage_compressed(ancestors, descendants, compressor: Compressor) -> str:
|
|
180
|
+
"""Week 9 风格: 1 行总结 ancestors + 1 行总结 descendants。"""
|
|
181
|
+
parts: list[str] = [" 谱系上下文 (压缩):"]
|
|
182
|
+
if ancestors:
|
|
183
|
+
c_anc = compressor.compress(ancestors, relation="ancestors")
|
|
184
|
+
parts.append(" " + _LINEAGE_COMPRESSED_TEMPLATE.format(
|
|
185
|
+
relation="↑ ancestors",
|
|
186
|
+
n=c_anc.original_count,
|
|
187
|
+
summary=c_anc.summary,
|
|
188
|
+
))
|
|
189
|
+
if descendants:
|
|
190
|
+
c_desc = compressor.compress(descendants, relation="descendants")
|
|
191
|
+
parts.append(" " + _LINEAGE_COMPRESSED_TEMPLATE.format(
|
|
192
|
+
relation="↓ descendants",
|
|
193
|
+
n=c_desc.original_count,
|
|
194
|
+
summary=c_desc.summary,
|
|
195
|
+
))
|
|
196
|
+
return "\n".join(parts)
|