quantnodes 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuantNodes/__init__.py +15 -0
- QuantNodes/__main__.py +14 -0
- QuantNodes/agent/__init__.py +158 -0
- QuantNodes/agent/agents/__init__.py +13 -0
- QuantNodes/agent/agents/definition.py +180 -0
- QuantNodes/agent/agents/manager.py +73 -0
- QuantNodes/agent/config/__init__.py +34 -0
- QuantNodes/agent/config/executor.py +958 -0
- QuantNodes/agent/config/loader.py +427 -0
- QuantNodes/agent/config/templates/bollinger_bands.yaml +84 -0
- QuantNodes/agent/config/templates/dual_ma.yaml +72 -0
- QuantNodes/agent/config/templates/empty.yaml +56 -0
- QuantNodes/agent/config/templates/mean_reversion.yaml +47 -0
- QuantNodes/agent/config/templates/mean_reversion_zscore.yaml +90 -0
- QuantNodes/agent/config/templates/momentum.yaml +81 -0
- QuantNodes/agent/config/templates/momentum_breakout.yaml +84 -0
- QuantNodes/agent/config/templates/rsi_strategy.yaml +72 -0
- QuantNodes/agent/config/templates/volume_price.yaml +86 -0
- QuantNodes/agent/config/types.py +156 -0
- QuantNodes/agent/config_mapper.py +293 -0
- QuantNodes/agent/core/__init__.py +19 -0
- QuantNodes/agent/core/dream.py +47 -0
- QuantNodes/agent/core/quant_dream.py +274 -0
- QuantNodes/agent/cron_jobs.py +314 -0
- QuantNodes/agent/nanobot_bridge.py +242 -0
- QuantNodes/agent/permission/__init__.py +30 -0
- QuantNodes/agent/permission/defaults.py +36 -0
- QuantNodes/agent/permission/evaluate.py +41 -0
- QuantNodes/agent/permission/models.py +59 -0
- QuantNodes/agent/permission/service.py +133 -0
- QuantNodes/agent/providers/__init__.py +11 -0
- QuantNodes/agent/providers/base.py +102 -0
- QuantNodes/agent/providers/quantnodes.py +610 -0
- QuantNodes/agent/providers/rate_limiter.py +326 -0
- QuantNodes/agent/providers/registry.py +163 -0
- QuantNodes/agent/skills/__init__.py +20 -0
- QuantNodes/agent/skills/base.py +118 -0
- QuantNodes/agent/skills/bridge.py +73 -0
- QuantNodes/agent/skills/factor/__init__.py +14 -0
- QuantNodes/agent/skills/factor/correlation.py +99 -0
- QuantNodes/agent/skills/factor/group_backtest.py +114 -0
- QuantNodes/agent/skills/factor/ic_analysis.py +106 -0
- QuantNodes/agent/skills/loader.py +107 -0
- QuantNodes/agent/skills/registry.py +105 -0
- QuantNodes/agent/skills/strategy/__init__.py +16 -0
- QuantNodes/agent/skills/strategy/bollinger.py +86 -0
- QuantNodes/agent/skills/strategy/dual_ma.py +82 -0
- QuantNodes/agent/skills/strategy/momentum.py +74 -0
- QuantNodes/agent/skills/strategy/rsi_reversal.py +99 -0
- QuantNodes/agent/skills_quant/__init__.py +14 -0
- QuantNodes/agent/skills_quant/backtest-analyze/SKILL.md +42 -0
- QuantNodes/agent/skills_quant/config-driven/SKILL.md +72 -0
- QuantNodes/agent/skills_quant/factor-research/SKILL.md +40 -0
- QuantNodes/agent/skills_quant/quant-dream/SKILL.md +55 -0
- QuantNodes/agent/skills_quant/risk-management/SKILL.md +45 -0
- QuantNodes/agent/skills_quant/strategy-design/SKILL.md +43 -0
- QuantNodes/agent/templates/__init__.py +4 -0
- QuantNodes/agent/tools/__init__.py +173 -0
- QuantNodes/agent/tools/_workspace.py +51 -0
- QuantNodes/agent/tools/alpha_backtest.py +328 -0
- QuantNodes/agent/tools/alpha_evaluate.py +493 -0
- QuantNodes/agent/tools/backtest.py +226 -0
- QuantNodes/agent/tools/base.py +133 -0
- QuantNodes/agent/tools/code_search.py +207 -0
- QuantNodes/agent/tools/config_backtest.py +401 -0
- QuantNodes/agent/tools/context.py +97 -0
- QuantNodes/agent/tools/dream_skill.py +77 -0
- QuantNodes/agent/tools/echo.py +38 -0
- QuantNodes/agent/tools/factor.py +231 -0
- QuantNodes/agent/tools/file_ops.py +201 -0
- QuantNodes/agent/tools/git_ops.py +190 -0
- QuantNodes/agent/tools/operator_lookup.py +218 -0
- QuantNodes/agent/tools/output_truncation.py +77 -0
- QuantNodes/agent/tools/path_check.py +43 -0
- QuantNodes/agent/tools/pipeline.py +62 -0
- QuantNodes/agent/tools/registry.py +150 -0
- QuantNodes/agent/tools/sandbox.py +62 -0
- QuantNodes/agent/tools/shell_safety.py +63 -0
- QuantNodes/agent/tools/strategy.py +106 -0
- QuantNodes/agent/tools/task.py +171 -0
- QuantNodes/agent/tools/web_fetch.py +142 -0
- QuantNodes/agent/tools/web_search.py +114 -0
- QuantNodes/agent/tools/wiki.py +370 -0
- QuantNodes/agent/utils/__init__.py +11 -0
- QuantNodes/agent/utils/helpers.py +43 -0
- QuantNodes/agent/utils/prompt_templates.py +30 -0
- QuantNodes/agent/workflows/__init__.py +20 -0
- QuantNodes/agent/workflows/implementations/__init__.py +8 -0
- QuantNodes/agent/workflows/implementations/alpha_gpt.py +508 -0
- QuantNodes/agent/workflows/implementations/mcts.py +442 -0
- QuantNodes/agent/workflows/parsers.py +44 -0
- QuantNodes/agent/workflows/registry.py +119 -0
- QuantNodes/agent/workflows/step_agent.py +219 -0
- QuantNodes/agent/workflows/tool.py +198 -0
- QuantNodes/ai/__init__.py +93 -0
- QuantNodes/ai/llm/__init__.py +75 -0
- QuantNodes/ai/llm/base.py +233 -0
- QuantNodes/ai/llm/decorators.py +281 -0
- QuantNodes/ai/llm/gateway.py +571 -0
- QuantNodes/ai/llm/null.py +76 -0
- QuantNodes/ai/llm/openai.py +435 -0
- QuantNodes/ai/optimizer.py +405 -0
- QuantNodes/ai/prompts/__init__.py +229 -0
- QuantNodes/ai/sandbox.py +371 -0
- QuantNodes/ai/sandbox_pandas_bridge.py +150 -0
- QuantNodes/ai/strategy_gen.py +396 -0
- QuantNodes/backtest/__init__.py +64 -0
- QuantNodes/backtest/backtest_node.py +188 -0
- QuantNodes/backtest/broker_node.py +378 -0
- QuantNodes/backtest/config_runner.py +397 -0
- QuantNodes/backtest/config_strategy.py +64 -0
- QuantNodes/backtest/risk_node.py +360 -0
- QuantNodes/backtest/strategy_node.py +268 -0
- QuantNodes/cache_node/__init__.py +19 -0
- QuantNodes/cache_node/base.py +244 -0
- QuantNodes/cache_node/cache_store.py +99 -0
- QuantNodes/cache_node/metadata.py +100 -0
- QuantNodes/cli/__init__.py +109 -0
- QuantNodes/cli/_helpers.py +511 -0
- QuantNodes/cli/command.py +110 -0
- QuantNodes/cli/commands/__init__.py +69 -0
- QuantNodes/cli/commands/agent.py +158 -0
- QuantNodes/cli/commands/alpha.py +951 -0
- QuantNodes/cli/commands/chat.py +38 -0
- QuantNodes/cli/commands/evolve.py +120 -0
- QuantNodes/cli/commands/factor.py +569 -0
- QuantNodes/cli/commands/init.py +190 -0
- QuantNodes/cli/commands/run.py +259 -0
- QuantNodes/cli/commands/serve.py +398 -0
- QuantNodes/cli/commands/version.py +120 -0
- QuantNodes/cli/enhanced.py +146 -0
- QuantNodes/conf_node/__init__.py +37 -0
- QuantNodes/conf_node/base.py +120 -0
- QuantNodes/conf_node/env_config.py +132 -0
- QuantNodes/conf_node/ini_config.py +70 -0
- QuantNodes/conf_node/json_config.py +69 -0
- QuantNodes/conf_node/yaml_config.py +78 -0
- QuantNodes/constants.py +17 -0
- QuantNodes/core/__init__.py +196 -0
- QuantNodes/core/_lookback_helpers.py +49 -0
- QuantNodes/core/ast_parser.py +198 -0
- QuantNodes/core/base.py +61 -0
- QuantNodes/core/cache_manager.py +344 -0
- QuantNodes/core/cache_utils.py +150 -0
- QuantNodes/core/cond_builder.py +53 -0
- QuantNodes/core/config.py +170 -0
- QuantNodes/core/constants.py +48 -0
- QuantNodes/core/control.py +412 -0
- QuantNodes/core/data_preprocessing.py +453 -0
- QuantNodes/core/data_source.py +46 -0
- QuantNodes/core/events.py +178 -0
- QuantNodes/core/evolution/__init__.py +22 -0
- QuantNodes/core/evolution/loop.py +583 -0
- QuantNodes/core/evolution/operators.py +289 -0
- QuantNodes/core/evolution/settings.py +44 -0
- QuantNodes/core/expression.py +841 -0
- QuantNodes/core/feedback/__init__.py +38 -0
- QuantNodes/core/feedback/channels.py +182 -0
- QuantNodes/core/feedback/collector.py +91 -0
- QuantNodes/core/feedback/dataclass.py +239 -0
- QuantNodes/core/feedback/llm_judge.py +138 -0
- QuantNodes/core/knowledge/__init__.py +69 -0
- QuantNodes/core/knowledge/knowledge_base.py +217 -0
- QuantNodes/core/knowledge/lineage_compress.py +196 -0
- QuantNodes/core/knowledge/lineage_expand.py +123 -0
- QuantNodes/core/knowledge/metrics/__init__.py +43 -0
- QuantNodes/core/knowledge/metrics/evaluator.py +176 -0
- QuantNodes/core/knowledge/metrics/metrics.py +220 -0
- QuantNodes/core/knowledge/rag_prompt.py +196 -0
- QuantNodes/core/knowledge/retriever.py +209 -0
- QuantNodes/core/lambda_node.py +81 -0
- QuantNodes/core/monitoring/__init__.py +22 -0
- QuantNodes/core/monitoring/collector.py +292 -0
- QuantNodes/core/monitoring/dashboard.py +365 -0
- QuantNodes/core/node.py +375 -0
- QuantNodes/core/pandas_utils.py +504 -0
- QuantNodes/core/parallel/__init__.py +15 -0
- QuantNodes/core/parallel/worker.py +140 -0
- QuantNodes/core/parallel/worker_process.py +265 -0
- QuantNodes/core/path_utils.py +73 -0
- QuantNodes/core/pipeline.py +328 -0
- QuantNodes/core/plugin.py +135 -0
- QuantNodes/core/quality_gate/__init__.py +32 -0
- QuantNodes/core/quality_gate/complexity.py +94 -0
- QuantNodes/core/quality_gate/consistency.py +26 -0
- QuantNodes/core/quality_gate/node.py +97 -0
- QuantNodes/core/quality_gate/redundancy.py +51 -0
- QuantNodes/core/quality_gate/settings.py +43 -0
- QuantNodes/core/quality_gate/zoo.py +98 -0
- QuantNodes/core/serializable.py +116 -0
- QuantNodes/core/serialization.py +673 -0
- QuantNodes/core/tools.py +333 -0
- QuantNodes/core/trajectory/__init__.py +25 -0
- QuantNodes/core/trajectory/entry.py +116 -0
- QuantNodes/core/trajectory/lineage.py +67 -0
- QuantNodes/core/trajectory/pool.py +211 -0
- QuantNodes/core/trajectory/selector.py +140 -0
- QuantNodes/core/visualization/__init__.py +33 -0
- QuantNodes/core/visualization/builder.py +233 -0
- QuantNodes/core/visualization/gate_breakdown.py +140 -0
- QuantNodes/core/visualization/lineage_dag.py +203 -0
- QuantNodes/core/visualization/metric_distribution.py +125 -0
- QuantNodes/core/visualization/report.py +68 -0
- QuantNodes/database_node/__init__.py +69 -0
- QuantNodes/database_node/base.py +135 -0
- QuantNodes/database_node/clickhouse_node.py +272 -0
- QuantNodes/database_node/csv_node.py +83 -0
- QuantNodes/database_node/duckdb_node.py +86 -0
- QuantNodes/database_node/factory.py +83 -0
- QuantNodes/database_node/mysql_node.py +100 -0
- QuantNodes/database_node/parquet_node.py +75 -0
- QuantNodes/database_node/sqlite_node.py +67 -0
- QuantNodes/factor_node/__init__.py +50 -0
- QuantNodes/factor_node/factor.py +563 -0
- QuantNodes/factor_node/factor_db.py +421 -0
- QuantNodes/factor_node/factor_functions/__init__.py +252 -0
- QuantNodes/factor_node/factor_functions/_helpers.py +358 -0
- QuantNodes/factor_node/factor_functions/_helpers_debug.py +317 -0
- QuantNodes/factor_node/factor_functions/composite_ops.py +136 -0
- QuantNodes/factor_node/factor_functions/math_ops.py +433 -0
- QuantNodes/factor_node/factor_functions/section_ops.py +290 -0
- QuantNodes/factor_node/factor_functions/talib_ops.py +1293 -0
- QuantNodes/factor_node/factor_functions/time_ops.py +535 -0
- QuantNodes/factor_node/factor_operation.py +1115 -0
- QuantNodes/factor_node/factor_table.py +1073 -0
- QuantNodes/factor_node/quant_nodes_object.py +60 -0
- QuantNodes/mcp_server/__init__.py +27 -0
- QuantNodes/mcp_server/__main__.py +4 -0
- QuantNodes/mcp_server/server.py +272 -0
- QuantNodes/methods/__init__.py +28 -0
- QuantNodes/methods/pipeline.py +100 -0
- QuantNodes/methods/sandbox.py +102 -0
- QuantNodes/monitor/__init__.py +27 -0
- QuantNodes/monitor/agent_tools/__init__.py +5 -0
- QuantNodes/monitor/agent_tools/monitor_tool.py +98 -0
- QuantNodes/monitor/agent_tools/schedule_tool.py +98 -0
- QuantNodes/monitor/agent_tools/version_tool.py +133 -0
- QuantNodes/monitor/monitor/__init__.py +6 -0
- QuantNodes/monitor/monitor/alerter.py +60 -0
- QuantNodes/monitor/monitor/collector.py +164 -0
- QuantNodes/monitor/monitor/dashboard.py +115 -0
- QuantNodes/monitor/monitor/drift.py +190 -0
- QuantNodes/monitor/scheduler/__init__.py +4 -0
- QuantNodes/monitor/scheduler/runner.py +133 -0
- QuantNodes/monitor/scheduler/scheduler.py +184 -0
- QuantNodes/monitor/storage/__init__.py +16 -0
- QuantNodes/monitor/storage/models.py +70 -0
- QuantNodes/monitor/storage/repository.py +407 -0
- QuantNodes/monitor/version/__init__.py +4 -0
- QuantNodes/monitor/version/diff.py +81 -0
- QuantNodes/monitor/version/version_manager.py +182 -0
- QuantNodes/operator_node/__init__.py +28 -0
- QuantNodes/operator_node/base.py +97 -0
- QuantNodes/operator_node/query_node.py +129 -0
- QuantNodes/operator_node/sql_builder.py +125 -0
- QuantNodes/operator_node/sql_utils.py +172 -0
- QuantNodes/operator_node/transform.py +130 -0
- QuantNodes/operators/__init__.py +90 -0
- QuantNodes/operators/_engine.py +108 -0
- QuantNodes/operators/composite.py +161 -0
- QuantNodes/operators/composite_dag.py +667 -0
- QuantNodes/operators/composite_dag_ops.py +343 -0
- QuantNodes/operators/composite_dag_pandas_ops.py +382 -0
- QuantNodes/operators/custom.py +408 -0
- QuantNodes/operators/facade.py +164 -0
- QuantNodes/operators/math.py +163 -0
- QuantNodes/operators/proxy.py +29 -0
- QuantNodes/operators/registry.py +144 -0
- QuantNodes/operators/section.py +99 -0
- QuantNodes/operators/talib.py +757 -0
- QuantNodes/operators/templates.py +95 -0
- QuantNodes/operators/time_series.py +136 -0
- QuantNodes/prompts/__init__.py +20 -0
- QuantNodes/prompts/backtest/__init__.py +12 -0
- QuantNodes/prompts/backtest/factor_based.py +86 -0
- QuantNodes/prompts/backtest/standard.py +73 -0
- QuantNodes/prompts/factor/__init__.py +14 -0
- QuantNodes/prompts/factor/correlation.py +77 -0
- QuantNodes/prompts/factor/group_backtest.py +86 -0
- QuantNodes/prompts/factor/ic_analysis.py +91 -0
- QuantNodes/prompts/strategy/__init__.py +18 -0
- QuantNodes/prompts/strategy/market_neutral.py +96 -0
- QuantNodes/prompts/strategy/mean_reversion.py +107 -0
- QuantNodes/prompts/strategy/momentum.py +160 -0
- QuantNodes/prompts/strategy/pairs_trading.py +107 -0
- QuantNodes/prompts/strategy/trend_following.py +96 -0
- QuantNodes/research/README.md +106 -0
- QuantNodes/research/__init__.py +154 -0
- QuantNodes/research/_legacy_3c/__init__.py +61 -0
- QuantNodes/research/_legacy_3c/auto_researcher.py +289 -0
- QuantNodes/research/_legacy_3c/factor_evaluator.py +560 -0
- QuantNodes/research/_legacy_3c/factor_miner.py +318 -0
- QuantNodes/research/_legacy_3c/mcts_search.py +324 -0
- QuantNodes/research/factor_test/__init__.py +25 -0
- QuantNodes/research/factor_test/config.py +184 -0
- QuantNodes/research/factor_test/config_builder.py +276 -0
- QuantNodes/research/factor_test/e2e/data_prep.py +163 -0
- QuantNodes/research/factor_test/e2e/run_evolution_e2e.py +309 -0
- QuantNodes/research/factor_test/evolution_adapter.py +231 -0
- QuantNodes/research/factor_test/feedback_wrapper.py +102 -0
- QuantNodes/research/factor_test/ifind_db/__init__.py +7 -0
- QuantNodes/research/factor_test/ifind_db/fetcher.py +224 -0
- QuantNodes/research/factor_test/ifind_db/ifind_database.py +689 -0
- QuantNodes/research/factor_test/nodes/__init__.py +1 -0
- QuantNodes/research/factor_test/nodes/_base.py +91 -0
- QuantNodes/research/factor_test/nodes/adjust_date_node.py +48 -0
- QuantNodes/research/factor_test/nodes/configs.py +240 -0
- QuantNodes/research/factor_test/nodes/factor_neutralize_node.py +87 -0
- QuantNodes/research/factor_test/nodes/factor_preprocess_node.py +222 -0
- QuantNodes/research/factor_test/nodes/factor_score_node.py +141 -0
- QuantNodes/research/factor_test/nodes/factor_test_report_node.py +153 -0
- QuantNodes/research/factor_test/nodes/group_analyzer_node.py +317 -0
- QuantNodes/research/factor_test/nodes/ic_analyzer_node.py +112 -0
- QuantNodes/research/factor_test/nodes/load_data_node.py +100 -0
- QuantNodes/research/factor_test/nodes/long_short_node.py +93 -0
- QuantNodes/research/factor_test/nodes/neutralizers.py +222 -0
- QuantNodes/research/factor_test/nodes/preprocess_strategies.py +277 -0
- QuantNodes/research/factor_test/nodes/risk_correlation_node.py +112 -0
- QuantNodes/research/factor_test/nodes/sample_pool_filter_node.py +110 -0
- QuantNodes/research/factor_test/nodes/tradability_filter_node.py +92 -0
- QuantNodes/research/factor_test/pipeline_runner.py +305 -0
- QuantNodes/research/factor_test/pipeline_spec.py +216 -0
- QuantNodes/research/factor_test/utils/__init__.py +26 -0
- QuantNodes/research/factor_test/utils/constants.py +86 -0
- QuantNodes/research/factor_test/utils/data_loader.py +141 -0
- QuantNodes/research/factor_test/utils/date_utils.py +232 -0
- QuantNodes/research/factor_test/utils/file_loaders.py +150 -0
- QuantNodes/research/factor_test/utils/labels.py +37 -0
- QuantNodes/research/factor_test/utils/metrics_extractor.py +55 -0
- QuantNodes/research/factor_test/utils/performance_metrics.py +175 -0
- QuantNodes/research/factor_test/utils/safe_load.py +106 -0
- QuantNodes/research/quant_alpha/CHANGELOG.md +80 -0
- QuantNodes/research/quant_alpha/README.md +142 -0
- QuantNodes/research/quant_alpha/__init__.py +45 -0
- QuantNodes/research/quant_alpha/adapters/__init__.py +99 -0
- QuantNodes/research/quant_alpha/adapters/calculator.py +503 -0
- QuantNodes/research/quant_alpha/adapters/expression.py +387 -0
- QuantNodes/research/quant_alpha/alpha101_design/__init__.py +50 -0
- QuantNodes/research/quant_alpha/alpha101_design/few_shot_examples.py +243 -0
- QuantNodes/research/quant_alpha/alpha101_design/philosophy.py +474 -0
- QuantNodes/research/quant_alpha/alpha158_design/__init__.py +63 -0
- QuantNodes/research/quant_alpha/alpha158_design/few_shot_examples.py +219 -0
- QuantNodes/research/quant_alpha/alpha158_design/philosophy.py +240 -0
- QuantNodes/research/quant_alpha/evaluation/__init__.py +47 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/__init__.py +8 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/g1_handcrafted.py +135 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/g2_llm_only.py +269 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/g3_alpha_gpt.py +152 -0
- QuantNodes/research/quant_alpha/evaluation/clickhouse_data_loader.py +227 -0
- QuantNodes/research/quant_alpha/evaluation/contracts.py +376 -0
- QuantNodes/research/quant_alpha/evaluation/evaluators/__init__.py +6 -0
- QuantNodes/research/quant_alpha/evaluation/evaluators/polars_evaluator.py +545 -0
- QuantNodes/research/quant_alpha/evaluation/mock_data_loader.py +226 -0
- QuantNodes/research/quant_alpha/evaluation/runner.py +243 -0
- QuantNodes/research/quant_alpha/llm/__init__.py +38 -0
- QuantNodes/research/quant_alpha/llm/parser.py +681 -0
- QuantNodes/research/quant_alpha/logic_driven_pipeline.py +411 -0
- QuantNodes/research/quant_alpha/logic_mining/__init__.py +74 -0
- QuantNodes/research/quant_alpha/logic_mining/compiler.py +457 -0
- QuantNodes/research/quant_alpha/logic_mining/generator.py +366 -0
- QuantNodes/research/quant_alpha/logic_mining/models.py +252 -0
- QuantNodes/research/quant_alpha/logic_mining/parser.py +287 -0
- QuantNodes/research/quant_alpha/logic_mining/pipelines.py +297 -0
- QuantNodes/research/quant_alpha/logic_mining/sources.py +149 -0
- QuantNodes/research/quant_alpha/mcts/__init__.py +66 -0
- QuantNodes/research/quant_alpha/mcts/cache.py +262 -0
- QuantNodes/research/quant_alpha/mcts/extension_ops.py +320 -0
- QuantNodes/research/quant_alpha/mcts/feedback.py +825 -0
- QuantNodes/research/quant_alpha/mcts/op_prior.py +180 -0
- QuantNodes/research/quant_alpha/mcts/search.py +540 -0
- QuantNodes/research/quant_alpha/mcts/tree.py +201 -0
- QuantNodes/research/quant_alpha/operator_vocab/__init__.py +50 -0
- QuantNodes/research/quant_alpha/operator_vocab/config.py +54 -0
- QuantNodes/research/quant_alpha/operator_vocab/metadata.py +263 -0
- QuantNodes/research/quant_alpha/operator_vocab/vocabulary.py +481 -0
- QuantNodes/research/quant_alpha/pipeline.py +1027 -0
- QuantNodes/research/quant_alpha/types/__init__.py +27 -0
- QuantNodes/research/quant_alpha/types/constants.py +28 -0
- QuantNodes/research/quant_alpha/types/state.py +205 -0
- QuantNodes/research/quant_alpha/workflow/__init__.py +32 -0
- QuantNodes/research/quant_alpha/workflow/alpha_gpt.py +911 -0
- QuantNodes/research/quant_alpha/workflow/alpha_logics.py +416 -0
- QuantNodes/research/quant_alpha/workflow/state.py +27 -0
- QuantNodes/research/report_reproducer.py +485 -0
- QuantNodes/research/wiki.py +1155 -0
- QuantNodes/symbolic/__init__.py +51 -0
- QuantNodes/symbolic/compiler.py +113 -0
- QuantNodes/symbolic/dialect.py +260 -0
- QuantNodes/symbolic/executor.py +147 -0
- QuantNodes/symbolic/expression.py +234 -0
- QuantNodes/symbolic/functions.py +433 -0
- QuantNodes/symbolic/optimizer.py +165 -0
- QuantNodes/ui_node/__init__.py +30 -0
- QuantNodes/ui_node/base.py +222 -0
- quantnodes-3.0.0.dist-info/METADATA +463 -0
- quantnodes-3.0.0.dist-info/RECORD +399 -0
- quantnodes-3.0.0.dist-info/WHEEL +5 -0
- quantnodes-3.0.0.dist-info/entry_points.txt +24 -0
- quantnodes-3.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
"""
|
|
3
|
+
parser.py - LLM 输出 JSON 三层降级解析器(Alpha-GPT M5)
|
|
4
|
+
|
|
5
|
+
Alpha-GPT 工作流的【所有 5 阶段】输出都依赖 LLM JSON 解析。
|
|
6
|
+
LLM 输出不稳定(多余文本 / markdown 包裹 / 截断),需要
|
|
7
|
+
3 层降级:JSON Schema → 正则提取 → 重试 LLM。
|
|
8
|
+
|
|
9
|
+
零新依赖(不引 instructor / outlines)。
|
|
10
|
+
|
|
11
|
+
Usage::
|
|
12
|
+
|
|
13
|
+
from QuantNodes.research.quant_alpha.llm.parser import (
|
|
14
|
+
FormulaParser, parse_idea_generator_output,
|
|
15
|
+
parse_formula_translator_output, parse_evaluator_output,
|
|
16
|
+
parse_reflector_output, parse_critic_output,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
result = parse_idea_generator_output(llm_output)
|
|
20
|
+
if result.ok:
|
|
21
|
+
ideas = result.data["ideas"]
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import logging
|
|
28
|
+
import re
|
|
29
|
+
from dataclasses import dataclass, field
|
|
30
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Tuple
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ==============================================================================
|
|
36
|
+
# 通用 Result
|
|
37
|
+
# ==============================================================================
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ParseResult:
|
|
42
|
+
"""JSON 解析结果"""
|
|
43
|
+
|
|
44
|
+
ok: bool
|
|
45
|
+
data: Optional[Dict[str, Any]] = None
|
|
46
|
+
error: Optional[str] = None
|
|
47
|
+
layer: str = "" # "schema" | "regex" | "retry" | "default"
|
|
48
|
+
raw: Optional[str] = None
|
|
49
|
+
|
|
50
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
51
|
+
return {
|
|
52
|
+
"ok": self.ok,
|
|
53
|
+
"layer": self.layer,
|
|
54
|
+
"error": self.error,
|
|
55
|
+
"data": self.data,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ==============================================================================
|
|
60
|
+
# 通用 3 层降级
|
|
61
|
+
# ==============================================================================
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def parse_json_3layer(
|
|
65
|
+
raw: str,
|
|
66
|
+
schema_validator: Optional[Callable[[Dict[str, Any]], Optional[str]]] = None,
|
|
67
|
+
) -> ParseResult:
|
|
68
|
+
"""4 层降级 JSON 解析
|
|
69
|
+
|
|
70
|
+
Layer 1: 直接 json.loads + schema 校验
|
|
71
|
+
Layer 2: 正则提取首个 { ... } 块 + 重新解析
|
|
72
|
+
Layer 3: truncated recovery (扫描内层完整子对象)
|
|
73
|
+
Layer 4: 找最后一个满足 schema 的 JSON 候选 (处理
|
|
74
|
+
"截断 JSON + thinking + 重写 JSON" 模式)
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
raw: LLM 输出文本
|
|
78
|
+
schema_validator: 可选 schema 校验函数,返回 None 表示通过,
|
|
79
|
+
返回 str 表示失败原因
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
ParseResult
|
|
83
|
+
"""
|
|
84
|
+
if raw is None or not raw.strip():
|
|
85
|
+
return ParseResult(ok=False, error="empty input", raw=raw)
|
|
86
|
+
|
|
87
|
+
_json_ok = False
|
|
88
|
+
|
|
89
|
+
def _try(s: str) -> Optional[Dict[str, Any]]:
|
|
90
|
+
nonlocal _json_ok
|
|
91
|
+
try:
|
|
92
|
+
obj = json.loads(s)
|
|
93
|
+
except (json.JSONDecodeError, TypeError):
|
|
94
|
+
return None
|
|
95
|
+
if not isinstance(obj, dict):
|
|
96
|
+
return None
|
|
97
|
+
_json_ok = True
|
|
98
|
+
if schema_validator is not None:
|
|
99
|
+
err = schema_validator(obj)
|
|
100
|
+
if err is not None:
|
|
101
|
+
return None
|
|
102
|
+
return obj
|
|
103
|
+
|
|
104
|
+
obj = _try(raw)
|
|
105
|
+
if obj is not None:
|
|
106
|
+
return ParseResult(ok=True, data=obj, layer="schema", raw=raw)
|
|
107
|
+
|
|
108
|
+
m = re.search(r"\{[\s\S]*\}", raw)
|
|
109
|
+
if m:
|
|
110
|
+
obj = _try(m.group(0))
|
|
111
|
+
if obj is not None:
|
|
112
|
+
return ParseResult(ok=True, data=obj, layer="regex", raw=raw)
|
|
113
|
+
|
|
114
|
+
if not _json_ok:
|
|
115
|
+
# Layer 4 (优先): 找最后一个满足 schema 的完整 JSON
|
|
116
|
+
# 处理 LLM "截断 JSON + thinking + 重写 JSON" 模式
|
|
117
|
+
last = _find_last_valid_json(raw, schema_validator)
|
|
118
|
+
if last is not None:
|
|
119
|
+
return ParseResult(ok=True, data=last, layer="last_valid", raw=raw)
|
|
120
|
+
|
|
121
|
+
# Layer 3 (fallback): 截断恢复
|
|
122
|
+
truncated = _recover_truncated_json(raw, schema_validator)
|
|
123
|
+
if truncated is not None:
|
|
124
|
+
return ParseResult(
|
|
125
|
+
ok=True, data=truncated, layer="truncated", raw=raw
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
return ParseResult(
|
|
129
|
+
ok=False,
|
|
130
|
+
error="Cannot parse JSON after 4 layers (full raw in ParseResult.raw)",
|
|
131
|
+
raw=raw,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _find_last_valid_json(
|
|
136
|
+
raw: str,
|
|
137
|
+
schema_validator: Optional[Callable[[Dict[str, Any]], Optional[str]]] = None,
|
|
138
|
+
) -> Optional[Dict[str, Any]]:
|
|
139
|
+
"""扫描所有 JSON 候选对象,返回最后一个满足 schema 的。
|
|
140
|
+
|
|
141
|
+
处理 LLM "截断 JSON + thinking + 重写 JSON" 模式:
|
|
142
|
+
```
|
|
143
|
+
{
|
|
144
|
+
"round": 1, ... ← 第一次输出,被 max_tokens 截断
|
|
145
|
+
}
|
|
146
|
+
Actually, ...
|
|
147
|
+
```json
|
|
148
|
+
{"round": 1, "formulas": [...]} ← 第二次完整输出
|
|
149
|
+
```
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Greedy regex `\{[\s\S]*\}` 会匹配从第一个 `{` 到最后一个 `}`,跨过两个
|
|
153
|
+
JSON,导致解析失败。本函数扫描所有可能的 JSON 起始位置,收集所有
|
|
154
|
+
可解的 dict,然后选**最后一个**(最可能是 LLM 重写的完整版本)。
|
|
155
|
+
|
|
156
|
+
过滤规则:
|
|
157
|
+
1. 必须是 dict(非 list / 单值)
|
|
158
|
+
2. 至少 2 个 key(排除只含元数据如 `{"round": 1}` 的 dict)
|
|
159
|
+
3. 通过 schema_validator(如果有)
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
raw: LLM 输出文本
|
|
163
|
+
schema_validator: 可选 schema 校验函数
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
满足条件的最后一个 dict,或 None
|
|
167
|
+
"""
|
|
168
|
+
if not raw:
|
|
169
|
+
return None
|
|
170
|
+
decoder = json.JSONDecoder()
|
|
171
|
+
candidates: List[Dict[str, Any]] = []
|
|
172
|
+
n = len(raw)
|
|
173
|
+
|
|
174
|
+
for i, ch in enumerate(raw):
|
|
175
|
+
if ch != "{":
|
|
176
|
+
continue
|
|
177
|
+
# 快速排除:前一个字符是字母/数字说明这是内嵌的 dict(不是顶层)
|
|
178
|
+
# 顶层 JSON 起始位置前通常是空白/换行/`{`/`[`/`,` 等
|
|
179
|
+
if i > 0:
|
|
180
|
+
prev = raw[i - 1]
|
|
181
|
+
if prev.isalnum() or prev == '"':
|
|
182
|
+
continue
|
|
183
|
+
try:
|
|
184
|
+
obj, end = decoder.raw_decode(raw, i)
|
|
185
|
+
except json.JSONDecodeError:
|
|
186
|
+
continue
|
|
187
|
+
if not isinstance(obj, dict):
|
|
188
|
+
continue
|
|
189
|
+
if len(obj) < 2:
|
|
190
|
+
# 排除只含元数据的 dict (如 {"round": 1})
|
|
191
|
+
continue
|
|
192
|
+
if schema_validator is not None:
|
|
193
|
+
try:
|
|
194
|
+
err = schema_validator(obj)
|
|
195
|
+
except Exception:
|
|
196
|
+
err = "validator exception"
|
|
197
|
+
if err is not None:
|
|
198
|
+
continue
|
|
199
|
+
candidates.append(obj)
|
|
200
|
+
|
|
201
|
+
return candidates[-1] if candidates else None
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _recover_truncated_json(
|
|
205
|
+
raw: str,
|
|
206
|
+
schema_validator: Optional[Callable[[Dict[str, Any]], Optional[str]]] = None,
|
|
207
|
+
) -> Optional[Dict[str, Any]]:
|
|
208
|
+
"""截断恢复:扫描 LLM 输出,提取所有已完整闭合的子对象。
|
|
209
|
+
|
|
210
|
+
处理 LLM 输出因 max_tokens 截断的场景:
|
|
211
|
+
- LLM 输出 `{"ideas": [完整对象1, 完整对象2, {不完整对象`
|
|
212
|
+
- 简单 json.loads 失败
|
|
213
|
+
- 但内层 [完整对象1, 完整对象2] 仍可解析
|
|
214
|
+
|
|
215
|
+
策略:用 json.JSONDecoder().raw_decode() 反复解码,每次从下一个 [ 或 { 开始。
|
|
216
|
+
- 收集可解的 list(直接收)
|
|
217
|
+
- 收集可解的 dict(按"位置相邻"组成 list,因为数组虽然外层 [] 截断,但
|
|
218
|
+
内部 {item1}, {item2} 都可解,它们之间距离通常很近)
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
恢复后的 dict(如 {"items": [item1, item2], "_truncated": True}),
|
|
222
|
+
或 None 表示无法恢复
|
|
223
|
+
"""
|
|
224
|
+
if not raw or not raw.strip():
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
decoder = json.JSONDecoder()
|
|
228
|
+
text = raw.strip()
|
|
229
|
+
|
|
230
|
+
# 收集可解的对象和它们的终止位置
|
|
231
|
+
decoded_objects: List[Dict[str, Any]] = []
|
|
232
|
+
pos = 0
|
|
233
|
+
n = len(text)
|
|
234
|
+
openers_set = set("[{")
|
|
235
|
+
|
|
236
|
+
while pos < n:
|
|
237
|
+
next_pos = -1
|
|
238
|
+
for k in range(pos, n):
|
|
239
|
+
if text[k] in openers_set:
|
|
240
|
+
next_pos = k
|
|
241
|
+
break
|
|
242
|
+
if next_pos == -1:
|
|
243
|
+
break
|
|
244
|
+
try:
|
|
245
|
+
obj, end = decoder.raw_decode(text[next_pos:])
|
|
246
|
+
except json.JSONDecodeError:
|
|
247
|
+
pos = next_pos + 1
|
|
248
|
+
continue
|
|
249
|
+
if isinstance(obj, list) and len(obj) > 0:
|
|
250
|
+
return {
|
|
251
|
+
"_truncated": True,
|
|
252
|
+
"_recovered_count": len(obj),
|
|
253
|
+
"items": obj,
|
|
254
|
+
}
|
|
255
|
+
if isinstance(obj, dict) and len(obj) > 0:
|
|
256
|
+
# 跳过只含顶层元数据的 dict (如 {round: 1})
|
|
257
|
+
if not (set(obj.keys()) <= {"round"}):
|
|
258
|
+
decoded_objects.append(obj)
|
|
259
|
+
pos = next_pos + end
|
|
260
|
+
|
|
261
|
+
if decoded_objects:
|
|
262
|
+
return {
|
|
263
|
+
"_truncated": True,
|
|
264
|
+
"_recovered_count": len(decoded_objects),
|
|
265
|
+
"items": decoded_objects,
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# ==============================================================================
|
|
272
|
+
# 5 阶段 schema 校验
|
|
273
|
+
# ==============================================================================
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# ==============================================================================
|
|
277
|
+
# 5 阶段 schema 校验
|
|
278
|
+
# ==============================================================================
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _validate_idea_generator(obj: Dict[str, Any]) -> Optional[str]:
|
|
282
|
+
"""IdeaGenerator 输出 schema"""
|
|
283
|
+
if "ideas" not in obj:
|
|
284
|
+
return "missing 'ideas'"
|
|
285
|
+
ideas = obj["ideas"]
|
|
286
|
+
if not isinstance(ideas, list):
|
|
287
|
+
return "'ideas' must be list"
|
|
288
|
+
if len(ideas) == 0:
|
|
289
|
+
return "'ideas' empty"
|
|
290
|
+
for i, idea in enumerate(ideas):
|
|
291
|
+
if not isinstance(idea, dict):
|
|
292
|
+
return f"ideas[{i}] not dict"
|
|
293
|
+
if "id" not in idea or "name" not in idea:
|
|
294
|
+
return f"ideas[{i}] missing id/name"
|
|
295
|
+
if "category" not in idea:
|
|
296
|
+
return f"ideas[{i}] missing category"
|
|
297
|
+
return None
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _validate_formula_translator(obj: Dict[str, Any]) -> Optional[str]:
|
|
301
|
+
"""FormulaTranslator 输出 schema
|
|
302
|
+
|
|
303
|
+
P2 升级 (refactor/smart-p2): 智能 explanation 处理 (3 档)
|
|
304
|
+
- 档 1: 含结构化标记 → 拆分为 summary (explanation) + detail (explanation_detail)
|
|
305
|
+
- 档 2: 超长但无结构化 → 截断到 200 chars
|
|
306
|
+
- 档 3: 短小干净 → 保留原样
|
|
307
|
+
|
|
308
|
+
其他强化:
|
|
309
|
+
- idea_id 改为 optional(缺失时 fallback 空串)
|
|
310
|
+
- formula 字段缺失直接 fail
|
|
311
|
+
"""
|
|
312
|
+
if "formulas" not in obj:
|
|
313
|
+
return "missing 'formulas'"
|
|
314
|
+
formulas = obj["formulas"]
|
|
315
|
+
if not isinstance(formulas, list):
|
|
316
|
+
return "'formulas' must be list"
|
|
317
|
+
if len(formulas) == 0:
|
|
318
|
+
return "'formulas' empty"
|
|
319
|
+
for i, f in enumerate(formulas):
|
|
320
|
+
if not isinstance(f, dict):
|
|
321
|
+
return f"formulas[{i}] not dict"
|
|
322
|
+
if "formula" not in f:
|
|
323
|
+
return f"formulas[{i}] missing formula"
|
|
324
|
+
# 容忍缺失 idea_id(fallback 空串)
|
|
325
|
+
f.setdefault("idea_id", "")
|
|
326
|
+
# P2 升级: 智能 explanation 处理 (3 档)
|
|
327
|
+
if "explanation" in f and isinstance(f["explanation"], str):
|
|
328
|
+
expl = f["explanation"]
|
|
329
|
+
match = _STRUCTURED_MARKERS_RE.search(expl)
|
|
330
|
+
if match:
|
|
331
|
+
# 档 1: 含结构化标记 → 拆分为 summary + detail
|
|
332
|
+
split_pos = match.start()
|
|
333
|
+
f["explanation"] = expl[:split_pos].strip()
|
|
334
|
+
f["explanation_detail"] = expl[split_pos:]
|
|
335
|
+
elif len(expl) > _MAX_EXPLANATION_LEN:
|
|
336
|
+
# 档 2: 普通超长 → 截断
|
|
337
|
+
f["explanation"] = expl[:_MAX_EXPLANATION_LEN - 3] + "..."
|
|
338
|
+
# 档 3: 短小干净 → 保留(无操作)
|
|
339
|
+
return None
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _validate_evaluator(obj: Dict[str, Any]) -> Optional[str]:
|
|
343
|
+
"""Evaluator 输出 schema"""
|
|
344
|
+
if "evaluations" not in obj:
|
|
345
|
+
return "missing 'evaluations'"
|
|
346
|
+
evals = obj["evaluations"]
|
|
347
|
+
if not isinstance(evals, list):
|
|
348
|
+
return "'evaluations' must be list"
|
|
349
|
+
for i, e in enumerate(evals):
|
|
350
|
+
if not isinstance(e, dict):
|
|
351
|
+
return f"evaluations[{i}] not dict"
|
|
352
|
+
if "formula_id" not in e or "status" not in e:
|
|
353
|
+
return f"evaluations[{i}] missing formula_id/status"
|
|
354
|
+
return None
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _validate_reflector(obj: Dict[str, Any]) -> Optional[str]:
|
|
358
|
+
"""Reflector 输出 schema
|
|
359
|
+
|
|
360
|
+
兼容两种格式:
|
|
361
|
+
1. 标准格式:包含 formula_feedback 数组
|
|
362
|
+
2. 分析格式:包含 analysis 字段(formula_feedback 可选)
|
|
363
|
+
"""
|
|
364
|
+
# 标准格式:必须有 formula_feedback
|
|
365
|
+
if "formula_feedback" in obj:
|
|
366
|
+
feedback = obj["formula_feedback"]
|
|
367
|
+
if not isinstance(feedback, list):
|
|
368
|
+
return "'formula_feedback' must be list"
|
|
369
|
+
for i, fb in enumerate(feedback):
|
|
370
|
+
if "verdict" not in fb:
|
|
371
|
+
return f"formula_feedback[{i}] missing verdict"
|
|
372
|
+
if fb["verdict"] not in {"keep", "mutate", "drop", "merge"}:
|
|
373
|
+
return f"formula_feedback[{i}] bad verdict"
|
|
374
|
+
return None
|
|
375
|
+
|
|
376
|
+
# 分析格式:有 analysis 即可(formula_feedback 可选)
|
|
377
|
+
if "analysis" in obj:
|
|
378
|
+
return None
|
|
379
|
+
|
|
380
|
+
return "missing 'formula_feedback' or 'analysis'"
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _validate_critic(obj: Dict[str, Any]) -> Optional[str]:
|
|
384
|
+
"""Critic 输出 schema"""
|
|
385
|
+
if "final_pool" not in obj:
|
|
386
|
+
return "missing 'final_pool'"
|
|
387
|
+
pool = obj["final_pool"]
|
|
388
|
+
if not isinstance(pool, list):
|
|
389
|
+
return "'final_pool' must be list"
|
|
390
|
+
for i, item in enumerate(pool):
|
|
391
|
+
if "formula" not in item:
|
|
392
|
+
return f"final_pool[{i}] missing formula"
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
# ==============================================================================
|
|
397
|
+
# 截断恢复后的字段映射
|
|
398
|
+
# ==============================================================================
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
# P2 升级 (refactor/smart-p2): 智能 explanation 处理的模块级常量
|
|
402
|
+
_STRUCTURED_MARKERS_RE = re.compile(
|
|
403
|
+
r"\b(HYPOTHESIS|MECHANISM|OPERATOR_RATIONALE|PARAMETER_RATIONALE|"
|
|
404
|
+
r"RISK|SUGGESTED_OPS|FORMULA|HYPOTHESES)\s*:",
|
|
405
|
+
re.IGNORECASE,
|
|
406
|
+
)
|
|
407
|
+
_MAX_EXPLANATION_LEN = 200
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
# 截断恢复时把 "items" 重命名为对应 stage 的字段
|
|
411
|
+
_TRUNCATED_KEY_MAP = {
|
|
412
|
+
"idea_generator": "ideas",
|
|
413
|
+
"formula_translator": "formulas",
|
|
414
|
+
"evaluator": "evaluations",
|
|
415
|
+
"reflector": "formula_feedback",
|
|
416
|
+
"critic": "final_pool",
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
_STAGE_VALIDATORS = {
|
|
421
|
+
"idea_generator": _validate_idea_generator,
|
|
422
|
+
"formula_translator": _validate_formula_translator,
|
|
423
|
+
"evaluator": _validate_evaluator,
|
|
424
|
+
"reflector": _validate_reflector,
|
|
425
|
+
"critic": _validate_critic,
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def _apply_truncation_mapping(result: ParseResult, stage: str) -> ParseResult:
|
|
430
|
+
"""截断恢复后,把 items 字段重命名为对应 stage 的字段
|
|
431
|
+
|
|
432
|
+
例如 idea_generator 截断后返回 {items: [idea1, idea2]},重命名为
|
|
433
|
+
{ideas: [idea1, idea2]},让下游代码能正常处理。
|
|
434
|
+
然后用对应 stage 的 schema validator 校验 mapped data。
|
|
435
|
+
"""
|
|
436
|
+
if result.layer != "truncated" or not result.data:
|
|
437
|
+
return result
|
|
438
|
+
items = result.data.pop("items", None)
|
|
439
|
+
recovered_count = result.data.pop("_recovered_count", 0)
|
|
440
|
+
target_key = _TRUNCATED_KEY_MAP.get(stage)
|
|
441
|
+
if target_key and items is not None:
|
|
442
|
+
mapped = {target_key: items, "round": 1}
|
|
443
|
+
# schema 校验 mapped data
|
|
444
|
+
validator = _STAGE_VALIDATORS.get(stage)
|
|
445
|
+
if validator is not None:
|
|
446
|
+
err = validator(mapped)
|
|
447
|
+
if err is not None:
|
|
448
|
+
# 校验失败:返回失败
|
|
449
|
+
return ParseResult(
|
|
450
|
+
ok=False,
|
|
451
|
+
error=f"truncated recovery mapped schema failed: {err}",
|
|
452
|
+
raw=result.raw,
|
|
453
|
+
)
|
|
454
|
+
result.data[target_key] = items
|
|
455
|
+
result.data["_recovered_count"] = recovered_count
|
|
456
|
+
result.data["round"] = 1
|
|
457
|
+
return result
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
# ==============================================================================
|
|
461
|
+
# 5 阶段 parse 函数
|
|
462
|
+
# ==============================================================================
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def parse_idea_generator_output(raw: str) -> ParseResult:
|
|
466
|
+
"""IdeaGenerator 输出解析(带截断恢复)"""
|
|
467
|
+
result = parse_json_3layer(raw, _validate_idea_generator)
|
|
468
|
+
return _apply_truncation_mapping(result, "idea_generator")
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def parse_formula_translator_output(raw: str) -> ParseResult:
|
|
472
|
+
"""FormulaTranslator 输出解析(带截断恢复)"""
|
|
473
|
+
result = parse_json_3layer(raw, _validate_formula_translator)
|
|
474
|
+
return _apply_truncation_mapping(result, "formula_translator")
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def parse_evaluator_output(raw: str) -> ParseResult:
|
|
478
|
+
"""Evaluator 输出解析(带截断恢复)"""
|
|
479
|
+
result = parse_json_3layer(raw, _validate_evaluator)
|
|
480
|
+
return _apply_truncation_mapping(result, "evaluator")
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def parse_reflector_output(raw: str) -> ParseResult:
|
|
484
|
+
"""Reflector 输出解析(带截断恢复)"""
|
|
485
|
+
result = parse_json_3layer(raw, _validate_reflector)
|
|
486
|
+
return _apply_truncation_mapping(result, "reflector")
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def parse_critic_output(raw: str) -> ParseResult:
|
|
490
|
+
"""Critic 输出解析(带截断恢复)"""
|
|
491
|
+
result = parse_json_3layer(raw, _validate_critic)
|
|
492
|
+
return _apply_truncation_mapping(result, "critic")
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def parse_critic_output(raw: str) -> ParseResult:
|
|
496
|
+
"""Critic 输出解析"""
|
|
497
|
+
return parse_json_3layer(raw, _validate_critic)
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
# ==============================================================================
|
|
501
|
+
# 公式白名单校验(FormulaTranslator 专用)
|
|
502
|
+
# ==============================================================================
|
|
503
|
+
|
|
504
|
+
# ALLOWED_OPERATORS 实际定义在 types/constants.py(叶子包),
|
|
505
|
+
# 此处 re-export 保持向后兼容。
|
|
506
|
+
from QuantNodes.research.quant_alpha.types.constants import ALLOWED_OPERATORS
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def extract_operators(formula: str) -> List[str]:
|
|
510
|
+
"""从公式字符串中提取所有算子名(词法分析)"""
|
|
511
|
+
return re.findall(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(", formula)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def validate_formula_operators(formula: str) -> Optional[str]:
|
|
515
|
+
"""校验公式中的算子是否在白名单
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
None if OK, error message string if invalid
|
|
519
|
+
"""
|
|
520
|
+
ops = extract_operators(formula)
|
|
521
|
+
for op in ops:
|
|
522
|
+
if op not in ALLOWED_OPERATORS:
|
|
523
|
+
return f"Unknown operator: {op!r}"
|
|
524
|
+
return None
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
__all__ = [
|
|
528
|
+
"ParseResult",
|
|
529
|
+
"parse_json_3layer",
|
|
530
|
+
"parse_idea_generator_output",
|
|
531
|
+
"parse_formula_translator_output",
|
|
532
|
+
"parse_evaluator_output",
|
|
533
|
+
"parse_reflector_output",
|
|
534
|
+
"parse_critic_output",
|
|
535
|
+
"validate_formula_operators",
|
|
536
|
+
"extract_operators",
|
|
537
|
+
"ALLOWED_OPERATORS",
|
|
538
|
+
"ThinkingRecord",
|
|
539
|
+
"parse_thinking_block",
|
|
540
|
+
]
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
# ==============================================================================
|
|
544
|
+
# 思维链结构化解析(Tier 1+2:feature/thinking-chain)
|
|
545
|
+
# ==============================================================================
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
@dataclass
|
|
549
|
+
class ThinkingRecord:
|
|
550
|
+
"""从 LLM <think> 块提取的结构化推理字段。
|
|
551
|
+
|
|
552
|
+
Attributes:
|
|
553
|
+
raw: 原始 thinking 文本
|
|
554
|
+
hypothesis: 经济假设(一句话)
|
|
555
|
+
mechanism: 经济学机制(为什么有效)
|
|
556
|
+
operator_rationale: 算子选择理由
|
|
557
|
+
parameter_rationale: 参数选择理由
|
|
558
|
+
risk: 风险因素
|
|
559
|
+
suggested_ops: LLM 显式建议的算子(SUGGESTED_OPS 字段)
|
|
560
|
+
mentioned_ops: thinking 文本中提及的、属于 op_vocab 的算子
|
|
561
|
+
key_insights: 反射器输出的核心洞察列表
|
|
562
|
+
next_round_focus: 反射器建议的下轮焦点
|
|
563
|
+
risk_patterns: 反射器发现的失效模式
|
|
564
|
+
selection_criteria: 评论家选择标准
|
|
565
|
+
diversity: 评论家多样性考虑
|
|
566
|
+
risk_filters: 评论家风险过滤
|
|
567
|
+
"""
|
|
568
|
+
|
|
569
|
+
raw: str = ""
|
|
570
|
+
hypothesis: str = ""
|
|
571
|
+
mechanism: str = ""
|
|
572
|
+
operator_rationale: str = ""
|
|
573
|
+
parameter_rationale: str = ""
|
|
574
|
+
risk: str = ""
|
|
575
|
+
suggested_ops: List[str] = field(default_factory=list)
|
|
576
|
+
mentioned_ops: List[str] = field(default_factory=list)
|
|
577
|
+
# 反射器(reflector)专用
|
|
578
|
+
key_insights: List[str] = field(default_factory=list)
|
|
579
|
+
next_round_focus: str = ""
|
|
580
|
+
risk_patterns: str = ""
|
|
581
|
+
# 评论家(critic)专用
|
|
582
|
+
selection_criteria: str = ""
|
|
583
|
+
diversity: str = ""
|
|
584
|
+
risk_filters: str = ""
|
|
585
|
+
|
|
586
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
587
|
+
return {
|
|
588
|
+
"raw": self.raw,
|
|
589
|
+
"hypothesis": self.hypothesis,
|
|
590
|
+
"mechanism": self.mechanism,
|
|
591
|
+
"operator_rationale": self.operator_rationale,
|
|
592
|
+
"parameter_rationale": self.parameter_rationale,
|
|
593
|
+
"risk": self.risk,
|
|
594
|
+
"suggested_ops": self.suggested_ops,
|
|
595
|
+
"mentioned_ops": self.mentioned_ops,
|
|
596
|
+
"key_insights": self.key_insights,
|
|
597
|
+
"next_round_focus": self.next_round_focus,
|
|
598
|
+
"risk_patterns": self.risk_patterns,
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def parse_thinking_block(
|
|
603
|
+
thinking_text: Optional[str],
|
|
604
|
+
op_vocab: Optional[set] = None,
|
|
605
|
+
) -> ThinkingRecord:
|
|
606
|
+
"""从 LLM <think> 块提取结构化字段。
|
|
607
|
+
|
|
608
|
+
Tier 1+2 实现:解析 LLM 在 thinking 块中按结构化指令输出的字段。
|
|
609
|
+
所有字段都是 Optional → 缺失时返回空串/空 list,向后兼容。
|
|
610
|
+
|
|
611
|
+
Args:
|
|
612
|
+
thinking_text: <think> 块的文本(不含标签)
|
|
613
|
+
op_vocab: 算子词表(用于 mentioned_ops 过滤)
|
|
614
|
+
|
|
615
|
+
Returns:
|
|
616
|
+
ThinkingRecord
|
|
617
|
+
"""
|
|
618
|
+
if not thinking_text:
|
|
619
|
+
return ThinkingRecord(raw="")
|
|
620
|
+
|
|
621
|
+
result = ThinkingRecord(raw=thinking_text)
|
|
622
|
+
|
|
623
|
+
field_pattern = (
|
|
624
|
+
r"(?:^|\n)\s*[-*]?\s*"
|
|
625
|
+
r"(HYPOTHESIS|MECHANISM|OPERATOR_RATIONALE|"
|
|
626
|
+
r"PARAMETER_RATIONALE|RISK|"
|
|
627
|
+
r"KEY_INSIGHTS|NEXT_ROUND_FOCUS|RISK_PATTERNS|"
|
|
628
|
+
r"SELECTION_CRITERIA|DIVERSITY|RISK_FILTERS)\s*:\s*"
|
|
629
|
+
r"(.+?)(?=\n\s*[-*]?[A-Z_]+:|$)"
|
|
630
|
+
)
|
|
631
|
+
matches = re.findall(field_pattern, thinking_text, re.DOTALL)
|
|
632
|
+
for key, value in matches:
|
|
633
|
+
value = value.strip()
|
|
634
|
+
if key == "HYPOTHESIS":
|
|
635
|
+
result.hypothesis = value
|
|
636
|
+
elif key == "MECHANISM":
|
|
637
|
+
result.mechanism = value
|
|
638
|
+
elif key == "OPERATOR_RATIONALE":
|
|
639
|
+
result.operator_rationale = value
|
|
640
|
+
elif key == "PARAMETER_RATIONALE":
|
|
641
|
+
result.parameter_rationale = value
|
|
642
|
+
elif key == "RISK":
|
|
643
|
+
result.risk = value
|
|
644
|
+
elif key == "KEY_INSIGHTS":
|
|
645
|
+
# 反射器专用,存为 key_insights list
|
|
646
|
+
result.key_insights = [
|
|
647
|
+
line.strip().lstrip("-*").strip()
|
|
648
|
+
for line in value.split("\n")
|
|
649
|
+
if line.strip() and not line.strip().startswith("NEXT")
|
|
650
|
+
]
|
|
651
|
+
elif key == "NEXT_ROUND_FOCUS":
|
|
652
|
+
result.next_round_focus = value
|
|
653
|
+
elif key == "RISK_PATTERNS":
|
|
654
|
+
result.risk_patterns = value
|
|
655
|
+
elif key == "SELECTION_CRITERIA":
|
|
656
|
+
result.selection_criteria = value
|
|
657
|
+
elif key == "DIVERSITY":
|
|
658
|
+
result.diversity = value
|
|
659
|
+
elif key == "RISK_FILTERS":
|
|
660
|
+
result.risk_filters = value
|
|
661
|
+
|
|
662
|
+
# SUGGESTED_OPS 是单行字段(值不应跨行)
|
|
663
|
+
ops_match = re.search(r"SUGGESTED_OPS:\s*([^\n]+)", thinking_text)
|
|
664
|
+
if ops_match:
|
|
665
|
+
result.suggested_ops = [
|
|
666
|
+
s.strip() for s in ops_match.group(1).split(",") if s.strip()
|
|
667
|
+
]
|
|
668
|
+
|
|
669
|
+
if op_vocab:
|
|
670
|
+
# 提取算子提及:两种方式
|
|
671
|
+
# 1) 出现在 SUGGESTED_OPS 列表中(已在 suggested_ops 中)
|
|
672
|
+
# 2) 出现在 text 中作为算子调用(带括号)
|
|
673
|
+
# 3) 作为独立单词出现在文本中(用于 narrative mention)
|
|
674
|
+
ops_called = set(re.findall(r"\b([a-zA-Z_]\w*)\s*\(", thinking_text))
|
|
675
|
+
ops_words = set(re.findall(r"\b([a-zA-Z_]\w*)\b", thinking_text))
|
|
676
|
+
all_mentioned = ops_called | ops_words
|
|
677
|
+
result.mentioned_ops = sorted(
|
|
678
|
+
op for op in all_mentioned if op in op_vocab
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
return result
|