quantnodes 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuantNodes/__init__.py +15 -0
- QuantNodes/__main__.py +14 -0
- QuantNodes/agent/__init__.py +158 -0
- QuantNodes/agent/agents/__init__.py +13 -0
- QuantNodes/agent/agents/definition.py +180 -0
- QuantNodes/agent/agents/manager.py +73 -0
- QuantNodes/agent/config/__init__.py +34 -0
- QuantNodes/agent/config/executor.py +958 -0
- QuantNodes/agent/config/loader.py +427 -0
- QuantNodes/agent/config/templates/bollinger_bands.yaml +84 -0
- QuantNodes/agent/config/templates/dual_ma.yaml +72 -0
- QuantNodes/agent/config/templates/empty.yaml +56 -0
- QuantNodes/agent/config/templates/mean_reversion.yaml +47 -0
- QuantNodes/agent/config/templates/mean_reversion_zscore.yaml +90 -0
- QuantNodes/agent/config/templates/momentum.yaml +81 -0
- QuantNodes/agent/config/templates/momentum_breakout.yaml +84 -0
- QuantNodes/agent/config/templates/rsi_strategy.yaml +72 -0
- QuantNodes/agent/config/templates/volume_price.yaml +86 -0
- QuantNodes/agent/config/types.py +156 -0
- QuantNodes/agent/config_mapper.py +293 -0
- QuantNodes/agent/core/__init__.py +19 -0
- QuantNodes/agent/core/dream.py +47 -0
- QuantNodes/agent/core/quant_dream.py +274 -0
- QuantNodes/agent/cron_jobs.py +314 -0
- QuantNodes/agent/nanobot_bridge.py +242 -0
- QuantNodes/agent/permission/__init__.py +30 -0
- QuantNodes/agent/permission/defaults.py +36 -0
- QuantNodes/agent/permission/evaluate.py +41 -0
- QuantNodes/agent/permission/models.py +59 -0
- QuantNodes/agent/permission/service.py +133 -0
- QuantNodes/agent/providers/__init__.py +11 -0
- QuantNodes/agent/providers/base.py +102 -0
- QuantNodes/agent/providers/quantnodes.py +610 -0
- QuantNodes/agent/providers/rate_limiter.py +326 -0
- QuantNodes/agent/providers/registry.py +163 -0
- QuantNodes/agent/skills/__init__.py +20 -0
- QuantNodes/agent/skills/base.py +118 -0
- QuantNodes/agent/skills/bridge.py +73 -0
- QuantNodes/agent/skills/factor/__init__.py +14 -0
- QuantNodes/agent/skills/factor/correlation.py +99 -0
- QuantNodes/agent/skills/factor/group_backtest.py +114 -0
- QuantNodes/agent/skills/factor/ic_analysis.py +106 -0
- QuantNodes/agent/skills/loader.py +107 -0
- QuantNodes/agent/skills/registry.py +105 -0
- QuantNodes/agent/skills/strategy/__init__.py +16 -0
- QuantNodes/agent/skills/strategy/bollinger.py +86 -0
- QuantNodes/agent/skills/strategy/dual_ma.py +82 -0
- QuantNodes/agent/skills/strategy/momentum.py +74 -0
- QuantNodes/agent/skills/strategy/rsi_reversal.py +99 -0
- QuantNodes/agent/skills_quant/__init__.py +14 -0
- QuantNodes/agent/skills_quant/backtest-analyze/SKILL.md +42 -0
- QuantNodes/agent/skills_quant/config-driven/SKILL.md +72 -0
- QuantNodes/agent/skills_quant/factor-research/SKILL.md +40 -0
- QuantNodes/agent/skills_quant/quant-dream/SKILL.md +55 -0
- QuantNodes/agent/skills_quant/risk-management/SKILL.md +45 -0
- QuantNodes/agent/skills_quant/strategy-design/SKILL.md +43 -0
- QuantNodes/agent/templates/__init__.py +4 -0
- QuantNodes/agent/tools/__init__.py +173 -0
- QuantNodes/agent/tools/_workspace.py +51 -0
- QuantNodes/agent/tools/alpha_backtest.py +328 -0
- QuantNodes/agent/tools/alpha_evaluate.py +493 -0
- QuantNodes/agent/tools/backtest.py +226 -0
- QuantNodes/agent/tools/base.py +133 -0
- QuantNodes/agent/tools/code_search.py +207 -0
- QuantNodes/agent/tools/config_backtest.py +401 -0
- QuantNodes/agent/tools/context.py +97 -0
- QuantNodes/agent/tools/dream_skill.py +77 -0
- QuantNodes/agent/tools/echo.py +38 -0
- QuantNodes/agent/tools/factor.py +231 -0
- QuantNodes/agent/tools/file_ops.py +201 -0
- QuantNodes/agent/tools/git_ops.py +190 -0
- QuantNodes/agent/tools/operator_lookup.py +218 -0
- QuantNodes/agent/tools/output_truncation.py +77 -0
- QuantNodes/agent/tools/path_check.py +43 -0
- QuantNodes/agent/tools/pipeline.py +62 -0
- QuantNodes/agent/tools/registry.py +150 -0
- QuantNodes/agent/tools/sandbox.py +62 -0
- QuantNodes/agent/tools/shell_safety.py +63 -0
- QuantNodes/agent/tools/strategy.py +106 -0
- QuantNodes/agent/tools/task.py +171 -0
- QuantNodes/agent/tools/web_fetch.py +142 -0
- QuantNodes/agent/tools/web_search.py +114 -0
- QuantNodes/agent/tools/wiki.py +370 -0
- QuantNodes/agent/utils/__init__.py +11 -0
- QuantNodes/agent/utils/helpers.py +43 -0
- QuantNodes/agent/utils/prompt_templates.py +30 -0
- QuantNodes/agent/workflows/__init__.py +20 -0
- QuantNodes/agent/workflows/implementations/__init__.py +8 -0
- QuantNodes/agent/workflows/implementations/alpha_gpt.py +508 -0
- QuantNodes/agent/workflows/implementations/mcts.py +442 -0
- QuantNodes/agent/workflows/parsers.py +44 -0
- QuantNodes/agent/workflows/registry.py +119 -0
- QuantNodes/agent/workflows/step_agent.py +219 -0
- QuantNodes/agent/workflows/tool.py +198 -0
- QuantNodes/ai/__init__.py +93 -0
- QuantNodes/ai/llm/__init__.py +75 -0
- QuantNodes/ai/llm/base.py +233 -0
- QuantNodes/ai/llm/decorators.py +281 -0
- QuantNodes/ai/llm/gateway.py +571 -0
- QuantNodes/ai/llm/null.py +76 -0
- QuantNodes/ai/llm/openai.py +435 -0
- QuantNodes/ai/optimizer.py +405 -0
- QuantNodes/ai/prompts/__init__.py +229 -0
- QuantNodes/ai/sandbox.py +371 -0
- QuantNodes/ai/sandbox_pandas_bridge.py +150 -0
- QuantNodes/ai/strategy_gen.py +396 -0
- QuantNodes/backtest/__init__.py +64 -0
- QuantNodes/backtest/backtest_node.py +188 -0
- QuantNodes/backtest/broker_node.py +378 -0
- QuantNodes/backtest/config_runner.py +397 -0
- QuantNodes/backtest/config_strategy.py +64 -0
- QuantNodes/backtest/risk_node.py +360 -0
- QuantNodes/backtest/strategy_node.py +268 -0
- QuantNodes/cache_node/__init__.py +19 -0
- QuantNodes/cache_node/base.py +244 -0
- QuantNodes/cache_node/cache_store.py +99 -0
- QuantNodes/cache_node/metadata.py +100 -0
- QuantNodes/cli/__init__.py +109 -0
- QuantNodes/cli/_helpers.py +511 -0
- QuantNodes/cli/command.py +110 -0
- QuantNodes/cli/commands/__init__.py +69 -0
- QuantNodes/cli/commands/agent.py +158 -0
- QuantNodes/cli/commands/alpha.py +951 -0
- QuantNodes/cli/commands/chat.py +38 -0
- QuantNodes/cli/commands/evolve.py +120 -0
- QuantNodes/cli/commands/factor.py +569 -0
- QuantNodes/cli/commands/init.py +190 -0
- QuantNodes/cli/commands/run.py +259 -0
- QuantNodes/cli/commands/serve.py +398 -0
- QuantNodes/cli/commands/version.py +120 -0
- QuantNodes/cli/enhanced.py +146 -0
- QuantNodes/conf_node/__init__.py +37 -0
- QuantNodes/conf_node/base.py +120 -0
- QuantNodes/conf_node/env_config.py +132 -0
- QuantNodes/conf_node/ini_config.py +70 -0
- QuantNodes/conf_node/json_config.py +69 -0
- QuantNodes/conf_node/yaml_config.py +78 -0
- QuantNodes/constants.py +17 -0
- QuantNodes/core/__init__.py +196 -0
- QuantNodes/core/_lookback_helpers.py +49 -0
- QuantNodes/core/ast_parser.py +198 -0
- QuantNodes/core/base.py +61 -0
- QuantNodes/core/cache_manager.py +344 -0
- QuantNodes/core/cache_utils.py +150 -0
- QuantNodes/core/cond_builder.py +53 -0
- QuantNodes/core/config.py +170 -0
- QuantNodes/core/constants.py +48 -0
- QuantNodes/core/control.py +412 -0
- QuantNodes/core/data_preprocessing.py +453 -0
- QuantNodes/core/data_source.py +46 -0
- QuantNodes/core/events.py +178 -0
- QuantNodes/core/evolution/__init__.py +22 -0
- QuantNodes/core/evolution/loop.py +583 -0
- QuantNodes/core/evolution/operators.py +289 -0
- QuantNodes/core/evolution/settings.py +44 -0
- QuantNodes/core/expression.py +841 -0
- QuantNodes/core/feedback/__init__.py +38 -0
- QuantNodes/core/feedback/channels.py +182 -0
- QuantNodes/core/feedback/collector.py +91 -0
- QuantNodes/core/feedback/dataclass.py +239 -0
- QuantNodes/core/feedback/llm_judge.py +138 -0
- QuantNodes/core/knowledge/__init__.py +69 -0
- QuantNodes/core/knowledge/knowledge_base.py +217 -0
- QuantNodes/core/knowledge/lineage_compress.py +196 -0
- QuantNodes/core/knowledge/lineage_expand.py +123 -0
- QuantNodes/core/knowledge/metrics/__init__.py +43 -0
- QuantNodes/core/knowledge/metrics/evaluator.py +176 -0
- QuantNodes/core/knowledge/metrics/metrics.py +220 -0
- QuantNodes/core/knowledge/rag_prompt.py +196 -0
- QuantNodes/core/knowledge/retriever.py +209 -0
- QuantNodes/core/lambda_node.py +81 -0
- QuantNodes/core/monitoring/__init__.py +22 -0
- QuantNodes/core/monitoring/collector.py +292 -0
- QuantNodes/core/monitoring/dashboard.py +365 -0
- QuantNodes/core/node.py +375 -0
- QuantNodes/core/pandas_utils.py +504 -0
- QuantNodes/core/parallel/__init__.py +15 -0
- QuantNodes/core/parallel/worker.py +140 -0
- QuantNodes/core/parallel/worker_process.py +265 -0
- QuantNodes/core/path_utils.py +73 -0
- QuantNodes/core/pipeline.py +328 -0
- QuantNodes/core/plugin.py +135 -0
- QuantNodes/core/quality_gate/__init__.py +32 -0
- QuantNodes/core/quality_gate/complexity.py +94 -0
- QuantNodes/core/quality_gate/consistency.py +26 -0
- QuantNodes/core/quality_gate/node.py +97 -0
- QuantNodes/core/quality_gate/redundancy.py +51 -0
- QuantNodes/core/quality_gate/settings.py +43 -0
- QuantNodes/core/quality_gate/zoo.py +98 -0
- QuantNodes/core/serializable.py +116 -0
- QuantNodes/core/serialization.py +673 -0
- QuantNodes/core/tools.py +333 -0
- QuantNodes/core/trajectory/__init__.py +25 -0
- QuantNodes/core/trajectory/entry.py +116 -0
- QuantNodes/core/trajectory/lineage.py +67 -0
- QuantNodes/core/trajectory/pool.py +211 -0
- QuantNodes/core/trajectory/selector.py +140 -0
- QuantNodes/core/visualization/__init__.py +33 -0
- QuantNodes/core/visualization/builder.py +233 -0
- QuantNodes/core/visualization/gate_breakdown.py +140 -0
- QuantNodes/core/visualization/lineage_dag.py +203 -0
- QuantNodes/core/visualization/metric_distribution.py +125 -0
- QuantNodes/core/visualization/report.py +68 -0
- QuantNodes/database_node/__init__.py +69 -0
- QuantNodes/database_node/base.py +135 -0
- QuantNodes/database_node/clickhouse_node.py +272 -0
- QuantNodes/database_node/csv_node.py +83 -0
- QuantNodes/database_node/duckdb_node.py +86 -0
- QuantNodes/database_node/factory.py +83 -0
- QuantNodes/database_node/mysql_node.py +100 -0
- QuantNodes/database_node/parquet_node.py +75 -0
- QuantNodes/database_node/sqlite_node.py +67 -0
- QuantNodes/factor_node/__init__.py +50 -0
- QuantNodes/factor_node/factor.py +563 -0
- QuantNodes/factor_node/factor_db.py +421 -0
- QuantNodes/factor_node/factor_functions/__init__.py +252 -0
- QuantNodes/factor_node/factor_functions/_helpers.py +358 -0
- QuantNodes/factor_node/factor_functions/_helpers_debug.py +317 -0
- QuantNodes/factor_node/factor_functions/composite_ops.py +136 -0
- QuantNodes/factor_node/factor_functions/math_ops.py +433 -0
- QuantNodes/factor_node/factor_functions/section_ops.py +290 -0
- QuantNodes/factor_node/factor_functions/talib_ops.py +1293 -0
- QuantNodes/factor_node/factor_functions/time_ops.py +535 -0
- QuantNodes/factor_node/factor_operation.py +1115 -0
- QuantNodes/factor_node/factor_table.py +1073 -0
- QuantNodes/factor_node/quant_nodes_object.py +60 -0
- QuantNodes/mcp_server/__init__.py +27 -0
- QuantNodes/mcp_server/__main__.py +4 -0
- QuantNodes/mcp_server/server.py +272 -0
- QuantNodes/methods/__init__.py +28 -0
- QuantNodes/methods/pipeline.py +100 -0
- QuantNodes/methods/sandbox.py +102 -0
- QuantNodes/monitor/__init__.py +27 -0
- QuantNodes/monitor/agent_tools/__init__.py +5 -0
- QuantNodes/monitor/agent_tools/monitor_tool.py +98 -0
- QuantNodes/monitor/agent_tools/schedule_tool.py +98 -0
- QuantNodes/monitor/agent_tools/version_tool.py +133 -0
- QuantNodes/monitor/monitor/__init__.py +6 -0
- QuantNodes/monitor/monitor/alerter.py +60 -0
- QuantNodes/monitor/monitor/collector.py +164 -0
- QuantNodes/monitor/monitor/dashboard.py +115 -0
- QuantNodes/monitor/monitor/drift.py +190 -0
- QuantNodes/monitor/scheduler/__init__.py +4 -0
- QuantNodes/monitor/scheduler/runner.py +133 -0
- QuantNodes/monitor/scheduler/scheduler.py +184 -0
- QuantNodes/monitor/storage/__init__.py +16 -0
- QuantNodes/monitor/storage/models.py +70 -0
- QuantNodes/monitor/storage/repository.py +407 -0
- QuantNodes/monitor/version/__init__.py +4 -0
- QuantNodes/monitor/version/diff.py +81 -0
- QuantNodes/monitor/version/version_manager.py +182 -0
- QuantNodes/operator_node/__init__.py +28 -0
- QuantNodes/operator_node/base.py +97 -0
- QuantNodes/operator_node/query_node.py +129 -0
- QuantNodes/operator_node/sql_builder.py +125 -0
- QuantNodes/operator_node/sql_utils.py +172 -0
- QuantNodes/operator_node/transform.py +130 -0
- QuantNodes/operators/__init__.py +90 -0
- QuantNodes/operators/_engine.py +108 -0
- QuantNodes/operators/composite.py +161 -0
- QuantNodes/operators/composite_dag.py +667 -0
- QuantNodes/operators/composite_dag_ops.py +343 -0
- QuantNodes/operators/composite_dag_pandas_ops.py +382 -0
- QuantNodes/operators/custom.py +408 -0
- QuantNodes/operators/facade.py +164 -0
- QuantNodes/operators/math.py +163 -0
- QuantNodes/operators/proxy.py +29 -0
- QuantNodes/operators/registry.py +144 -0
- QuantNodes/operators/section.py +99 -0
- QuantNodes/operators/talib.py +757 -0
- QuantNodes/operators/templates.py +95 -0
- QuantNodes/operators/time_series.py +136 -0
- QuantNodes/prompts/__init__.py +20 -0
- QuantNodes/prompts/backtest/__init__.py +12 -0
- QuantNodes/prompts/backtest/factor_based.py +86 -0
- QuantNodes/prompts/backtest/standard.py +73 -0
- QuantNodes/prompts/factor/__init__.py +14 -0
- QuantNodes/prompts/factor/correlation.py +77 -0
- QuantNodes/prompts/factor/group_backtest.py +86 -0
- QuantNodes/prompts/factor/ic_analysis.py +91 -0
- QuantNodes/prompts/strategy/__init__.py +18 -0
- QuantNodes/prompts/strategy/market_neutral.py +96 -0
- QuantNodes/prompts/strategy/mean_reversion.py +107 -0
- QuantNodes/prompts/strategy/momentum.py +160 -0
- QuantNodes/prompts/strategy/pairs_trading.py +107 -0
- QuantNodes/prompts/strategy/trend_following.py +96 -0
- QuantNodes/research/README.md +106 -0
- QuantNodes/research/__init__.py +154 -0
- QuantNodes/research/_legacy_3c/__init__.py +61 -0
- QuantNodes/research/_legacy_3c/auto_researcher.py +289 -0
- QuantNodes/research/_legacy_3c/factor_evaluator.py +560 -0
- QuantNodes/research/_legacy_3c/factor_miner.py +318 -0
- QuantNodes/research/_legacy_3c/mcts_search.py +324 -0
- QuantNodes/research/factor_test/__init__.py +25 -0
- QuantNodes/research/factor_test/config.py +184 -0
- QuantNodes/research/factor_test/config_builder.py +276 -0
- QuantNodes/research/factor_test/e2e/data_prep.py +163 -0
- QuantNodes/research/factor_test/e2e/run_evolution_e2e.py +309 -0
- QuantNodes/research/factor_test/evolution_adapter.py +231 -0
- QuantNodes/research/factor_test/feedback_wrapper.py +102 -0
- QuantNodes/research/factor_test/ifind_db/__init__.py +7 -0
- QuantNodes/research/factor_test/ifind_db/fetcher.py +224 -0
- QuantNodes/research/factor_test/ifind_db/ifind_database.py +689 -0
- QuantNodes/research/factor_test/nodes/__init__.py +1 -0
- QuantNodes/research/factor_test/nodes/_base.py +91 -0
- QuantNodes/research/factor_test/nodes/adjust_date_node.py +48 -0
- QuantNodes/research/factor_test/nodes/configs.py +240 -0
- QuantNodes/research/factor_test/nodes/factor_neutralize_node.py +87 -0
- QuantNodes/research/factor_test/nodes/factor_preprocess_node.py +222 -0
- QuantNodes/research/factor_test/nodes/factor_score_node.py +141 -0
- QuantNodes/research/factor_test/nodes/factor_test_report_node.py +153 -0
- QuantNodes/research/factor_test/nodes/group_analyzer_node.py +317 -0
- QuantNodes/research/factor_test/nodes/ic_analyzer_node.py +112 -0
- QuantNodes/research/factor_test/nodes/load_data_node.py +100 -0
- QuantNodes/research/factor_test/nodes/long_short_node.py +93 -0
- QuantNodes/research/factor_test/nodes/neutralizers.py +222 -0
- QuantNodes/research/factor_test/nodes/preprocess_strategies.py +277 -0
- QuantNodes/research/factor_test/nodes/risk_correlation_node.py +112 -0
- QuantNodes/research/factor_test/nodes/sample_pool_filter_node.py +110 -0
- QuantNodes/research/factor_test/nodes/tradability_filter_node.py +92 -0
- QuantNodes/research/factor_test/pipeline_runner.py +305 -0
- QuantNodes/research/factor_test/pipeline_spec.py +216 -0
- QuantNodes/research/factor_test/utils/__init__.py +26 -0
- QuantNodes/research/factor_test/utils/constants.py +86 -0
- QuantNodes/research/factor_test/utils/data_loader.py +141 -0
- QuantNodes/research/factor_test/utils/date_utils.py +232 -0
- QuantNodes/research/factor_test/utils/file_loaders.py +150 -0
- QuantNodes/research/factor_test/utils/labels.py +37 -0
- QuantNodes/research/factor_test/utils/metrics_extractor.py +55 -0
- QuantNodes/research/factor_test/utils/performance_metrics.py +175 -0
- QuantNodes/research/factor_test/utils/safe_load.py +106 -0
- QuantNodes/research/quant_alpha/CHANGELOG.md +80 -0
- QuantNodes/research/quant_alpha/README.md +142 -0
- QuantNodes/research/quant_alpha/__init__.py +45 -0
- QuantNodes/research/quant_alpha/adapters/__init__.py +99 -0
- QuantNodes/research/quant_alpha/adapters/calculator.py +503 -0
- QuantNodes/research/quant_alpha/adapters/expression.py +387 -0
- QuantNodes/research/quant_alpha/alpha101_design/__init__.py +50 -0
- QuantNodes/research/quant_alpha/alpha101_design/few_shot_examples.py +243 -0
- QuantNodes/research/quant_alpha/alpha101_design/philosophy.py +474 -0
- QuantNodes/research/quant_alpha/alpha158_design/__init__.py +63 -0
- QuantNodes/research/quant_alpha/alpha158_design/few_shot_examples.py +219 -0
- QuantNodes/research/quant_alpha/alpha158_design/philosophy.py +240 -0
- QuantNodes/research/quant_alpha/evaluation/__init__.py +47 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/__init__.py +8 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/g1_handcrafted.py +135 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/g2_llm_only.py +269 -0
- QuantNodes/research/quant_alpha/evaluation/baselines/g3_alpha_gpt.py +152 -0
- QuantNodes/research/quant_alpha/evaluation/clickhouse_data_loader.py +227 -0
- QuantNodes/research/quant_alpha/evaluation/contracts.py +376 -0
- QuantNodes/research/quant_alpha/evaluation/evaluators/__init__.py +6 -0
- QuantNodes/research/quant_alpha/evaluation/evaluators/polars_evaluator.py +545 -0
- QuantNodes/research/quant_alpha/evaluation/mock_data_loader.py +226 -0
- QuantNodes/research/quant_alpha/evaluation/runner.py +243 -0
- QuantNodes/research/quant_alpha/llm/__init__.py +38 -0
- QuantNodes/research/quant_alpha/llm/parser.py +681 -0
- QuantNodes/research/quant_alpha/logic_driven_pipeline.py +411 -0
- QuantNodes/research/quant_alpha/logic_mining/__init__.py +74 -0
- QuantNodes/research/quant_alpha/logic_mining/compiler.py +457 -0
- QuantNodes/research/quant_alpha/logic_mining/generator.py +366 -0
- QuantNodes/research/quant_alpha/logic_mining/models.py +252 -0
- QuantNodes/research/quant_alpha/logic_mining/parser.py +287 -0
- QuantNodes/research/quant_alpha/logic_mining/pipelines.py +297 -0
- QuantNodes/research/quant_alpha/logic_mining/sources.py +149 -0
- QuantNodes/research/quant_alpha/mcts/__init__.py +66 -0
- QuantNodes/research/quant_alpha/mcts/cache.py +262 -0
- QuantNodes/research/quant_alpha/mcts/extension_ops.py +320 -0
- QuantNodes/research/quant_alpha/mcts/feedback.py +825 -0
- QuantNodes/research/quant_alpha/mcts/op_prior.py +180 -0
- QuantNodes/research/quant_alpha/mcts/search.py +540 -0
- QuantNodes/research/quant_alpha/mcts/tree.py +201 -0
- QuantNodes/research/quant_alpha/operator_vocab/__init__.py +50 -0
- QuantNodes/research/quant_alpha/operator_vocab/config.py +54 -0
- QuantNodes/research/quant_alpha/operator_vocab/metadata.py +263 -0
- QuantNodes/research/quant_alpha/operator_vocab/vocabulary.py +481 -0
- QuantNodes/research/quant_alpha/pipeline.py +1027 -0
- QuantNodes/research/quant_alpha/types/__init__.py +27 -0
- QuantNodes/research/quant_alpha/types/constants.py +28 -0
- QuantNodes/research/quant_alpha/types/state.py +205 -0
- QuantNodes/research/quant_alpha/workflow/__init__.py +32 -0
- QuantNodes/research/quant_alpha/workflow/alpha_gpt.py +911 -0
- QuantNodes/research/quant_alpha/workflow/alpha_logics.py +416 -0
- QuantNodes/research/quant_alpha/workflow/state.py +27 -0
- QuantNodes/research/report_reproducer.py +485 -0
- QuantNodes/research/wiki.py +1155 -0
- QuantNodes/symbolic/__init__.py +51 -0
- QuantNodes/symbolic/compiler.py +113 -0
- QuantNodes/symbolic/dialect.py +260 -0
- QuantNodes/symbolic/executor.py +147 -0
- QuantNodes/symbolic/expression.py +234 -0
- QuantNodes/symbolic/functions.py +433 -0
- QuantNodes/symbolic/optimizer.py +165 -0
- QuantNodes/ui_node/__init__.py +30 -0
- QuantNodes/ui_node/base.py +222 -0
- quantnodes-3.0.0.dist-info/METADATA +463 -0
- quantnodes-3.0.0.dist-info/RECORD +399 -0
- quantnodes-3.0.0.dist-info/WHEEL +5 -0
- quantnodes-3.0.0.dist-info/entry_points.txt +24 -0
- quantnodes-3.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
"""
|
|
3
|
+
g2_llm_only.py - G2 baseline:mock LLM 直接生成 50 个公式字符串
|
|
4
|
+
|
|
5
|
+
G2 = "LLM-Only",代表「直接问 LLM 生成 alpha 公式」的最简 baseline。
|
|
6
|
+
无 5 智能体编排、无反思循环、无 evaluator 反馈 — 仅 LLM 一次输出。
|
|
7
|
+
|
|
8
|
+
Stage 1 用 mock 模拟 LLM 输出(valid + invalid 混合);
|
|
9
|
+
Stage 2 用真实 LLM(MiniMax)替换。
|
|
10
|
+
|
|
11
|
+
模拟策略(mock 阶段):
|
|
12
|
+
- 60% 公式:valid(LLM 已知基础语法)
|
|
13
|
+
- 25% 公式:复杂 valid(LLM 写出更复杂的算子组合)
|
|
14
|
+
- 15% 公式:invalid(模拟 LLM 错误 / 不支持的算子)
|
|
15
|
+
|
|
16
|
+
复用:
|
|
17
|
+
- contracts.Baseline:generate_factors() 接口
|
|
18
|
+
- g1_handcrafted._gen_formula:共享公式生成逻辑(DRY)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
import random
|
|
25
|
+
from typing import List, Optional
|
|
26
|
+
|
|
27
|
+
from ..contracts import Baseline, FactorSpec
|
|
28
|
+
from .g1_handcrafted import _gen_formula
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
__all__ = ["G2LlmOnly"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# 15% 模拟 LLM 错误:使用 parser 不支持的算子
|
|
36
|
+
INVALID_LLM_TOKENS = [
|
|
37
|
+
"rank(close)", # 跨截面 rank,tool 不支持
|
|
38
|
+
"IndNeutralize(close, industry)", # tool 不支持
|
|
39
|
+
"ts_zscore(close, 20)", # rolling 映射缺失
|
|
40
|
+
"log(vol)", # 需要先除以均值
|
|
41
|
+
"close - ts_mean(close, 5)", # 中缀语法
|
|
42
|
+
"correlation(close, vol, 20)", # tool 不支持
|
|
43
|
+
"quantile(close, 0.5)", # tool 不支持
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class G2LlmOnly(Baseline):
|
|
48
|
+
"""G2 LLM-Only baseline
|
|
49
|
+
|
|
50
|
+
模拟「直接问 LLM 生成 alpha 公式」的最简 baseline:
|
|
51
|
+
- 不调用 AlphaGptWorkflow
|
|
52
|
+
- 不做反思 / critic / evaluator 反馈
|
|
53
|
+
- 一次性输出 N 个公式(含 valid + invalid 混合)
|
|
54
|
+
|
|
55
|
+
Stage 1:使用 mock 公式生成器(valid + invalid 混合)
|
|
56
|
+
Stage 2:注入真实 LLM client(LLMGateway → MiniMax)
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, n: int = 50, seed: int = 7, llm_client=None) -> None:
|
|
60
|
+
self.n = n
|
|
61
|
+
self.seed = seed
|
|
62
|
+
self._llm_client = llm_client
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def group_name(self) -> str:
|
|
66
|
+
return "G2_LlmOnly"
|
|
67
|
+
|
|
68
|
+
def generate_factors(self, n: Optional[int] = None) -> List[FactorSpec]:
|
|
69
|
+
"""生成 n 个因子
|
|
70
|
+
|
|
71
|
+
Stage 2 (有 llm_client): 用真实 LLM 生成公式
|
|
72
|
+
Stage 1 (无 llm_client): 用 mock 公式生成器
|
|
73
|
+
"""
|
|
74
|
+
n = n or self.n
|
|
75
|
+
|
|
76
|
+
# Stage 2: 真实 LLM 生成
|
|
77
|
+
if self._llm_client is not None:
|
|
78
|
+
return self._generate_with_llm(n)
|
|
79
|
+
|
|
80
|
+
# Stage 1: mock 公式生成
|
|
81
|
+
return self._generate_mock(n)
|
|
82
|
+
|
|
83
|
+
def _generate_with_llm(self, n: int) -> List[FactorSpec]:
|
|
84
|
+
"""Stage 2: 用真实 LLM + operator_lookup 工具生成公式。"""
|
|
85
|
+
import asyncio
|
|
86
|
+
|
|
87
|
+
prompt = (
|
|
88
|
+
f"你是一个量化研究员,负责生成 alpha 因子公式。\n\n"
|
|
89
|
+
f"任务:生成 {n} 个唯一的 alpha 因子公式。\n\n"
|
|
90
|
+
f"步骤:\n"
|
|
91
|
+
f"1. 先调用 operator_lookup(action='list_operators') 获取可用算子列表\n"
|
|
92
|
+
f"2. 调用 operator_lookup(action='get_operator_info', name='xxx') 获取算子详情\n"
|
|
93
|
+
f"3. 生成公式,确保使用可用算子\n"
|
|
94
|
+
f"4. 调用 operator_lookup(action='validate_formula', formula='xxx') 验证每个公式\n\n"
|
|
95
|
+
f"基础特征:open, high, low, close, vol, amount\n"
|
|
96
|
+
f"公式语法:Python 函数调用,如 rank(ts_mean(close, 20))\n\n"
|
|
97
|
+
f"最终输出:仅返回 JSON 数组,如 [\"rank(ts_mean(close, 20))\", \"ts_std(close, 10)\"]\n"
|
|
98
|
+
f"不要解释,不要 markdown,只要 JSON 数组。"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
# 使用 nanobot agent (有工具访问权限)
|
|
103
|
+
from QuantNodes.agent.nanobot_bridge import Agent
|
|
104
|
+
agent = Agent(workspace=".agent")
|
|
105
|
+
response = asyncio.run(agent.run(prompt, session_id="g2-formula-gen"))
|
|
106
|
+
formulas = self._parse_llm_formulas(response, n)
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.warning("[G2] LLM generation failed: %s, falling back to mock", e)
|
|
109
|
+
return self._generate_mock(n)
|
|
110
|
+
|
|
111
|
+
# 公式校验 + 重试
|
|
112
|
+
valid_formulas = []
|
|
113
|
+
for formula in formulas:
|
|
114
|
+
if self._validate_formula(formula):
|
|
115
|
+
valid_formulas.append(formula)
|
|
116
|
+
if len(valid_formulas) >= n:
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
# 如果 valid 不足,用 mock 补充
|
|
120
|
+
if len(valid_formulas) < n:
|
|
121
|
+
logger.warning(
|
|
122
|
+
"[G2] Only %d/%d valid formulas, supplementing with mock",
|
|
123
|
+
len(valid_formulas), n,
|
|
124
|
+
)
|
|
125
|
+
mock_factors = self._generate_mock(n - len(valid_formulas))
|
|
126
|
+
for f in mock_factors:
|
|
127
|
+
f.formula_id = f"G2_{len(valid_formulas):03d}"
|
|
128
|
+
valid_formulas.append(f.formula)
|
|
129
|
+
|
|
130
|
+
factors: List[FactorSpec] = []
|
|
131
|
+
for i, formula in enumerate(valid_formulas[:n]):
|
|
132
|
+
if isinstance(formula, str):
|
|
133
|
+
factors.append(
|
|
134
|
+
FactorSpec(
|
|
135
|
+
formula_id=f"G2_{i:03d}",
|
|
136
|
+
formula=formula,
|
|
137
|
+
source="g2_llm_only",
|
|
138
|
+
category=self._infer_category(formula),
|
|
139
|
+
complexity=formula.count("("),
|
|
140
|
+
meta={"llm_generated": True},
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
else:
|
|
144
|
+
# FactorSpec from mock supplement
|
|
145
|
+
formula.formula_id = f"G2_{i:03d}"
|
|
146
|
+
factors.append(formula)
|
|
147
|
+
|
|
148
|
+
logger.info("[G2] LLM generated %d factors (%d valid)", len(factors), len(valid_formulas))
|
|
149
|
+
return factors
|
|
150
|
+
|
|
151
|
+
@staticmethod
|
|
152
|
+
def _validate_formula(formula: str) -> bool:
|
|
153
|
+
"""校验公式是否有效。"""
|
|
154
|
+
from QuantNodes.research.quant_alpha.operator_vocab import OperatorVocab
|
|
155
|
+
import polars as pl
|
|
156
|
+
|
|
157
|
+
test_data = pl.DataFrame({
|
|
158
|
+
"date": ["2020-01-01", "2020-01-02", "2020-01-03"] * 3,
|
|
159
|
+
"code": ["A"] * 3 + ["B"] * 3 + ["C"] * 3,
|
|
160
|
+
"open": [10.0] * 9,
|
|
161
|
+
"high": [10.5] * 9,
|
|
162
|
+
"low": [9.5] * 9,
|
|
163
|
+
"close": [10.0, 10.1, 10.2, 11.0, 11.1, 11.2, 12.0, 12.1, 12.2],
|
|
164
|
+
"vol": [1000.0] * 9,
|
|
165
|
+
"amount": [10000.0] * 9,
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
vocab = OperatorVocab.default()
|
|
170
|
+
result = vocab.evaluate(
|
|
171
|
+
formula=formula,
|
|
172
|
+
data=test_data,
|
|
173
|
+
date_column="date",
|
|
174
|
+
code_column="code",
|
|
175
|
+
)
|
|
176
|
+
return result is not None and len(result) == len(test_data)
|
|
177
|
+
except Exception:
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
@staticmethod
|
|
181
|
+
def _parse_llm_formulas(response: str, expected: int) -> List[str]:
|
|
182
|
+
"""从 LLM 响应中解析公式列表。"""
|
|
183
|
+
import json
|
|
184
|
+
import re
|
|
185
|
+
|
|
186
|
+
# 尝试直接 JSON 解析
|
|
187
|
+
try:
|
|
188
|
+
result = json.loads(response)
|
|
189
|
+
if isinstance(result, list):
|
|
190
|
+
return [str(f) for f in result]
|
|
191
|
+
except json.JSONDecodeError:
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
# 尝试从 markdown 代码块提取
|
|
195
|
+
match = re.search(r'```(?:json)?\s*(\[.*?\])\s*```', response, re.DOTALL)
|
|
196
|
+
if match:
|
|
197
|
+
try:
|
|
198
|
+
result = json.loads(match.group(1))
|
|
199
|
+
if isinstance(result, list):
|
|
200
|
+
return [str(f) for f in result]
|
|
201
|
+
except json.JSONDecodeError:
|
|
202
|
+
pass
|
|
203
|
+
|
|
204
|
+
# 尝试提取所有看起来像公式的内容
|
|
205
|
+
formulas = re.findall(r'"([^"]+\([^"]+\)[^"]*)"', response)
|
|
206
|
+
if formulas:
|
|
207
|
+
return formulas
|
|
208
|
+
|
|
209
|
+
logger.warning("[G2] Failed to parse LLM response, using fallback")
|
|
210
|
+
return []
|
|
211
|
+
|
|
212
|
+
def _generate_mock(self, n: int) -> List[FactorSpec]:
|
|
213
|
+
"""Stage 1: mock 公式生成。"""
|
|
214
|
+
rng = random.Random(self.seed)
|
|
215
|
+
|
|
216
|
+
factors: List[FactorSpec] = []
|
|
217
|
+
seen: set = set()
|
|
218
|
+
|
|
219
|
+
n_invalid = max(1, int(n * 0.15))
|
|
220
|
+
n_complex = max(1, int(n * 0.25))
|
|
221
|
+
n_simple = n - n_invalid - n_complex
|
|
222
|
+
|
|
223
|
+
attempts = 0
|
|
224
|
+
while len(factors) < n and attempts < n * 10:
|
|
225
|
+
attempts += 1
|
|
226
|
+
if len(factors) < n_simple:
|
|
227
|
+
formula = _gen_formula(rng, max_depth=1)
|
|
228
|
+
elif len(factors) < n_simple + n_complex:
|
|
229
|
+
formula = _gen_formula(rng, max_depth=2)
|
|
230
|
+
else:
|
|
231
|
+
formula = rng.choice(INVALID_LLM_TOKENS)
|
|
232
|
+
|
|
233
|
+
if formula in seen:
|
|
234
|
+
continue
|
|
235
|
+
seen.add(formula)
|
|
236
|
+
|
|
237
|
+
factors.append(
|
|
238
|
+
FactorSpec(
|
|
239
|
+
formula_id=f"G2_{len(factors):03d}",
|
|
240
|
+
formula=formula,
|
|
241
|
+
source="g2_llm_only",
|
|
242
|
+
category=self._infer_category(formula),
|
|
243
|
+
complexity=formula.count("("),
|
|
244
|
+
meta={"seed": self.seed, "valid": "rank" not in formula},
|
|
245
|
+
)
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
logger.info(
|
|
249
|
+
"[G2] generated %d factors (n_simple=%d, n_complex=%d, n_invalid=%d)",
|
|
250
|
+
len(factors),
|
|
251
|
+
min(n_simple, len(factors)),
|
|
252
|
+
max(0, min(n_complex, len(factors) - n_simple)),
|
|
253
|
+
max(0, len(factors) - n_simple - n_complex),
|
|
254
|
+
)
|
|
255
|
+
return factors
|
|
256
|
+
|
|
257
|
+
@staticmethod
|
|
258
|
+
def _infer_category(formula: str) -> str:
|
|
259
|
+
if "ts_mean" in formula and "delta" not in formula:
|
|
260
|
+
return "momentum"
|
|
261
|
+
if "delta" in formula:
|
|
262
|
+
return "momentum"
|
|
263
|
+
if "ts_std" in formula:
|
|
264
|
+
return "volatility"
|
|
265
|
+
if "vol" in formula:
|
|
266
|
+
return "volume"
|
|
267
|
+
if "abs" in formula:
|
|
268
|
+
return "reversal"
|
|
269
|
+
return "value"
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
"""
|
|
3
|
+
g3_alpha_gpt.py - G3 baseline:包 AlphaGptWorkflow(M5)
|
|
4
|
+
|
|
5
|
+
G3 = "Alpha-GPT",代表完整 5 智能体编排:idea-generator → formula-translator →
|
|
6
|
+
evaluator → reflector → critic,5 轮迭代。
|
|
7
|
+
|
|
8
|
+
Stage 1:用 mock LLM(_mock_llm_response)跑通 workflow
|
|
9
|
+
Stage 2:注入 NanobotLLMWrapper(MiniMaxClient) 替换 mock
|
|
10
|
+
|
|
11
|
+
复用:
|
|
12
|
+
- QuantNodes.research.quant_alpha.workflow.AlphaGptWorkflow:完整 5 智能体编排
|
|
13
|
+
- QuantNodes.research.quant_alpha.workflow.AlphaGptConfig:config dataclass
|
|
14
|
+
- contracts.Baseline:generate_factors() 接口
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
from typing import List, Optional
|
|
21
|
+
|
|
22
|
+
from ..contracts import Baseline, FactorSpec
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
__all__ = ["G3AlphaGpt"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class G3AlphaGpt(Baseline):
|
|
30
|
+
"""G3 Alpha-GPT baseline
|
|
31
|
+
|
|
32
|
+
包装 AlphaGptWorkflow(M5),跑完整 5 智能体编排。
|
|
33
|
+
Stage 1:用 mock LLM 跑通;Stage 2:注入真实 LLM client。
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
n: int = 30,
|
|
39
|
+
objective: str = "maximize IC and IR for 1-day forward return",
|
|
40
|
+
iterations: int = 3,
|
|
41
|
+
pool_size: int = 10,
|
|
42
|
+
seed: int = 11,
|
|
43
|
+
llm_client=None,
|
|
44
|
+
) -> None:
|
|
45
|
+
self.n = n
|
|
46
|
+
self.objective = objective
|
|
47
|
+
self.iterations = iterations
|
|
48
|
+
self.pool_size = pool_size
|
|
49
|
+
self.seed = seed
|
|
50
|
+
self._llm_client = llm_client
|
|
51
|
+
self._last_workflow_result = None # 暴露给 runner 调试
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def group_name(self) -> str:
|
|
55
|
+
return "G3_AlphaGpt"
|
|
56
|
+
|
|
57
|
+
def generate_factors(self, n: Optional[int] = None) -> List[FactorSpec]:
|
|
58
|
+
"""运行 AlphaGptWorkflow 并提取 final_pool 公式
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
n: 截取前 n 个因子(默认 self.n)
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
FactorSpec 列表(公式 + 元信息)
|
|
65
|
+
"""
|
|
66
|
+
n = n or self.n
|
|
67
|
+
|
|
68
|
+
from QuantNodes.research.quant_alpha.workflow.alpha_gpt import (
|
|
69
|
+
AlphaGptConfig,
|
|
70
|
+
AlphaGptWorkflow,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
config = AlphaGptConfig(
|
|
74
|
+
objective=self.objective,
|
|
75
|
+
iterations=self.iterations,
|
|
76
|
+
pool_size=self.pool_size,
|
|
77
|
+
top_k=n,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
logger.info(
|
|
81
|
+
"[G3] starting AlphaGptWorkflow (iterations=%d, pool_size=%d)",
|
|
82
|
+
self.iterations,
|
|
83
|
+
self.pool_size,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
# Stage 2: 注入真实 LLM client
|
|
88
|
+
# Stage 1: llm_client=None → AlphaGptWorkflow 使用 mock
|
|
89
|
+
workflow = AlphaGptWorkflow(config=config, llm_client=self._llm_client)
|
|
90
|
+
result = workflow.run()
|
|
91
|
+
except Exception as e:
|
|
92
|
+
logger.error("[G3] AlphaGptWorkflow failed: %s, using fallback", e)
|
|
93
|
+
# workflow 失败时, 用 mock fallback(与 workflow 返回空时的逻辑一致)
|
|
94
|
+
result = None
|
|
95
|
+
|
|
96
|
+
if result is not None:
|
|
97
|
+
self._last_workflow_result = result
|
|
98
|
+
|
|
99
|
+
factors: List[FactorSpec] = []
|
|
100
|
+
if result is not None:
|
|
101
|
+
for i, formula_rec in enumerate(result.final_pool[:n]):
|
|
102
|
+
factors.append(
|
|
103
|
+
FactorSpec(
|
|
104
|
+
formula_id=f"G3_{i:03d}",
|
|
105
|
+
formula=formula_rec.formula,
|
|
106
|
+
source="g3_alpha_gpt",
|
|
107
|
+
category=formula_rec.category or "unknown",
|
|
108
|
+
complexity=formula_rec.formula.count("("),
|
|
109
|
+
meta={
|
|
110
|
+
"rank": formula_rec.rank,
|
|
111
|
+
"selection_reason": formula_rec.selection_reason,
|
|
112
|
+
"round_discovered": formula_rec.round_discovered,
|
|
113
|
+
},
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Stage 1 mock 兼容:若 workflow 失败/返回空/final_pool 不足,
|
|
118
|
+
# 用 G1 风格的简单 valid 公式兜底(保证 baseline 数量稳定)
|
|
119
|
+
if len(factors) < n:
|
|
120
|
+
logger.warning(
|
|
121
|
+
"[G3] workflow returned %d factors (期望 %d), mock 兜底补充",
|
|
122
|
+
len(factors),
|
|
123
|
+
n,
|
|
124
|
+
)
|
|
125
|
+
from .g1_handcrafted import _gen_formula
|
|
126
|
+
import random
|
|
127
|
+
|
|
128
|
+
rng = random.Random(self.seed + 100)
|
|
129
|
+
while len(factors) < n:
|
|
130
|
+
formula = _gen_formula(rng)
|
|
131
|
+
if any(f.formula == formula for f in factors):
|
|
132
|
+
continue
|
|
133
|
+
factors.append(
|
|
134
|
+
FactorSpec(
|
|
135
|
+
formula_id=f"G3_{len(factors):03d}",
|
|
136
|
+
formula=formula,
|
|
137
|
+
source="g3_alpha_gpt",
|
|
138
|
+
category="g3_fallback_mock",
|
|
139
|
+
complexity=formula.count("("),
|
|
140
|
+
meta={"fallback": True, "seed": self.seed},
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
if result is not None:
|
|
145
|
+
logger.info(
|
|
146
|
+
"[G3] AlphaGptWorkflow returned %d factors (total=%d, elapsed=%.2fs)",
|
|
147
|
+
len(factors),
|
|
148
|
+
result.total_formulas,
|
|
149
|
+
result.elapsed_seconds,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return factors
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
"""
|
|
3
|
+
clickhouse_data_loader.py - Stage 2 ClickHouse 数据加载器
|
|
4
|
+
|
|
5
|
+
从 ClickHouse 加载全 A 股日线数据,支持本地 parquet 缓存。
|
|
6
|
+
复用 database_node/clickhouse_node.py 的 CHBase 客户端。
|
|
7
|
+
|
|
8
|
+
ClickHouse 表 schema (quote.stock_quote):
|
|
9
|
+
ts_code LowCardinality(String) → code
|
|
10
|
+
trade_date DateTime → date (cast Date)
|
|
11
|
+
open Float64 → open
|
|
12
|
+
high Float64 → high
|
|
13
|
+
low Float64 → low
|
|
14
|
+
close Float64 → close
|
|
15
|
+
vol Float64 → vol
|
|
16
|
+
amount Float64 → amount
|
|
17
|
+
|
|
18
|
+
Stage 2 替代 MockDataLoader,接口完全兼容 DataLoader ABC。
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Optional
|
|
26
|
+
|
|
27
|
+
import polars as pl
|
|
28
|
+
|
|
29
|
+
from .contracts import DataLoader
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
__all__ = ["ClickHouseDataLoader"]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ClickHouseDataLoader(DataLoader):
|
|
37
|
+
"""Stage 2 ClickHouse 数据加载器
|
|
38
|
+
|
|
39
|
+
从 ClickHouse 加载全 A 股日线数据。
|
|
40
|
+
支持本地 parquet 缓存(首次查询后自动缓存)。
|
|
41
|
+
|
|
42
|
+
用法::
|
|
43
|
+
|
|
44
|
+
loader = ClickHouseDataLoader(
|
|
45
|
+
table="quote.stock_quote",
|
|
46
|
+
start_date="2019-01-01",
|
|
47
|
+
end_date="2024-12-31",
|
|
48
|
+
)
|
|
49
|
+
df = loader.load() # polars.DataFrame
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# ClickHouse → polars 字段映射
|
|
53
|
+
FIELD_MAP = {
|
|
54
|
+
"ts_code": "code",
|
|
55
|
+
"open": "open",
|
|
56
|
+
"high": "high",
|
|
57
|
+
"low": "low",
|
|
58
|
+
"close": "close",
|
|
59
|
+
"vol": "vol",
|
|
60
|
+
"amount": "amount",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
table: str = "quote.stock_quote",
|
|
66
|
+
host: str = "localhost",
|
|
67
|
+
port: int = 8123,
|
|
68
|
+
user: str = "data",
|
|
69
|
+
password: str = "123456",
|
|
70
|
+
database: str = "quote",
|
|
71
|
+
start_date: str = "2019-01-01",
|
|
72
|
+
end_date: str = "2024-12-31",
|
|
73
|
+
min_amount_percentile: float = 0.0,
|
|
74
|
+
cache_parquet: Optional[str] = "data/cache/full_a_2019_2024.parquet",
|
|
75
|
+
) -> None:
|
|
76
|
+
self.table = table
|
|
77
|
+
self.host = host
|
|
78
|
+
self.port = port
|
|
79
|
+
self.user = user
|
|
80
|
+
self.password = password
|
|
81
|
+
self.database = database
|
|
82
|
+
self.start_date = start_date
|
|
83
|
+
self.end_date = end_date
|
|
84
|
+
self.min_amount_percentile = min_amount_percentile
|
|
85
|
+
self.cache_parquet = Path(cache_parquet) if cache_parquet else None
|
|
86
|
+
|
|
87
|
+
def load(self) -> pl.DataFrame:
|
|
88
|
+
"""加载数据:优先读缓存 parquet,否则从 ClickHouse 查询并缓存。"""
|
|
89
|
+
if self.cache_parquet and self.cache_parquet.exists():
|
|
90
|
+
logger.info("[ClickHouseDataLoader] 读取缓存: %s", self.cache_parquet)
|
|
91
|
+
df = pl.read_parquet(self.cache_parquet)
|
|
92
|
+
logger.info("[ClickHouseDataLoader] 缓存加载完成: %s rows", df.height)
|
|
93
|
+
return df
|
|
94
|
+
|
|
95
|
+
logger.info(
|
|
96
|
+
"[ClickHouseDataLoader] 从 ClickHouse 查询: %s (%s ~ %s)",
|
|
97
|
+
self.table, self.start_date, self.end_date,
|
|
98
|
+
)
|
|
99
|
+
df = self._query_clickhouse()
|
|
100
|
+
df = self._clean(df)
|
|
101
|
+
|
|
102
|
+
if self.cache_parquet:
|
|
103
|
+
self.cache_parquet.parent.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
df.write_parquet(self.cache_parquet)
|
|
105
|
+
logger.info(
|
|
106
|
+
"[ClickHouseDataLoader] 缓存已保存: %s (%s rows)",
|
|
107
|
+
self.cache_parquet, df.height,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
return df
|
|
111
|
+
|
|
112
|
+
def _query_clickhouse(self) -> pl.DataFrame:
|
|
113
|
+
"""从 ClickHouse 查询数据,返回 polars DataFrame。
|
|
114
|
+
|
|
115
|
+
P2.12c.3 (v3.0+): 重构 — 委托 ClickHouseNode (database_node) 而非 raw HTTP。
|
|
116
|
+
原实现使用 http.client.HTTPConnection 直接调用,绕过了 production-tested
|
|
117
|
+
CHBase 客户端和 ClickHouseNode 的连接池/错误处理。
|
|
118
|
+
"""
|
|
119
|
+
from QuantNodes.database_node.clickhouse_node import ClickHouseNode
|
|
120
|
+
|
|
121
|
+
fields = ", ".join(
|
|
122
|
+
f"{ch_name} AS {pl_name}" for ch_name, pl_name in self.FIELD_MAP.items()
|
|
123
|
+
)
|
|
124
|
+
sql = (
|
|
125
|
+
f"SELECT {fields}, CAST(trade_date AS Date) AS date "
|
|
126
|
+
f"FROM {self.table} "
|
|
127
|
+
f"WHERE trade_date >= '{self.start_date}' "
|
|
128
|
+
f"AND trade_date <= '{self.end_date}' "
|
|
129
|
+
f"ORDER BY date, code"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
logger.info("[ClickHouseDataLoader] SQL: %s", sql[:200])
|
|
133
|
+
|
|
134
|
+
node = ClickHouseNode(
|
|
135
|
+
host=self.host,
|
|
136
|
+
port=self.port,
|
|
137
|
+
user=self.user,
|
|
138
|
+
passwd=self.password,
|
|
139
|
+
database="default",
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
pd_df = node.query(sql)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
raise RuntimeError(f"ClickHouse query failed: {e}") from e
|
|
146
|
+
|
|
147
|
+
if pd_df is None or len(pd_df) == 0:
|
|
148
|
+
raise RuntimeError("ClickHouse returned empty result")
|
|
149
|
+
|
|
150
|
+
# 转 polars (Table 4 pipeline 是 polars-first)
|
|
151
|
+
df = pl.from_pandas(pd_df)
|
|
152
|
+
logger.info("[ClickHouseDataLoader] 查询完成: %s rows", df.height)
|
|
153
|
+
return df
|
|
154
|
+
|
|
155
|
+
def _clean(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
156
|
+
"""数据清洗:类型转换 + 过滤。"""
|
|
157
|
+
# 确保 date 列为 Date 类型
|
|
158
|
+
if df["date"].dtype == pl.Utf8:
|
|
159
|
+
df = df.with_columns(pl.col("date").str.to_date())
|
|
160
|
+
elif df["date"].dtype == pl.Datetime:
|
|
161
|
+
df = df.with_columns(pl.col("date").cast(pl.Date))
|
|
162
|
+
|
|
163
|
+
# 确保数值列为 Float64
|
|
164
|
+
for col in ["open", "high", "low", "close", "vol", "amount"]:
|
|
165
|
+
if col in df.columns:
|
|
166
|
+
df = df.with_columns(pl.col(col).cast(pl.Float64))
|
|
167
|
+
|
|
168
|
+
# 过滤停牌(vol == 0)
|
|
169
|
+
before = df.height
|
|
170
|
+
df = df.filter(pl.col("vol") > 0)
|
|
171
|
+
if df.height < before:
|
|
172
|
+
logger.info(
|
|
173
|
+
"[ClickHouseDataLoader] 过滤停牌: %d → %d rows",
|
|
174
|
+
before, df.height,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# 过滤低流动性(可选)
|
|
178
|
+
if self.min_amount_percentile > 0:
|
|
179
|
+
threshold = df["amount"].quantile(self.min_amount_percentile)
|
|
180
|
+
before = df.height
|
|
181
|
+
df = df.filter(pl.col("amount") >= threshold)
|
|
182
|
+
logger.info(
|
|
183
|
+
"[ClickHouseDataLoader] 过滤低流动性 (< %.0f): %d → %d rows",
|
|
184
|
+
threshold, before, df.height,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
return df
|
|
188
|
+
|
|
189
|
+
def load_summary(self) -> dict:
|
|
190
|
+
"""返回数据摘要(不加载全量数据)。
|
|
191
|
+
|
|
192
|
+
P2.12c.4: 重构 — 委托 ClickHouseNode.query() 而非 raw HTTP。
|
|
193
|
+
"""
|
|
194
|
+
from QuantNodes.database_node.clickhouse_node import ClickHouseNode
|
|
195
|
+
|
|
196
|
+
sql = (
|
|
197
|
+
f"SELECT min(trade_date) as min_date, max(trade_date) as max_date, "
|
|
198
|
+
f"count() as total_rows, count(distinct ts_code) as n_stocks "
|
|
199
|
+
f"FROM {self.table} "
|
|
200
|
+
f"WHERE trade_date >= '{self.start_date}' "
|
|
201
|
+
f"AND trade_date <= '{self.end_date}'"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
node = ClickHouseNode(
|
|
205
|
+
host=self.host,
|
|
206
|
+
port=self.port,
|
|
207
|
+
user=self.user,
|
|
208
|
+
passwd=self.password,
|
|
209
|
+
database="default",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
pd_df = node.query(sql)
|
|
214
|
+
except Exception as e:
|
|
215
|
+
logger.warning("[ClickHouseDataLoader] summary query failed: %s", e)
|
|
216
|
+
return {"error": "query failed"}
|
|
217
|
+
|
|
218
|
+
if pd_df is None or len(pd_df) == 0:
|
|
219
|
+
return {"error": "query failed"}
|
|
220
|
+
|
|
221
|
+
# pandas → dict (与原行为兼容: 返回第一行)
|
|
222
|
+
row = pd_df.iloc[0].to_dict()
|
|
223
|
+
# 处理 timestamp → str 转换 (避免 JSON 序列化失败)
|
|
224
|
+
for k, v in row.items():
|
|
225
|
+
if hasattr(v, "isoformat"):
|
|
226
|
+
row[k] = v.isoformat()
|
|
227
|
+
return row
|