quantnodes 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (399) hide show
  1. QuantNodes/__init__.py +15 -0
  2. QuantNodes/__main__.py +14 -0
  3. QuantNodes/agent/__init__.py +158 -0
  4. QuantNodes/agent/agents/__init__.py +13 -0
  5. QuantNodes/agent/agents/definition.py +180 -0
  6. QuantNodes/agent/agents/manager.py +73 -0
  7. QuantNodes/agent/config/__init__.py +34 -0
  8. QuantNodes/agent/config/executor.py +958 -0
  9. QuantNodes/agent/config/loader.py +427 -0
  10. QuantNodes/agent/config/templates/bollinger_bands.yaml +84 -0
  11. QuantNodes/agent/config/templates/dual_ma.yaml +72 -0
  12. QuantNodes/agent/config/templates/empty.yaml +56 -0
  13. QuantNodes/agent/config/templates/mean_reversion.yaml +47 -0
  14. QuantNodes/agent/config/templates/mean_reversion_zscore.yaml +90 -0
  15. QuantNodes/agent/config/templates/momentum.yaml +81 -0
  16. QuantNodes/agent/config/templates/momentum_breakout.yaml +84 -0
  17. QuantNodes/agent/config/templates/rsi_strategy.yaml +72 -0
  18. QuantNodes/agent/config/templates/volume_price.yaml +86 -0
  19. QuantNodes/agent/config/types.py +156 -0
  20. QuantNodes/agent/config_mapper.py +293 -0
  21. QuantNodes/agent/core/__init__.py +19 -0
  22. QuantNodes/agent/core/dream.py +47 -0
  23. QuantNodes/agent/core/quant_dream.py +274 -0
  24. QuantNodes/agent/cron_jobs.py +314 -0
  25. QuantNodes/agent/nanobot_bridge.py +242 -0
  26. QuantNodes/agent/permission/__init__.py +30 -0
  27. QuantNodes/agent/permission/defaults.py +36 -0
  28. QuantNodes/agent/permission/evaluate.py +41 -0
  29. QuantNodes/agent/permission/models.py +59 -0
  30. QuantNodes/agent/permission/service.py +133 -0
  31. QuantNodes/agent/providers/__init__.py +11 -0
  32. QuantNodes/agent/providers/base.py +102 -0
  33. QuantNodes/agent/providers/quantnodes.py +610 -0
  34. QuantNodes/agent/providers/rate_limiter.py +326 -0
  35. QuantNodes/agent/providers/registry.py +163 -0
  36. QuantNodes/agent/skills/__init__.py +20 -0
  37. QuantNodes/agent/skills/base.py +118 -0
  38. QuantNodes/agent/skills/bridge.py +73 -0
  39. QuantNodes/agent/skills/factor/__init__.py +14 -0
  40. QuantNodes/agent/skills/factor/correlation.py +99 -0
  41. QuantNodes/agent/skills/factor/group_backtest.py +114 -0
  42. QuantNodes/agent/skills/factor/ic_analysis.py +106 -0
  43. QuantNodes/agent/skills/loader.py +107 -0
  44. QuantNodes/agent/skills/registry.py +105 -0
  45. QuantNodes/agent/skills/strategy/__init__.py +16 -0
  46. QuantNodes/agent/skills/strategy/bollinger.py +86 -0
  47. QuantNodes/agent/skills/strategy/dual_ma.py +82 -0
  48. QuantNodes/agent/skills/strategy/momentum.py +74 -0
  49. QuantNodes/agent/skills/strategy/rsi_reversal.py +99 -0
  50. QuantNodes/agent/skills_quant/__init__.py +14 -0
  51. QuantNodes/agent/skills_quant/backtest-analyze/SKILL.md +42 -0
  52. QuantNodes/agent/skills_quant/config-driven/SKILL.md +72 -0
  53. QuantNodes/agent/skills_quant/factor-research/SKILL.md +40 -0
  54. QuantNodes/agent/skills_quant/quant-dream/SKILL.md +55 -0
  55. QuantNodes/agent/skills_quant/risk-management/SKILL.md +45 -0
  56. QuantNodes/agent/skills_quant/strategy-design/SKILL.md +43 -0
  57. QuantNodes/agent/templates/__init__.py +4 -0
  58. QuantNodes/agent/tools/__init__.py +173 -0
  59. QuantNodes/agent/tools/_workspace.py +51 -0
  60. QuantNodes/agent/tools/alpha_backtest.py +328 -0
  61. QuantNodes/agent/tools/alpha_evaluate.py +493 -0
  62. QuantNodes/agent/tools/backtest.py +226 -0
  63. QuantNodes/agent/tools/base.py +133 -0
  64. QuantNodes/agent/tools/code_search.py +207 -0
  65. QuantNodes/agent/tools/config_backtest.py +401 -0
  66. QuantNodes/agent/tools/context.py +97 -0
  67. QuantNodes/agent/tools/dream_skill.py +77 -0
  68. QuantNodes/agent/tools/echo.py +38 -0
  69. QuantNodes/agent/tools/factor.py +231 -0
  70. QuantNodes/agent/tools/file_ops.py +201 -0
  71. QuantNodes/agent/tools/git_ops.py +190 -0
  72. QuantNodes/agent/tools/operator_lookup.py +218 -0
  73. QuantNodes/agent/tools/output_truncation.py +77 -0
  74. QuantNodes/agent/tools/path_check.py +43 -0
  75. QuantNodes/agent/tools/pipeline.py +62 -0
  76. QuantNodes/agent/tools/registry.py +150 -0
  77. QuantNodes/agent/tools/sandbox.py +62 -0
  78. QuantNodes/agent/tools/shell_safety.py +63 -0
  79. QuantNodes/agent/tools/strategy.py +106 -0
  80. QuantNodes/agent/tools/task.py +171 -0
  81. QuantNodes/agent/tools/web_fetch.py +142 -0
  82. QuantNodes/agent/tools/web_search.py +114 -0
  83. QuantNodes/agent/tools/wiki.py +370 -0
  84. QuantNodes/agent/utils/__init__.py +11 -0
  85. QuantNodes/agent/utils/helpers.py +43 -0
  86. QuantNodes/agent/utils/prompt_templates.py +30 -0
  87. QuantNodes/agent/workflows/__init__.py +20 -0
  88. QuantNodes/agent/workflows/implementations/__init__.py +8 -0
  89. QuantNodes/agent/workflows/implementations/alpha_gpt.py +508 -0
  90. QuantNodes/agent/workflows/implementations/mcts.py +442 -0
  91. QuantNodes/agent/workflows/parsers.py +44 -0
  92. QuantNodes/agent/workflows/registry.py +119 -0
  93. QuantNodes/agent/workflows/step_agent.py +219 -0
  94. QuantNodes/agent/workflows/tool.py +198 -0
  95. QuantNodes/ai/__init__.py +93 -0
  96. QuantNodes/ai/llm/__init__.py +75 -0
  97. QuantNodes/ai/llm/base.py +233 -0
  98. QuantNodes/ai/llm/decorators.py +281 -0
  99. QuantNodes/ai/llm/gateway.py +571 -0
  100. QuantNodes/ai/llm/null.py +76 -0
  101. QuantNodes/ai/llm/openai.py +435 -0
  102. QuantNodes/ai/optimizer.py +405 -0
  103. QuantNodes/ai/prompts/__init__.py +229 -0
  104. QuantNodes/ai/sandbox.py +371 -0
  105. QuantNodes/ai/sandbox_pandas_bridge.py +150 -0
  106. QuantNodes/ai/strategy_gen.py +396 -0
  107. QuantNodes/backtest/__init__.py +64 -0
  108. QuantNodes/backtest/backtest_node.py +188 -0
  109. QuantNodes/backtest/broker_node.py +378 -0
  110. QuantNodes/backtest/config_runner.py +397 -0
  111. QuantNodes/backtest/config_strategy.py +64 -0
  112. QuantNodes/backtest/risk_node.py +360 -0
  113. QuantNodes/backtest/strategy_node.py +268 -0
  114. QuantNodes/cache_node/__init__.py +19 -0
  115. QuantNodes/cache_node/base.py +244 -0
  116. QuantNodes/cache_node/cache_store.py +99 -0
  117. QuantNodes/cache_node/metadata.py +100 -0
  118. QuantNodes/cli/__init__.py +109 -0
  119. QuantNodes/cli/_helpers.py +511 -0
  120. QuantNodes/cli/command.py +110 -0
  121. QuantNodes/cli/commands/__init__.py +69 -0
  122. QuantNodes/cli/commands/agent.py +158 -0
  123. QuantNodes/cli/commands/alpha.py +951 -0
  124. QuantNodes/cli/commands/chat.py +38 -0
  125. QuantNodes/cli/commands/evolve.py +120 -0
  126. QuantNodes/cli/commands/factor.py +569 -0
  127. QuantNodes/cli/commands/init.py +190 -0
  128. QuantNodes/cli/commands/run.py +259 -0
  129. QuantNodes/cli/commands/serve.py +398 -0
  130. QuantNodes/cli/commands/version.py +120 -0
  131. QuantNodes/cli/enhanced.py +146 -0
  132. QuantNodes/conf_node/__init__.py +37 -0
  133. QuantNodes/conf_node/base.py +120 -0
  134. QuantNodes/conf_node/env_config.py +132 -0
  135. QuantNodes/conf_node/ini_config.py +70 -0
  136. QuantNodes/conf_node/json_config.py +69 -0
  137. QuantNodes/conf_node/yaml_config.py +78 -0
  138. QuantNodes/constants.py +17 -0
  139. QuantNodes/core/__init__.py +196 -0
  140. QuantNodes/core/_lookback_helpers.py +49 -0
  141. QuantNodes/core/ast_parser.py +198 -0
  142. QuantNodes/core/base.py +61 -0
  143. QuantNodes/core/cache_manager.py +344 -0
  144. QuantNodes/core/cache_utils.py +150 -0
  145. QuantNodes/core/cond_builder.py +53 -0
  146. QuantNodes/core/config.py +170 -0
  147. QuantNodes/core/constants.py +48 -0
  148. QuantNodes/core/control.py +412 -0
  149. QuantNodes/core/data_preprocessing.py +453 -0
  150. QuantNodes/core/data_source.py +46 -0
  151. QuantNodes/core/events.py +178 -0
  152. QuantNodes/core/evolution/__init__.py +22 -0
  153. QuantNodes/core/evolution/loop.py +583 -0
  154. QuantNodes/core/evolution/operators.py +289 -0
  155. QuantNodes/core/evolution/settings.py +44 -0
  156. QuantNodes/core/expression.py +841 -0
  157. QuantNodes/core/feedback/__init__.py +38 -0
  158. QuantNodes/core/feedback/channels.py +182 -0
  159. QuantNodes/core/feedback/collector.py +91 -0
  160. QuantNodes/core/feedback/dataclass.py +239 -0
  161. QuantNodes/core/feedback/llm_judge.py +138 -0
  162. QuantNodes/core/knowledge/__init__.py +69 -0
  163. QuantNodes/core/knowledge/knowledge_base.py +217 -0
  164. QuantNodes/core/knowledge/lineage_compress.py +196 -0
  165. QuantNodes/core/knowledge/lineage_expand.py +123 -0
  166. QuantNodes/core/knowledge/metrics/__init__.py +43 -0
  167. QuantNodes/core/knowledge/metrics/evaluator.py +176 -0
  168. QuantNodes/core/knowledge/metrics/metrics.py +220 -0
  169. QuantNodes/core/knowledge/rag_prompt.py +196 -0
  170. QuantNodes/core/knowledge/retriever.py +209 -0
  171. QuantNodes/core/lambda_node.py +81 -0
  172. QuantNodes/core/monitoring/__init__.py +22 -0
  173. QuantNodes/core/monitoring/collector.py +292 -0
  174. QuantNodes/core/monitoring/dashboard.py +365 -0
  175. QuantNodes/core/node.py +375 -0
  176. QuantNodes/core/pandas_utils.py +504 -0
  177. QuantNodes/core/parallel/__init__.py +15 -0
  178. QuantNodes/core/parallel/worker.py +140 -0
  179. QuantNodes/core/parallel/worker_process.py +265 -0
  180. QuantNodes/core/path_utils.py +73 -0
  181. QuantNodes/core/pipeline.py +328 -0
  182. QuantNodes/core/plugin.py +135 -0
  183. QuantNodes/core/quality_gate/__init__.py +32 -0
  184. QuantNodes/core/quality_gate/complexity.py +94 -0
  185. QuantNodes/core/quality_gate/consistency.py +26 -0
  186. QuantNodes/core/quality_gate/node.py +97 -0
  187. QuantNodes/core/quality_gate/redundancy.py +51 -0
  188. QuantNodes/core/quality_gate/settings.py +43 -0
  189. QuantNodes/core/quality_gate/zoo.py +98 -0
  190. QuantNodes/core/serializable.py +116 -0
  191. QuantNodes/core/serialization.py +673 -0
  192. QuantNodes/core/tools.py +333 -0
  193. QuantNodes/core/trajectory/__init__.py +25 -0
  194. QuantNodes/core/trajectory/entry.py +116 -0
  195. QuantNodes/core/trajectory/lineage.py +67 -0
  196. QuantNodes/core/trajectory/pool.py +211 -0
  197. QuantNodes/core/trajectory/selector.py +140 -0
  198. QuantNodes/core/visualization/__init__.py +33 -0
  199. QuantNodes/core/visualization/builder.py +233 -0
  200. QuantNodes/core/visualization/gate_breakdown.py +140 -0
  201. QuantNodes/core/visualization/lineage_dag.py +203 -0
  202. QuantNodes/core/visualization/metric_distribution.py +125 -0
  203. QuantNodes/core/visualization/report.py +68 -0
  204. QuantNodes/database_node/__init__.py +69 -0
  205. QuantNodes/database_node/base.py +135 -0
  206. QuantNodes/database_node/clickhouse_node.py +272 -0
  207. QuantNodes/database_node/csv_node.py +83 -0
  208. QuantNodes/database_node/duckdb_node.py +86 -0
  209. QuantNodes/database_node/factory.py +83 -0
  210. QuantNodes/database_node/mysql_node.py +100 -0
  211. QuantNodes/database_node/parquet_node.py +75 -0
  212. QuantNodes/database_node/sqlite_node.py +67 -0
  213. QuantNodes/factor_node/__init__.py +50 -0
  214. QuantNodes/factor_node/factor.py +563 -0
  215. QuantNodes/factor_node/factor_db.py +421 -0
  216. QuantNodes/factor_node/factor_functions/__init__.py +252 -0
  217. QuantNodes/factor_node/factor_functions/_helpers.py +358 -0
  218. QuantNodes/factor_node/factor_functions/_helpers_debug.py +317 -0
  219. QuantNodes/factor_node/factor_functions/composite_ops.py +136 -0
  220. QuantNodes/factor_node/factor_functions/math_ops.py +433 -0
  221. QuantNodes/factor_node/factor_functions/section_ops.py +290 -0
  222. QuantNodes/factor_node/factor_functions/talib_ops.py +1293 -0
  223. QuantNodes/factor_node/factor_functions/time_ops.py +535 -0
  224. QuantNodes/factor_node/factor_operation.py +1115 -0
  225. QuantNodes/factor_node/factor_table.py +1073 -0
  226. QuantNodes/factor_node/quant_nodes_object.py +60 -0
  227. QuantNodes/mcp_server/__init__.py +27 -0
  228. QuantNodes/mcp_server/__main__.py +4 -0
  229. QuantNodes/mcp_server/server.py +272 -0
  230. QuantNodes/methods/__init__.py +28 -0
  231. QuantNodes/methods/pipeline.py +100 -0
  232. QuantNodes/methods/sandbox.py +102 -0
  233. QuantNodes/monitor/__init__.py +27 -0
  234. QuantNodes/monitor/agent_tools/__init__.py +5 -0
  235. QuantNodes/monitor/agent_tools/monitor_tool.py +98 -0
  236. QuantNodes/monitor/agent_tools/schedule_tool.py +98 -0
  237. QuantNodes/monitor/agent_tools/version_tool.py +133 -0
  238. QuantNodes/monitor/monitor/__init__.py +6 -0
  239. QuantNodes/monitor/monitor/alerter.py +60 -0
  240. QuantNodes/monitor/monitor/collector.py +164 -0
  241. QuantNodes/monitor/monitor/dashboard.py +115 -0
  242. QuantNodes/monitor/monitor/drift.py +190 -0
  243. QuantNodes/monitor/scheduler/__init__.py +4 -0
  244. QuantNodes/monitor/scheduler/runner.py +133 -0
  245. QuantNodes/monitor/scheduler/scheduler.py +184 -0
  246. QuantNodes/monitor/storage/__init__.py +16 -0
  247. QuantNodes/monitor/storage/models.py +70 -0
  248. QuantNodes/monitor/storage/repository.py +407 -0
  249. QuantNodes/monitor/version/__init__.py +4 -0
  250. QuantNodes/monitor/version/diff.py +81 -0
  251. QuantNodes/monitor/version/version_manager.py +182 -0
  252. QuantNodes/operator_node/__init__.py +28 -0
  253. QuantNodes/operator_node/base.py +97 -0
  254. QuantNodes/operator_node/query_node.py +129 -0
  255. QuantNodes/operator_node/sql_builder.py +125 -0
  256. QuantNodes/operator_node/sql_utils.py +172 -0
  257. QuantNodes/operator_node/transform.py +130 -0
  258. QuantNodes/operators/__init__.py +90 -0
  259. QuantNodes/operators/_engine.py +108 -0
  260. QuantNodes/operators/composite.py +161 -0
  261. QuantNodes/operators/composite_dag.py +667 -0
  262. QuantNodes/operators/composite_dag_ops.py +343 -0
  263. QuantNodes/operators/composite_dag_pandas_ops.py +382 -0
  264. QuantNodes/operators/custom.py +408 -0
  265. QuantNodes/operators/facade.py +164 -0
  266. QuantNodes/operators/math.py +163 -0
  267. QuantNodes/operators/proxy.py +29 -0
  268. QuantNodes/operators/registry.py +144 -0
  269. QuantNodes/operators/section.py +99 -0
  270. QuantNodes/operators/talib.py +757 -0
  271. QuantNodes/operators/templates.py +95 -0
  272. QuantNodes/operators/time_series.py +136 -0
  273. QuantNodes/prompts/__init__.py +20 -0
  274. QuantNodes/prompts/backtest/__init__.py +12 -0
  275. QuantNodes/prompts/backtest/factor_based.py +86 -0
  276. QuantNodes/prompts/backtest/standard.py +73 -0
  277. QuantNodes/prompts/factor/__init__.py +14 -0
  278. QuantNodes/prompts/factor/correlation.py +77 -0
  279. QuantNodes/prompts/factor/group_backtest.py +86 -0
  280. QuantNodes/prompts/factor/ic_analysis.py +91 -0
  281. QuantNodes/prompts/strategy/__init__.py +18 -0
  282. QuantNodes/prompts/strategy/market_neutral.py +96 -0
  283. QuantNodes/prompts/strategy/mean_reversion.py +107 -0
  284. QuantNodes/prompts/strategy/momentum.py +160 -0
  285. QuantNodes/prompts/strategy/pairs_trading.py +107 -0
  286. QuantNodes/prompts/strategy/trend_following.py +96 -0
  287. QuantNodes/research/README.md +106 -0
  288. QuantNodes/research/__init__.py +154 -0
  289. QuantNodes/research/_legacy_3c/__init__.py +61 -0
  290. QuantNodes/research/_legacy_3c/auto_researcher.py +289 -0
  291. QuantNodes/research/_legacy_3c/factor_evaluator.py +560 -0
  292. QuantNodes/research/_legacy_3c/factor_miner.py +318 -0
  293. QuantNodes/research/_legacy_3c/mcts_search.py +324 -0
  294. QuantNodes/research/factor_test/__init__.py +25 -0
  295. QuantNodes/research/factor_test/config.py +184 -0
  296. QuantNodes/research/factor_test/config_builder.py +276 -0
  297. QuantNodes/research/factor_test/e2e/data_prep.py +163 -0
  298. QuantNodes/research/factor_test/e2e/run_evolution_e2e.py +309 -0
  299. QuantNodes/research/factor_test/evolution_adapter.py +231 -0
  300. QuantNodes/research/factor_test/feedback_wrapper.py +102 -0
  301. QuantNodes/research/factor_test/ifind_db/__init__.py +7 -0
  302. QuantNodes/research/factor_test/ifind_db/fetcher.py +224 -0
  303. QuantNodes/research/factor_test/ifind_db/ifind_database.py +689 -0
  304. QuantNodes/research/factor_test/nodes/__init__.py +1 -0
  305. QuantNodes/research/factor_test/nodes/_base.py +91 -0
  306. QuantNodes/research/factor_test/nodes/adjust_date_node.py +48 -0
  307. QuantNodes/research/factor_test/nodes/configs.py +240 -0
  308. QuantNodes/research/factor_test/nodes/factor_neutralize_node.py +87 -0
  309. QuantNodes/research/factor_test/nodes/factor_preprocess_node.py +222 -0
  310. QuantNodes/research/factor_test/nodes/factor_score_node.py +141 -0
  311. QuantNodes/research/factor_test/nodes/factor_test_report_node.py +153 -0
  312. QuantNodes/research/factor_test/nodes/group_analyzer_node.py +317 -0
  313. QuantNodes/research/factor_test/nodes/ic_analyzer_node.py +112 -0
  314. QuantNodes/research/factor_test/nodes/load_data_node.py +100 -0
  315. QuantNodes/research/factor_test/nodes/long_short_node.py +93 -0
  316. QuantNodes/research/factor_test/nodes/neutralizers.py +222 -0
  317. QuantNodes/research/factor_test/nodes/preprocess_strategies.py +277 -0
  318. QuantNodes/research/factor_test/nodes/risk_correlation_node.py +112 -0
  319. QuantNodes/research/factor_test/nodes/sample_pool_filter_node.py +110 -0
  320. QuantNodes/research/factor_test/nodes/tradability_filter_node.py +92 -0
  321. QuantNodes/research/factor_test/pipeline_runner.py +305 -0
  322. QuantNodes/research/factor_test/pipeline_spec.py +216 -0
  323. QuantNodes/research/factor_test/utils/__init__.py +26 -0
  324. QuantNodes/research/factor_test/utils/constants.py +86 -0
  325. QuantNodes/research/factor_test/utils/data_loader.py +141 -0
  326. QuantNodes/research/factor_test/utils/date_utils.py +232 -0
  327. QuantNodes/research/factor_test/utils/file_loaders.py +150 -0
  328. QuantNodes/research/factor_test/utils/labels.py +37 -0
  329. QuantNodes/research/factor_test/utils/metrics_extractor.py +55 -0
  330. QuantNodes/research/factor_test/utils/performance_metrics.py +175 -0
  331. QuantNodes/research/factor_test/utils/safe_load.py +106 -0
  332. QuantNodes/research/quant_alpha/CHANGELOG.md +80 -0
  333. QuantNodes/research/quant_alpha/README.md +142 -0
  334. QuantNodes/research/quant_alpha/__init__.py +45 -0
  335. QuantNodes/research/quant_alpha/adapters/__init__.py +99 -0
  336. QuantNodes/research/quant_alpha/adapters/calculator.py +503 -0
  337. QuantNodes/research/quant_alpha/adapters/expression.py +387 -0
  338. QuantNodes/research/quant_alpha/alpha101_design/__init__.py +50 -0
  339. QuantNodes/research/quant_alpha/alpha101_design/few_shot_examples.py +243 -0
  340. QuantNodes/research/quant_alpha/alpha101_design/philosophy.py +474 -0
  341. QuantNodes/research/quant_alpha/alpha158_design/__init__.py +63 -0
  342. QuantNodes/research/quant_alpha/alpha158_design/few_shot_examples.py +219 -0
  343. QuantNodes/research/quant_alpha/alpha158_design/philosophy.py +240 -0
  344. QuantNodes/research/quant_alpha/evaluation/__init__.py +47 -0
  345. QuantNodes/research/quant_alpha/evaluation/baselines/__init__.py +8 -0
  346. QuantNodes/research/quant_alpha/evaluation/baselines/g1_handcrafted.py +135 -0
  347. QuantNodes/research/quant_alpha/evaluation/baselines/g2_llm_only.py +269 -0
  348. QuantNodes/research/quant_alpha/evaluation/baselines/g3_alpha_gpt.py +152 -0
  349. QuantNodes/research/quant_alpha/evaluation/clickhouse_data_loader.py +227 -0
  350. QuantNodes/research/quant_alpha/evaluation/contracts.py +376 -0
  351. QuantNodes/research/quant_alpha/evaluation/evaluators/__init__.py +6 -0
  352. QuantNodes/research/quant_alpha/evaluation/evaluators/polars_evaluator.py +545 -0
  353. QuantNodes/research/quant_alpha/evaluation/mock_data_loader.py +226 -0
  354. QuantNodes/research/quant_alpha/evaluation/runner.py +243 -0
  355. QuantNodes/research/quant_alpha/llm/__init__.py +38 -0
  356. QuantNodes/research/quant_alpha/llm/parser.py +681 -0
  357. QuantNodes/research/quant_alpha/logic_driven_pipeline.py +411 -0
  358. QuantNodes/research/quant_alpha/logic_mining/__init__.py +74 -0
  359. QuantNodes/research/quant_alpha/logic_mining/compiler.py +457 -0
  360. QuantNodes/research/quant_alpha/logic_mining/generator.py +366 -0
  361. QuantNodes/research/quant_alpha/logic_mining/models.py +252 -0
  362. QuantNodes/research/quant_alpha/logic_mining/parser.py +287 -0
  363. QuantNodes/research/quant_alpha/logic_mining/pipelines.py +297 -0
  364. QuantNodes/research/quant_alpha/logic_mining/sources.py +149 -0
  365. QuantNodes/research/quant_alpha/mcts/__init__.py +66 -0
  366. QuantNodes/research/quant_alpha/mcts/cache.py +262 -0
  367. QuantNodes/research/quant_alpha/mcts/extension_ops.py +320 -0
  368. QuantNodes/research/quant_alpha/mcts/feedback.py +825 -0
  369. QuantNodes/research/quant_alpha/mcts/op_prior.py +180 -0
  370. QuantNodes/research/quant_alpha/mcts/search.py +540 -0
  371. QuantNodes/research/quant_alpha/mcts/tree.py +201 -0
  372. QuantNodes/research/quant_alpha/operator_vocab/__init__.py +50 -0
  373. QuantNodes/research/quant_alpha/operator_vocab/config.py +54 -0
  374. QuantNodes/research/quant_alpha/operator_vocab/metadata.py +263 -0
  375. QuantNodes/research/quant_alpha/operator_vocab/vocabulary.py +481 -0
  376. QuantNodes/research/quant_alpha/pipeline.py +1027 -0
  377. QuantNodes/research/quant_alpha/types/__init__.py +27 -0
  378. QuantNodes/research/quant_alpha/types/constants.py +28 -0
  379. QuantNodes/research/quant_alpha/types/state.py +205 -0
  380. QuantNodes/research/quant_alpha/workflow/__init__.py +32 -0
  381. QuantNodes/research/quant_alpha/workflow/alpha_gpt.py +911 -0
  382. QuantNodes/research/quant_alpha/workflow/alpha_logics.py +416 -0
  383. QuantNodes/research/quant_alpha/workflow/state.py +27 -0
  384. QuantNodes/research/report_reproducer.py +485 -0
  385. QuantNodes/research/wiki.py +1155 -0
  386. QuantNodes/symbolic/__init__.py +51 -0
  387. QuantNodes/symbolic/compiler.py +113 -0
  388. QuantNodes/symbolic/dialect.py +260 -0
  389. QuantNodes/symbolic/executor.py +147 -0
  390. QuantNodes/symbolic/expression.py +234 -0
  391. QuantNodes/symbolic/functions.py +433 -0
  392. QuantNodes/symbolic/optimizer.py +165 -0
  393. QuantNodes/ui_node/__init__.py +30 -0
  394. QuantNodes/ui_node/base.py +222 -0
  395. quantnodes-3.0.0.dist-info/METADATA +463 -0
  396. quantnodes-3.0.0.dist-info/RECORD +399 -0
  397. quantnodes-3.0.0.dist-info/WHEEL +5 -0
  398. quantnodes-3.0.0.dist-info/entry_points.txt +24 -0
  399. quantnodes-3.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1073 @@
1
+ # coding=utf-8
2
+ """因子表
3
+
4
+ 包含 FactorTable(因子表接口)和 CustomFT(自定义因子表)
5
+ 以及相关的遍历模式、运算模式。
6
+ v2.0: 移除 traits 和 multiprocessing,使用 dataclass + concurrent.futures
7
+ """
8
+ import datetime as dt
9
+ import gc
10
+ import mmap
11
+ import os
12
+ import pickle
13
+ import shelve
14
+ import tempfile
15
+ import time
16
+ import uuid
17
+ from collections import OrderedDict
18
+ from dataclasses import dataclass, field
19
+ from enum import Enum
20
+ from multiprocessing import Queue, Process, Lock
21
+ from os import cpu_count
22
+ from typing import Dict, List
23
+
24
+ import numpy as np
25
+ import pandas as pd
26
+ from progressbar import ProgressBar
27
+
28
+ from QuantNodes.factor_node.quant_nodes_object import QuantNodesObject
29
+ from QuantNodes.core.base import FactorError
30
+ from QuantNodes.core.tools import (
31
+ compile_id_filter_str,
32
+ gen_available_name,
33
+ partition_list_moving_sampling,
34
+ start_multi_process,
35
+ )
36
+
37
+
38
+ class ErgodicModeType(Enum):
39
+ """遍历模式"""
40
+ FACTOR = "因子"
41
+ ID = "ID"
42
+
43
+
44
+ @dataclass
45
+ class _ErgodicMode(QuantNodesObject):
46
+ """遍历模式"""
47
+ forward_period: int = 600
48
+ backward_period: int = 1
49
+ cache_mode: ErgodicModeType = ErgodicModeType.FACTOR
50
+ max_factor_cache_num: int = 60
51
+ max_id_cache_num: int = 10000
52
+ cache_size: int = 300
53
+ ergodic_dts: List = field(default_factory=list)
54
+ ergodic_ids: List = field(default_factory=list)
55
+
56
+ def __init__(self, sys_args: Dict = None, **kwargs):
57
+ super().__init__(sys_args=sys_args, **kwargs)
58
+ self._isStarted = False
59
+ self._CurDT = None
60
+
61
+
62
+ def _prepareMMAPFactorCacheData(ft, mmap_cache):
63
+ """基于 mmap 的因子缓冲数据准备子进程"""
64
+ CacheData, CacheDTs, MMAPCacheData, DTNum = {}, [], mmap_cache, len(ft.ErgodicMode._DateTimes)
65
+ CacheSize = int(ft.ErgodicMode.CacheSize * 2 ** 20)
66
+ if os.name == 'nt':
67
+ MMAPCacheData = mmap.mmap(-1, CacheSize, tagname=ft.ErgodicMode._TagName)
68
+ while True:
69
+ Task = ft.ErgodicMode._Queue2SubProcess.get()
70
+ if Task is None:
71
+ break
72
+ if (Task[0] is None) and (Task[1] is None):
73
+ CacheDataByte = pickle.dumps(CacheData)
74
+ DataLen = len(CacheDataByte)
75
+ for i in range(int(DataLen / CacheSize) + 1):
76
+ iStartInd = i * CacheSize
77
+ iEndInd = min((i + 1) * CacheSize, DataLen)
78
+ if iEndInd > iStartInd:
79
+ MMAPCacheData.seek(0)
80
+ MMAPCacheData.write(CacheDataByte[iStartInd:iEndInd])
81
+ ft.ErgodicMode._Queue2MainProcess.put(iEndInd - iStartInd)
82
+ ft.ErgodicMode._Queue2SubProcess.get()
83
+ ft.ErgodicMode._Queue2MainProcess.put(0)
84
+ del CacheDataByte
85
+ gc.collect()
86
+ elif Task[0] is None:
87
+ NewFactors, PopFactors = Task[1]
88
+ for iFactorName in PopFactors:
89
+ CacheData.pop(iFactorName)
90
+ if NewFactors:
91
+ if CacheDTs:
92
+ CacheData.update(dict(ft.__QN_calc_data__(
93
+ raw_data=ft.__QN_prepare_raw_data__(factor_names=NewFactors, ids=ft.ErgodicMode._IDs,
94
+ dts=CacheDTs), factor_names=NewFactors,
95
+ ids=ft.ErgodicMode._IDs, dts=CacheDTs)))
96
+ else:
97
+ CacheData.update(
98
+ {iFactorName: pd.DataFrame(index=CacheDTs, columns=ft.ErgodicMode._IDs) for iFactorName in
99
+ NewFactors})
100
+ else:
101
+ CurInd = Task[0] + ft.ErgodicMode.ForwardPeriod + 1
102
+ if CurInd < DTNum:
103
+ OldCacheDTs = set(CacheDTs)
104
+ CacheDTs = ft.ErgodicMode._DateTimes[max((0, CurInd - ft.ErgodicMode.BackwardPeriod)):min(
105
+ (DTNum, CurInd + ft.ErgodicMode.ForwardPeriod + 1))].tolist()
106
+ NewCacheDTs = sorted(set(CacheDTs).difference(OldCacheDTs))
107
+ if CacheData:
108
+ isDisjoint = OldCacheDTs.isdisjoint(CacheDTs)
109
+ CacheFactorNames = list(CacheData.keys())
110
+ if NewCacheDTs:
111
+ NewCacheData = ft.__QN_calc_data__(
112
+ raw_data=ft.__QN_prepare_raw_data__(factor_names=CacheFactorNames, ids=ft.ErgodicMode._IDs,
113
+ dts=NewCacheDTs), factor_names=CacheFactorNames,
114
+ ids=ft.ErgodicMode._IDs, dts=NewCacheDTs)
115
+ else:
116
+ NewCacheData = {name: pd.DataFrame(index=NewCacheDTs, columns=ft.ErgodicMode._IDs)
117
+ for name in CacheFactorNames}
118
+ for iFactorName in CacheData:
119
+ if isDisjoint:
120
+ CacheData[iFactorName] = NewCacheData[iFactorName]
121
+ else:
122
+ CacheData[iFactorName] = CacheData[iFactorName].loc[CacheDTs, :]
123
+ CacheData[iFactorName].loc[NewCacheDTs, :] = NewCacheData[iFactorName]
124
+ NewCacheData = None
125
+ return 0
126
+
127
+
128
+ def _prepareMMAPIDCacheData(ft, mmap_cache):
129
+ """基于 mmap 的 ID 缓冲数据准备子进程"""
130
+ CacheData, CacheDTs, MMAPCacheData, DTNum = {}, [], mmap_cache, len(ft.ErgodicMode._DateTimes)
131
+ CacheSize = int(ft.ErgodicMode.CacheSize * 2 ** 20)
132
+ if os.name == 'nt':
133
+ MMAPCacheData = mmap.mmap(-1, CacheSize, tagname=ft.ErgodicMode._TagName)
134
+ while True:
135
+ Task = ft.ErgodicMode._Queue2SubProcess.get()
136
+ if Task is None:
137
+ break
138
+ if (Task[0] is None) and (Task[1] is None):
139
+ CacheDataByte = pickle.dumps(CacheData)
140
+ DataLen = len(CacheDataByte)
141
+ for i in range(int(DataLen / CacheSize) + 1):
142
+ iStartInd = i * CacheSize
143
+ iEndInd = min((i + 1) * CacheSize, DataLen)
144
+ if iEndInd > iStartInd:
145
+ MMAPCacheData.seek(0)
146
+ MMAPCacheData.write(CacheDataByte[iStartInd:iEndInd])
147
+ ft.ErgodicMode._Queue2MainProcess.put(iEndInd - iStartInd)
148
+ ft.ErgodicMode._Queue2SubProcess.get()
149
+ ft.ErgodicMode._Queue2MainProcess.put(0)
150
+ del CacheDataByte
151
+ gc.collect()
152
+ elif Task[0] is None:
153
+ NewID, PopID = Task[1]
154
+ if PopID:
155
+ CacheData.pop(PopID)
156
+ if NewID:
157
+ if CacheDTs:
158
+ CacheData[NewID] = ft.__QN_calc_data__(
159
+ raw_data=ft.__QN_prepare_raw_data__(factor_names=ft.FactorNames, ids=[NewID], dts=CacheDTs),
160
+ factor_names=ft.FactorNames, ids=[NewID], dts=CacheDTs).iloc[:, :, 0]
161
+ else:
162
+ CacheData[NewID] = pd.DataFrame(index=CacheDTs, columns=ft.FactorNames)
163
+ else:
164
+ CurInd = Task[0] + ft.ErgodicMode.ForwardPeriod + 1
165
+ if CurInd < DTNum:
166
+ OldCacheDTs = set(CacheDTs)
167
+ CacheDTs = ft.ErgodicMode._DateTimes[max((0, CurInd - ft.ErgodicMode.BackwardPeriod)):min(
168
+ (DTNum, CurInd + ft.ErgodicMode.ForwardPeriod + 1))].tolist()
169
+ NewCacheDTs = sorted(set(CacheDTs).difference(OldCacheDTs))
170
+ if CacheData:
171
+ isDisjoint = OldCacheDTs.isdisjoint(CacheDTs)
172
+ CacheIDs = list(CacheData.keys())
173
+ if NewCacheDTs:
174
+ NewCacheData = ft.__QN_calc_data__(
175
+ raw_data=ft.__QN_prepare_raw_data__(factor_names=ft.FactorNames, ids=CacheIDs,
176
+ dts=NewCacheDTs), factor_names=ft.FactorNames,
177
+ ids=CacheIDs, dts=NewCacheDTs)
178
+ else:
179
+ NewCacheData = {name: pd.DataFrame(index=NewCacheDTs, columns=CacheIDs)
180
+ for name in ft.FactorNames}
181
+ for iID in CacheData:
182
+ if isDisjoint:
183
+ CacheData[iID] = NewCacheData.loc[:, :, iID]
184
+ else:
185
+ CacheData[iID] = CacheData[iID].loc[CacheDTs, :]
186
+ CacheData[iID].loc[NewCacheDTs, :] = NewCacheData.loc[:, :, iID]
187
+ NewCacheData = None
188
+ return 0
189
+
190
+
191
+ @dataclass
192
+ class _OperationMode(QuantNodesObject):
193
+ """运算模式"""
194
+ date_times: List = field(default_factory=list)
195
+ ids: List = field(default_factory=list)
196
+ factor_names: List = field(default_factory=list)
197
+ sub_process_num: int = 0
198
+ dt_ruler: List = field(default_factory=list)
199
+
200
+ def __init__(self, ft, sys_args: Dict = None, config_file: str = None, **kwargs):
201
+ self._FT = ft
202
+ self._isStarted = False
203
+ self._Factors = []
204
+ self._FactorDict = {}
205
+ self._FactorID = {}
206
+ self._FactorStartDT = {}
207
+ self._FactorPrepareIDs = {}
208
+ self._iPID = "0"
209
+ self._PIDs = []
210
+ self._PID_IDs = {}
211
+ self._PID_Lock = {}
212
+ self._CacheDir = None
213
+ self._RawDataDir = ""
214
+ self._CacheDataDir = ""
215
+ self._Event = {}
216
+ from QuantNodes.core.tools import get_shelve_file_suffix
217
+ self._FileSuffix = get_shelve_file_suffix()
218
+ if self._FileSuffix:
219
+ self._FileSuffix = "." + self._FileSuffix
220
+ super().__init__(sys_args=sys_args, config_file=config_file, **kwargs)
221
+
222
+ def __getstate__(self):
223
+ state = self.__dict__.copy()
224
+ if self._CacheDir is not None:
225
+ state["_CacheDir"] = self._CacheDir.name
226
+ return state
227
+
228
+
229
+ def _prepareRawData(args):
230
+ """因子表准备原始数据子进程"""
231
+ nGroup = len(args['GroupInfo'])
232
+ if "Sub2MainQueue" not in args:
233
+ with ProgressBar(max_value=nGroup) as ProgBar:
234
+ for i in range(nGroup):
235
+ iFT, iFactorNames, iRawFactorNames, iDTs, iArgs = args['GroupInfo'][i]
236
+ iPrepareIDs = args["PrepareIDs"][i]
237
+ if iPrepareIDs is None:
238
+ iPrepareIDs = args["FT"].OperationMode.IDs
239
+ iPID_PrepareIDs = args["PID_PrepareIDs"][i]
240
+ if iPID_PrepareIDs is None:
241
+ iPID_PrepareIDs = args["FT"].OperationMode._PID_IDs
242
+ iRawData = iFT.__QN_prepare_raw_data__(iRawFactorNames, iPrepareIDs, iDTs, iArgs)
243
+ iFT.__QN_save_raw_data__(iRawData, iRawFactorNames, args["FT"].OperationMode._RawDataDir, iPID_PrepareIDs,
244
+ args["RawDataFileNames"][i], args["FT"].OperationMode._PID_Lock)
245
+ ProgBar.update(i + 1)
246
+ else:
247
+ for i in range(nGroup):
248
+ iFT, iFactorNames, iRawFactorNames, iDTs, iArgs = args['GroupInfo'][i]
249
+ iPrepareIDs = args["PrepareIDs"][i]
250
+ if iPrepareIDs is None:
251
+ iPrepareIDs = args["FT"].OperationMode.IDs
252
+ iPID_PrepareIDs = args["PID_PrepareIDs"][i]
253
+ if iPID_PrepareIDs is None:
254
+ iPID_PrepareIDs = args["FT"].OperationMode._PID_IDs
255
+ iRawData = iFT.__QN_prepare_raw_data__(iRawFactorNames, iPrepareIDs, iDTs, iArgs)
256
+ iFT.__QN_save_raw_data__(iRawData, iRawFactorNames, args["FT"].OperationMode._RawDataDir, iPID_PrepareIDs,
257
+ args["RawDataFileNames"][i], args["FT"].OperationMode._PID_Lock)
258
+ args['Sub2MainQueue'].put((args["PID"], 1, None))
259
+ return 0
260
+
261
+
262
+ def _build_task_dispatch(FT, TDB, TableName, SpecificTarget):
263
+ """构建任务分发字典"""
264
+ if SpecificTarget:
265
+ TaskDispatched = OrderedDict()
266
+ for iFactorName in FT.OperationMode.FactorNames:
267
+ iDB, iTableName, iTargetFactorName = SpecificTarget.get(iFactorName, (None, None, None))
268
+ if iDB is None:
269
+ iDB = TDB
270
+ if iTableName is None:
271
+ iTableName = TableName
272
+ if iTargetFactorName is None:
273
+ iTargetFactorName = iFactorName
274
+ iDBTable = (id(iDB), iTableName)
275
+ if iDBTable in TaskDispatched:
276
+ TaskDispatched[iDBTable][1].append(FT.OperationMode._FactorDict[iFactorName])
277
+ TaskDispatched[iDBTable][2].append(iTargetFactorName)
278
+ else:
279
+ TaskDispatched[iDBTable] = (iDB, [FT.OperationMode._FactorDict[iFactorName]], [iTargetFactorName])
280
+ else:
281
+ TaskDispatched = {(id(TDB), TableName): (TDB, FT.OperationMode._Factors, list(FT.OperationMode.FactorNames))}
282
+ return TaskDispatched
283
+
284
+
285
+ def _write_factor_data_batch(iDB, iTableName, iFactors, iTargetFactorNames, FT, PID, ProgBar, TaskCount, if_exists):
286
+ """单进程写入因子数据 (writeFactorData 路径)"""
287
+ for j, jFactor in enumerate(iFactors):
288
+ jData = jFactor._QN_get_data(dts=FT.OperationMode.DateTimes, pids=[PID])
289
+ if FT.OperationMode._FactorPrepareIDs[jFactor.Name] is not None:
290
+ jData = jData.loc[:, FT.OperationMode.IDs]
291
+ iDB.writeFactorData(jData, iTableName, iTargetFactorNames[j], if_exists=if_exists,
292
+ data_type=jFactor.getMetaData(key="DataType"))
293
+ jData = None
294
+ TaskCount += 1
295
+ ProgBar.update(TaskCount)
296
+ return TaskCount
297
+
298
+
299
+ def _write_panel_batch(iDB, iTableName, iFactors, iTargetFactorNames, FT, PID, nDT, ProgBar, TaskCount, if_exists):
300
+ """单进程写入面板数据 (writeData 路径)"""
301
+ iFactoNum = len(iFactors)
302
+ iDTLen = int(np.ceil(nDT / iFactoNum))
303
+ iDataTypes = {iTargetFactorNames[j]: jFactor.getMetaData(key="DataType") for j, jFactor in enumerate(iFactors)}
304
+ for j in range(iFactoNum):
305
+ jDTs = list(FT.OperationMode.DateTimes[j * iDTLen:(j + 1) * iDTLen])
306
+ if jDTs:
307
+ jData = {}
308
+ for k, kFactor in enumerate(iFactors):
309
+ ijkData = kFactor._QN_get_data(dts=jDTs, pids=[PID])
310
+ if FT.OperationMode._FactorPrepareIDs[kFactor.Name] is not None:
311
+ ijkData = ijkData.loc[:, FT.OperationMode.IDs]
312
+ jData[iTargetFactorNames[k]] = ijkData
313
+ if j == 0:
314
+ TaskCount += 0.5
315
+ ProgBar.update(TaskCount)
316
+ jData = {name: jData[name] for name in iTargetFactorNames if name in jData}
317
+ iDB.writeData(jData, iTableName, if_exists=if_exists, data_type=iDataTypes)
318
+ jData = None
319
+ TaskCount += 0.5
320
+ ProgBar.update(TaskCount)
321
+ return TaskCount
322
+
323
+
324
+ def _write_factor_data_single(iDB, iTableName, iFactors, iTargetFactorNames, FT, args):
325
+ """多进程写入因子数据 (writeFactorData 路径)"""
326
+ for j, jFactor in enumerate(iFactors):
327
+ if FT.OperationMode._FactorPrepareIDs[jFactor.Name] is not None:
328
+ jData = jFactor._QN_get_data(dts=FT.OperationMode.DateTimes, pids=None)
329
+ jData = jData.loc[:, FT.OperationMode._PID_IDs[args["PID"]]]
330
+ else:
331
+ jData = jFactor._QN_get_data(dts=FT.OperationMode.DateTimes, pids=[args["PID"]])
332
+ iDB.writeFactorData(jData, iTableName, iTargetFactorNames[j], if_exists=args["if_exists"],
333
+ data_type=jFactor.getMetaData(key="DataType"))
334
+ jData = None
335
+ args["Sub2MainQueue"].put((args["PID"], 1, None))
336
+
337
+
338
+ def _write_panel_single(iDB, iTableName, iFactors, iTargetFactorNames, FT, args, nDT):
339
+ """多进程写入面板数据 (writeData 路径)"""
340
+ iFactoNum = len(iFactors)
341
+ iDTLen = int(np.ceil(nDT / iFactoNum))
342
+ iDataTypes = {iTargetFactorNames[j]: jFactor.getMetaData(key="DataType") for j, jFactor in enumerate(iFactors)}
343
+ for j in range(iFactoNum):
344
+ jDTs = list(FT.OperationMode.DateTimes[j * iDTLen:(j + 1) * iDTLen])
345
+ if jDTs:
346
+ jData = {}
347
+ for k, kFactor in enumerate(iFactors):
348
+ ijkData = kFactor._QN_get_data(dts=jDTs, pids=[args["PID"]])
349
+ if FT.OperationMode._FactorPrepareIDs[kFactor.Name] is not None:
350
+ ijkData = ijkData.loc[:, FT.OperationMode.IDs]
351
+ jData[iTargetFactorNames[k]] = ijkData
352
+ if j == 0:
353
+ args["Sub2MainQueue"].put((args["PID"], 0.5, None))
354
+ jData = {name: jData[name] for name in iTargetFactorNames if name in jData}
355
+ iDB.writeData(jData, iTableName, if_exists=args["if_exists"], data_type=iDataTypes)
356
+ jData = None
357
+ args["Sub2MainQueue"].put((args["PID"], 0.5, None))
358
+
359
+
360
+ def _calculate_single_process(FT, TaskDispatched, TableName, args, nDT):
361
+ """单进程执行因子计算"""
362
+ nTask = len(FT.OperationMode.FactorNames)
363
+ TaskCount = 0
364
+ with ProgressBar(max_value=nTask) as ProgBar:
365
+ for i, iTask in enumerate(TaskDispatched):
366
+ iDB, iFactors, iTargetFactorNames = TaskDispatched[iTask]
367
+ iTableName = iTask[1]
368
+ if hasattr(iDB, "writeFactorData"):
369
+ TaskCount = _write_factor_data_batch(iDB, iTableName, iFactors, iTargetFactorNames,
370
+ FT, args["PID"], ProgBar, TaskCount, args["if_exists"])
371
+ else:
372
+ TaskCount = _write_panel_batch(iDB, iTableName, iFactors, iTargetFactorNames,
373
+ FT, args["PID"], nDT, ProgBar, TaskCount, args["if_exists"])
374
+
375
+
376
+ def _calculate_multi_process(FT, TaskDispatched, TableName, args, nDT):
377
+ """多进程执行因子计算"""
378
+ for i, iTask in enumerate(TaskDispatched):
379
+ iDB, iFactors, iTargetFactorNames = TaskDispatched[iTask]
380
+ iTableName = iTask[1]
381
+ if hasattr(iDB, "writeFactorData"):
382
+ _write_factor_data_single(iDB, iTableName, iFactors, iTargetFactorNames, FT, args)
383
+ else:
384
+ _write_panel_single(iDB, iTableName, iFactors, iTargetFactorNames, FT, args, nDT)
385
+
386
+
387
+ def _calculate(args):
388
+ """因子表运算子进程"""
389
+ FT = args["FT"]
390
+ FT.OperationMode._iPID = args["PID"]
391
+ TDB, TableName, SpecificTarget = args["FactorDB"], args["TableName"], args["specific_target"]
392
+ TaskDispatched = _build_task_dispatch(FT, TDB, TableName, SpecificTarget)
393
+ nDT = len(FT.OperationMode.DateTimes)
394
+ if FT.OperationMode.SubProcessNum == 0:
395
+ _calculate_single_process(FT, TaskDispatched, TableName, args, nDT)
396
+ else:
397
+ _calculate_multi_process(FT, TaskDispatched, TableName, args, nDT)
398
+ return 0
399
+
400
+
401
+ class FactorTable(QuantNodesObject):
402
+ """因子表(接口类)
403
+
404
+ 因子表可看做一个独立的数据集或命名空间,
405
+ 可看做 Panel(items=[因子], major_axis=[时间点], minor_axis=[ID])。
406
+ """
407
+ ergodic_mode: _ErgodicMode = None
408
+ operation_mode: _OperationMode = None
409
+
410
+ def __init__(self, name, fdb=None, sys_args={}, config_file=None, **kwargs):
411
+ self._Name = name
412
+ self._FactorDB = fdb
413
+ self.ergodic_mode = _ErgodicMode()
414
+ self.operation_mode = _OperationMode(ft=self)
415
+ return super().__init__(sys_args=sys_args, config_file=config_file, **kwargs)
416
+
417
+ @property
418
+ def Name(self):
419
+ return self._Name
420
+
421
+ @property
422
+ def FactorDB(self):
423
+ return self._FactorDB
424
+
425
+ def getMetaData(self, key=None, args={}):
426
+ if key is None:
427
+ return {}
428
+ return None
429
+
430
+ @property
431
+ def FactorNames(self):
432
+ return []
433
+
434
+ def getFactor(self, ifactor_name, args={}, new_name=None):
435
+ from QuantNodes.factor_node.factor import Factor
436
+ iFactor = Factor(name=ifactor_name, ft=self)
437
+ iFactor.name = new_name or ifactor_name
438
+ return iFactor
439
+
440
+ def getFactorMetaData(self, factor_names, key=None, args={}):
441
+ if key is None:
442
+ return pd.DataFrame(index=factor_names, dtype=np.dtype("O"))
443
+ else:
444
+ return pd.Series([None] * len(factor_names), index=factor_names, dtype=np.dtype("O"))
445
+
446
+ def getID(self, ifactor_name=None, idt=None, args={}):
447
+ return []
448
+
449
+ def getIDMask(self, idt, ids=None, id_filter_str=None, args={}):
450
+ if ids is None:
451
+ ids = self.getID(idt=idt, args=args)
452
+ if not id_filter_str:
453
+ return pd.Series(True, index=ids)
454
+ CompiledIDFilterStr, IDFilterFactors = compile_id_filter_str(id_filter_str, self.FactorNames)
455
+ if CompiledIDFilterStr is None:
456
+ raise FactorError("过滤条件字符串有误!")
457
+ return eval(CompiledIDFilterStr, {}, {
458
+ "temp": self.readData(factor_names=IDFilterFactors, ids=ids, dts=[idt], args=args).loc[:, idt, :]
459
+ })
460
+
461
+ def getFilteredID(self, idt, ids=None, id_filter_str=None, args={}):
462
+ if not id_filter_str:
463
+ return self.getID(idt=idt, args=args)
464
+ if ids is None:
465
+ ids = self.getID(idt=idt, args=args)
466
+ CompiledIDFilterStr, IDFilterFactors = compile_id_filter_str(id_filter_str, self.FactorNames)
467
+ if CompiledIDFilterStr is None:
468
+ raise FactorError("过滤条件字符串有误!")
469
+ temp = self.readData(factor_names=IDFilterFactors, ids=ids, dts=[idt], args=args).loc[:, idt, :] # noqa: F841 (used in eval below)
470
+ return eval("temp[" + CompiledIDFilterStr + "].index.tolist()")
471
+
472
+ def getDateTime(self, ifactor_name=None, iid=None, start_dt=None, end_dt=None, args={}):
473
+ return []
474
+
475
+ def __QN_prepare_raw_data__(self, factor_names, ids, dts, args={}):
476
+ return None
477
+
478
+ def __QN_calc_data__(self, raw_data, factor_names, ids, dts, args={}):
479
+ return None
480
+
481
+ def readData(self, factor_names, ids, dts, args={}):
482
+ if self.ErgodicMode._isStarted:
483
+ return self._readData_ErgodicMode(factor_names=factor_names, ids=ids, dts=dts, args=args)
484
+ return self.__QN_calc_data__(
485
+ raw_data=self.__QN_prepare_raw_data__(factor_names=factor_names, ids=ids, dts=dts, args=args),
486
+ factor_names=factor_names, ids=ids, dts=dts, args=args)
487
+
488
+ def _readData_FactorCacheMode(self, factor_names, ids, dts, args={}):
489
+ self.ErgodicMode._FactorReadNum[factor_names] += 1
490
+ if (self.ErgodicMode.MaxFactorCacheNum <= 0) or (not self.ErgodicMode._CacheDTs) or (
491
+ dts[0] < self.ErgodicMode._CacheDTs[0]) or (dts[-1] > self.ErgodicMode._CacheDTs[-1]):
492
+ return self.__QN_calc_data__(
493
+ raw_data=self.__QN_prepare_raw_data__(factor_names=factor_names, ids=ids, dts=dts, args=args),
494
+ factor_names=factor_names, ids=ids, dts=dts, args=args)
495
+ Data = {}
496
+ DataFactorNames = []
497
+ CacheFactorNames = set()
498
+ PopFactorNames = []
499
+ for iFactorName in factor_names:
500
+ iFactorData = self.ErgodicMode._CacheData.get(iFactorName)
501
+ if iFactorData is None:
502
+ if self.ErgodicMode._CacheFactorNum < self.ErgodicMode.MaxFactorCacheNum:
503
+ self.ErgodicMode._CacheFactorNum += 1
504
+ CacheFactorNames.add(iFactorName)
505
+ else:
506
+ CacheFactorReadNum = self.ErgodicMode._FactorReadNum[self.ErgodicMode._CacheData.keys()]
507
+ MinReadNumInd = CacheFactorReadNum.argmin()
508
+ if CacheFactorReadNum.loc[MinReadNumInd] < self.ErgodicMode._FactorReadNum[iFactorName]:
509
+ CacheFactorNames.add(iFactorName)
510
+ PopFactor = MinReadNumInd
511
+ self.ErgodicMode._CacheData.pop(PopFactor)
512
+ PopFactorNames.append(PopFactor)
513
+ else:
514
+ DataFactorNames.append(iFactorName)
515
+ else:
516
+ Data[iFactorName] = iFactorData
517
+ CacheFactorNames = list(CacheFactorNames)
518
+ if CacheFactorNames:
519
+ iData = dict(self.__QN_calc_data__(
520
+ raw_data=self.__QN_prepare_raw_data__(factor_names=CacheFactorNames, ids=self.ErgodicMode._IDs,
521
+ dts=self.ErgodicMode._CacheDTs, args=args),
522
+ factor_names=CacheFactorNames, ids=self.ErgodicMode._IDs, dts=self.ErgodicMode._CacheDTs, args=args))
523
+ Data.update(iData)
524
+ self.ErgodicMode._CacheData.update(iData)
525
+ self.ErgodicMode._Queue2SubProcess.put((None, (CacheFactorNames, PopFactorNames)))
526
+ if len(Data) > 0:
527
+ Data = {name: df.loc[dts, ids] for name, df in Data.items() if isinstance(df, pd.DataFrame)}
528
+ if not DataFactorNames:
529
+ return {name: Data[name] for name in factor_names if name in Data}
530
+ return self.__QN_calc_data__(
531
+ raw_data=self.__QN_prepare_raw_data__(factor_names=DataFactorNames, ids=ids, dts=dts, args=args),
532
+ factor_names=DataFactorNames, ids=ids, dts=dts, args=args)
533
+
534
+ def _readIDData(self, iid, factor_names, dts, args={}):
535
+ self.ErgodicMode._IDReadNum[iid] = self.ErgodicMode._IDReadNum.get(iid, 0) + 1
536
+ if (self.ErgodicMode.MaxIDCacheNum <= 0) or (not self.ErgodicMode._CacheDTs) or (
537
+ dts[0] < self.ErgodicMode._CacheDTs[0]) or (dts[-1] > self.ErgodicMode._CacheDTs[-1]):
538
+ return self.__QN_calc_data__(
539
+ raw_data=self.__QN_prepare_raw_data__(factor_names=factor_names, ids=[iid], dts=dts, args=args),
540
+ factor_names=factor_names, ids=[iid], dts=dts, args=args).iloc[:, :, 0]
541
+ IDData = self.ErgodicMode._CacheData.get(iid)
542
+ if IDData is None:
543
+ if self.ErgodicMode._CacheIDNum < self.ErgodicMode.MaxIDCacheNum:
544
+ self.ErgodicMode._CacheIDNum += 1
545
+ IDData = self.__QN_calc_data__(
546
+ raw_data=self.__QN_prepare_raw_data__(factor_names=self.FactorNames, ids=[iid],
547
+ dts=self.ErgodicMode._CacheDTs, args=args),
548
+ factor_names=self.FactorNames, ids=[iid], dts=self.ErgodicMode._CacheDTs, args=args).iloc[:, :, 0]
549
+ self.ErgodicMode._CacheData[iid] = IDData
550
+ self.ErgodicMode._Queue2SubProcess.put((None, (iid, None)))
551
+ else:
552
+ CacheIDReadNum = self.ErgodicMode._IDReadNum[self.ErgodicMode._CacheData.keys()]
553
+ MinReadNumInd = CacheIDReadNum.argmin()
554
+ if CacheIDReadNum.loc[MinReadNumInd] < self.ErgodicMode._IDReadNum[iid]:
555
+ IDData = self.__QN_calc_data__(
556
+ raw_data=self.__QN_prepare_raw_data__(factor_names=self.FactorNames, ids=[iid],
557
+ dts=self.ErgodicMode._CacheDTs, args=args),
558
+ factor_names=self.FactorNames, ids=[iid], dts=self.ErgodicMode._CacheDTs, args=args).iloc[:, :,
559
+ 0]
560
+ PopID = MinReadNumInd
561
+ self.ErgodicMode._CacheData.pop(PopID)
562
+ self.ErgodicMode._CacheData[iid] = IDData
563
+ self.ErgodicMode._Queue2SubProcess.put((None, (iid, PopID)))
564
+ else:
565
+ return self.__QN_calc_data__(
566
+ raw_data=self.__QN_prepare_raw_data__(factor_names=factor_names, ids=[iid], dts=dts, args=args),
567
+ factor_names=factor_names, ids=[iid], dts=dts, args=args).iloc[:, :, 0]
568
+ return IDData.loc[dts, factor_names]
569
+
570
+ def _readData_ErgodicMode(self, factor_names, ids, dts, args={}):
571
+ if self.ErgodicMode.CacheMode == "因子":
572
+ return self._readData_FactorCacheMode(factor_names=factor_names, ids=ids, dts=dts, args=args)
573
+ # pd.Panel removed - return dict of DataFrames keyed by ID
574
+ return {iID: self._readIDData(iID, factor_names=factor_names, dts=dts, args=args) for iID in ids}
575
+
576
+ def start(self, dts, **kwargs):
577
+ if self.ErgodicMode._isStarted:
578
+ return 0
579
+ self.ErgodicMode._DateTimes = np.array(
580
+ (self.getDateTime() if not self.ErgodicMode.ErgodicDTs else self.ErgodicMode.ErgodicDTs), dtype="O")
581
+ if self.ErgodicMode._DateTimes.shape[0] == 0:
582
+ raise FactorError("因子表: '%s' 的默认时间序列为空, 请设置参数 '遍历模式-遍历时点' !" % self.Name)
583
+ self.ErgodicMode._IDs = (self.getID() if not self.ErgodicMode.ErgodicIDs else list(self.ErgodicMode.ErgodicIDs))
584
+ if not self.ErgodicMode._IDs:
585
+ raise FactorError("因子表: '%s' 的默认 ID 序列为空, 请设置参数 '遍历模式-遍历ID' !" % self.Name)
586
+ self.ErgodicMode._CurInd = -1
587
+ self.ErgodicMode._DTNum = self.ErgodicMode._DateTimes.shape[0]
588
+ self.ErgodicMode._CacheDTs = []
589
+ self.ErgodicMode._CacheData = {}
590
+ self.ErgodicMode._CacheFactorNum = 0
591
+ self.ErgodicMode._CacheIDNum = 0
592
+ self.ErgodicMode._FactorReadNum = pd.Series(0, index=self.FactorNames)
593
+ self.ErgodicMode._IDReadNum = pd.Series()
594
+ self.ErgodicMode._Queue2SubProcess = Queue()
595
+ self.ErgodicMode._Queue2MainProcess = Queue()
596
+ if self.ErgodicMode.CacheSize > 0:
597
+ if os.name == "nt":
598
+ self.ErgodicMode._TagName = str(uuid.uuid1())
599
+ self._MMAPCacheData = None
600
+ else:
601
+ self.ErgodicMode._TagName = None
602
+ self._MMAPCacheData = mmap.mmap(-1, int(self.ErgodicMode.CacheSize * 2 ** 20))
603
+ if self.ErgodicMode.CacheMode == "因子":
604
+ self.ErgodicMode._CacheDataProcess = Process(target=_prepareMMAPFactorCacheData,
605
+ args=(self, self._MMAPCacheData), daemon=True)
606
+ else:
607
+ self.ErgodicMode._CacheDataProcess = Process(target=_prepareMMAPIDCacheData,
608
+ args=(self, self._MMAPCacheData), daemon=True)
609
+ self.ErgodicMode._CacheDataProcess.start()
610
+ if os.name == "nt":
611
+ self._MMAPCacheData = mmap.mmap(-1, int(self.ErgodicMode.CacheSize * 2 ** 20),
612
+ tagname=self.ErgodicMode._TagName)
613
+ self.ErgodicMode._isStarted = True
614
+ return 0
615
+
616
+ def move(self, idt, **kwargs):
617
+ if idt == self.ErgodicMode._CurDT:
618
+ return 0
619
+ self.ErgodicMode._CurDT = idt
620
+ PreInd = self.ErgodicMode._CurInd
621
+ self.ErgodicMode._CurInd = PreInd + np.sum(self.ErgodicMode._DateTimes[PreInd + 1:] <= idt)
622
+ if (self.ErgodicMode.CacheSize > 0) and (self.ErgodicMode._CurInd > -1) and (
623
+ (not self.ErgodicMode._CacheDTs) or (
624
+ self.ErgodicMode._DateTimes[self.ErgodicMode._CurInd] > self.ErgodicMode._CacheDTs[-1])):
625
+ self.ErgodicMode._Queue2SubProcess.put((None, None))
626
+ DataLen = self.ErgodicMode._Queue2MainProcess.get()
627
+ CacheData = b""
628
+ while DataLen > 0:
629
+ self._MMAPCacheData.seek(0)
630
+ CacheData += self._MMAPCacheData.read(DataLen)
631
+ self.ErgodicMode._Queue2SubProcess.put(DataLen)
632
+ DataLen = self.ErgodicMode._Queue2MainProcess.get()
633
+ self.ErgodicMode._CacheData = pickle.loads(CacheData)
634
+ if self.ErgodicMode._CurInd == PreInd + 1:
635
+ self.ErgodicMode._Queue2SubProcess.put((self.ErgodicMode._CurInd, None))
636
+ self.ErgodicMode._CacheDTs = self.ErgodicMode._DateTimes[
637
+ max((0, self.ErgodicMode._CurInd - self.ErgodicMode.BackwardPeriod)):min((
638
+ self.ErgodicMode._DTNum,
639
+ self.ErgodicMode._CurInd + self.ErgodicMode.ForwardPeriod + 1))].tolist()
640
+ else:
641
+ LastCacheInd = (self.ErgodicMode._DateTimes.searchsorted(
642
+ self.ErgodicMode._CacheDTs[-1]) if self.ErgodicMode._CacheDTs else self.ErgodicMode._CurInd - 1)
643
+ self.ErgodicMode._Queue2SubProcess.put((LastCacheInd + 1, None))
644
+ self.ErgodicMode._CacheDTs = self.ErgodicMode._DateTimes[
645
+ max((0, LastCacheInd + 1 - self.ErgodicMode.BackwardPeriod)):min((
646
+ self.ErgodicMode._DTNum,
647
+ LastCacheInd + 1 + self.ErgodicMode.ForwardPeriod + 1))].tolist()
648
+ return 0
649
+
650
+ def __QN_on_backtest_move_event__(self, event):
651
+ return self.move(**event.Data)
652
+
653
+ def end(self):
654
+ if not self.ErgodicMode._isStarted:
655
+ return 0
656
+ self.ErgodicMode._CacheData, self.ErgodicMode._FactorReadNum, self.ErgodicMode._IDReadNum = None, None, None
657
+ if self.ErgodicMode.CacheSize > 0:
658
+ self.ErgodicMode._Queue2SubProcess.put(None)
659
+ self.ErgodicMode._Queue2SubProcess = self.ErgodicMode._Queue2MainProcess = self.ErgodicMode._CacheDataProcess = None
660
+ self.ErgodicMode._isStarted = False
661
+ self.ErgodicMode._CurDT = None
662
+ self._MMAPCacheData = None
663
+ return 0
664
+
665
+ def __QN_on_backtest_end_event__(self, event):
666
+ return self.end()
667
+
668
+ def __QN_gen_group_info__(self, factors, operation_mode):
669
+ StartDT = dt.datetime.now()
670
+ FactorNames, RawFactorNames = [], set()
671
+ for iFactor in factors:
672
+ FactorNames.append(iFactor.Name)
673
+ RawFactorNames.add(iFactor._NameInFT)
674
+ StartDT = min((StartDT, operation_mode._FactorStartDT[iFactor.Name]))
675
+ EndDT = operation_mode.DateTimes[-1]
676
+ StartInd, EndInd = operation_mode.DTRuler.index(StartDT), operation_mode.DTRuler.index(EndDT)
677
+ return [(self, FactorNames, list(RawFactorNames), operation_mode.DTRuler[StartInd:EndInd + 1], {})]
678
+
679
+ def __QN_save_raw_data__(self, raw_data, factor_names, raw_data_dir, pid_ids, file_name, pid_lock, **kwargs):
680
+ if raw_data is None:
681
+ return 0
682
+ if isinstance(raw_data, pd.DataFrame) and ("ID" in raw_data):
683
+ raw_data = raw_data.set_index(["ID"])
684
+ CommonCols = raw_data.columns.difference(factor_names).tolist()
685
+ AllIDs = set(raw_data.index)
686
+ for iPID, iIDs in pid_ids.items():
687
+ with shelve.open(raw_data_dir + os.sep + iPID + os.sep + file_name) as iFile:
688
+ iInterIDs = sorted(AllIDs.intersection(iIDs))
689
+ iData = raw_data.loc[iInterIDs]
690
+ if factor_names:
691
+ for jFactorName in factor_names:
692
+ iFile[jFactorName] = iData[CommonCols + [jFactorName]].reset_index()
693
+ else:
694
+ iFile["RawData"] = iData[CommonCols].reset_index()
695
+ iFile["_QN_IDs"] = iIDs
696
+ else:
697
+ for iPID, iIDs in pid_ids.items():
698
+ with shelve.open(raw_data_dir + os.sep + iPID + os.sep + file_name) as iFile:
699
+ iFile["RawData"] = raw_data
700
+ iFile["_QN_IDs"] = iIDs
701
+ return 0
702
+
703
+ def _genFactorDict(self, factors, factor_dict={}):
704
+ for iFactor in factors:
705
+ iFactor._OperationMode = self.OperationMode
706
+ if (not isinstance(iFactor.Name, str)) or (iFactor.Name == "") or (
707
+ iFactor is not factor_dict.get(iFactor.Name, iFactor)):
708
+ iFactor.Name = gen_available_name("TempFactor", factor_dict)
709
+ factor_dict[iFactor.Name] = iFactor
710
+ self.OperationMode._FactorID[iFactor.Name] = len(factor_dict)
711
+ factor_dict.update(self._genFactorDict(iFactor.Descriptors, factor_dict))
712
+ return factor_dict
713
+
714
+ def _initOperation(self):
715
+ if not self.OperationMode.DateTimes:
716
+ raise FactorError("运算时点序列不能为空!")
717
+ if not self.OperationMode.IDs:
718
+ raise FactorError("运算 ID 序列不能为空!")
719
+ try:
720
+ DTs = pd.Series(np.arange(0, len(self.OperationMode.DTRuler)), index=list(self.OperationMode.DTRuler)).loc[
721
+ list(self.OperationMode.DateTimes)]
722
+ except (KeyError, IndexError):
723
+ raise FactorError("运算时点序列超出了时点标尺!")
724
+ if pd.isnull(DTs).sum() > 0:
725
+ raise FactorError("运算时点序列超出了时点标尺!")
726
+ elif (DTs.diff().iloc[1:] != 1).sum() > 0:
727
+ raise FactorError("运算时点序列的频率与时点标尺不一致!")
728
+ if not self.OperationMode.FactorNames:
729
+ self.OperationMode.FactorNames = self.FactorNames
730
+ self.OperationMode._Factors = []
731
+ self.OperationMode._FactorDict = {}
732
+ self.OperationMode._FactorID = {}
733
+ for i, iFactorName in enumerate(self.OperationMode.FactorNames):
734
+ iFactor = self.getFactor(iFactorName)
735
+ iFactor._OperationMode = self.OperationMode
736
+ self.OperationMode._Factors.append(iFactor)
737
+ self.OperationMode._FactorDict[iFactorName] = iFactor
738
+ self.OperationMode._FactorID[iFactorName] = i
739
+ self.OperationMode._FactorDict = self._genFactorDict(self.OperationMode._Factors,
740
+ self.OperationMode._FactorDict)
741
+ self.OperationMode._Event = {}
742
+ self.OperationMode._CacheDir = tempfile.TemporaryDirectory()
743
+ self.OperationMode._RawDataDir = self.OperationMode._CacheDir.name + os.sep + "RawData"
744
+ self.OperationMode._CacheDataDir = self.OperationMode._CacheDir.name + os.sep + "CacheData"
745
+ os.mkdir(self.OperationMode._RawDataDir)
746
+ os.mkdir(self.OperationMode._CacheDataDir)
747
+ if self.OperationMode.SubProcessNum == 0:
748
+ self.OperationMode._PIDs = ["0"]
749
+ self.OperationMode._PID_IDs = {"0": list(self.OperationMode.IDs)}
750
+ os.mkdir(self.OperationMode._RawDataDir + os.sep + "0")
751
+ os.mkdir(self.OperationMode._CacheDataDir + os.sep + "0")
752
+ self.OperationMode._PID_Lock = {"0": Lock()}
753
+ else:
754
+ self.OperationMode._PIDs = []
755
+ self.OperationMode._PID_IDs = {}
756
+ nPrcs = min((self.OperationMode.SubProcessNum, len(self.OperationMode.IDs)))
757
+ SubIDs = partition_list_moving_sampling(list(self.OperationMode.IDs), nPrcs)
758
+ self.OperationMode._PID_Lock = {}
759
+ for i in range(nPrcs):
760
+ iPID = "0-" + str(i)
761
+ self.OperationMode._PIDs.append(iPID)
762
+ self.OperationMode._PID_IDs[iPID] = SubIDs[i]
763
+ os.mkdir(self.OperationMode._RawDataDir + os.sep + iPID)
764
+ os.mkdir(self.OperationMode._CacheDataDir + os.sep + iPID)
765
+ self.OperationMode._PID_Lock[iPID] = Lock()
766
+ self.OperationMode._FactorStartDT = {}
767
+ self.OperationMode._FactorPrepareIDs = {}
768
+ for iFactor in self.OperationMode._Factors:
769
+ iFactor._QN_init_operation(self.OperationMode.DateTimes[0], self.OperationMode._FactorStartDT,
770
+ self.OperationMode.SectionIDs, self.OperationMode._FactorPrepareIDs)
771
+
772
+ def _prepare(self, factor_names, ids, dts):
773
+ self.OperationMode.FactorNames = factor_names
774
+ self.OperationMode.DateTimes = dts
775
+ self.OperationMode.IDs = ids
776
+ self._initOperation()
777
+ InitGroups = {}
778
+ for iFactor in self.OperationMode._FactorDict.values():
779
+ if iFactor.FactorTable is None:
780
+ continue
781
+ iFTID = id(iFactor.FactorTable)
782
+ iPrepareIDs = self.OperationMode._FactorPrepareIDs[iFactor.Name]
783
+ if iFTID not in InitGroups:
784
+ InitGroups[iFTID] = [(iFactor.FactorTable, [iFactor], iPrepareIDs)]
785
+ else:
786
+ iGroups = InitGroups[iFTID]
787
+ for j in range(len(iGroups)):
788
+ if iPrepareIDs == iGroups[j][2]:
789
+ iGroups[j][1].append(iFactor)
790
+ break
791
+ else:
792
+ iGroups.append((iFactor.FactorTable, [iFactor], iPrepareIDs))
793
+ GroupInfo, RawDataFileNames, PrepareIDs, PID_PrepareIDs = [], [], [], []
794
+ for iFTID, iGroups in InitGroups.items():
795
+ iGroupInfo = []
796
+ jStartInd = 0
797
+ for j in range(len(iGroups)):
798
+ iFT = iGroups[j][0]
799
+ ijGroupInfo = iFT.__QN_gen_group_info__(iGroups[j][1], self.OperationMode)
800
+ iGroupInfo.extend(ijGroupInfo)
801
+ ijGroupNum = len(ijGroupInfo)
802
+ for k in range(ijGroupNum):
803
+ ijkRawDataFileName = iFT.Name + "-" + str(iFTID) + "-" + str(jStartInd + k)
804
+ for m in range(len(ijGroupInfo[k][1])):
805
+ self.OperationMode._FactorDict[ijGroupInfo[k][1][m]]._RawDataFile = ijkRawDataFileName
806
+ RawDataFileNames.append(ijkRawDataFileName)
807
+ jStartInd += ijGroupNum
808
+ PrepareIDs += [iGroups[j][2]] * ijGroupNum
809
+ if iGroups[j][2] is not None:
810
+ PID_PrepareIDs += [{self.OperationMode._PIDs[i]: iSubIDs for i, iSubIDs in enumerate(
811
+ partition_list_moving_sampling(iGroups[j][2], len(self.OperationMode._PIDs)))}] * ijGroupNum
812
+ else:
813
+ PID_PrepareIDs += [None] * ijGroupNum
814
+ GroupInfo.extend(iGroupInfo)
815
+ args = {"GroupInfo": GroupInfo, "FT": self, "RawDataFileNames": RawDataFileNames, "PrepareIDs": PrepareIDs,
816
+ "PID_PrepareIDs": PID_PrepareIDs}
817
+ if self.OperationMode.SubProcessNum == 0:
818
+ Error = _prepareRawData(args)
819
+ else:
820
+ nPrcs = min((self.OperationMode.SubProcessNum, len(args["GroupInfo"])))
821
+ Procs, Main2SubQueue, Sub2MainQueue = start_multi_process(pid="0", n_prc=nPrcs, target_fun=_prepareRawData,
822
+ arg=args,
823
+ partition_arg=["GroupInfo", "RawDataFileNames",
824
+ "PrepareIDs", "PID_PrepareIDs"],
825
+ n_partition_head=0, n_partition_tail=0,
826
+ main2sub_queue="None", sub2main_queue="Single")
827
+ nGroup = len(GroupInfo)
828
+ with ProgressBar(max_value=nGroup) as ProgBar:
829
+ for i in range(nGroup):
830
+ iPID, Error, iMsg = Sub2MainQueue.get()
831
+ if Error != 1:
832
+ for iPID, iProc in Procs.items():
833
+ if iProc.is_alive():
834
+ iProc.terminate()
835
+ raise FactorError(iMsg)
836
+ ProgBar.update(i + 1)
837
+ for iPrcs in Procs.values():
838
+ iPrcs.join()
839
+ self.OperationMode._isStarted = True
840
+ return 0
841
+
842
+ def _exit(self):
843
+ self.OperationMode._CacheDir = None
844
+ self.OperationMode._isStarted = False
845
+ for iFactorName, iFactor in self.OperationMode._FactorDict.items():
846
+ iFactor._exit()
847
+ return 0
848
+
849
+ def write2FDB(self, factor_names, ids, dts, factor_db, table_name, if_exists="update",
850
+ subprocess_num=cpu_count() - 1, dt_ruler=None, section_ids=None, specific_target={}, **kwargs):
851
+ from QuantNodes.factor_node.factor_db import WritableFactorDB
852
+ if not isinstance(factor_db, WritableFactorDB):
853
+ raise FactorError("因子数据库: %s 不可写入!" % factor_db.Name)
854
+ print("==========因子运算==========", "1. 原始数据准备", sep="\n", end="\n")
855
+ TotalStartT = time.perf_counter()
856
+ self.OperationMode.SubProcessNum = subprocess_num
857
+ self.OperationMode.DTRuler = (dts if dt_ruler is None else dt_ruler)
858
+ self.OperationMode.SectionIDs = section_ids
859
+ self._prepare(factor_names, ids, dts)
860
+ print(("耗时 : %.2f" % (time.perf_counter() - TotalStartT,)), "2. 因子数据计算", end="\n", sep="\n")
861
+ StartT = time.perf_counter()
862
+ Args = {"FT": self, "PID": "0", "FactorDB": factor_db, "TableName": table_name, "if_exists": if_exists,
863
+ "specific_target": specific_target}
864
+ if self.OperationMode.SubProcessNum == 0:
865
+ _calculate(Args)
866
+ else:
867
+ nPrcs = len(self.OperationMode._PIDs)
868
+ nTask = len(self.OperationMode._Factors) * nPrcs
869
+ EventState = {iFactorName: 0 for iFactorName in self.OperationMode._Event}
870
+ Procs, Main2SubQueue, Sub2MainQueue = start_multi_process(pid="0", n_prc=nPrcs, target_fun=_calculate,
871
+ arg=Args,
872
+ main2sub_queue="None", sub2main_queue="Single")
873
+ iProg = 0
874
+ with ProgressBar(max_value=nTask) as ProgBar:
875
+ while True:
876
+ nEvent = len(EventState)
877
+ if nEvent > 0:
878
+ FactorNames = tuple(EventState.keys())
879
+ for iFactorName in FactorNames:
880
+ iQueue = self.OperationMode._Event[iFactorName][0]
881
+ while not iQueue.empty():
882
+ jInc = iQueue.get()
883
+ EventState[iFactorName] += jInc
884
+ if EventState[iFactorName] >= nPrcs:
885
+ self.OperationMode._Event[iFactorName][1].set()
886
+ EventState.pop(iFactorName)
887
+ while ((not Sub2MainQueue.empty()) or (nEvent == 0)) and (iProg < nTask):
888
+ iPID, iSubProg, iMsg = Sub2MainQueue.get()
889
+ iProg += iSubProg
890
+ ProgBar.update(iProg)
891
+ if iProg >= nTask:
892
+ break
893
+ for iPID, iPrcs in Procs.items():
894
+ iPrcs.join()
895
+ print(("耗时 : %.2f" % (time.perf_counter() - StartT,)), "3. 清理缓存", end="\n", sep="\n")
896
+ StartT = time.perf_counter()
897
+ factor_db.connect()
898
+ self._exit()
899
+ print(('耗时 : %.2f' % (time.perf_counter() - StartT,)), ("总耗时 : %.2f" % (time.perf_counter() - TotalStartT,)),
900
+ "=" * 28, sep="\n", end="\n")
901
+ return 0
902
+
903
+
904
+ class CustomFT(FactorTable):
905
+ """自定义因子表"""
906
+
907
+ def __init__(self, name, sys_args={}, config_file=None, **kwargs):
908
+ self._DateTimes = []
909
+ self._IDs = []
910
+ self._Factors = {}
911
+ self._FactorDict = pd.DataFrame(columns=["FTID", "ArgIndex", "NameInFT", "DataType"], dtype=np.dtype("O"))
912
+ self._TableArgDict = {}
913
+ self._IDFilterStr = None
914
+ self._CompiledIDFilter = {}
915
+ self._isStarted = False
916
+ return super().__init__(name=name, fdb=None, sys_args=sys_args, config_file=config_file, **kwargs)
917
+
918
+ @property
919
+ def FactorNames(self):
920
+ return sorted(self._Factors)
921
+
922
+ def getFactorMetaData(self, factor_names=None, key=None, args={}):
923
+ if factor_names is None:
924
+ factor_names = self.FactorNames
925
+ if key is not None:
926
+ return pd.Series({iFactorName: self._Factors[iFactorName].getMetaData(key) for iFactorName in factor_names})
927
+ else:
928
+ return pd.DataFrame(
929
+ {iFactorName: self._Factors[iFactorName].getMetaData(key) for iFactorName in factor_names}).T
930
+
931
+ def getFactor(self, ifactor_name, args={}, new_name=None):
932
+ iFactor = self._Factors[ifactor_name]
933
+ if new_name is not None:
934
+ iFactor.Name = new_name
935
+ return iFactor
936
+
937
+ def getDateTime(self, ifactor_name=None, iid=None, start_dt=None, end_dt=None, args={}):
938
+ DateTimes = self._DateTimes
939
+ if (start_dt is not None) or (end_dt is not None):
940
+ DateTimes = np.array(DateTimes, dtype="O")
941
+ if start_dt is not None:
942
+ DateTimes = DateTimes[DateTimes >= start_dt]
943
+ if end_dt is not None:
944
+ DateTimes = DateTimes[DateTimes <= end_dt]
945
+ DateTimes = DateTimes.tolist()
946
+ return DateTimes
947
+
948
+ def getID(self, ifactor_name=None, idt=None, args={}):
949
+ return self._IDs
950
+
951
+ def getIDMask(self, idt, ids=None, id_filter_str=None, args={}):
952
+ if ids is None:
953
+ ids = self.getID(idt=idt, args=args)
954
+ OldIDFilterStr = self.setIDFilter(id_filter_str)
955
+ if self._IDFilterStr is None:
956
+ self._IDFilterStr = OldIDFilterStr
957
+ return pd.Series(True, index=ids)
958
+ CompiledFilterStr, IDFilterFactors = self._CompiledIDFilter[self._IDFilterStr]
959
+ temp = self.readData(factor_names=IDFilterFactors, ids=ids, dts=[idt], args=args).loc[:, idt, :] # noqa: F841 (used in eval below)
960
+ self._IDFilterStr = OldIDFilterStr
961
+ return eval(CompiledFilterStr)
962
+
963
+ def getFilteredID(self, idt, ids=None, id_filter_str=None, args={}):
964
+ OldIDFilterStr = self.setIDFilter(id_filter_str)
965
+ if ids is None:
966
+ ids = self.getID(idt=idt, args=args)
967
+ if self._IDFilterStr is None:
968
+ self._IDFilterStr = OldIDFilterStr
969
+ return ids
970
+ CompiledFilterStr, IDFilterFactors = self._CompiledIDFilter[self._IDFilterStr]
971
+ if CompiledFilterStr is None:
972
+ raise FactorError("过滤条件字符串有误!")
973
+ temp = self.readData(factor_names=IDFilterFactors, ids=ids, dts=[idt], args=args).loc[:, idt, :] # noqa: F841 (used in eval below)
974
+ self._IDFilterStr = OldIDFilterStr
975
+ return eval("temp[" + CompiledFilterStr + "].index.tolist()")
976
+
977
+ def __QN_calc_data__(self, raw_data, factor_names, ids, dts, args={}):
978
+ return {iFactorName: self._Factors[iFactorName].readData(ids=ids, dts=dts, dt_ruler=self._DateTimes,
979
+ section_ids=self._IDs) for iFactorName in
980
+ factor_names}
981
+
982
+ def write2FDB(self, factor_names, ids, dts, factor_db, table_name, if_exists="update",
983
+ subprocess_num=cpu_count() - 1, dt_ruler=None, section_ids=None, specific_target={}, **kwargs):
984
+ if dt_ruler is None:
985
+ dt_ruler = self._DateTimes
986
+ if not dt_ruler:
987
+ dt_ruler = None
988
+ if section_ids is None:
989
+ section_ids = self._IDs
990
+ if (not section_ids) or (section_ids == ids):
991
+ section_ids = None
992
+ return super().write2FDB(factor_names, ids, dts, factor_db, table_name, if_exists, subprocess_num,
993
+ dt_ruler=dt_ruler, section_ids=section_ids, specific_target=specific_target, **kwargs)
994
+
995
+ def addFactors(self, factor_list=[], factor_table=None, factor_names=None, args={}):
996
+ """添加因子"""
997
+ for iFactor in factor_list:
998
+ if iFactor.Name in self._Factors:
999
+ raise FactorError("因子: '%s' 有重名!" % iFactor.Name)
1000
+ self._Factors[iFactor.Name] = iFactor
1001
+ if factor_table is None:
1002
+ return 0
1003
+ if factor_names is None:
1004
+ factor_names = factor_table.FactorNames
1005
+ for iFactorName in factor_names:
1006
+ if iFactorName in self._Factors:
1007
+ raise FactorError("因子: '%s' 有重名!" % iFactorName)
1008
+ iFactor = factor_table.getFactor(iFactorName, args=args)
1009
+ self._Factors[iFactor.Name] = iFactor
1010
+ return 0
1011
+
1012
+ def deleteFactors(self, factor_names=None):
1013
+ """删除因子"""
1014
+ if factor_names is None:
1015
+ factor_names = self.FactorNames
1016
+ for iFactorName in factor_names:
1017
+ if iFactorName not in self._Factors:
1018
+ continue
1019
+ self._Factors.pop(iFactorName, None)
1020
+ return 0
1021
+
1022
+ def renameFactor(self, factor_name, new_factor_name):
1023
+ """重命名因子"""
1024
+ if factor_name not in self._Factors:
1025
+ raise FactorError("因子: '%s' 不存在!" % factor_name)
1026
+ if (new_factor_name != factor_name) and (new_factor_name in self._Factors):
1027
+ raise FactorError("因子: '%s' 有重名!" % new_factor_name)
1028
+ self._Factors[new_factor_name] = self._Factors.pop(factor_name)
1029
+ return 0
1030
+
1031
+ def setDateTime(self, dts):
1032
+ """设置时间点序列"""
1033
+ self._DateTimes = sorted(dts)
1034
+
1035
+ def setID(self, ids):
1036
+ """设置 ID 序列"""
1037
+ self._IDs = sorted(ids)
1038
+
1039
+ @property
1040
+ def IDFilterStr(self):
1041
+ """ID 过滤条件"""
1042
+ return self._IDFilterStr
1043
+
1044
+ def setIDFilter(self, id_filter_str):
1045
+ """设置 ID 过滤条件"""
1046
+ OldIDFilterStr = self._IDFilterStr
1047
+ if not id_filter_str:
1048
+ self._IDFilterStr = None
1049
+ return OldIDFilterStr
1050
+ elif not isinstance(id_filter_str, str):
1051
+ raise FactorError("条件字符串必须为字符串或者为 None!")
1052
+ CompiledIDFilter = self._CompiledIDFilter.get(id_filter_str, None)
1053
+ if CompiledIDFilter is not None:
1054
+ self._IDFilterStr = id_filter_str
1055
+ return OldIDFilterStr
1056
+ CompiledIDFilterStr, IDFilterFactors = compile_id_filter_str(id_filter_str, self.FactorNames)
1057
+ if CompiledIDFilterStr is None:
1058
+ raise FactorError("条件字符串有误!")
1059
+ self._IDFilterStr = id_filter_str
1060
+ self._CompiledIDFilter[id_filter_str] = (CompiledIDFilterStr, IDFilterFactors)
1061
+ return OldIDFilterStr
1062
+
1063
+ def start(self, dts, **kwargs):
1064
+ super().start(dts=dts, **kwargs)
1065
+ for iFactor in self._Factors.values():
1066
+ iFactor.start(dts=dts, **kwargs)
1067
+ return 0
1068
+
1069
+ def end(self):
1070
+ super().end()
1071
+ for iFactor in self._Factors.values():
1072
+ iFactor.end()
1073
+ return 0