itp-interface 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (485) hide show
  1. itp_interface/__init__.py +0 -0
  2. itp_interface/agent/__init__.py +0 -0
  3. itp_interface/agent/simple_proof_agent.py +100 -0
  4. itp_interface/coq_ser_api/__init__.py +165 -0
  5. itp_interface/coq_ser_api/contexts.py +283 -0
  6. itp_interface/coq_ser_api/coq_agent.py +459 -0
  7. itp_interface/coq_ser_api/coq_backend.py +135 -0
  8. itp_interface/coq_ser_api/coq_util.py +839 -0
  9. itp_interface/coq_ser_api/example.py +67 -0
  10. itp_interface/coq_ser_api/lsp_backend.py +375 -0
  11. itp_interface/coq_ser_api/py.typed +0 -0
  12. itp_interface/coq_ser_api/serapi_backend.py +841 -0
  13. itp_interface/coq_ser_api/util.py +145 -0
  14. itp_interface/coq_ser_api_old/__init__.py +2583 -0
  15. itp_interface/coq_ser_api_old/contexts.py +172 -0
  16. itp_interface/coq_ser_api_old/util.py +146 -0
  17. itp_interface/lean_server/__init__.py +0 -0
  18. itp_interface/lean_server/commands.py +484 -0
  19. itp_interface/lean_server/lean3_search_tool.py +358 -0
  20. itp_interface/lean_server/lean4_repl_interface.py +151 -0
  21. itp_interface/lean_server/lean4_utils.py +255 -0
  22. itp_interface/lean_server/lean_cmd_server.py +111 -0
  23. itp_interface/lean_server/lean_context.py +60 -0
  24. itp_interface/lean_server/lean_sync_server.py +174 -0
  25. itp_interface/lean_server/lean_utils.py +199 -0
  26. itp_interface/lean_server/py.typed +1 -0
  27. itp_interface/main/__init__.py +0 -0
  28. itp_interface/main/config/afp_data_gen.yaml +14 -0
  29. itp_interface/main/config/benchmark/CompCert.yaml +366 -0
  30. itp_interface/main/config/benchmark/GeoCoq.yaml +930 -0
  31. itp_interface/main/config/benchmark/UniMath.yaml +2690 -0
  32. itp_interface/main/config/benchmark/afp_isabelle.yaml +29200 -0
  33. itp_interface/main/config/benchmark/agent_proverbot_hard.yaml +247 -0
  34. itp_interface/main/config/benchmark/category-theory.yaml +470 -0
  35. itp_interface/main/config/benchmark/compcert_118_subset.yaml +148 -0
  36. itp_interface/main/config/benchmark/compcert_benchmark.yaml +36 -0
  37. itp_interface/main/config/benchmark/compcert_benchmark_hard.yaml +498 -0
  38. itp_interface/main/config/benchmark/compcert_benchmark_hard_1.yaml +55 -0
  39. itp_interface/main/config/benchmark/compcert_benchmark_hard_2.yaml +24 -0
  40. itp_interface/main/config/benchmark/compcert_benchmark_hard_3.yaml +95 -0
  41. itp_interface/main/config/benchmark/compcert_benchmark_hard_7_per_cent.yaml +78 -0
  42. itp_interface/main/config/benchmark/compcert_benchmark_test.yaml +38 -0
  43. itp_interface/main/config/benchmark/compcert_benchmark_train.yaml +340 -0
  44. itp_interface/main/config/benchmark/leandojo_novel_premises_test.yaml +2908 -0
  45. itp_interface/main/config/benchmark/leandojo_novel_premises_train.yaml +98645 -0
  46. itp_interface/main/config/benchmark/leandojo_novel_premises_val.yaml +2912 -0
  47. itp_interface/main/config/benchmark/leandojo_random.yaml +2889 -0
  48. itp_interface/main/config/benchmark/leandojo_random_test.yaml +2421 -0
  49. itp_interface/main/config/benchmark/leandojo_random_train.yaml +62729 -0
  50. itp_interface/main/config/benchmark/leandojo_random_val.yaml +2504 -0
  51. itp_interface/main/config/benchmark/math-comp.yaml +200 -0
  52. itp_interface/main/config/benchmark/miniF2F_test.yaml +12 -0
  53. itp_interface/main/config/benchmark/miniF2F_test_aime.yaml +27 -0
  54. itp_interface/main/config/benchmark/miniF2F_test_algebra.yaml +30 -0
  55. itp_interface/main/config/benchmark/miniF2F_test_amc12.yaml +57 -0
  56. itp_interface/main/config/benchmark/miniF2F_test_few_shot_hard.yaml +231 -0
  57. itp_interface/main/config/benchmark/miniF2F_test_imo.yaml +32 -0
  58. itp_interface/main/config/benchmark/miniF2F_test_induction.yaml +20 -0
  59. itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra.yaml +82 -0
  60. itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra_hard.yaml +72 -0
  61. itp_interface/main/config/benchmark/miniF2F_test_mathd_numbertheory.yaml +72 -0
  62. itp_interface/main/config/benchmark/miniF2F_test_numbertheory.yaml +20 -0
  63. itp_interface/main/config/benchmark/minicompcert_benchmark_1.yaml +14 -0
  64. itp_interface/main/config/benchmark/proverbot_hard.yaml +104 -0
  65. itp_interface/main/config/benchmark/re_prover.yaml +66 -0
  66. itp_interface/main/config/benchmark/re_prover_hard.yaml +41 -0
  67. itp_interface/main/config/benchmark/re_prover_very_hard.yaml +22 -0
  68. itp_interface/main/config/benchmark/reprover_with_retrieval.yaml +73 -0
  69. itp_interface/main/config/benchmark/reprover_with_retrieval_hard.yaml +30 -0
  70. itp_interface/main/config/benchmark/reprover_with_retrieval_neg.yaml +195 -0
  71. itp_interface/main/config/benchmark/simple_benchmark_1.yaml +24 -0
  72. itp_interface/main/config/benchmark/simple_benchmark_8.yaml +50 -0
  73. itp_interface/main/config/benchmark/simple_benchmark_9.yaml +65 -0
  74. itp_interface/main/config/benchmark/simple_benchmark_isabelle.yaml +18 -0
  75. itp_interface/main/config/benchmark/simple_benchmark_lean.yaml +12 -0
  76. itp_interface/main/config/benchmark/simple_benchmark_lean_training_data.yaml +12 -0
  77. itp_interface/main/config/benchmark/simple_rl_benchmark_lean.yaml +14 -0
  78. itp_interface/main/config/benchmark/stack_machine.yaml +13 -0
  79. itp_interface/main/config/benchmark/stack_machine_hard.yaml +15 -0
  80. itp_interface/main/config/category_theory_data_gen.yaml +14 -0
  81. itp_interface/main/config/category_theory_data_gen_random.yaml +16 -0
  82. itp_interface/main/config/compcert_data_gen_test.yaml +10 -0
  83. itp_interface/main/config/compcert_data_gen_train.yaml +7 -0
  84. itp_interface/main/config/env_settings/bm25_retrieval.yaml +2 -0
  85. itp_interface/main/config/env_settings/bm25_retrieval_no_dfns.yaml +2 -0
  86. itp_interface/main/config/env_settings/bm25_retrieval_only_local_no_dfns.yaml +2 -0
  87. itp_interface/main/config/env_settings/bm25_retrieval_with_print.yaml +2 -0
  88. itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local.yaml +2 -0
  89. itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local_no_dfns.yaml +2 -0
  90. itp_interface/main/config/env_settings/no_retrieval.yaml +2 -0
  91. itp_interface/main/config/experiments.yaml +12 -0
  92. itp_interface/main/config/geo_coq_data_gen.yaml +14 -0
  93. itp_interface/main/config/geo_coq_data_gen_random.yaml +16 -0
  94. itp_interface/main/config/leandojo_random_data_gen.yaml +16 -0
  95. itp_interface/main/config/math_comp_data_gen.yaml +14 -0
  96. itp_interface/main/config/math_comp_data_gen_random.yaml +16 -0
  97. itp_interface/main/config/mathlib_data_gen.yaml +14 -0
  98. itp_interface/main/config/repo/coq_repos.yaml +191 -0
  99. itp_interface/main/config/run_settings/default_coq_data_generation_transforms.yaml +24 -0
  100. itp_interface/main/config/run_settings/default_isabelle_data_generation_transforms.yaml +24 -0
  101. itp_interface/main/config/run_settings/default_lean4_data_generation_transforms.yaml +24 -0
  102. itp_interface/main/config/run_settings/default_lean_data_generation_transforms.yaml +24 -0
  103. itp_interface/main/config/simple_coq_data_gen.yaml +12 -0
  104. itp_interface/main/config/simple_coq_data_gen_random.yaml +17 -0
  105. itp_interface/main/config/simple_lean_data_gen.yaml +12 -0
  106. itp_interface/main/config/simple_rl_lean_data_gen.yaml +12 -0
  107. itp_interface/main/config/uni_math_data_gen.yaml +14 -0
  108. itp_interface/main/config.py +192 -0
  109. itp_interface/main/extract_benchmark_dataset.py +106 -0
  110. itp_interface/main/filter_dataset.py +107 -0
  111. itp_interface/main/install.py +92 -0
  112. itp_interface/main/merge_dataset.py +96 -0
  113. itp_interface/main/run_tool.py +444 -0
  114. itp_interface/pisa/.git +1 -0
  115. itp_interface/pisa/.gitignore +125 -0
  116. itp_interface/pisa/.idea/.gitignore +8 -0
  117. itp_interface/pisa/.idea/ClojureProjectResolveSettings.xml +6 -0
  118. itp_interface/pisa/.idea/codeStyles/Project.xml +7 -0
  119. itp_interface/pisa/.idea/codeStyles/codeStyleConfig.xml +5 -0
  120. itp_interface/pisa/.idea/inspectionProfiles/Project_Default.xml +16 -0
  121. itp_interface/pisa/.idea/libraries/sbt__com_google_android_annotations_4_1_1_4_jar.xml +13 -0
  122. itp_interface/pisa/.idea/libraries/sbt__com_google_api_grpc_proto_google_common_protos_1_17_0_jar.xml +13 -0
  123. itp_interface/pisa/.idea/libraries/sbt__com_google_code_findbugs_jsr305_3_0_2_jar.xml +13 -0
  124. itp_interface/pisa/.idea/libraries/sbt__com_google_code_gson_gson_2_8_6_jar.xml +13 -0
  125. itp_interface/pisa/.idea/libraries/sbt__com_google_errorprone_error_prone_annotations_2_3_4_jar.xml +13 -0
  126. itp_interface/pisa/.idea/libraries/sbt__com_google_guava_failureaccess_1_0_1_jar.xml +13 -0
  127. itp_interface/pisa/.idea/libraries/sbt__com_google_guava_guava_30_0_jre_jar.xml +13 -0
  128. itp_interface/pisa/.idea/libraries/sbt__com_google_guava_listenablefuture_9999_0_empty_to_avoid_conflict_with_guava_jar.xml +9 -0
  129. itp_interface/pisa/.idea/libraries/sbt__com_google_j2objc_j2objc_annotations_1_3_jar.xml +13 -0
  130. itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_3_12_0_jar.xml +13 -0
  131. itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_util_3_12_0_jar.xml +13 -0
  132. itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_fastparse_2_13_2_3_0_jar.xml +13 -0
  133. itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_geny_2_13_0_6_0_jar.xml +13 -0
  134. itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_sourcecode_2_13_0_2_1_jar.xml +13 -0
  135. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_lenses_2_13_0_10_9_jar.xml +13 -0
  136. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_2_13_0_10_9_jar.xml +13 -0
  137. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_grpc_2_13_0_10_9_jar.xml +13 -0
  138. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_zio_grpc_zio_grpc_core_2_13_0_4_2_jar.xml +13 -0
  139. itp_interface/pisa/.idea/libraries/sbt__com_thoughtworks_paranamer_paranamer_2_8_jar.xml +13 -0
  140. itp_interface/pisa/.idea/libraries/sbt__commons_io_commons_io_2_8_0_jar.xml +13 -0
  141. itp_interface/pisa/.idea/libraries/sbt__de_unruh_java_patterns_0_1_0_jar.xml +13 -0
  142. itp_interface/pisa/.idea/libraries/sbt__de_unruh_scala_isabelle_2_13_master_SNAPSHOT_jar.xml +13 -0
  143. itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_2_13_1_0_0_M9_jar.xml +13 -0
  144. itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_thirdparty_boopickle_shaded_2_13_1_0_0_M9_jar.xml +13 -0
  145. itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_2_13_1_0_3_jar.xml +13 -0
  146. itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_stacktracer_2_13_1_0_3_jar.xml +13 -0
  147. itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_streams_2_13_1_0_3_jar.xml +13 -0
  148. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_api_1_34_0_jar.xml +13 -0
  149. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_context_1_34_0_jar.xml +13 -0
  150. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_core_1_34_0_jar.xml +13 -0
  151. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_netty_1_34_0_jar.xml +13 -0
  152. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_1_34_0_jar.xml +13 -0
  153. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_lite_1_34_0_jar.xml +13 -0
  154. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_services_1_34_0_jar.xml +13 -0
  155. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_stub_1_34_0_jar.xml +13 -0
  156. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_buffer_4_1_51_Final_jar.xml +13 -0
  157. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_4_1_51_Final_jar.xml +13 -0
  158. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http2_4_1_51_Final_jar.xml +13 -0
  159. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http_4_1_51_Final_jar.xml +13 -0
  160. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_socks_4_1_51_Final_jar.xml +13 -0
  161. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_common_4_1_51_Final_jar.xml +13 -0
  162. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_4_1_51_Final_jar.xml +13 -0
  163. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_proxy_4_1_51_Final_jar.xml +13 -0
  164. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_resolver_4_1_51_Final_jar.xml +13 -0
  165. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_transport_4_1_51_Final_jar.xml +13 -0
  166. itp_interface/pisa/.idea/libraries/sbt__io_perfmark_perfmark_api_0_19_0_jar.xml +13 -0
  167. itp_interface/pisa/.idea/libraries/sbt__net_java_dev_jna_jna_5_3_1_jar.xml +13 -0
  168. itp_interface/pisa/.idea/libraries/sbt__net_liftweb_lift_json_2_13_3_4_3_jar.xml +13 -0
  169. itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_lang3_3_11_jar.xml +13 -0
  170. itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_text_1_9_jar.xml +13 -0
  171. itp_interface/pisa/.idea/libraries/sbt__org_checkerframework_checker_qual_3_5_0_jar.xml +13 -0
  172. itp_interface/pisa/.idea/libraries/sbt__org_codehaus_mojo_animal_sniffer_annotations_1_18_jar.xml +13 -0
  173. itp_interface/pisa/.idea/libraries/sbt__org_jetbrains_annotations_20_1_0_jar.xml +13 -0
  174. itp_interface/pisa/.idea/libraries/sbt__org_jline_jline_3_16_0_jar.xml +13 -0
  175. itp_interface/pisa/.idea/libraries/sbt__org_log4s_log4s_2_13_1_9_0_jar.xml +13 -0
  176. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_collection_compat_2_13_2_1_6_jar.xml +13 -0
  177. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_xml_2_13_1_3_0_jar.xml +13 -0
  178. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_compiler_2_13_4_jar.xml +13 -0
  179. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_library_2_13_4_jar.xml +23 -0
  180. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_reflect_2_13_4_jar.xml +13 -0
  181. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scalap_2_13_4_jar.xml +13 -0
  182. itp_interface/pisa/.idea/libraries/sbt__org_scalaz_scalaz_core_2_13_7_3_2_jar.xml +13 -0
  183. itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_api_1_7_30_jar.xml +13 -0
  184. itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_simple_1_7_30_jar.xml +13 -0
  185. itp_interface/pisa/.idea/misc.xml +7 -0
  186. itp_interface/pisa/.idea/modules/PISA-build.iml +127 -0
  187. itp_interface/pisa/.idea/modules/PISA.iml +94 -0
  188. itp_interface/pisa/.idea/modules.xml +9 -0
  189. itp_interface/pisa/.idea/other.xml +6 -0
  190. itp_interface/pisa/.idea/sbt.xml +20 -0
  191. itp_interface/pisa/.idea/scala_compiler.xml +6 -0
  192. itp_interface/pisa/.idea/uiDesigner.xml +124 -0
  193. itp_interface/pisa/.idea/vcs.xml +6 -0
  194. itp_interface/pisa/.scalafmt.conf +2 -0
  195. itp_interface/pisa/LICENSE +29 -0
  196. itp_interface/pisa/README.md +262 -0
  197. itp_interface/pisa/build.sbt +49 -0
  198. itp_interface/pisa/build.sh +26 -0
  199. itp_interface/pisa/command_generation/close_gaps.py +44 -0
  200. itp_interface/pisa/command_generation/conjecture_normal_order.py +62 -0
  201. itp_interface/pisa/command_generation/conjecturer_command_generator.py +36 -0
  202. itp_interface/pisa/command_generation/create_dirs.py +11 -0
  203. itp_interface/pisa/command_generation/find_std.py +67 -0
  204. itp_interface/pisa/command_generation/generate_build_commands_afp.py +15 -0
  205. itp_interface/pisa/command_generation/generate_build_commands_std.py +15 -0
  206. itp_interface/pisa/command_generation/generate_commands_afp.py +103 -0
  207. itp_interface/pisa/command_generation/generate_commands_mini.py +73 -0
  208. itp_interface/pisa/command_generation/generate_commands_std.py +69 -0
  209. itp_interface/pisa/command_generation/generate_hammer_extraction_text.py +5 -0
  210. itp_interface/pisa/command_generation/hammer_command_generator.py +40 -0
  211. itp_interface/pisa/command_generation/hp_search_command_generator.py +63 -0
  212. itp_interface/pisa/command_generation/oracle_command_generator.py +56 -0
  213. itp_interface/pisa/command_generation/search_command_generator.py +69 -0
  214. itp_interface/pisa/command_generation/summarise_problem_names.py +45 -0
  215. itp_interface/pisa/command_generation/tpu_hp_search.py +75 -0
  216. itp_interface/pisa/docker/Dockerfile +34 -0
  217. itp_interface/pisa/docker/docker_tutorial.md +64 -0
  218. itp_interface/pisa/eval_setup/copy_isabelle.py +42 -0
  219. itp_interface/pisa/eval_setup/copy_pisa_jars.py +18 -0
  220. itp_interface/pisa/mesh_transformer_utils/tokenization.py +86 -0
  221. itp_interface/pisa/project/build.properties +1 -0
  222. itp_interface/pisa/project/plugins.sbt +5 -0
  223. itp_interface/pisa/requirements.txt +4 -0
  224. itp_interface/pisa/scripts/extract_last_k_steps.py +28 -0
  225. itp_interface/pisa/scripts/extract_proof_corpus.py +26 -0
  226. itp_interface/pisa/scripts/gather_hammer_results.py +27 -0
  227. itp_interface/pisa/scripts/length_in_char_stats.py +20 -0
  228. itp_interface/pisa/scripts/mix.py +127 -0
  229. itp_interface/pisa/scripts/results_stat.py +52 -0
  230. itp_interface/pisa/scripts/test_array_job.sh +34 -0
  231. itp_interface/pisa/setup.sh +25 -0
  232. itp_interface/pisa/src/main/protobuf/server.proto +60 -0
  233. itp_interface/pisa/src/main/python/.idea/.gitignore +8 -0
  234. itp_interface/pisa/src/main/python/.idea/inspectionProfiles/Project_Default.xml +18 -0
  235. itp_interface/pisa/src/main/python/.idea/inspectionProfiles/profiles_settings.xml +6 -0
  236. itp_interface/pisa/src/main/python/.idea/misc.xml +4 -0
  237. itp_interface/pisa/src/main/python/.idea/modules.xml +8 -0
  238. itp_interface/pisa/src/main/python/.idea/python.iml +12 -0
  239. itp_interface/pisa/src/main/python/.idea/vcs.xml +6 -0
  240. itp_interface/pisa/src/main/python/conjecturing_parsing/conjecturer_postprocessing.py +59 -0
  241. itp_interface/pisa/src/main/python/data_extraction/extract_data.py +184 -0
  242. itp_interface/pisa/src/main/python/data_extraction/find_premises.py +221 -0
  243. itp_interface/pisa/src/main/python/data_extraction/process_data.py +129 -0
  244. itp_interface/pisa/src/main/python/legacy/PisaFlexibleClient.py +167 -0
  245. itp_interface/pisa/src/main/python/legacy/autof_test.py +74 -0
  246. itp_interface/pisa/src/main/python/legacy/cmd_client.py +23 -0
  247. itp_interface/pisa/src/main/python/legacy/convert_scala_dump_to_test_name_jsons.py +14 -0
  248. itp_interface/pisa/src/main/python/legacy/create_data_txt.py +72 -0
  249. itp_interface/pisa/src/main/python/legacy/create_finetune_tfrecords.py +311 -0
  250. itp_interface/pisa/src/main/python/legacy/demo.py +49 -0
  251. itp_interface/pisa/src/main/python/legacy/evaluate.py +108 -0
  252. itp_interface/pisa/src/main/python/legacy/extract_first_step.py +25 -0
  253. itp_interface/pisa/src/main/python/legacy/get_global_facts.py +35 -0
  254. itp_interface/pisa/src/main/python/legacy/mix_data.py +19 -0
  255. itp_interface/pisa/src/main/python/legacy/one_stage_extraction.py +111 -0
  256. itp_interface/pisa/src/main/python/legacy/prepare_episodic_transitions.py +137 -0
  257. itp_interface/pisa/src/main/python/legacy/prepare_translation_pairs.py +277 -0
  258. itp_interface/pisa/src/main/python/pisa_client.py +322 -0
  259. itp_interface/pisa/src/main/python/server_pb2.py +394 -0
  260. itp_interface/pisa/src/main/python/server_pb2_grpc.py +230 -0
  261. itp_interface/pisa/src/main/python/test_client.py +17 -0
  262. itp_interface/pisa/src/main/python/test_client2.py +79 -0
  263. itp_interface/pisa/src/main/python/utils/filters.py +59 -0
  264. itp_interface/pisa/src/main/python/utils/pisa_server_control.py +29 -0
  265. itp_interface/pisa/src/main/scala/pisa/agent/CheckSyntax.scala +257 -0
  266. itp_interface/pisa/src/main/scala/pisa/agent/DepThms.scala +29 -0
  267. itp_interface/pisa/src/main/scala/pisa/agent/PisaStat.scala +46 -0
  268. itp_interface/pisa/src/main/scala/pisa/agent/RefactorTest.scala +40 -0
  269. itp_interface/pisa/src/main/scala/pisa/agent/RepHammer.scala +95 -0
  270. itp_interface/pisa/src/main/scala/pisa/server/HammFacts.scala +63 -0
  271. itp_interface/pisa/src/main/scala/pisa/server/PisaOS.scala +881 -0
  272. itp_interface/pisa/src/main/scala/pisa/server/PisaOneStage.scala +540 -0
  273. itp_interface/pisa/src/main/scala/pisa/server/PisaOneStageServers.scala +1048 -0
  274. itp_interface/pisa/src/main/scala/pisa/utils/TheoryManager.scala +95 -0
  275. itp_interface/pisa/src/test/python/analyse_debug.py +33 -0
  276. itp_interface/pisa/src/test/python/extract_test_seq2seq.py +53 -0
  277. itp_interface/pisa/src/test/python/extract_test_theorem_ground_truth_indices.py +31 -0
  278. itp_interface/pisa/src/test/python/proof_originality.py +24 -0
  279. itp_interface/pisa/src/test/python/test_command_generator.py +25 -0
  280. itp_interface/pisa/src/test/python/test_model_sequence_accuracy.py +70 -0
  281. itp_interface/pisa/src/test/scala/pisa/Easy.scala +26 -0
  282. itp_interface/pisa/src/test/scala/pisa/TestCurl.scala +82 -0
  283. itp_interface/pisa/src/test/scala/pisa/TestIsa.scala +27 -0
  284. itp_interface/pisa/test.sh +19 -0
  285. itp_interface/pisa/universal_test_theorems.tar.gz +0 -0
  286. itp_interface/repo/build.py +78 -0
  287. itp_interface/repo/clone.py +79 -0
  288. itp_interface/repo/dataset_discovery.py +99 -0
  289. itp_interface/retrieval/__init__.py +0 -0
  290. itp_interface/retrieval/abstraction.py +35 -0
  291. itp_interface/retrieval/coq_bm25_reranker.py +153 -0
  292. itp_interface/retrieval/isabelle_bm25_reranker.py +86 -0
  293. itp_interface/retrieval/lean3_bm25_reranker.py +86 -0
  294. itp_interface/rl/__init__.py +0 -0
  295. itp_interface/rl/abstraction.py +168 -0
  296. itp_interface/rl/proof_action.py +172 -0
  297. itp_interface/rl/proof_state.py +149 -0
  298. itp_interface/rl/proof_tree.py +109 -0
  299. itp_interface/rl/simpl_proof_env_pool.py +16 -0
  300. itp_interface/rl/simple_proof_env.py +713 -0
  301. itp_interface/rl/simple_proof_env_pool.py +591 -0
  302. itp_interface/scripts/setup.sh +228 -0
  303. itp_interface/tools/__init__.py +0 -0
  304. itp_interface/tools/basic_utils.py +172 -0
  305. itp_interface/tools/bin_packing.py +61 -0
  306. itp_interface/tools/cache.py +93 -0
  307. itp_interface/tools/coq_build_spec.py +31 -0
  308. itp_interface/tools/coq_build_tool.py +319 -0
  309. itp_interface/tools/coq_context_helper.py +354 -0
  310. itp_interface/tools/coq_executor.py +508 -0
  311. itp_interface/tools/coq_local_data_generation_transform.py +158 -0
  312. itp_interface/tools/coq_parse_utils.py +154 -0
  313. itp_interface/tools/coq_raw_proofs.py +193 -0
  314. itp_interface/tools/coq_theorem_proof_pair_generation_transform.py +146 -0
  315. itp_interface/tools/coq_training_data_generator.py +76 -0
  316. itp_interface/tools/dynamic_coq_proof_exec.py +220 -0
  317. itp_interface/tools/dynamic_isabelle_proof_exec.py +229 -0
  318. itp_interface/tools/dynamic_lean4_proof_exec.py +236 -0
  319. itp_interface/tools/dynamic_lean_proof_exec.py +228 -0
  320. itp_interface/tools/isabelle_context_helper.py +66 -0
  321. itp_interface/tools/isabelle_executor.py +862 -0
  322. itp_interface/tools/isabelle_local_data_generation_transform.py +149 -0
  323. itp_interface/tools/isabelle_parse_utils.py +131 -0
  324. itp_interface/tools/isabelle_server.py +106 -0
  325. itp_interface/tools/lean4_context_helper.py +72 -0
  326. itp_interface/tools/lean4_local_data_generation_transform.py +122 -0
  327. itp_interface/tools/lean4_sync_executor.py +1193 -0
  328. itp_interface/tools/lean_cmd_executor.py +804 -0
  329. itp_interface/tools/lean_context_helper.py +327 -0
  330. itp_interface/tools/lean_dojo_data_generation_transform.py +206 -0
  331. itp_interface/tools/lean_executor.py +687 -0
  332. itp_interface/tools/lean_local_data_generation_transform.py +136 -0
  333. itp_interface/tools/lean_parse_utils.py +32 -0
  334. itp_interface/tools/log_utils.py +20 -0
  335. itp_interface/tools/proof_exec_callback.py +76 -0
  336. itp_interface/tools/ray_utils.py +265 -0
  337. itp_interface/tools/repl/.git +1 -0
  338. itp_interface/tools/repl/.github/workflows/ci.yml +24 -0
  339. itp_interface/tools/repl/.gitignore +7 -0
  340. itp_interface/tools/repl/.vscode/copyright.code-snippets +13 -0
  341. itp_interface/tools/repl/.vscode/extensions.json +13 -0
  342. itp_interface/tools/repl/.vscode/module-docstring.code-snippets +35 -0
  343. itp_interface/tools/repl/.vscode/settings.json +11 -0
  344. itp_interface/tools/repl/README.md +174 -0
  345. itp_interface/tools/repl/REPL/Frontend.lean +47 -0
  346. itp_interface/tools/repl/REPL/JSON.lean +186 -0
  347. itp_interface/tools/repl/REPL/Lean/ContextInfo.lean +9 -0
  348. itp_interface/tools/repl/REPL/Lean/Environment.lean +31 -0
  349. itp_interface/tools/repl/REPL/Lean/InfoTree/ToJson.lean +114 -0
  350. itp_interface/tools/repl/REPL/Lean/InfoTree.lean +272 -0
  351. itp_interface/tools/repl/REPL/Main.lean +323 -0
  352. itp_interface/tools/repl/REPL/Snapshots.lean +306 -0
  353. itp_interface/tools/repl/REPL/Util/Path.lean +36 -0
  354. itp_interface/tools/repl/REPL/Util/Pickle.lean +44 -0
  355. itp_interface/tools/repl/REPL.lean +4 -0
  356. itp_interface/tools/repl/lake-manifest.json +5 -0
  357. itp_interface/tools/repl/lakefile.lean +15 -0
  358. itp_interface/tools/repl/lean-toolchain +1 -0
  359. itp_interface/tools/repl/test/Mathlib/.gitignore +5 -0
  360. itp_interface/tools/repl/test/Mathlib/H20231110.sh +2 -0
  361. itp_interface/tools/repl/test/Mathlib/ReplMathlibTests.lean +1 -0
  362. itp_interface/tools/repl/test/Mathlib/lake-manifest.json +68 -0
  363. itp_interface/tools/repl/test/Mathlib/lakefile.lean +11 -0
  364. itp_interface/tools/repl/test/Mathlib/lean-toolchain +1 -0
  365. itp_interface/tools/repl/test/Mathlib/test/20240209.expected.out +20 -0
  366. itp_interface/tools/repl/test/Mathlib/test/20240209.in +3 -0
  367. itp_interface/tools/repl/test/Mathlib/test/20240209.lean +4 -0
  368. itp_interface/tools/repl/test/Mathlib/test/H20231020.expected.out +8 -0
  369. itp_interface/tools/repl/test/Mathlib/test/H20231020.in +8 -0
  370. itp_interface/tools/repl/test/Mathlib/test/H20231020.lean +22 -0
  371. itp_interface/tools/repl/test/Mathlib/test/H20231110.expected.out +4 -0
  372. itp_interface/tools/repl/test/Mathlib/test/H20231110.in +4 -0
  373. itp_interface/tools/repl/test/Mathlib/test/H20231115.expected.out +19 -0
  374. itp_interface/tools/repl/test/Mathlib/test/H20231115.in +5 -0
  375. itp_interface/tools/repl/test/Mathlib/test/H20231115_2.expected.out +18 -0
  376. itp_interface/tools/repl/test/Mathlib/test/H20231115_2.in +4 -0
  377. itp_interface/tools/repl/test/Mathlib/test/H20231115_3.expected.out +10 -0
  378. itp_interface/tools/repl/test/Mathlib/test/H20231115_3.in +4 -0
  379. itp_interface/tools/repl/test/Mathlib/test/H20231214.in +9 -0
  380. itp_interface/tools/repl/test/Mathlib/test/H20231214.lean +30 -0
  381. itp_interface/tools/repl/test/Mathlib/test/H20231215.expected.out +4 -0
  382. itp_interface/tools/repl/test/Mathlib/test/H20231215.in +4 -0
  383. itp_interface/tools/repl/test/Mathlib/test/H20231215_2.expected.out +14 -0
  384. itp_interface/tools/repl/test/Mathlib/test/H20231215_2.in +3 -0
  385. itp_interface/tools/repl/test/Mathlib/test/exact.expected.out +37 -0
  386. itp_interface/tools/repl/test/Mathlib/test/exact.in +10 -0
  387. itp_interface/tools/repl/test/Mathlib/test/import_Mathlib.lean +1 -0
  388. itp_interface/tools/repl/test/Mathlib/test/induction.expected.out +29 -0
  389. itp_interface/tools/repl/test/Mathlib/test/induction.in +10 -0
  390. itp_interface/tools/repl/test/Mathlib/test/induction.lean +6 -0
  391. itp_interface/tools/repl/test/Mathlib/test/on_goal.expected.out +22 -0
  392. itp_interface/tools/repl/test/Mathlib/test/on_goal.in +5 -0
  393. itp_interface/tools/repl/test/Mathlib/test/pickle.expected.out +16 -0
  394. itp_interface/tools/repl/test/Mathlib/test/pickle.in +6 -0
  395. itp_interface/tools/repl/test/Mathlib/test/pickle_2.expected.out +4 -0
  396. itp_interface/tools/repl/test/Mathlib/test/pickle_2.in +4 -0
  397. itp_interface/tools/repl/test/Mathlib/test.sh +41 -0
  398. itp_interface/tools/repl/test/all_tactics.expected.out +13 -0
  399. itp_interface/tools/repl/test/all_tactics.in +1 -0
  400. itp_interface/tools/repl/test/by_cases.expected.out +25 -0
  401. itp_interface/tools/repl/test/by_cases.in +8 -0
  402. itp_interface/tools/repl/test/by_cases.lean +4 -0
  403. itp_interface/tools/repl/test/calc.expected.out +32 -0
  404. itp_interface/tools/repl/test/calc.in +1 -0
  405. itp_interface/tools/repl/test/def_eval.expected.out +9 -0
  406. itp_interface/tools/repl/test/def_eval.in +3 -0
  407. itp_interface/tools/repl/test/enableInitializersExecution.expected.out +2 -0
  408. itp_interface/tools/repl/test/enableInitializersExecution.in +1 -0
  409. itp_interface/tools/repl/test/file.expected.out +8 -0
  410. itp_interface/tools/repl/test/file.in +1 -0
  411. itp_interface/tools/repl/test/file.lean +5 -0
  412. itp_interface/tools/repl/test/have_by_sorry.expected.out +28 -0
  413. itp_interface/tools/repl/test/have_by_sorry.in +6 -0
  414. itp_interface/tools/repl/test/import_lean.in +1 -0
  415. itp_interface/tools/repl/test/incomplete.expected.out +18 -0
  416. itp_interface/tools/repl/test/incomplete.in +3 -0
  417. itp_interface/tools/repl/test/incomplete.lean +0 -0
  418. itp_interface/tools/repl/test/infotree.expected.out +20 -0
  419. itp_interface/tools/repl/test/infotree.in +2 -0
  420. itp_interface/tools/repl/test/invalid_tactic.expected.out +20 -0
  421. itp_interface/tools/repl/test/invalid_tactic.in +3 -0
  422. itp_interface/tools/repl/test/name_generator.expected.out +53 -0
  423. itp_interface/tools/repl/test/name_generator.in +18 -0
  424. itp_interface/tools/repl/test/no_goal_sorry.expected.out +11 -0
  425. itp_interface/tools/repl/test/no_goal_sorry.in +1 -0
  426. itp_interface/tools/repl/test/no_goal_sorry_2.expected.out +12 -0
  427. itp_interface/tools/repl/test/no_goal_sorry_2.in +1 -0
  428. itp_interface/tools/repl/test/options.expected.out +17 -0
  429. itp_interface/tools/repl/test/options.in +6 -0
  430. itp_interface/tools/repl/test/pickle_environment.expected.out +8 -0
  431. itp_interface/tools/repl/test/pickle_environment.in +7 -0
  432. itp_interface/tools/repl/test/pickle_environment_with_imports.expected.out +10 -0
  433. itp_interface/tools/repl/test/pickle_environment_with_imports.in +9 -0
  434. itp_interface/tools/repl/test/pickle_open.expected.out +8 -0
  435. itp_interface/tools/repl/test/pickle_open.in +7 -0
  436. itp_interface/tools/repl/test/pickle_open_2.expected.out +4 -0
  437. itp_interface/tools/repl/test/pickle_open_2.in +3 -0
  438. itp_interface/tools/repl/test/pickle_open_scoped.expected.out +18 -0
  439. itp_interface/tools/repl/test/pickle_open_scoped.in +8 -0
  440. itp_interface/tools/repl/test/pickle_open_scoped_2.expected.out +14 -0
  441. itp_interface/tools/repl/test/pickle_open_scoped_2.in +3 -0
  442. itp_interface/tools/repl/test/pickle_proof_state_1.expected.out +26 -0
  443. itp_interface/tools/repl/test/pickle_proof_state_1.in +15 -0
  444. itp_interface/tools/repl/test/pickle_proof_state_2.expected.out +4 -0
  445. itp_interface/tools/repl/test/pickle_proof_state_2.in +3 -0
  446. itp_interface/tools/repl/test/pickle_proof_state_env.expected.out +26 -0
  447. itp_interface/tools/repl/test/pickle_proof_state_env.in +15 -0
  448. itp_interface/tools/repl/test/pickle_scoped_notation.in +16 -0
  449. itp_interface/tools/repl/test/pickle_scoped_notation_2.in +3 -0
  450. itp_interface/tools/repl/test/proof_step.expected.out +18 -0
  451. itp_interface/tools/repl/test/proof_step.in +7 -0
  452. itp_interface/tools/repl/test/readme.expected.out +16 -0
  453. itp_interface/tools/repl/test/readme.in +5 -0
  454. itp_interface/tools/repl/test/sorry_hypotheses.expected.out +16 -0
  455. itp_interface/tools/repl/test/sorry_hypotheses.in +4 -0
  456. itp_interface/tools/repl/test/synthesize_placeholder.expected.out +7 -0
  457. itp_interface/tools/repl/test/synthesize_placeholder.in +1 -0
  458. itp_interface/tools/repl/test/tactic_mode_sorry.expected.out +14 -0
  459. itp_interface/tools/repl/test/tactic_mode_sorry.in +3 -0
  460. itp_interface/tools/repl/test/tactic_sorry.expected.out +12 -0
  461. itp_interface/tools/repl/test/tactic_sorry.in +1 -0
  462. itp_interface/tools/repl/test/term_sorry.expected.out +12 -0
  463. itp_interface/tools/repl/test/term_sorry.in +1 -0
  464. itp_interface/tools/repl/test/trace_simp.expected.out +41 -0
  465. itp_interface/tools/repl/test/trace_simp.in +15 -0
  466. itp_interface/tools/repl/test/unfinished_tactic_block.expected.out +11 -0
  467. itp_interface/tools/repl/test/unfinished_tactic_block.in +1 -0
  468. itp_interface/tools/repl/test/unknown_environment.expected.out +2 -0
  469. itp_interface/tools/repl/test/unknown_environment.in +1 -0
  470. itp_interface/tools/repl/test/unknown_proof_state.expected.out +14 -0
  471. itp_interface/tools/repl/test/unknown_proof_state.in +3 -0
  472. itp_interface/tools/repl/test/unknown_tactic.expected.out +14 -0
  473. itp_interface/tools/repl/test/unknown_tactic.in +3 -0
  474. itp_interface/tools/repl/test/variables.expected.out +26 -0
  475. itp_interface/tools/repl/test/variables.in +5 -0
  476. itp_interface/tools/repl/test.sh +43 -0
  477. itp_interface/tools/run_data_generation_transforms.py +350 -0
  478. itp_interface/tools/theorem_details.py +25 -0
  479. itp_interface/tools/training_data.py +358 -0
  480. itp_interface/tools/training_data_format.py +599 -0
  481. itp_interface-1.0.0.dist-info/METADATA +78 -0
  482. itp_interface-1.0.0.dist-info/RECORD +485 -0
  483. itp_interface-1.0.0.dist-info/WHEEL +4 -0
  484. itp_interface-1.0.0.dist-info/entry_points.txt +3 -0
  485. itp_interface-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,713 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+
5
+ root_dir = f"{__file__.split('itp_interface')[0]}"
6
+ if root_dir not in sys.path:
7
+ sys.path.append(root_dir)
8
+ import copy
9
+ import typing
10
+ import logging
11
+ import time
12
+ import os
13
+ import ray
14
+ from itp_interface.rl.proof_tree import ProofSearchResult, ProofTree
15
+ from itp_interface.rl.proof_state import ProofState
16
+ from itp_interface.rl.proof_action import ProofAction
17
+ from itp_interface.rl.abstraction import State, Action, Env
18
+ from itp_interface.tools.proof_exec_callback import ProofExecutorCallback
19
+ from itp_interface.tools.training_data_format import TrainingDataFormat
20
+ from itp_interface.tools.isabelle_executor import IsabelleExecutor, HammerMode
21
+ from itp_interface.tools.dynamic_coq_proof_exec import DynamicProofExecutor as DynamicCoqProofExecutor
22
+ from itp_interface.tools.dynamic_lean_proof_exec import DynamicProofExecutor as DynamicLeanProofExecutor
23
+ from itp_interface.tools.dynamic_lean4_proof_exec import DynamicProofExecutor as DynamicLean4ProofExecutor
24
+ from itp_interface.tools.dynamic_isabelle_proof_exec import DynamicProofExecutor as DynamicIsabelleProofExecutor
25
+ from itp_interface.retrieval.coq_bm25_reranker import CoqBm25ReRanker
26
+ from itp_interface.retrieval.lean3_bm25_reranker import Lean3Bm25ReRanker
27
+ from itp_interface.retrieval.isabelle_bm25_reranker import IsabelleBm25ReRanker
28
+ from dataclasses import dataclass, field
29
+ from dataclasses_json import dataclass_json
30
+ from enum import Enum
31
+
32
+
33
+ class ProgressState:
34
+ STARTING = "Starting"
35
+ STATE_CHANGED = "StateChanged"
36
+ STATE_UNCHANGED = "StateUnchanged"
37
+ DONE = "Done"
38
+ FAILED = "Failed"
39
+ def __init__(self):
40
+ pass
41
+
42
+ @dataclass_json
43
+ @dataclass
44
+ class ProofEnvInfo(object):
45
+ progress: str = ProgressState.STARTING
46
+ error_message: typing.Optional[str] = None
47
+ info_messages: typing.List[str] = field(default_factory=list)
48
+
49
+ def __eq__(self, __value: object) -> bool:
50
+ return isinstance(__value, ProofEnvInfo) and self.progress == __value.progress and self.error_message == __value.error_message
51
+
52
+ class ProofEnvReRankStrategy(Enum):
53
+ BM25 = "BM25"
54
+ BM25_WITH_PRINT = "BM25_WITH_PRINT"
55
+ BM25_WITH_PRINT_ONLY_LOCAL = "BM25_WITH_PRINT_ONLY_LOCAL"
56
+ BM25_WITH_PRINT_NO_DFNS = "BM25_WITH_PRINT_NO_DFNS"
57
+ BM25_WITH_PRINT_ONLY_LOCAL_NO_DFNS = "BM25_WITH_PRINT_ONLY_LOCAL_NO_DFNS"
58
+ BM25_ONLY_LOCAL_NO_DFNS = "BM25_ONLY_LOCAL_NO_DFNS"
59
+ BM25_NO_DFNS = "BM25_NO_DFNS"
60
+ NO_RE_RANK = "NO_RE_RANK"
61
+
62
+ def __str__(self):
63
+ return self.value
64
+
65
+ class ProofEnv(Env):
66
+ max_depth_penalty = -0.1
67
+ max_proof_completion_reward = 1.0
68
+ progress_reward = 0.2
69
+ _re_ranker = None
70
+ def __init__(self,
71
+ name: str,
72
+ dynamic_proof_executor_callback: ProofExecutorCallback,
73
+ lemma_name: str,
74
+ retrieval_strategy: ProofEnvReRankStrategy = ProofEnvReRankStrategy.BM25,
75
+ max_proof_depth: int = 10,
76
+ always_retrieve_thms: bool = False,
77
+ logger : logging.Logger = None):
78
+ assert isinstance(dynamic_proof_executor_callback, ProofExecutorCallback)
79
+ assert isinstance(lemma_name, str)
80
+ assert isinstance(max_proof_depth, int)
81
+ assert isinstance(always_retrieve_thms, bool)
82
+ self.dynamic_proof_executor_callback = dynamic_proof_executor_callback
83
+ self._dynamic_proof_executor : typing.Union[DynamicCoqProofExecutor, DynamicLeanProofExecutor, DynamicIsabelleProofExecutor] = None
84
+ self._loaded = False
85
+ self._history : typing.List[typing.Tuple[ProofState, ProofAction, ProofState, float, bool, ProofEnvInfo]] = []
86
+ self._name = name
87
+ self.max_proof_depth = max_proof_depth
88
+ self.lemma_name = lemma_name
89
+ self.current_proof_depth = 0
90
+ self._p_tree = ProofTree()
91
+ self._possible_failure_paths = 0
92
+ self._success_path_length = 0
93
+ self._num_cycles = 0
94
+ self._always_retrieve_thms = always_retrieve_thms
95
+ self.retrieve_strategy = retrieval_strategy
96
+ self.language = self.dynamic_proof_executor_callback.language
97
+ if self.retrieve_strategy == ProofEnvReRankStrategy.BM25 or \
98
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT or \
99
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_ONLY_LOCAL or \
100
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_NO_DFNS or \
101
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_ONLY_LOCAL_NO_DFNS or \
102
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_ONLY_LOCAL_NO_DFNS or \
103
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_NO_DFNS or \
104
+ self.retrieve_strategy == ProofEnvReRankStrategy.NO_RE_RANK:
105
+ if ProofEnv._re_ranker is None or str(self.language) != ProofEnv._re_ranker.language:
106
+ if self.language == ProofAction.Language.COQ:
107
+ ProofEnv._re_ranker = CoqBm25ReRanker(language=str(self.language))
108
+ elif self.language == ProofAction.Language.LEAN:
109
+ ProofEnv._re_ranker = Lean3Bm25ReRanker(language=str(self.language))
110
+ elif self.language == ProofAction.Language.LEAN4:
111
+ ProofEnv._re_ranker = Lean3Bm25ReRanker(language=str(self.language))
112
+ elif self.language == ProofAction.Language.ISABELLE:
113
+ ProofEnv._re_ranker = IsabelleBm25ReRanker(language=str(self.language))
114
+ else:
115
+ raise NotImplementedError(f"Language {self.language} not implemented")
116
+ self._re_ranker = ProofEnv._re_ranker
117
+ else:
118
+ raise NotImplementedError(f"Retrieval strategy {self.retrieve_strategy} not implemented")
119
+ self.logger = logger if logger is not None else logging.getLogger(__name__)
120
+
121
+ def __enter__(self):
122
+ self.reset()
123
+ return self
124
+
125
+ def __exit__(self, exc_type, exc_value, traceback):
126
+ if self._dynamic_proof_executor is not None:
127
+ self._dynamic_proof_executor.__exit__(exc_type, exc_value, traceback)
128
+ pass
129
+
130
+ @property
131
+ def name(self):
132
+ return self._name
133
+
134
+ @property
135
+ def state(self):
136
+ assert self._loaded, "Env not loaded, call reset() first"
137
+ use_fallback = True
138
+ if len(self._history) > 0:
139
+ # Just check the last action in history to the current state
140
+ _, _, s2, _, _, _ = self._history[-1]
141
+ if s2 is not None:
142
+ # s2 can be None when called internally for getting the current state before executing an action
143
+ # We need this for actions which keep the state same but add more information like useful theorems and defintions
144
+ current_goals = s2.training_data_format
145
+ use_fallback = False
146
+ if use_fallback:
147
+ # This gets the state from the Coq interface itself
148
+ if self._always_retrieve_thms:
149
+ proof_state, _, _, _ = self._get_current_dfns_thms(ProofEnvInfo(progress=ProgressState.STARTING))
150
+ current_goals = proof_state.training_data_format
151
+ else:
152
+ current_goals = self._dynamic_proof_executor.get_current_proof_state_as_training_data()
153
+ current_goals = copy.deepcopy(current_goals)
154
+ current_proof_tree = copy.deepcopy(self._p_tree)
155
+ lemma_stmt = self._dynamic_proof_executor.get_lemma_stmt_if_running()
156
+ lemma_name = self._dynamic_proof_executor.get_current_lemma_name()
157
+ state = ProofState(current_goals, language=self.language, theorem_statement_with_name=lemma_stmt, theorem_name=lemma_name) # always make a copy of goals to avoid side effects
158
+ state.proof_tree = current_proof_tree
159
+ state.was_reset = len(self._history) == 0
160
+ return state
161
+
162
+ @property
163
+ def done(self) -> bool:
164
+ assert self._loaded, "Env not loaded, call reset() first"
165
+ # needs_qed = self._dynamic_proof_executor.needs_qed()
166
+ not_in_proof_mode = not self._dynamic_proof_executor.is_in_proof_mode()
167
+ # return needs_qed or not_in_proof_mode
168
+ return not_in_proof_mode
169
+
170
+ @property
171
+ def history(self) -> typing.List[typing.Tuple[ProofState, ProofAction, ProofState, float, bool, ProofEnvInfo]]:
172
+ assert self._loaded, "Env not loaded, call reset() first"
173
+ return self._history
174
+
175
+ def get_state(self):
176
+ return self.state
177
+
178
+ def get_done(self):
179
+ return self.done
180
+
181
+ def get_history(self):
182
+ return self.history
183
+
184
+ def getattr(self, attr_name: str):
185
+ return self.__getattribute__(attr_name)
186
+
187
+ def reset(self):
188
+ self.current_proof_depth = 0
189
+ if self._dynamic_proof_executor is not None:
190
+ try:
191
+ self._dynamic_proof_executor.__exit__(None, None, None)
192
+ except Exception:
193
+ pass
194
+ self._dynamic_proof_executor = self.dynamic_proof_executor_callback.get_proof_executor()
195
+ if self.dynamic_proof_executor_callback.language == ProofAction.Language.LEAN:
196
+ lean_proof_executor = self._dynamic_proof_executor
197
+ # Initialize the lemma search
198
+ if self._always_retrieve_thms and \
199
+ str(self.language) == str(self.dynamic_proof_executor_callback.language) and \
200
+ len(self._re_ranker.responses) == 0: # This is done only once
201
+ search_tool = lean_proof_executor.lean_context_helper.search_executor._search_tool
202
+ if len(search_tool.lemmas) > 0:
203
+ all_lemmas = [str(lemma) for lemma in search_tool.lemmas]
204
+ self._re_ranker.reindex(all_lemmas)
205
+ # if isinstance(self._dynamic_proof_executor, DynamicLeanProofExecutor):
206
+ # self._always_retrieve_thms = False # Lean does not support retrieval of theorems as of now
207
+ self._dynamic_proof_executor.__enter__()
208
+ self._history.clear()
209
+ self._p_tree = ProofTree()
210
+ self._loaded = True
211
+ self._foward_to_lemma_proof()
212
+ self.goal_start_time = time.time()
213
+ self.inferences_used = 0
214
+
215
+ # If in Isabelle, automatically enter proof
216
+ if self.language == ProofAction.Language.ISABELLE:
217
+ self._dynamic_proof_executor.run_tactics(["proof -"])
218
+
219
+ def step(self, action: Action) -> typing.Tuple[State, Action, State, float, bool, ProofEnvInfo]:
220
+ assert self._loaded, "Env not loaded, call reset() first"
221
+ info = ProofEnvInfo(progress=ProgressState.STARTING)
222
+ if self.done:
223
+ info.progress = ProgressState.DONE
224
+ return self.state, 0.0, True, info
225
+ assert isinstance(action, ProofAction), f"action must be of type ProofAction, not {type(action)}"
226
+ history_idx = len(self._history)
227
+ state_before = self.state
228
+ self._history.append((state_before, action, None, 0.0, False, info))
229
+ if action.action_type == ProofAction.ActionType.RUN_TACTIC:
230
+ self._run_tactic(history_idx)
231
+ elif action.action_type == ProofAction.ActionType.GET_DFNS_THMS:
232
+ self._get_dfns_thms(history_idx)
233
+ elif action.action_type == ProofAction.ActionType.BACKTRACK:
234
+ self._backtrack(history_idx)
235
+ else:
236
+ raise NotImplementedError(f"Action type {action.action_type} not implemented")
237
+ self.inferences_used += 1
238
+ return self._history[-1][0], self._history[-1][1], self._history[-1][2], self._history[-1][3], self._history[-1][4], self._history[-1][5]
239
+
240
+ def checkpoint(self):
241
+ return super().checkpoint()
242
+
243
+ def clone(self):
244
+ return super().clone()
245
+
246
+ def render(self):
247
+ if len(self._history) == 0:
248
+ current_state = self.state
249
+ s_goals = [f"Goal [{idx}]:\n {goal.goal} \n Hyps [{idx}]:\n {goal.hypotheses} \n Dfns [{idx}]:\n {goal.relevant_defns} \n Thms [{idx}]:\n {goal.possible_useful_theorems_local} \n------------------\n" for idx, goal in enumerate(current_state.training_data_format.start_goals)]
250
+ s_goal = '\n'.join(s_goals)
251
+ self.logger.info(f"Proof State (before action):\n {s_goal}")
252
+ return
253
+ s1, a, s2, r, d, info = self._history[-1]
254
+ visibility = 3
255
+ self.logger.info("-"*50)
256
+ s1_relevant_dfns = [
257
+ "\n".join([str(s1.training_data_format.all_useful_defns_theorems[dfns.lemma_idx]) for dfns in goal.relevant_defns])
258
+ for goal in s1.training_data_format.start_goals]
259
+ s1_possible_thms = [
260
+ "\n".join([str(s1.training_data_format.all_useful_defns_theorems[thm.lemma_idx])
261
+ for thm in (goal.possible_useful_theorems_local[:visibility] + goal.possible_useful_theorems_external[:visibility])])
262
+ for goal in s1.training_data_format.start_goals]
263
+ s1_goals = [f"Goal [{idx}]:\n {goal.goal} \n Hyps [{idx}]:\n {goal.hypotheses} \n Dfns [{idx}]:\n {s1_relevant_dfns[idx]} \n Thms [{idx}]:\n {s1_possible_thms[idx]} \n------------------\n" for idx, goal in enumerate(s1.training_data_format.start_goals)]
264
+ s1_goal = '\n'.join(s1_goals)
265
+ self.logger.info(f"Proof State (before action):\n {s1_goal}")
266
+ s2_relevant_dfns = [
267
+ "\n".join([str(s2.training_data_format.all_useful_defns_theorems[dfns.lemma_idx]) for dfns in goal.relevant_defns])
268
+ for goal in s2.training_data_format.start_goals]
269
+ s2_possible_thms = [
270
+ "\n".join([str(s2.training_data_format.all_useful_defns_theorems[thm.lemma_idx])
271
+ for thm in (goal.possible_useful_theorems_local[:visibility] + goal.possible_useful_theorems_external[:visibility])])
272
+ for goal in s2.training_data_format.start_goals]
273
+ s2_goals = [f"Goal [{idx}]:\n {goal.goal} \n Hyps [{idx}]: {goal.hypotheses} \n Dfns [{idx}]:\n {s2_relevant_dfns[idx]} \n Thms [{idx}]:\n {s2_possible_thms[idx]} \n-------------------\n" for idx, goal in enumerate(s2.training_data_format.start_goals)]
274
+ action = a.serialize()
275
+ self.logger.info(f"Action:\n {action}")
276
+ s2_goal = '\n'.join(s2_goals)
277
+ self.logger.info(f"Proof State (after action):\n {s2_goal}")
278
+ self.logger.info(f"Reward:\n {r}")
279
+ self.logger.info(f"Done:\n {d}")
280
+ self.logger.info(f"Info:\n {info.to_json()}")
281
+ self.logger.info("-"*50)
282
+ pass
283
+
284
+ def dump_proof(self, dump_file_name: str = None, additional_info: typing.Dict[str, typing.Any] = None):
285
+ assert self._loaded, "Env not loaded, call reset() first"
286
+ self.goal_end_time = time.time()
287
+ self.time_taken = self.goal_end_time - self.goal_start_time
288
+ proof_steps = [TrainingDataFormat(proof_steps=tactic.proof_steps) for _, tactic in self._p_tree.tactics]
289
+ additional_info = additional_info if additional_info is not None else {}
290
+ self.proof_search_res = ProofSearchResult(
291
+ self._dynamic_proof_executor.main_file,
292
+ not self._dynamic_proof_executor.is_in_proof_mode(),
293
+ self._lemma_name_with_stmt,
294
+ proof_steps,
295
+ self.time_taken,
296
+ self.inferences_used,
297
+ possible_failed_paths=-1,
298
+ num_of_backtracks=-1,
299
+ is_timeout=False,
300
+ is_inference_exhausted=False,
301
+ longest_success_path=-1,
302
+ additional_info=additional_info,
303
+ language=self.language)
304
+ self.logger.info(f"Dumping proof search result:\n {self.proof_search_res}")
305
+ if dump_file_name is not None:
306
+ opening_mode = 'a' if os.path.exists(dump_file_name) else 'w'
307
+ with open(dump_file_name, opening_mode) as f:
308
+ if opening_mode == 'a':
309
+ f.write("\n\n")
310
+ f.write(str(self.proof_search_res))
311
+
312
+ def _run_tactic(self, history_idx: int = None):
313
+ assert self._loaded, "Env not loaded, call reset() first"
314
+ history_idx = len(self._history) - 1 if history_idx is None else history_idx
315
+ state, action, _, reward, done, env_info = self._history[history_idx]
316
+ # was_done_before = done
317
+ assert action.action_type == ProofAction.ActionType.RUN_TACTIC, "Action must be of type RUN_TACTIC"
318
+ tactics = action.kwargs["tactics"]
319
+ assert isinstance(tactics, list)
320
+ assert len(tactics) > 0
321
+ assert all([isinstance(tactic, str) for tactic in tactics])
322
+ # Remove unnecessary spaces, newlines, and tabs
323
+ tactics = [tactic.strip() for tactic in tactics]
324
+ original_tactics = copy.deepcopy(tactics)
325
+ try:
326
+ state, next_state, reward, done, env_info = self._run_tactics(tactics, state, action, env_info)
327
+ except Exception:
328
+ self.logger.exception(f"Exception occured while running tactics:\n {tactics}")
329
+ self.logger.info("Resetting the environment and running all the tactics again")
330
+ self._reset_and_restore_history()
331
+ next_state = self.state
332
+ reward = -1.0
333
+ done = False
334
+ env_info.progress = ProgressState.FAILED
335
+ env_info.error_message = self._dynamic_proof_executor.get_last_exception()
336
+
337
+ if self.language == ProofAction.Language.ISABELLE and \
338
+ (len(original_tactics) != len(tactics) or \
339
+ any([original_tactics[i] != tactics[i] for i in range(len(original_tactics))])):
340
+ # It is possible in case of Isabelle that tactics are modified by the proof executor when hammer is used
341
+ # So we need to update the tactics in the action
342
+ action.kwargs["tactics"] = tactics
343
+
344
+ self._history[history_idx] = (state, action, next_state, reward, done, env_info)
345
+
346
+ def _run_tactics(self, tactics: typing.List[str], state: ProofState, action: ProofAction, env_info: ProofEnvInfo):
347
+ env_info = copy.deepcopy(env_info)
348
+ tactic_line_num, ran_successfully = self._dynamic_proof_executor.run_tactics(tactics)
349
+ proof_progressed = False
350
+ if ran_successfully:
351
+ previous_proof_state = state
352
+ previous_proof_state.training_data_format.proof_steps = copy.deepcopy(tactics)
353
+ # add the proof step to the proof tree
354
+ self._p_tree.try_add_tactic(tactic_line_num, previous_proof_state.training_data_format, force_add=True, action=action)
355
+ self.current_proof_depth += 1
356
+ proof_progressed = True
357
+ current_proof_state = self.state
358
+ else:
359
+ proof_progressed = False
360
+ if not proof_progressed:
361
+ self._possible_failure_paths += 1
362
+ assert len(self._p_tree) == self.current_proof_depth, "proof_tree must have the same length as current_depth"
363
+ # cancel anything which might got executed
364
+ self._dynamic_proof_executor.cancel_tactic_till_line(tactic_line_num)
365
+ reward = 0.0
366
+ depth_ratio = self.current_proof_depth/self.max_proof_depth
367
+ if depth_ratio > 1.0:
368
+ depth_ratio = 1.0
369
+ depth_penalty = depth_ratio * ProofEnv.max_depth_penalty
370
+ reward += depth_penalty
371
+ done = self.done
372
+ if proof_progressed and done:
373
+ reward += ProofEnv.max_proof_completion_reward
374
+ env_info.progress = ProgressState.DONE
375
+ env_info.error_message = None
376
+ elif proof_progressed:
377
+ reward += ProofEnv.progress_reward
378
+ env_info.progress = ProgressState.STATE_CHANGED if state != current_proof_state else ProgressState.STATE_UNCHANGED
379
+ env_info.error_message = None
380
+ else:
381
+ env_info.progress = ProgressState.FAILED
382
+ env_info.error_message = self._dynamic_proof_executor.get_last_exception()
383
+ current_proof_state = copy.deepcopy(state)
384
+ # There is a special case of the first tactic failing, in which case there is no reset
385
+ # So always decide the reset based on whether the history is empty or not
386
+ # Clone the current_proof_state always to avoid side effects
387
+ current_proof_state.was_reset = len(self._history) == 0
388
+ return (state, current_proof_state, reward, done, env_info)
389
+
390
+ def _get_dfns_thms(self, history_idx: int = None):
391
+ assert self._loaded, "Env not loaded, call reset() first"
392
+ history_idx = len(self._history) - 1 if history_idx is None else history_idx
393
+ state, action, current_proof_state, reward, done, env_info = self._history[history_idx]
394
+ assert action.action_type == ProofAction.ActionType.GET_DFNS_THMS, "Action must be of type GET_DFNS_THMS"
395
+ current_proof_state, reward, done, env_info = self._get_current_dfns_thms(env_info)
396
+ self._history[history_idx] = (state, action, current_proof_state, reward, done, env_info)
397
+
398
+ def _get_current_dfns_thms(self, env_info : ProofEnvInfo):
399
+ should_print_symbol = self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT or \
400
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_ONLY_LOCAL or \
401
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_NO_DFNS or \
402
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_ONLY_LOCAL_NO_DFNS
403
+ should_have_relevant_dfns = self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT or \
404
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_ONLY_LOCAL or \
405
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25
406
+ only_local = self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_ONLY_LOCAL or \
407
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_ONLY_LOCAL_NO_DFNS or \
408
+ self.retrieve_strategy == ProofEnvReRankStrategy.BM25_ONLY_LOCAL_NO_DFNS
409
+ only_proof_state = self.retrieve_strategy == ProofEnvReRankStrategy.NO_RE_RANK
410
+ relevant_defns_thms = self._dynamic_proof_executor.get_all_relevant_defns_and_thms(should_print_symbol, only_local, only_proof_state)
411
+ should_have_relevant_dfns = not only_proof_state and should_have_relevant_dfns
412
+ if should_have_relevant_dfns:
413
+ for idx, goal in enumerate(relevant_defns_thms.start_goals):
414
+ query = relevant_defns_thms.get_human_readable_serialized_goal(idx, skip_special_tokens=True)
415
+ responses = [str(relevant_defns_thms.all_useful_defns_theorems[lemma_ref.lemma_idx]) for lemma_ref in goal.relevant_defns]
416
+ if len(self._re_ranker.responses) > 0 and len(responses) == len(self._re_ranker.responses):
417
+ response_scores = self._re_ranker.get_scores(query) # When the response are globally same
418
+ else:
419
+ response_scores = self._re_ranker.rerank(query, responses)
420
+ relevant_defns_idx = [(idx, score) for idx, score in enumerate(response_scores)]
421
+ relevant_defns_idx.sort(key=lambda x: x[1], reverse=True)
422
+ relevant_defns_reranked = [goal.relevant_defns[idx] for idx, _ in relevant_defns_idx]
423
+ sum_scores = sum([score for _, score in relevant_defns_idx]) + 1e-6
424
+ for i in range(len(relevant_defns_reranked)):
425
+ relevant_defns_reranked[i].score = relevant_defns_idx[i][1]/sum_scores
426
+ goal.relevant_defns = relevant_defns_reranked
427
+ else:
428
+ for goal in relevant_defns_thms.start_goals:
429
+ goal.relevant_defns = []
430
+
431
+ should_have_relevant_lemmas = not only_proof_state
432
+ if should_have_relevant_lemmas:
433
+ for idx, goal in enumerate(relevant_defns_thms.start_goals):
434
+ query = relevant_defns_thms.get_human_readable_serialized_goal(idx, skip_special_tokens=True)
435
+ local_responses = [str(relevant_defns_thms.all_useful_defns_theorems[lemma_ref.lemma_idx]) for lemma_ref in goal.possible_useful_theorems_local]
436
+ if self.retrieve_strategy == ProofEnvReRankStrategy.BM25_WITH_PRINT_ONLY_LOCAL:
437
+ global_responses = []
438
+ else:
439
+ global_responses = [str(relevant_defns_thms.all_useful_defns_theorems[lemma_ref.lemma_idx]) for lemma_ref in goal.possible_useful_theorems_external]
440
+ if len(self._re_ranker.responses) > 0 and len(local_responses) == len(self._re_ranker.responses):
441
+ local_scores = self._re_ranker.get_scores(query)
442
+ else:
443
+ local_scores = self._re_ranker.rerank(query, local_responses)
444
+ if len(self._re_ranker.responses) > 0 and len(global_responses) == len(self._re_ranker.responses):
445
+ global_scores = self._re_ranker.rerank(query, global_responses)
446
+ else:
447
+ global_scores = self._re_ranker.rerank(query, global_responses)
448
+ local_idx = [(idx, score) for idx, score in enumerate(local_scores)]
449
+ global_idx = [(idx, score) for idx, score in enumerate(global_scores)]
450
+ local_idx.sort(key=lambda x: x[1], reverse=True)
451
+ global_idx.sort(key=lambda x: x[1], reverse=True)
452
+ local_responses = [goal.possible_useful_theorems_local[idx] for idx, _ in local_idx]
453
+ global_responses = [goal.possible_useful_theorems_external[idx] for idx, _ in global_idx]
454
+ # Remove any local responses which are already in the relevant defns
455
+ relevant_dfns_names = set([relevant_defns_thms.all_useful_defns_theorems[lemma_ref.lemma_idx].lemma_name for lemma_ref in goal.relevant_defns])
456
+ local_responses = [response for response in local_responses if relevant_defns_thms.all_useful_defns_theorems[response.lemma_idx].lemma_name not in relevant_dfns_names]
457
+ # Remove any global responses which are already in the relevant defns
458
+ global_responses = [response for response in global_responses if relevant_defns_thms.all_useful_defns_theorems[response.lemma_idx].lemma_name not in relevant_dfns_names]
459
+ sum_local_scores = sum([score for _, score in local_idx]) + 1e-6
460
+ sum_global_scores = sum([score for _, score in global_idx]) + 1e-6
461
+ for i in range(len(local_responses)):
462
+ local_responses[i].score = local_idx[i][1]/sum_local_scores
463
+ for i in range(len(global_responses)):
464
+ global_responses[i].score = global_idx[i][1]/sum_global_scores
465
+ goal.possible_useful_theorems_local = local_responses
466
+ goal.possible_useful_theorems_external = global_responses
467
+ lemma_stmt = self._dynamic_proof_executor.get_lemma_stmt_if_running()
468
+ current_proof_state = ProofState(relevant_defns_thms, language=self.language, theorem_statement_with_name=lemma_stmt)
469
+ current_proof_state.proof_tree = copy.deepcopy(self._p_tree)
470
+ done = self.done
471
+ env_info.progress = ProgressState.STATE_UNCHANGED if not done else ProgressState.DONE
472
+ env_info.error_message = None
473
+ reward = 0.0
474
+ return current_proof_state, reward, done, env_info
475
+
476
+ def _backtrack(self, history_idx: int = None):
477
+ assert self._loaded, "Env not loaded, call reset() first"
478
+ history_idx = len(self._history) - 1 if history_idx is None else history_idx
479
+ state, action, current_proof_state, reward, done, env_info = self._history[history_idx]
480
+ assert action.action_type == ProofAction.ActionType.BACKTRACK, "Action must be of type BACKTRACK"
481
+ last_tactic_line, last_tactic = self._p_tree.try_remove_last_tactic()
482
+ assert (last_tactic is not None and last_tactic_line is not None) or (last_tactic is None and last_tactic_line is None), "last tactic and last tactic line must be either both None or both not None"
483
+ if last_tactic is not None and last_tactic_line is not None:
484
+ try:
485
+ self._dynamic_proof_executor.cancel_tactic_till_line(last_tactic_line)
486
+ self.current_proof_depth -= 1
487
+ except Exception:
488
+ # history = self._history # History helps us to restore the state
489
+ self.logger.exception("Exception occured while backtracking")
490
+ history = copy.deepcopy(self._history)
491
+ p_tree = copy.deepcopy(self._p_tree)
492
+ self.reset() # To ensure that everything is fine we start again
493
+ # Run all the current steps in the proof tree
494
+ self.logger
495
+ for _tactic_idx, (_, tactic) in enumerate(p_tree.tactics):
496
+ _action = p_tree.actions[_tactic_idx]
497
+ self._run_tactics(tactic.proof_steps, self.state, _action, ProofEnvInfo(progress=ProgressState.STARTING))
498
+ # No need to capture in history as the history is already captured
499
+ self._history = history
500
+ self.logger.warning("Backtracking failed, resetting the environment and running all the tactics again till two-steps before the backtracked step (hence effectively backtracking!)")
501
+
502
+ if self._dynamic_proof_executor.is_in_proof_mode():
503
+ env_info.progress = ProgressState.STATE_CHANGED
504
+ env_info.error_message = "Backtracked successfully"
505
+ reward = 0.0
506
+ else:
507
+ raise Exception("This should never happen as reset() should always take back the environment to a valid proof state in which the proof mode is on")
508
+ else:
509
+ reward = -1.0
510
+ env_info.progress = ProgressState.FAILED
511
+ env_info.error_message = "Cannot backtrack any further"
512
+ current_proof_state = self.state
513
+ done = self.done
514
+ self._history[history_idx] = (state, action, current_proof_state, reward, done, env_info)
515
+
516
+ def _foward_to_lemma_proof(self):
517
+ assert self._loaded, "Env not loaded, call reset() first"
518
+ lemma_found = False
519
+ self._lemma_name_with_stmt = None
520
+ if isinstance(self._dynamic_proof_executor, DynamicCoqProofExecutor):
521
+ while not self._dynamic_proof_executor.execution_complete and not lemma_found:
522
+ assert not self._dynamic_proof_executor.is_in_proof_mode(), "executor must not be in proof mode"
523
+ _ = list(self._dynamic_proof_executor.run_till_next_lemma_return_exec_stmt())
524
+ if self._dynamic_proof_executor.execution_complete:
525
+ break
526
+ lemma_name = self._dynamic_proof_executor.get_lemma_name_if_running()
527
+ if lemma_name is not None:
528
+ lemma_name = lemma_name.strip()
529
+ lemma_found = lemma_name.startswith(self.lemma_name) if lemma_name is not None else False
530
+ if not lemma_found:
531
+ _ = list(self._dynamic_proof_executor.run_to_finish_lemma_return_exec())
532
+ if self._dynamic_proof_executor.execution_complete:
533
+ break
534
+ elif isinstance(self._dynamic_proof_executor, DynamicLeanProofExecutor) or \
535
+ isinstance(self._dynamic_proof_executor, DynamicLean4ProofExecutor):
536
+ self._dynamic_proof_executor.skip_to_theorem(self.lemma_name)
537
+ lemma_found = True
538
+ elif isinstance(self._dynamic_proof_executor, DynamicIsabelleProofExecutor):
539
+ while not self._dynamic_proof_executor.execution_complete and not lemma_found:
540
+ assert not self._dynamic_proof_executor.is_in_proof_mode(), "executor must not be in proof mode"
541
+ _ = list(self._dynamic_proof_executor.run_till_next_lemma_return_exec_stmt())
542
+ if self._dynamic_proof_executor.execution_complete:
543
+ break
544
+ lemma_name = self._dynamic_proof_executor.get_lemma_name_if_running()
545
+ if lemma_name is not None:
546
+ lemma_name = lemma_name.strip()
547
+ lemma_found = lemma_name.startswith(self.lemma_name) if lemma_name is not None else False
548
+ if not lemma_found:
549
+ _ = list(self._dynamic_proof_executor.run_to_finish_lemma_return_exec())
550
+ if self._dynamic_proof_executor.execution_complete:
551
+ break
552
+ else:
553
+ raise NotImplementedError(f"Proof executor {type(self._dynamic_proof_executor)} not implemented")
554
+
555
+ if not lemma_found:
556
+ raise Exception(f"Could not find lemma {self.lemma_name}")
557
+ self._lemma_name_with_stmt = self._dynamic_proof_executor.get_lemma_stmt_if_running().strip()
558
+ pass
559
+
560
+ def _reset_and_restore_history(self):
561
+ history = copy.deepcopy(self._history)
562
+ p_tree = copy.deepcopy(self._p_tree)
563
+ self.reset() # To ensure that everything is fine we start again
564
+ # Run all the current steps in the proof tree
565
+ self.logger
566
+ for (_, tactic), action in zip(p_tree.tactics, p_tree.actions):
567
+ self._run_tactics(tactic.proof_steps, self.state, action, ProofEnvInfo(progress=ProgressState.STARTING))
568
+ # No need to capture in history as the history is already captured
569
+ self._history = history
570
+
571
+ def cleanup(self):
572
+ self.__exit__(None, None, None)
573
+ pass
574
+
575
+
576
+ @ray.remote
577
+ class ProofEnvActor(ProofEnv):
578
+ def __init__(self, *args, **kwargs):
579
+ self._should_load_env = kwargs.get("should_load_env", True)
580
+ kwargs.pop("should_load_env", None)
581
+ self._env_args = args
582
+ self._env_kwargs = kwargs
583
+ super().__init__(*args, **kwargs)
584
+ if self._should_load_env:
585
+ super().__enter__()
586
+ pass
587
+
588
+ def get_env_args(self):
589
+ return self._env_args
590
+
591
+ def get_env_kwargs(self):
592
+ return self._env_kwargs
593
+
594
+ def should_load_env(self):
595
+ return self._should_load_env
596
+
597
+ def get_timeout(self):
598
+ return self.dynamic_proof_executor_callback.timeout_in_secs
599
+
600
+ if __name__ == "__main__":
601
+ import os
602
+ os.chdir(root_dir)
603
+
604
+ print("Interactive Proof Environment")
605
+ supported_actions = [x.name for x in ProofAction.ActionType]
606
+
607
+ def scan_action(language):
608
+ inp_action_type = input(f"Enter an action type from {supported_actions}: (default RUN_TACTIC)")
609
+ if inp_action_type not in supported_actions:
610
+ inp_action_type = ProofAction.ActionType.RUN_TACTIC.name
611
+ action_type = ProofAction.ActionType[inp_action_type]
612
+ if action_type == ProofAction.ActionType.RUN_TACTIC:
613
+ inp = input("Enter tactic(s) (';' separated): ")
614
+ inp = inp.split(';')
615
+ return ProofAction(action_type, language, tactics=inp)
616
+ elif action_type == ProofAction.ActionType.GET_DFNS_THMS or action_type == ProofAction.ActionType.BACKTRACK or action_type == ProofAction.ActionType.EXIT:
617
+ return ProofAction(action_type, language)
618
+ else:
619
+ raise Exception(f"Invalid action type {action_type}")
620
+ logging.basicConfig(level=logging.INFO, stream=sys.stdout)
621
+ inp = input("Want to run coq, lean, or isabelle env? (Enter 'coq'/'lean'/'lean4'/'isabelle') ")
622
+ language = ProofAction.Language.COQ
623
+ if inp == 'coq':
624
+ proof_exec_callback = ProofExecutorCallback(
625
+ project_folder=".",
626
+ file_path="data/test/SimpleAlgebra.v"
627
+ )
628
+ theorem_name = "algb_add_comm"
629
+ language = ProofAction.Language.COQ
630
+ always_retrieve_thms = False
631
+ retrieval_strategy = ProofEnvReRankStrategy.BM25
632
+ elif inp == 'lean':
633
+ proof_exec_callback = ProofExecutorCallback(
634
+ project_folder="data/test/lean_proj",
635
+ file_path="data/test/lean_proj/src/simple_solved.lean",
636
+ language=ProofAction.Language.LEAN,
637
+ always_use_retrieval=True,
638
+ keep_local_context=True
639
+ )
640
+ theorem_name = "a_plus_b_a_minus_a"
641
+ language = ProofAction.Language.LEAN
642
+ always_retrieve_thms = True
643
+ retrieval_strategy = ProofEnvReRankStrategy.BM25
644
+ pass
645
+ elif inp == 'lean4':
646
+ proof_exec_callback = ProofExecutorCallback(
647
+ project_folder="data/test/lean4_proj",
648
+ file_path="data/test/lean4_proj/Lean4Proj/Basic.lean",
649
+ language=ProofAction.Language.LEAN4,
650
+ always_use_retrieval=False,
651
+ keep_local_context=True
652
+ )
653
+ theorem_name = "test3"
654
+ language = ProofAction.Language.LEAN4
655
+ always_retrieve_thms = False
656
+ retrieval_strategy = ProofEnvReRankStrategy.NO_RE_RANK
657
+ elif inp == 'isabelle':
658
+ proof_exec_callback = ProofExecutorCallback(
659
+ project_folder="data/test",
660
+ file_path="data/test/SimpleAlgebra.thy",
661
+ language=ProofAction.Language.ISABELLE,
662
+ use_hammer=HammerMode.AUTO
663
+ )
664
+ theorem_name = "sqrt_comp"
665
+ language = ProofAction.Language.ISABELLE
666
+ always_retrieve_thms = False
667
+ retrieval_strategy = ProofEnvReRankStrategy.BM25
668
+ else:
669
+ raise Exception(f"Invalid input {inp} for choosing coq/lean/lean4/isabelle env")
670
+
671
+ if language == ProofAction.Language.ISABELLE:
672
+ IsabelleExecutor.start_server(port=13000)
673
+
674
+ try:
675
+ test_ray = True
676
+ if test_ray:
677
+ logger = logging.getLogger(__name__)
678
+ ray.init()
679
+ env_actor = ProofEnvActor.remote("test", proof_exec_callback, theorem_name, retrieval_strategy=retrieval_strategy, max_proof_depth=10, always_retrieve_thms=always_retrieve_thms, logger=logger)
680
+ # with env:
681
+ done_id = env_actor.get_done.remote()
682
+ done = ray.get(done_id)
683
+ action = scan_action(language)
684
+ while action.action_type != ProofAction.ActionType.EXIT and not done:
685
+ step_id = env_actor.step.remote(action)
686
+ state, _, _, reward, done, info = ray.get(step_id)
687
+ print(f"Reward: {reward}")
688
+ print(f"Done: {done}")
689
+ print(f"Info: {info.to_json()}")
690
+ ray.get(env_actor.render.remote())
691
+ if not done:
692
+ action = scan_action(language)
693
+ # Assuming proof_env_actor is your actor reference
694
+ cleanup_future = env_actor.cleanup.remote()
695
+
696
+ # Optionally wait for the cleanup to complete before proceeding
697
+ ray.get(cleanup_future)
698
+
699
+ # If you wish to explicitly kill the actor, do so after the cleanup
700
+ ray.kill(env_actor)
701
+ else:
702
+ with ProofEnv("test", proof_exec_callback, theorem_name, retrieval_strategy=retrieval_strategy, max_proof_depth=10, always_retrieve_thms=always_retrieve_thms) as env:
703
+ done = env.done
704
+ env.render()
705
+ action = scan_action(language)
706
+ while action.action_type != ProofAction.ActionType.EXIT and not done:
707
+ state, _, _, reward, done, info = env.step(action)
708
+ env.render()
709
+ if not done:
710
+ action = scan_action(language)
711
+ finally:
712
+ if language == ProofAction.Language.ISABELLE:
713
+ IsabelleExecutor.stop_server()