itp-interface 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (485) hide show
  1. itp_interface/__init__.py +0 -0
  2. itp_interface/agent/__init__.py +0 -0
  3. itp_interface/agent/simple_proof_agent.py +100 -0
  4. itp_interface/coq_ser_api/__init__.py +165 -0
  5. itp_interface/coq_ser_api/contexts.py +283 -0
  6. itp_interface/coq_ser_api/coq_agent.py +459 -0
  7. itp_interface/coq_ser_api/coq_backend.py +135 -0
  8. itp_interface/coq_ser_api/coq_util.py +839 -0
  9. itp_interface/coq_ser_api/example.py +67 -0
  10. itp_interface/coq_ser_api/lsp_backend.py +375 -0
  11. itp_interface/coq_ser_api/py.typed +0 -0
  12. itp_interface/coq_ser_api/serapi_backend.py +841 -0
  13. itp_interface/coq_ser_api/util.py +145 -0
  14. itp_interface/coq_ser_api_old/__init__.py +2583 -0
  15. itp_interface/coq_ser_api_old/contexts.py +172 -0
  16. itp_interface/coq_ser_api_old/util.py +146 -0
  17. itp_interface/lean_server/__init__.py +0 -0
  18. itp_interface/lean_server/commands.py +484 -0
  19. itp_interface/lean_server/lean3_search_tool.py +358 -0
  20. itp_interface/lean_server/lean4_repl_interface.py +151 -0
  21. itp_interface/lean_server/lean4_utils.py +255 -0
  22. itp_interface/lean_server/lean_cmd_server.py +111 -0
  23. itp_interface/lean_server/lean_context.py +60 -0
  24. itp_interface/lean_server/lean_sync_server.py +174 -0
  25. itp_interface/lean_server/lean_utils.py +199 -0
  26. itp_interface/lean_server/py.typed +1 -0
  27. itp_interface/main/__init__.py +0 -0
  28. itp_interface/main/config/afp_data_gen.yaml +14 -0
  29. itp_interface/main/config/benchmark/CompCert.yaml +366 -0
  30. itp_interface/main/config/benchmark/GeoCoq.yaml +930 -0
  31. itp_interface/main/config/benchmark/UniMath.yaml +2690 -0
  32. itp_interface/main/config/benchmark/afp_isabelle.yaml +29200 -0
  33. itp_interface/main/config/benchmark/agent_proverbot_hard.yaml +247 -0
  34. itp_interface/main/config/benchmark/category-theory.yaml +470 -0
  35. itp_interface/main/config/benchmark/compcert_118_subset.yaml +148 -0
  36. itp_interface/main/config/benchmark/compcert_benchmark.yaml +36 -0
  37. itp_interface/main/config/benchmark/compcert_benchmark_hard.yaml +498 -0
  38. itp_interface/main/config/benchmark/compcert_benchmark_hard_1.yaml +55 -0
  39. itp_interface/main/config/benchmark/compcert_benchmark_hard_2.yaml +24 -0
  40. itp_interface/main/config/benchmark/compcert_benchmark_hard_3.yaml +95 -0
  41. itp_interface/main/config/benchmark/compcert_benchmark_hard_7_per_cent.yaml +78 -0
  42. itp_interface/main/config/benchmark/compcert_benchmark_test.yaml +38 -0
  43. itp_interface/main/config/benchmark/compcert_benchmark_train.yaml +340 -0
  44. itp_interface/main/config/benchmark/leandojo_novel_premises_test.yaml +2908 -0
  45. itp_interface/main/config/benchmark/leandojo_novel_premises_train.yaml +98645 -0
  46. itp_interface/main/config/benchmark/leandojo_novel_premises_val.yaml +2912 -0
  47. itp_interface/main/config/benchmark/leandojo_random.yaml +2889 -0
  48. itp_interface/main/config/benchmark/leandojo_random_test.yaml +2421 -0
  49. itp_interface/main/config/benchmark/leandojo_random_train.yaml +62729 -0
  50. itp_interface/main/config/benchmark/leandojo_random_val.yaml +2504 -0
  51. itp_interface/main/config/benchmark/math-comp.yaml +200 -0
  52. itp_interface/main/config/benchmark/miniF2F_test.yaml +12 -0
  53. itp_interface/main/config/benchmark/miniF2F_test_aime.yaml +27 -0
  54. itp_interface/main/config/benchmark/miniF2F_test_algebra.yaml +30 -0
  55. itp_interface/main/config/benchmark/miniF2F_test_amc12.yaml +57 -0
  56. itp_interface/main/config/benchmark/miniF2F_test_few_shot_hard.yaml +231 -0
  57. itp_interface/main/config/benchmark/miniF2F_test_imo.yaml +32 -0
  58. itp_interface/main/config/benchmark/miniF2F_test_induction.yaml +20 -0
  59. itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra.yaml +82 -0
  60. itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra_hard.yaml +72 -0
  61. itp_interface/main/config/benchmark/miniF2F_test_mathd_numbertheory.yaml +72 -0
  62. itp_interface/main/config/benchmark/miniF2F_test_numbertheory.yaml +20 -0
  63. itp_interface/main/config/benchmark/minicompcert_benchmark_1.yaml +14 -0
  64. itp_interface/main/config/benchmark/proverbot_hard.yaml +104 -0
  65. itp_interface/main/config/benchmark/re_prover.yaml +66 -0
  66. itp_interface/main/config/benchmark/re_prover_hard.yaml +41 -0
  67. itp_interface/main/config/benchmark/re_prover_very_hard.yaml +22 -0
  68. itp_interface/main/config/benchmark/reprover_with_retrieval.yaml +73 -0
  69. itp_interface/main/config/benchmark/reprover_with_retrieval_hard.yaml +30 -0
  70. itp_interface/main/config/benchmark/reprover_with_retrieval_neg.yaml +195 -0
  71. itp_interface/main/config/benchmark/simple_benchmark_1.yaml +24 -0
  72. itp_interface/main/config/benchmark/simple_benchmark_8.yaml +50 -0
  73. itp_interface/main/config/benchmark/simple_benchmark_9.yaml +65 -0
  74. itp_interface/main/config/benchmark/simple_benchmark_isabelle.yaml +18 -0
  75. itp_interface/main/config/benchmark/simple_benchmark_lean.yaml +12 -0
  76. itp_interface/main/config/benchmark/simple_benchmark_lean_training_data.yaml +12 -0
  77. itp_interface/main/config/benchmark/simple_rl_benchmark_lean.yaml +14 -0
  78. itp_interface/main/config/benchmark/stack_machine.yaml +13 -0
  79. itp_interface/main/config/benchmark/stack_machine_hard.yaml +15 -0
  80. itp_interface/main/config/category_theory_data_gen.yaml +14 -0
  81. itp_interface/main/config/category_theory_data_gen_random.yaml +16 -0
  82. itp_interface/main/config/compcert_data_gen_test.yaml +10 -0
  83. itp_interface/main/config/compcert_data_gen_train.yaml +7 -0
  84. itp_interface/main/config/env_settings/bm25_retrieval.yaml +2 -0
  85. itp_interface/main/config/env_settings/bm25_retrieval_no_dfns.yaml +2 -0
  86. itp_interface/main/config/env_settings/bm25_retrieval_only_local_no_dfns.yaml +2 -0
  87. itp_interface/main/config/env_settings/bm25_retrieval_with_print.yaml +2 -0
  88. itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local.yaml +2 -0
  89. itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local_no_dfns.yaml +2 -0
  90. itp_interface/main/config/env_settings/no_retrieval.yaml +2 -0
  91. itp_interface/main/config/experiments.yaml +12 -0
  92. itp_interface/main/config/geo_coq_data_gen.yaml +14 -0
  93. itp_interface/main/config/geo_coq_data_gen_random.yaml +16 -0
  94. itp_interface/main/config/leandojo_random_data_gen.yaml +16 -0
  95. itp_interface/main/config/math_comp_data_gen.yaml +14 -0
  96. itp_interface/main/config/math_comp_data_gen_random.yaml +16 -0
  97. itp_interface/main/config/mathlib_data_gen.yaml +14 -0
  98. itp_interface/main/config/repo/coq_repos.yaml +191 -0
  99. itp_interface/main/config/run_settings/default_coq_data_generation_transforms.yaml +24 -0
  100. itp_interface/main/config/run_settings/default_isabelle_data_generation_transforms.yaml +24 -0
  101. itp_interface/main/config/run_settings/default_lean4_data_generation_transforms.yaml +24 -0
  102. itp_interface/main/config/run_settings/default_lean_data_generation_transforms.yaml +24 -0
  103. itp_interface/main/config/simple_coq_data_gen.yaml +12 -0
  104. itp_interface/main/config/simple_coq_data_gen_random.yaml +17 -0
  105. itp_interface/main/config/simple_lean_data_gen.yaml +12 -0
  106. itp_interface/main/config/simple_rl_lean_data_gen.yaml +12 -0
  107. itp_interface/main/config/uni_math_data_gen.yaml +14 -0
  108. itp_interface/main/config.py +192 -0
  109. itp_interface/main/extract_benchmark_dataset.py +106 -0
  110. itp_interface/main/filter_dataset.py +107 -0
  111. itp_interface/main/install.py +92 -0
  112. itp_interface/main/merge_dataset.py +96 -0
  113. itp_interface/main/run_tool.py +444 -0
  114. itp_interface/pisa/.git +1 -0
  115. itp_interface/pisa/.gitignore +125 -0
  116. itp_interface/pisa/.idea/.gitignore +8 -0
  117. itp_interface/pisa/.idea/ClojureProjectResolveSettings.xml +6 -0
  118. itp_interface/pisa/.idea/codeStyles/Project.xml +7 -0
  119. itp_interface/pisa/.idea/codeStyles/codeStyleConfig.xml +5 -0
  120. itp_interface/pisa/.idea/inspectionProfiles/Project_Default.xml +16 -0
  121. itp_interface/pisa/.idea/libraries/sbt__com_google_android_annotations_4_1_1_4_jar.xml +13 -0
  122. itp_interface/pisa/.idea/libraries/sbt__com_google_api_grpc_proto_google_common_protos_1_17_0_jar.xml +13 -0
  123. itp_interface/pisa/.idea/libraries/sbt__com_google_code_findbugs_jsr305_3_0_2_jar.xml +13 -0
  124. itp_interface/pisa/.idea/libraries/sbt__com_google_code_gson_gson_2_8_6_jar.xml +13 -0
  125. itp_interface/pisa/.idea/libraries/sbt__com_google_errorprone_error_prone_annotations_2_3_4_jar.xml +13 -0
  126. itp_interface/pisa/.idea/libraries/sbt__com_google_guava_failureaccess_1_0_1_jar.xml +13 -0
  127. itp_interface/pisa/.idea/libraries/sbt__com_google_guava_guava_30_0_jre_jar.xml +13 -0
  128. itp_interface/pisa/.idea/libraries/sbt__com_google_guava_listenablefuture_9999_0_empty_to_avoid_conflict_with_guava_jar.xml +9 -0
  129. itp_interface/pisa/.idea/libraries/sbt__com_google_j2objc_j2objc_annotations_1_3_jar.xml +13 -0
  130. itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_3_12_0_jar.xml +13 -0
  131. itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_util_3_12_0_jar.xml +13 -0
  132. itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_fastparse_2_13_2_3_0_jar.xml +13 -0
  133. itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_geny_2_13_0_6_0_jar.xml +13 -0
  134. itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_sourcecode_2_13_0_2_1_jar.xml +13 -0
  135. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_lenses_2_13_0_10_9_jar.xml +13 -0
  136. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_2_13_0_10_9_jar.xml +13 -0
  137. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_grpc_2_13_0_10_9_jar.xml +13 -0
  138. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_zio_grpc_zio_grpc_core_2_13_0_4_2_jar.xml +13 -0
  139. itp_interface/pisa/.idea/libraries/sbt__com_thoughtworks_paranamer_paranamer_2_8_jar.xml +13 -0
  140. itp_interface/pisa/.idea/libraries/sbt__commons_io_commons_io_2_8_0_jar.xml +13 -0
  141. itp_interface/pisa/.idea/libraries/sbt__de_unruh_java_patterns_0_1_0_jar.xml +13 -0
  142. itp_interface/pisa/.idea/libraries/sbt__de_unruh_scala_isabelle_2_13_master_SNAPSHOT_jar.xml +13 -0
  143. itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_2_13_1_0_0_M9_jar.xml +13 -0
  144. itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_thirdparty_boopickle_shaded_2_13_1_0_0_M9_jar.xml +13 -0
  145. itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_2_13_1_0_3_jar.xml +13 -0
  146. itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_stacktracer_2_13_1_0_3_jar.xml +13 -0
  147. itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_streams_2_13_1_0_3_jar.xml +13 -0
  148. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_api_1_34_0_jar.xml +13 -0
  149. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_context_1_34_0_jar.xml +13 -0
  150. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_core_1_34_0_jar.xml +13 -0
  151. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_netty_1_34_0_jar.xml +13 -0
  152. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_1_34_0_jar.xml +13 -0
  153. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_lite_1_34_0_jar.xml +13 -0
  154. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_services_1_34_0_jar.xml +13 -0
  155. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_stub_1_34_0_jar.xml +13 -0
  156. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_buffer_4_1_51_Final_jar.xml +13 -0
  157. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_4_1_51_Final_jar.xml +13 -0
  158. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http2_4_1_51_Final_jar.xml +13 -0
  159. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http_4_1_51_Final_jar.xml +13 -0
  160. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_socks_4_1_51_Final_jar.xml +13 -0
  161. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_common_4_1_51_Final_jar.xml +13 -0
  162. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_4_1_51_Final_jar.xml +13 -0
  163. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_proxy_4_1_51_Final_jar.xml +13 -0
  164. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_resolver_4_1_51_Final_jar.xml +13 -0
  165. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_transport_4_1_51_Final_jar.xml +13 -0
  166. itp_interface/pisa/.idea/libraries/sbt__io_perfmark_perfmark_api_0_19_0_jar.xml +13 -0
  167. itp_interface/pisa/.idea/libraries/sbt__net_java_dev_jna_jna_5_3_1_jar.xml +13 -0
  168. itp_interface/pisa/.idea/libraries/sbt__net_liftweb_lift_json_2_13_3_4_3_jar.xml +13 -0
  169. itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_lang3_3_11_jar.xml +13 -0
  170. itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_text_1_9_jar.xml +13 -0
  171. itp_interface/pisa/.idea/libraries/sbt__org_checkerframework_checker_qual_3_5_0_jar.xml +13 -0
  172. itp_interface/pisa/.idea/libraries/sbt__org_codehaus_mojo_animal_sniffer_annotations_1_18_jar.xml +13 -0
  173. itp_interface/pisa/.idea/libraries/sbt__org_jetbrains_annotations_20_1_0_jar.xml +13 -0
  174. itp_interface/pisa/.idea/libraries/sbt__org_jline_jline_3_16_0_jar.xml +13 -0
  175. itp_interface/pisa/.idea/libraries/sbt__org_log4s_log4s_2_13_1_9_0_jar.xml +13 -0
  176. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_collection_compat_2_13_2_1_6_jar.xml +13 -0
  177. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_xml_2_13_1_3_0_jar.xml +13 -0
  178. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_compiler_2_13_4_jar.xml +13 -0
  179. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_library_2_13_4_jar.xml +23 -0
  180. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_reflect_2_13_4_jar.xml +13 -0
  181. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scalap_2_13_4_jar.xml +13 -0
  182. itp_interface/pisa/.idea/libraries/sbt__org_scalaz_scalaz_core_2_13_7_3_2_jar.xml +13 -0
  183. itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_api_1_7_30_jar.xml +13 -0
  184. itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_simple_1_7_30_jar.xml +13 -0
  185. itp_interface/pisa/.idea/misc.xml +7 -0
  186. itp_interface/pisa/.idea/modules/PISA-build.iml +127 -0
  187. itp_interface/pisa/.idea/modules/PISA.iml +94 -0
  188. itp_interface/pisa/.idea/modules.xml +9 -0
  189. itp_interface/pisa/.idea/other.xml +6 -0
  190. itp_interface/pisa/.idea/sbt.xml +20 -0
  191. itp_interface/pisa/.idea/scala_compiler.xml +6 -0
  192. itp_interface/pisa/.idea/uiDesigner.xml +124 -0
  193. itp_interface/pisa/.idea/vcs.xml +6 -0
  194. itp_interface/pisa/.scalafmt.conf +2 -0
  195. itp_interface/pisa/LICENSE +29 -0
  196. itp_interface/pisa/README.md +262 -0
  197. itp_interface/pisa/build.sbt +49 -0
  198. itp_interface/pisa/build.sh +26 -0
  199. itp_interface/pisa/command_generation/close_gaps.py +44 -0
  200. itp_interface/pisa/command_generation/conjecture_normal_order.py +62 -0
  201. itp_interface/pisa/command_generation/conjecturer_command_generator.py +36 -0
  202. itp_interface/pisa/command_generation/create_dirs.py +11 -0
  203. itp_interface/pisa/command_generation/find_std.py +67 -0
  204. itp_interface/pisa/command_generation/generate_build_commands_afp.py +15 -0
  205. itp_interface/pisa/command_generation/generate_build_commands_std.py +15 -0
  206. itp_interface/pisa/command_generation/generate_commands_afp.py +103 -0
  207. itp_interface/pisa/command_generation/generate_commands_mini.py +73 -0
  208. itp_interface/pisa/command_generation/generate_commands_std.py +69 -0
  209. itp_interface/pisa/command_generation/generate_hammer_extraction_text.py +5 -0
  210. itp_interface/pisa/command_generation/hammer_command_generator.py +40 -0
  211. itp_interface/pisa/command_generation/hp_search_command_generator.py +63 -0
  212. itp_interface/pisa/command_generation/oracle_command_generator.py +56 -0
  213. itp_interface/pisa/command_generation/search_command_generator.py +69 -0
  214. itp_interface/pisa/command_generation/summarise_problem_names.py +45 -0
  215. itp_interface/pisa/command_generation/tpu_hp_search.py +75 -0
  216. itp_interface/pisa/docker/Dockerfile +34 -0
  217. itp_interface/pisa/docker/docker_tutorial.md +64 -0
  218. itp_interface/pisa/eval_setup/copy_isabelle.py +42 -0
  219. itp_interface/pisa/eval_setup/copy_pisa_jars.py +18 -0
  220. itp_interface/pisa/mesh_transformer_utils/tokenization.py +86 -0
  221. itp_interface/pisa/project/build.properties +1 -0
  222. itp_interface/pisa/project/plugins.sbt +5 -0
  223. itp_interface/pisa/requirements.txt +4 -0
  224. itp_interface/pisa/scripts/extract_last_k_steps.py +28 -0
  225. itp_interface/pisa/scripts/extract_proof_corpus.py +26 -0
  226. itp_interface/pisa/scripts/gather_hammer_results.py +27 -0
  227. itp_interface/pisa/scripts/length_in_char_stats.py +20 -0
  228. itp_interface/pisa/scripts/mix.py +127 -0
  229. itp_interface/pisa/scripts/results_stat.py +52 -0
  230. itp_interface/pisa/scripts/test_array_job.sh +34 -0
  231. itp_interface/pisa/setup.sh +25 -0
  232. itp_interface/pisa/src/main/protobuf/server.proto +60 -0
  233. itp_interface/pisa/src/main/python/.idea/.gitignore +8 -0
  234. itp_interface/pisa/src/main/python/.idea/inspectionProfiles/Project_Default.xml +18 -0
  235. itp_interface/pisa/src/main/python/.idea/inspectionProfiles/profiles_settings.xml +6 -0
  236. itp_interface/pisa/src/main/python/.idea/misc.xml +4 -0
  237. itp_interface/pisa/src/main/python/.idea/modules.xml +8 -0
  238. itp_interface/pisa/src/main/python/.idea/python.iml +12 -0
  239. itp_interface/pisa/src/main/python/.idea/vcs.xml +6 -0
  240. itp_interface/pisa/src/main/python/conjecturing_parsing/conjecturer_postprocessing.py +59 -0
  241. itp_interface/pisa/src/main/python/data_extraction/extract_data.py +184 -0
  242. itp_interface/pisa/src/main/python/data_extraction/find_premises.py +221 -0
  243. itp_interface/pisa/src/main/python/data_extraction/process_data.py +129 -0
  244. itp_interface/pisa/src/main/python/legacy/PisaFlexibleClient.py +167 -0
  245. itp_interface/pisa/src/main/python/legacy/autof_test.py +74 -0
  246. itp_interface/pisa/src/main/python/legacy/cmd_client.py +23 -0
  247. itp_interface/pisa/src/main/python/legacy/convert_scala_dump_to_test_name_jsons.py +14 -0
  248. itp_interface/pisa/src/main/python/legacy/create_data_txt.py +72 -0
  249. itp_interface/pisa/src/main/python/legacy/create_finetune_tfrecords.py +311 -0
  250. itp_interface/pisa/src/main/python/legacy/demo.py +49 -0
  251. itp_interface/pisa/src/main/python/legacy/evaluate.py +108 -0
  252. itp_interface/pisa/src/main/python/legacy/extract_first_step.py +25 -0
  253. itp_interface/pisa/src/main/python/legacy/get_global_facts.py +35 -0
  254. itp_interface/pisa/src/main/python/legacy/mix_data.py +19 -0
  255. itp_interface/pisa/src/main/python/legacy/one_stage_extraction.py +111 -0
  256. itp_interface/pisa/src/main/python/legacy/prepare_episodic_transitions.py +137 -0
  257. itp_interface/pisa/src/main/python/legacy/prepare_translation_pairs.py +277 -0
  258. itp_interface/pisa/src/main/python/pisa_client.py +322 -0
  259. itp_interface/pisa/src/main/python/server_pb2.py +394 -0
  260. itp_interface/pisa/src/main/python/server_pb2_grpc.py +230 -0
  261. itp_interface/pisa/src/main/python/test_client.py +17 -0
  262. itp_interface/pisa/src/main/python/test_client2.py +79 -0
  263. itp_interface/pisa/src/main/python/utils/filters.py +59 -0
  264. itp_interface/pisa/src/main/python/utils/pisa_server_control.py +29 -0
  265. itp_interface/pisa/src/main/scala/pisa/agent/CheckSyntax.scala +257 -0
  266. itp_interface/pisa/src/main/scala/pisa/agent/DepThms.scala +29 -0
  267. itp_interface/pisa/src/main/scala/pisa/agent/PisaStat.scala +46 -0
  268. itp_interface/pisa/src/main/scala/pisa/agent/RefactorTest.scala +40 -0
  269. itp_interface/pisa/src/main/scala/pisa/agent/RepHammer.scala +95 -0
  270. itp_interface/pisa/src/main/scala/pisa/server/HammFacts.scala +63 -0
  271. itp_interface/pisa/src/main/scala/pisa/server/PisaOS.scala +881 -0
  272. itp_interface/pisa/src/main/scala/pisa/server/PisaOneStage.scala +540 -0
  273. itp_interface/pisa/src/main/scala/pisa/server/PisaOneStageServers.scala +1048 -0
  274. itp_interface/pisa/src/main/scala/pisa/utils/TheoryManager.scala +95 -0
  275. itp_interface/pisa/src/test/python/analyse_debug.py +33 -0
  276. itp_interface/pisa/src/test/python/extract_test_seq2seq.py +53 -0
  277. itp_interface/pisa/src/test/python/extract_test_theorem_ground_truth_indices.py +31 -0
  278. itp_interface/pisa/src/test/python/proof_originality.py +24 -0
  279. itp_interface/pisa/src/test/python/test_command_generator.py +25 -0
  280. itp_interface/pisa/src/test/python/test_model_sequence_accuracy.py +70 -0
  281. itp_interface/pisa/src/test/scala/pisa/Easy.scala +26 -0
  282. itp_interface/pisa/src/test/scala/pisa/TestCurl.scala +82 -0
  283. itp_interface/pisa/src/test/scala/pisa/TestIsa.scala +27 -0
  284. itp_interface/pisa/test.sh +19 -0
  285. itp_interface/pisa/universal_test_theorems.tar.gz +0 -0
  286. itp_interface/repo/build.py +78 -0
  287. itp_interface/repo/clone.py +79 -0
  288. itp_interface/repo/dataset_discovery.py +99 -0
  289. itp_interface/retrieval/__init__.py +0 -0
  290. itp_interface/retrieval/abstraction.py +35 -0
  291. itp_interface/retrieval/coq_bm25_reranker.py +153 -0
  292. itp_interface/retrieval/isabelle_bm25_reranker.py +86 -0
  293. itp_interface/retrieval/lean3_bm25_reranker.py +86 -0
  294. itp_interface/rl/__init__.py +0 -0
  295. itp_interface/rl/abstraction.py +168 -0
  296. itp_interface/rl/proof_action.py +172 -0
  297. itp_interface/rl/proof_state.py +149 -0
  298. itp_interface/rl/proof_tree.py +109 -0
  299. itp_interface/rl/simpl_proof_env_pool.py +16 -0
  300. itp_interface/rl/simple_proof_env.py +713 -0
  301. itp_interface/rl/simple_proof_env_pool.py +591 -0
  302. itp_interface/scripts/setup.sh +228 -0
  303. itp_interface/tools/__init__.py +0 -0
  304. itp_interface/tools/basic_utils.py +172 -0
  305. itp_interface/tools/bin_packing.py +61 -0
  306. itp_interface/tools/cache.py +93 -0
  307. itp_interface/tools/coq_build_spec.py +31 -0
  308. itp_interface/tools/coq_build_tool.py +319 -0
  309. itp_interface/tools/coq_context_helper.py +354 -0
  310. itp_interface/tools/coq_executor.py +508 -0
  311. itp_interface/tools/coq_local_data_generation_transform.py +158 -0
  312. itp_interface/tools/coq_parse_utils.py +154 -0
  313. itp_interface/tools/coq_raw_proofs.py +193 -0
  314. itp_interface/tools/coq_theorem_proof_pair_generation_transform.py +146 -0
  315. itp_interface/tools/coq_training_data_generator.py +76 -0
  316. itp_interface/tools/dynamic_coq_proof_exec.py +220 -0
  317. itp_interface/tools/dynamic_isabelle_proof_exec.py +229 -0
  318. itp_interface/tools/dynamic_lean4_proof_exec.py +236 -0
  319. itp_interface/tools/dynamic_lean_proof_exec.py +228 -0
  320. itp_interface/tools/isabelle_context_helper.py +66 -0
  321. itp_interface/tools/isabelle_executor.py +862 -0
  322. itp_interface/tools/isabelle_local_data_generation_transform.py +149 -0
  323. itp_interface/tools/isabelle_parse_utils.py +131 -0
  324. itp_interface/tools/isabelle_server.py +106 -0
  325. itp_interface/tools/lean4_context_helper.py +72 -0
  326. itp_interface/tools/lean4_local_data_generation_transform.py +122 -0
  327. itp_interface/tools/lean4_sync_executor.py +1193 -0
  328. itp_interface/tools/lean_cmd_executor.py +804 -0
  329. itp_interface/tools/lean_context_helper.py +327 -0
  330. itp_interface/tools/lean_dojo_data_generation_transform.py +206 -0
  331. itp_interface/tools/lean_executor.py +687 -0
  332. itp_interface/tools/lean_local_data_generation_transform.py +136 -0
  333. itp_interface/tools/lean_parse_utils.py +32 -0
  334. itp_interface/tools/log_utils.py +20 -0
  335. itp_interface/tools/proof_exec_callback.py +76 -0
  336. itp_interface/tools/ray_utils.py +265 -0
  337. itp_interface/tools/repl/.git +1 -0
  338. itp_interface/tools/repl/.github/workflows/ci.yml +24 -0
  339. itp_interface/tools/repl/.gitignore +7 -0
  340. itp_interface/tools/repl/.vscode/copyright.code-snippets +13 -0
  341. itp_interface/tools/repl/.vscode/extensions.json +13 -0
  342. itp_interface/tools/repl/.vscode/module-docstring.code-snippets +35 -0
  343. itp_interface/tools/repl/.vscode/settings.json +11 -0
  344. itp_interface/tools/repl/README.md +174 -0
  345. itp_interface/tools/repl/REPL/Frontend.lean +47 -0
  346. itp_interface/tools/repl/REPL/JSON.lean +186 -0
  347. itp_interface/tools/repl/REPL/Lean/ContextInfo.lean +9 -0
  348. itp_interface/tools/repl/REPL/Lean/Environment.lean +31 -0
  349. itp_interface/tools/repl/REPL/Lean/InfoTree/ToJson.lean +114 -0
  350. itp_interface/tools/repl/REPL/Lean/InfoTree.lean +272 -0
  351. itp_interface/tools/repl/REPL/Main.lean +323 -0
  352. itp_interface/tools/repl/REPL/Snapshots.lean +306 -0
  353. itp_interface/tools/repl/REPL/Util/Path.lean +36 -0
  354. itp_interface/tools/repl/REPL/Util/Pickle.lean +44 -0
  355. itp_interface/tools/repl/REPL.lean +4 -0
  356. itp_interface/tools/repl/lake-manifest.json +5 -0
  357. itp_interface/tools/repl/lakefile.lean +15 -0
  358. itp_interface/tools/repl/lean-toolchain +1 -0
  359. itp_interface/tools/repl/test/Mathlib/.gitignore +5 -0
  360. itp_interface/tools/repl/test/Mathlib/H20231110.sh +2 -0
  361. itp_interface/tools/repl/test/Mathlib/ReplMathlibTests.lean +1 -0
  362. itp_interface/tools/repl/test/Mathlib/lake-manifest.json +68 -0
  363. itp_interface/tools/repl/test/Mathlib/lakefile.lean +11 -0
  364. itp_interface/tools/repl/test/Mathlib/lean-toolchain +1 -0
  365. itp_interface/tools/repl/test/Mathlib/test/20240209.expected.out +20 -0
  366. itp_interface/tools/repl/test/Mathlib/test/20240209.in +3 -0
  367. itp_interface/tools/repl/test/Mathlib/test/20240209.lean +4 -0
  368. itp_interface/tools/repl/test/Mathlib/test/H20231020.expected.out +8 -0
  369. itp_interface/tools/repl/test/Mathlib/test/H20231020.in +8 -0
  370. itp_interface/tools/repl/test/Mathlib/test/H20231020.lean +22 -0
  371. itp_interface/tools/repl/test/Mathlib/test/H20231110.expected.out +4 -0
  372. itp_interface/tools/repl/test/Mathlib/test/H20231110.in +4 -0
  373. itp_interface/tools/repl/test/Mathlib/test/H20231115.expected.out +19 -0
  374. itp_interface/tools/repl/test/Mathlib/test/H20231115.in +5 -0
  375. itp_interface/tools/repl/test/Mathlib/test/H20231115_2.expected.out +18 -0
  376. itp_interface/tools/repl/test/Mathlib/test/H20231115_2.in +4 -0
  377. itp_interface/tools/repl/test/Mathlib/test/H20231115_3.expected.out +10 -0
  378. itp_interface/tools/repl/test/Mathlib/test/H20231115_3.in +4 -0
  379. itp_interface/tools/repl/test/Mathlib/test/H20231214.in +9 -0
  380. itp_interface/tools/repl/test/Mathlib/test/H20231214.lean +30 -0
  381. itp_interface/tools/repl/test/Mathlib/test/H20231215.expected.out +4 -0
  382. itp_interface/tools/repl/test/Mathlib/test/H20231215.in +4 -0
  383. itp_interface/tools/repl/test/Mathlib/test/H20231215_2.expected.out +14 -0
  384. itp_interface/tools/repl/test/Mathlib/test/H20231215_2.in +3 -0
  385. itp_interface/tools/repl/test/Mathlib/test/exact.expected.out +37 -0
  386. itp_interface/tools/repl/test/Mathlib/test/exact.in +10 -0
  387. itp_interface/tools/repl/test/Mathlib/test/import_Mathlib.lean +1 -0
  388. itp_interface/tools/repl/test/Mathlib/test/induction.expected.out +29 -0
  389. itp_interface/tools/repl/test/Mathlib/test/induction.in +10 -0
  390. itp_interface/tools/repl/test/Mathlib/test/induction.lean +6 -0
  391. itp_interface/tools/repl/test/Mathlib/test/on_goal.expected.out +22 -0
  392. itp_interface/tools/repl/test/Mathlib/test/on_goal.in +5 -0
  393. itp_interface/tools/repl/test/Mathlib/test/pickle.expected.out +16 -0
  394. itp_interface/tools/repl/test/Mathlib/test/pickle.in +6 -0
  395. itp_interface/tools/repl/test/Mathlib/test/pickle_2.expected.out +4 -0
  396. itp_interface/tools/repl/test/Mathlib/test/pickle_2.in +4 -0
  397. itp_interface/tools/repl/test/Mathlib/test.sh +41 -0
  398. itp_interface/tools/repl/test/all_tactics.expected.out +13 -0
  399. itp_interface/tools/repl/test/all_tactics.in +1 -0
  400. itp_interface/tools/repl/test/by_cases.expected.out +25 -0
  401. itp_interface/tools/repl/test/by_cases.in +8 -0
  402. itp_interface/tools/repl/test/by_cases.lean +4 -0
  403. itp_interface/tools/repl/test/calc.expected.out +32 -0
  404. itp_interface/tools/repl/test/calc.in +1 -0
  405. itp_interface/tools/repl/test/def_eval.expected.out +9 -0
  406. itp_interface/tools/repl/test/def_eval.in +3 -0
  407. itp_interface/tools/repl/test/enableInitializersExecution.expected.out +2 -0
  408. itp_interface/tools/repl/test/enableInitializersExecution.in +1 -0
  409. itp_interface/tools/repl/test/file.expected.out +8 -0
  410. itp_interface/tools/repl/test/file.in +1 -0
  411. itp_interface/tools/repl/test/file.lean +5 -0
  412. itp_interface/tools/repl/test/have_by_sorry.expected.out +28 -0
  413. itp_interface/tools/repl/test/have_by_sorry.in +6 -0
  414. itp_interface/tools/repl/test/import_lean.in +1 -0
  415. itp_interface/tools/repl/test/incomplete.expected.out +18 -0
  416. itp_interface/tools/repl/test/incomplete.in +3 -0
  417. itp_interface/tools/repl/test/incomplete.lean +0 -0
  418. itp_interface/tools/repl/test/infotree.expected.out +20 -0
  419. itp_interface/tools/repl/test/infotree.in +2 -0
  420. itp_interface/tools/repl/test/invalid_tactic.expected.out +20 -0
  421. itp_interface/tools/repl/test/invalid_tactic.in +3 -0
  422. itp_interface/tools/repl/test/name_generator.expected.out +53 -0
  423. itp_interface/tools/repl/test/name_generator.in +18 -0
  424. itp_interface/tools/repl/test/no_goal_sorry.expected.out +11 -0
  425. itp_interface/tools/repl/test/no_goal_sorry.in +1 -0
  426. itp_interface/tools/repl/test/no_goal_sorry_2.expected.out +12 -0
  427. itp_interface/tools/repl/test/no_goal_sorry_2.in +1 -0
  428. itp_interface/tools/repl/test/options.expected.out +17 -0
  429. itp_interface/tools/repl/test/options.in +6 -0
  430. itp_interface/tools/repl/test/pickle_environment.expected.out +8 -0
  431. itp_interface/tools/repl/test/pickle_environment.in +7 -0
  432. itp_interface/tools/repl/test/pickle_environment_with_imports.expected.out +10 -0
  433. itp_interface/tools/repl/test/pickle_environment_with_imports.in +9 -0
  434. itp_interface/tools/repl/test/pickle_open.expected.out +8 -0
  435. itp_interface/tools/repl/test/pickle_open.in +7 -0
  436. itp_interface/tools/repl/test/pickle_open_2.expected.out +4 -0
  437. itp_interface/tools/repl/test/pickle_open_2.in +3 -0
  438. itp_interface/tools/repl/test/pickle_open_scoped.expected.out +18 -0
  439. itp_interface/tools/repl/test/pickle_open_scoped.in +8 -0
  440. itp_interface/tools/repl/test/pickle_open_scoped_2.expected.out +14 -0
  441. itp_interface/tools/repl/test/pickle_open_scoped_2.in +3 -0
  442. itp_interface/tools/repl/test/pickle_proof_state_1.expected.out +26 -0
  443. itp_interface/tools/repl/test/pickle_proof_state_1.in +15 -0
  444. itp_interface/tools/repl/test/pickle_proof_state_2.expected.out +4 -0
  445. itp_interface/tools/repl/test/pickle_proof_state_2.in +3 -0
  446. itp_interface/tools/repl/test/pickle_proof_state_env.expected.out +26 -0
  447. itp_interface/tools/repl/test/pickle_proof_state_env.in +15 -0
  448. itp_interface/tools/repl/test/pickle_scoped_notation.in +16 -0
  449. itp_interface/tools/repl/test/pickle_scoped_notation_2.in +3 -0
  450. itp_interface/tools/repl/test/proof_step.expected.out +18 -0
  451. itp_interface/tools/repl/test/proof_step.in +7 -0
  452. itp_interface/tools/repl/test/readme.expected.out +16 -0
  453. itp_interface/tools/repl/test/readme.in +5 -0
  454. itp_interface/tools/repl/test/sorry_hypotheses.expected.out +16 -0
  455. itp_interface/tools/repl/test/sorry_hypotheses.in +4 -0
  456. itp_interface/tools/repl/test/synthesize_placeholder.expected.out +7 -0
  457. itp_interface/tools/repl/test/synthesize_placeholder.in +1 -0
  458. itp_interface/tools/repl/test/tactic_mode_sorry.expected.out +14 -0
  459. itp_interface/tools/repl/test/tactic_mode_sorry.in +3 -0
  460. itp_interface/tools/repl/test/tactic_sorry.expected.out +12 -0
  461. itp_interface/tools/repl/test/tactic_sorry.in +1 -0
  462. itp_interface/tools/repl/test/term_sorry.expected.out +12 -0
  463. itp_interface/tools/repl/test/term_sorry.in +1 -0
  464. itp_interface/tools/repl/test/trace_simp.expected.out +41 -0
  465. itp_interface/tools/repl/test/trace_simp.in +15 -0
  466. itp_interface/tools/repl/test/unfinished_tactic_block.expected.out +11 -0
  467. itp_interface/tools/repl/test/unfinished_tactic_block.in +1 -0
  468. itp_interface/tools/repl/test/unknown_environment.expected.out +2 -0
  469. itp_interface/tools/repl/test/unknown_environment.in +1 -0
  470. itp_interface/tools/repl/test/unknown_proof_state.expected.out +14 -0
  471. itp_interface/tools/repl/test/unknown_proof_state.in +3 -0
  472. itp_interface/tools/repl/test/unknown_tactic.expected.out +14 -0
  473. itp_interface/tools/repl/test/unknown_tactic.in +3 -0
  474. itp_interface/tools/repl/test/variables.expected.out +26 -0
  475. itp_interface/tools/repl/test/variables.in +5 -0
  476. itp_interface/tools/repl/test.sh +43 -0
  477. itp_interface/tools/run_data_generation_transforms.py +350 -0
  478. itp_interface/tools/theorem_details.py +25 -0
  479. itp_interface/tools/training_data.py +358 -0
  480. itp_interface/tools/training_data_format.py +599 -0
  481. itp_interface-1.0.0.dist-info/METADATA +78 -0
  482. itp_interface-1.0.0.dist-info/RECORD +485 -0
  483. itp_interface-1.0.0.dist-info/WHEEL +4 -0
  484. itp_interface-1.0.0.dist-info/entry_points.txt +3 -0
  485. itp_interface-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,137 @@
1
+ import json
2
+ import random
3
+ import shutil
4
+ import jsonlines
5
+
6
+ from tqdm import tqdm
7
+ from mpmath import mp, mpf, fmod
8
+ import hashlib
9
+ import math
10
+
11
+
12
+ random.seed(0)
13
+ mp.dps = 50
14
+
15
+
16
+ def split_transitions(problem_names, transitions):
17
+ transitions_for_problems = {problem_name: [] for problem_name in problem_names}
18
+ current_problem_name = ""
19
+ for transition in transitions:
20
+ if transition[1] in problem_names:
21
+ current_problem_name = transition[1]
22
+ elif "proof" not in transition[0]:
23
+ continue
24
+ transitions_for_problems[current_problem_name].append(transition)
25
+ return transitions_for_problems
26
+
27
+
28
+ def process_translations_for_a_problem(transitions_for_a_problem):
29
+ """Transform the transitions for a problem to translation pairs"""
30
+ # The first one is the lemma/theorem definition
31
+ previous_proof_segment = transitions_for_a_problem[0][1]
32
+
33
+ episodic_transitions = []
34
+ for transition in transitions_for_a_problem[1:]:
35
+ rl_transition = {
36
+ "observation": transition[0],
37
+ "extra context": previous_proof_segment,
38
+ "action": transition[1],
39
+ "complete": False
40
+ }
41
+
42
+ episodic_transitions.append(rl_transition)
43
+ previous_proof_segment += " \\n " + transition[1]
44
+
45
+ if episodic_transitions:
46
+ episodic_transitions[-1]["complete"] = True
47
+ return episodic_transitions
48
+
49
+
50
+ def trim_string(s: str):
51
+ """Remove all change line characters and replace them with spaces"""
52
+ return " ".join(s.replace("\n", " ").split())
53
+
54
+
55
+ def remove_extra_spaces(s: str):
56
+ return " ".join(s.split())
57
+
58
+
59
+ def hash_string_to_int(arg):
60
+ return int(hashlib.sha256(arg.encode("utf-8")).hexdigest(), 16) % 10**30
61
+
62
+
63
+ def hash_string_to_float(arg):
64
+ x = mpf(hash_string_to_int(arg))
65
+ return fmod(x * mp.pi, mpf(1.))
66
+
67
+
68
+ def get_split(arg):
69
+ float_hash = hash_string_to_float(arg)
70
+ if float_hash < 0.95:
71
+ return "train"
72
+ elif float_hash < 0.96:
73
+ return "val"
74
+ else:
75
+ return "test"
76
+
77
+
78
+ def random_split_file_names(file_names, val_test_files=100):
79
+ random.shuffle(file_names)
80
+ return file_names[:-2 * val_test_files], file_names[-2 * val_test_files:-val_test_files], \
81
+ file_names[-val_test_files:]
82
+
83
+
84
+ def process_files_with_proof_statements(file_names, saving_directory):
85
+ problem_names_split = {
86
+ "train": list(),
87
+ "val": list(),
88
+ "test": list()
89
+ }
90
+ unique_id_to_transitions = dict()
91
+ for file_path in tqdm(file_names):
92
+ file = json.load(open(file_path))
93
+ original_file_name = file['file_name']
94
+ problem_names = set(file['problem_names'])
95
+ transitions_split = split_transitions(problem_names, file['translations'])
96
+
97
+ for problem_name in set(file['problem_names']):
98
+ split = get_split(problem_name)
99
+ problem_names_split[split].append((original_file_name, problem_name))
100
+ episodic_transitions = process_translations_for_a_problem(transitions_split[problem_name])
101
+ unique_id_to_transitions[(original_file_name, problem_name)] = episodic_transitions
102
+
103
+ for split, split_uids in problem_names_split.items():
104
+ with jsonlines.open(os.path.join(saving_directory, "{}.jsonl".format(split)), "w") as writer:
105
+ for uid in split_uids:
106
+ writer.write(unique_id_to_transitions[uid])
107
+
108
+ json.dump(problem_names_split, open(os.path.join(saving_directory, "problem_names_split.json"), "w"))
109
+
110
+
111
+ if __name__ == "__main__":
112
+ import argparse
113
+ import glob
114
+ import os
115
+ parser = argparse.ArgumentParser(description='Extracting translation pairs.')
116
+ parser.add_argument('--extraction-file-directory', '-efd', help='Where the parsed json files are')
117
+ parser.add_argument('--saving-directory', '-sd', help='Where to save the translation pairs')
118
+ parser.add_argument('--proof', dest='proof', action='store_true')
119
+ parser.add_argument('--state', dest='state', action='store_true')
120
+ args = parser.parse_args()
121
+
122
+ assert args.proof or args.state
123
+ if args.proof and not args.state:
124
+ proof_state_suffix = "proof"
125
+ elif args.state and not args.proof:
126
+ proof_state_suffix = "state"
127
+ else:
128
+ proof_state_suffix = "proof_and_state"
129
+
130
+ saving_directory = args.saving_directory
131
+ if os.path.isdir(saving_directory):
132
+ shutil.rmtree(saving_directory)
133
+ os.makedirs(saving_directory)
134
+
135
+ file_names = list(glob.glob("{}/*/*_ground_truth.json".format(
136
+ args.extraction_file_directory, proof_state_suffix)))
137
+ process_files_with_proof_statements(file_names, saving_directory)
@@ -0,0 +1,277 @@
1
+ import json
2
+ import random
3
+ import shutil
4
+
5
+ from tqdm import tqdm
6
+ from mpmath import mp, mpf, fmod
7
+ import hashlib
8
+ import math
9
+
10
+
11
+ random.seed(0)
12
+ mp.dps = 50
13
+ DEFAULT_MAX_LENGTH = 3000
14
+
15
+
16
+ def split_transitions(problem_names, transitions):
17
+ transitions_for_problems = {problem_name: [] for problem_name in problem_names}
18
+ current_problem_name = ""
19
+ for transition in transitions:
20
+ if transition[1] in problem_names:
21
+ current_problem_name = transition[1]
22
+ elif "proof" not in transition[0]:
23
+ continue
24
+ transitions_for_problems[current_problem_name].append(transition)
25
+ return transitions_for_problems
26
+
27
+
28
+ def extract_siblings(proof_steps, current_step_index):
29
+ sibling_indices = []
30
+ current_proof_level = proof_steps[current_step_index][2]
31
+ # We shall have current_proof_level ≥ 1 since we're inside a proof
32
+ search_index = current_step_index - 1
33
+ while search_index >= 0:
34
+ if proof_steps[search_index][2] > current_proof_level:
35
+ # Unimportant proof subtree content*
36
+ pass
37
+ elif proof_steps[search_index][2] == current_proof_level:
38
+ # Sibling
39
+ sibling_indices.insert(0, search_index)
40
+ elif proof_steps[search_index][2] < current_proof_level:
41
+ # Higher level steps
42
+ return sibling_indices, search_index
43
+ search_index -= 1
44
+ return [], None
45
+
46
+
47
+ def extract_needed(proof_steps, current_step_index, needed_found):
48
+ if needed_found[current_step_index]:
49
+ return needed_found[current_step_index]
50
+ sibling_indices, search_index = extract_siblings(proof_steps, current_step_index)
51
+ if search_index is None:
52
+ return sibling_indices
53
+ elif search_index > 0:
54
+ return extract_needed(proof_steps, search_index, needed_found) + [search_index] + sibling_indices
55
+ elif search_index == 0:
56
+ return [search_index] + sibling_indices
57
+ else:
58
+ raise AssertionError
59
+
60
+
61
+ def extract_needed_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found, kwargs):
62
+ needed_indices = extract_needed(transitions_for_a_problem, i, needed_found)
63
+ needed_found[i] = needed_indices
64
+ needed_segment = " \\n ".join([transitions_for_a_problem[index][1] for index in needed_indices])
65
+ return f"<ISA_NDS> {needed_segment} <ISA_OBS> {transition[0]}"
66
+
67
+
68
+ def extract_last_k_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found, kwargs):
69
+ last_k = 1 if "last_k" not in kwargs else kwargs["last_k"]
70
+ assert isinstance(last_k, int)
71
+ proof_lines = previous_proof_segment.strip().split(" \\n ")
72
+ last_k_proof_lines = " \\n ".join(proof_lines[-last_k:])
73
+ return f"<ISA_LAST_{last_k}> {last_k_proof_lines} <ISA_OBS> {transition[0]}"
74
+
75
+
76
+ def extract_proof_only_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found, kwargs):
77
+ return f"<ISA_PRF> {previous_proof_segment}"
78
+
79
+
80
+ def extract_state_only_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found, kwargs):
81
+ return f"<ISA_OBS> {transition[0]}"
82
+
83
+
84
+ def extract_proof_and_state_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found,
85
+ kwargs):
86
+ return f"<ISA_PRF> {previous_proof_segment} <ISA_OBS> {transition[0]}"
87
+
88
+
89
+ def extract_trimmed_proof_and_state_string(transition, transitions_for_a_problem, previous_proof_segment, i,
90
+ needed_found, kwargs):
91
+ max_length = kwargs["max_length"] if "max_length" in kwargs else DEFAULT_MAX_LENGTH
92
+ proof_lines = previous_proof_segment.strip().split(" \\n ")
93
+ state_string = transition[0]
94
+ proof_string = ""
95
+ state_length = len(state_string)
96
+ for i in reversed(range(len(proof_lines))):
97
+ if len(proof_string) + state_length + len(proof_lines[i].strip()) > max_length:
98
+ break
99
+ proof_string = f"{proof_lines[i].strip()} \\n {proof_string}"
100
+
101
+ return f"<ISA_TRIM_PRF> {proof_string} <ISA_OBS> {transition[0]}"
102
+
103
+
104
+ all_processing_methods = {
105
+ "needed": extract_needed_string,
106
+ "last_k": extract_last_k_string,
107
+ "proof_only": extract_proof_only_string,
108
+ "state_only": extract_state_only_string,
109
+ "proof_and_state": extract_proof_and_state_string,
110
+ "trimmed_proof_and_state": extract_trimmed_proof_and_state_string,
111
+ }
112
+
113
+
114
+ def process_translations_for_a_problem(transitions_for_a_problem, processing_method_config=None):
115
+ """Transform the transitions for a problem to translation pairs"""
116
+ processing_method_name, additional_args = processing_method_config
117
+
118
+ # The first one is the lemma/theorem definition
119
+ previous_proof_segment = transitions_for_a_problem[0][1]
120
+ needed_found = {i: False for i in range(len(transitions_for_a_problem))}
121
+
122
+ translation_pairs = []
123
+ for i in range(1, len(transitions_for_a_problem)):
124
+ transition = transitions_for_a_problem[i]
125
+ translation_src = ""
126
+ if processing_method_name is None or not processing_method_name in all_processing_methods:
127
+ raise AssertionError("We must have a specified processing method")
128
+
129
+ processing_method = all_processing_methods[processing_method_name]
130
+ translation_src += processing_method(transition, transitions_for_a_problem, previous_proof_segment, i,
131
+ needed_found, additional_args)
132
+ translation_pairs.append((translation_src, transition[1]))
133
+ previous_proof_segment += " \\n " + transition[1]
134
+ return translation_pairs
135
+
136
+
137
+ def trim_string(s: str):
138
+ """Remove all change line characters and replace them with spaces"""
139
+ return " ".join(s.replace("\n", " ").split())
140
+
141
+
142
+ def hash_string_to_int(arg):
143
+ return int(hashlib.sha256(arg.encode("utf-8")).hexdigest(), 16) % 10**30
144
+
145
+
146
+ def hash_string_to_float(arg):
147
+ x = mpf(hash_string_to_int(arg))
148
+ return fmod(x * mp.pi, mpf(1.))
149
+
150
+
151
+ def get_split(arg):
152
+ float_hash = hash_string_to_float(arg)
153
+ if float_hash < 0.95:
154
+ return "train"
155
+ elif float_hash < 0.96:
156
+ return "val"
157
+ else:
158
+ return "test"
159
+
160
+
161
+ def random_split_file_names(file_names, val_test_files=100):
162
+ random.shuffle(file_names)
163
+ return file_names[:-2 * val_test_files], file_names[-2 * val_test_files:-val_test_files], \
164
+ file_names[-val_test_files:]
165
+
166
+
167
+ def process_files_with_proof_statements(file_names, saving_directory, processing_method_config=None):
168
+ # Store everything in jsonl format, and in text src and tgt format
169
+ datapoints = {
170
+ "train": list(),
171
+ "valid": list(),
172
+ "test": list()
173
+ }
174
+ problem_names_split = {
175
+ "train": list(),
176
+ "val": list(),
177
+ "test": list()
178
+ }
179
+ for file_path in tqdm(file_names, desc="Processing files"):
180
+ file = json.load(open(file_path))
181
+ original_file_name = file['file_name']
182
+ problem_names = set(file['problem_names'])
183
+ transitions_split = split_transitions(problem_names, file['translations'])
184
+
185
+ split = None
186
+
187
+
188
+ for problem_name in set(file['problem_names']):
189
+ if "Isabelle2021/src/HOL" in original_file_name:
190
+ split = "train"
191
+ else:
192
+ split = get_split(problem_name)
193
+
194
+ problem_names_split[split].append((original_file_name, problem_name))
195
+
196
+ json_split = "valid" if split == "val" else split
197
+ translation_pairs = process_translations_for_a_problem(
198
+ transitions_split[problem_name],
199
+ processing_method_config=processing_method_config
200
+ )
201
+
202
+ proof_script_till_now = [problem_name]
203
+ for proof_state, proof_step in translation_pairs:
204
+ sanitised_x = trim_string(proof_state)
205
+ sanitised_y = trim_string(proof_step)
206
+ datapoints[json_split].append(
207
+ {
208
+ "file_name": file['file_name'],
209
+ 'problem_name': problem_name,
210
+ "x": sanitised_x,
211
+ "y": sanitised_y,
212
+ "proof_script_until_now": "<SEP>".join(proof_script_till_now)
213
+ }
214
+ )
215
+ proof_script_till_now.append(sanitised_y)
216
+
217
+ for json_split in tqdm(datapoints.keys(), desc="Saving files"):
218
+ # Write json
219
+ with open(os.path.join(saving_directory, f"{json_split}.jsonl"), "w") as fout:
220
+ for datapoint in datapoints[json_split]:
221
+ fout.write(json.dumps(datapoint))
222
+ fout.write("\n")
223
+
224
+ # Write src and tgt
225
+ split = "val" if json_split == "valid" else json_split
226
+ with open(os.path.join(saving_directory, f"{split}.src"), "w") as f_src, \
227
+ open(os.path.join(saving_directory, f"{split}.tgt"), "w") as f_tgt:
228
+ for datapoint in datapoints[json_split]:
229
+ f_src.write(datapoint["x"]+"\n")
230
+ f_tgt.write(datapoint["y"]+"\n")
231
+
232
+ json.dump(problem_names_split, open(os.path.join(saving_directory, "problem_names_split.json"), "w"))
233
+
234
+ for split, lines in problem_names_split.items():
235
+ split = "valid" if split == "val" else split
236
+
237
+ with open(os.path.join(saving_directory, f"split.std_and_afp.{split}"), "w") as fout:
238
+ for theory_path, theorem_name in lines:
239
+ theorem_name = " ".join(theorem_name.strip().split())
240
+ fout.write(f"{theory_path} <PATH_AND_THEOREM_SEP> {theorem_name}")
241
+ fout.write("\n")
242
+
243
+
244
+ if __name__ == "__main__":
245
+ import argparse
246
+ import glob
247
+ import os
248
+ parser = argparse.ArgumentParser(description='Extracting translation pairs.')
249
+ parser.add_argument('--extraction-file-directory', '-efd', nargs='+', help='Where the parsed json files are')
250
+ parser.add_argument('--saving-directory', '-sd', help='Where to save the translation pairs')
251
+ parser.add_argument('--processing-method-config', '-pmc', type=str,
252
+ help='Where to find the processing method config')
253
+ parser.add_argument('--last-k', '-lk', type=int, default=1, help='How many last k steps to use')
254
+ args = parser.parse_args()
255
+
256
+ all_configs = {
257
+ "needed": ("needed", {}),
258
+ "last_k": ("last_k", {"last_k": args.last_k}),
259
+ "proof_only": ("proof_only", {}),
260
+ "state_only": ("state_only", {}),
261
+ "proof_and_state": ("proof_and_state", {}),
262
+ "trimmed_proof_and_state": ("trimmed_proof_and_state", {}),
263
+ }
264
+ ppc = all_configs[args.processing_method_config]
265
+ file_suffix = args.processing_method_config
266
+
267
+ saving_directory = "{}_with_{}".format(args.saving_directory, file_suffix)
268
+ if os.path.isdir(saving_directory):
269
+ shutil.rmtree(saving_directory)
270
+ os.makedirs(saving_directory)
271
+
272
+ file_names = []
273
+ for extraction_file_directory in args.extraction_file_directory:
274
+ file_names += list(glob.glob("{}/*/*_ground_truth.json".format(extraction_file_directory)))
275
+
276
+ print(f"Processing {len(file_names)} files in total")
277
+ process_files_with_proof_statements(file_names, saving_directory, ppc)