itp-interface 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (485) hide show
  1. itp_interface/__init__.py +0 -0
  2. itp_interface/agent/__init__.py +0 -0
  3. itp_interface/agent/simple_proof_agent.py +100 -0
  4. itp_interface/coq_ser_api/__init__.py +165 -0
  5. itp_interface/coq_ser_api/contexts.py +283 -0
  6. itp_interface/coq_ser_api/coq_agent.py +459 -0
  7. itp_interface/coq_ser_api/coq_backend.py +135 -0
  8. itp_interface/coq_ser_api/coq_util.py +839 -0
  9. itp_interface/coq_ser_api/example.py +67 -0
  10. itp_interface/coq_ser_api/lsp_backend.py +375 -0
  11. itp_interface/coq_ser_api/py.typed +0 -0
  12. itp_interface/coq_ser_api/serapi_backend.py +841 -0
  13. itp_interface/coq_ser_api/util.py +145 -0
  14. itp_interface/coq_ser_api_old/__init__.py +2583 -0
  15. itp_interface/coq_ser_api_old/contexts.py +172 -0
  16. itp_interface/coq_ser_api_old/util.py +146 -0
  17. itp_interface/lean_server/__init__.py +0 -0
  18. itp_interface/lean_server/commands.py +484 -0
  19. itp_interface/lean_server/lean3_search_tool.py +358 -0
  20. itp_interface/lean_server/lean4_repl_interface.py +151 -0
  21. itp_interface/lean_server/lean4_utils.py +255 -0
  22. itp_interface/lean_server/lean_cmd_server.py +111 -0
  23. itp_interface/lean_server/lean_context.py +60 -0
  24. itp_interface/lean_server/lean_sync_server.py +174 -0
  25. itp_interface/lean_server/lean_utils.py +199 -0
  26. itp_interface/lean_server/py.typed +1 -0
  27. itp_interface/main/__init__.py +0 -0
  28. itp_interface/main/config/afp_data_gen.yaml +14 -0
  29. itp_interface/main/config/benchmark/CompCert.yaml +366 -0
  30. itp_interface/main/config/benchmark/GeoCoq.yaml +930 -0
  31. itp_interface/main/config/benchmark/UniMath.yaml +2690 -0
  32. itp_interface/main/config/benchmark/afp_isabelle.yaml +29200 -0
  33. itp_interface/main/config/benchmark/agent_proverbot_hard.yaml +247 -0
  34. itp_interface/main/config/benchmark/category-theory.yaml +470 -0
  35. itp_interface/main/config/benchmark/compcert_118_subset.yaml +148 -0
  36. itp_interface/main/config/benchmark/compcert_benchmark.yaml +36 -0
  37. itp_interface/main/config/benchmark/compcert_benchmark_hard.yaml +498 -0
  38. itp_interface/main/config/benchmark/compcert_benchmark_hard_1.yaml +55 -0
  39. itp_interface/main/config/benchmark/compcert_benchmark_hard_2.yaml +24 -0
  40. itp_interface/main/config/benchmark/compcert_benchmark_hard_3.yaml +95 -0
  41. itp_interface/main/config/benchmark/compcert_benchmark_hard_7_per_cent.yaml +78 -0
  42. itp_interface/main/config/benchmark/compcert_benchmark_test.yaml +38 -0
  43. itp_interface/main/config/benchmark/compcert_benchmark_train.yaml +340 -0
  44. itp_interface/main/config/benchmark/leandojo_novel_premises_test.yaml +2908 -0
  45. itp_interface/main/config/benchmark/leandojo_novel_premises_train.yaml +98645 -0
  46. itp_interface/main/config/benchmark/leandojo_novel_premises_val.yaml +2912 -0
  47. itp_interface/main/config/benchmark/leandojo_random.yaml +2889 -0
  48. itp_interface/main/config/benchmark/leandojo_random_test.yaml +2421 -0
  49. itp_interface/main/config/benchmark/leandojo_random_train.yaml +62729 -0
  50. itp_interface/main/config/benchmark/leandojo_random_val.yaml +2504 -0
  51. itp_interface/main/config/benchmark/math-comp.yaml +200 -0
  52. itp_interface/main/config/benchmark/miniF2F_test.yaml +12 -0
  53. itp_interface/main/config/benchmark/miniF2F_test_aime.yaml +27 -0
  54. itp_interface/main/config/benchmark/miniF2F_test_algebra.yaml +30 -0
  55. itp_interface/main/config/benchmark/miniF2F_test_amc12.yaml +57 -0
  56. itp_interface/main/config/benchmark/miniF2F_test_few_shot_hard.yaml +231 -0
  57. itp_interface/main/config/benchmark/miniF2F_test_imo.yaml +32 -0
  58. itp_interface/main/config/benchmark/miniF2F_test_induction.yaml +20 -0
  59. itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra.yaml +82 -0
  60. itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra_hard.yaml +72 -0
  61. itp_interface/main/config/benchmark/miniF2F_test_mathd_numbertheory.yaml +72 -0
  62. itp_interface/main/config/benchmark/miniF2F_test_numbertheory.yaml +20 -0
  63. itp_interface/main/config/benchmark/minicompcert_benchmark_1.yaml +14 -0
  64. itp_interface/main/config/benchmark/proverbot_hard.yaml +104 -0
  65. itp_interface/main/config/benchmark/re_prover.yaml +66 -0
  66. itp_interface/main/config/benchmark/re_prover_hard.yaml +41 -0
  67. itp_interface/main/config/benchmark/re_prover_very_hard.yaml +22 -0
  68. itp_interface/main/config/benchmark/reprover_with_retrieval.yaml +73 -0
  69. itp_interface/main/config/benchmark/reprover_with_retrieval_hard.yaml +30 -0
  70. itp_interface/main/config/benchmark/reprover_with_retrieval_neg.yaml +195 -0
  71. itp_interface/main/config/benchmark/simple_benchmark_1.yaml +24 -0
  72. itp_interface/main/config/benchmark/simple_benchmark_8.yaml +50 -0
  73. itp_interface/main/config/benchmark/simple_benchmark_9.yaml +65 -0
  74. itp_interface/main/config/benchmark/simple_benchmark_isabelle.yaml +18 -0
  75. itp_interface/main/config/benchmark/simple_benchmark_lean.yaml +12 -0
  76. itp_interface/main/config/benchmark/simple_benchmark_lean_training_data.yaml +12 -0
  77. itp_interface/main/config/benchmark/simple_rl_benchmark_lean.yaml +14 -0
  78. itp_interface/main/config/benchmark/stack_machine.yaml +13 -0
  79. itp_interface/main/config/benchmark/stack_machine_hard.yaml +15 -0
  80. itp_interface/main/config/category_theory_data_gen.yaml +14 -0
  81. itp_interface/main/config/category_theory_data_gen_random.yaml +16 -0
  82. itp_interface/main/config/compcert_data_gen_test.yaml +10 -0
  83. itp_interface/main/config/compcert_data_gen_train.yaml +7 -0
  84. itp_interface/main/config/env_settings/bm25_retrieval.yaml +2 -0
  85. itp_interface/main/config/env_settings/bm25_retrieval_no_dfns.yaml +2 -0
  86. itp_interface/main/config/env_settings/bm25_retrieval_only_local_no_dfns.yaml +2 -0
  87. itp_interface/main/config/env_settings/bm25_retrieval_with_print.yaml +2 -0
  88. itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local.yaml +2 -0
  89. itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local_no_dfns.yaml +2 -0
  90. itp_interface/main/config/env_settings/no_retrieval.yaml +2 -0
  91. itp_interface/main/config/experiments.yaml +12 -0
  92. itp_interface/main/config/geo_coq_data_gen.yaml +14 -0
  93. itp_interface/main/config/geo_coq_data_gen_random.yaml +16 -0
  94. itp_interface/main/config/leandojo_random_data_gen.yaml +16 -0
  95. itp_interface/main/config/math_comp_data_gen.yaml +14 -0
  96. itp_interface/main/config/math_comp_data_gen_random.yaml +16 -0
  97. itp_interface/main/config/mathlib_data_gen.yaml +14 -0
  98. itp_interface/main/config/repo/coq_repos.yaml +191 -0
  99. itp_interface/main/config/run_settings/default_coq_data_generation_transforms.yaml +24 -0
  100. itp_interface/main/config/run_settings/default_isabelle_data_generation_transforms.yaml +24 -0
  101. itp_interface/main/config/run_settings/default_lean4_data_generation_transforms.yaml +24 -0
  102. itp_interface/main/config/run_settings/default_lean_data_generation_transforms.yaml +24 -0
  103. itp_interface/main/config/simple_coq_data_gen.yaml +12 -0
  104. itp_interface/main/config/simple_coq_data_gen_random.yaml +17 -0
  105. itp_interface/main/config/simple_lean_data_gen.yaml +12 -0
  106. itp_interface/main/config/simple_rl_lean_data_gen.yaml +12 -0
  107. itp_interface/main/config/uni_math_data_gen.yaml +14 -0
  108. itp_interface/main/config.py +192 -0
  109. itp_interface/main/extract_benchmark_dataset.py +106 -0
  110. itp_interface/main/filter_dataset.py +107 -0
  111. itp_interface/main/install.py +92 -0
  112. itp_interface/main/merge_dataset.py +96 -0
  113. itp_interface/main/run_tool.py +444 -0
  114. itp_interface/pisa/.git +1 -0
  115. itp_interface/pisa/.gitignore +125 -0
  116. itp_interface/pisa/.idea/.gitignore +8 -0
  117. itp_interface/pisa/.idea/ClojureProjectResolveSettings.xml +6 -0
  118. itp_interface/pisa/.idea/codeStyles/Project.xml +7 -0
  119. itp_interface/pisa/.idea/codeStyles/codeStyleConfig.xml +5 -0
  120. itp_interface/pisa/.idea/inspectionProfiles/Project_Default.xml +16 -0
  121. itp_interface/pisa/.idea/libraries/sbt__com_google_android_annotations_4_1_1_4_jar.xml +13 -0
  122. itp_interface/pisa/.idea/libraries/sbt__com_google_api_grpc_proto_google_common_protos_1_17_0_jar.xml +13 -0
  123. itp_interface/pisa/.idea/libraries/sbt__com_google_code_findbugs_jsr305_3_0_2_jar.xml +13 -0
  124. itp_interface/pisa/.idea/libraries/sbt__com_google_code_gson_gson_2_8_6_jar.xml +13 -0
  125. itp_interface/pisa/.idea/libraries/sbt__com_google_errorprone_error_prone_annotations_2_3_4_jar.xml +13 -0
  126. itp_interface/pisa/.idea/libraries/sbt__com_google_guava_failureaccess_1_0_1_jar.xml +13 -0
  127. itp_interface/pisa/.idea/libraries/sbt__com_google_guava_guava_30_0_jre_jar.xml +13 -0
  128. itp_interface/pisa/.idea/libraries/sbt__com_google_guava_listenablefuture_9999_0_empty_to_avoid_conflict_with_guava_jar.xml +9 -0
  129. itp_interface/pisa/.idea/libraries/sbt__com_google_j2objc_j2objc_annotations_1_3_jar.xml +13 -0
  130. itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_3_12_0_jar.xml +13 -0
  131. itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_util_3_12_0_jar.xml +13 -0
  132. itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_fastparse_2_13_2_3_0_jar.xml +13 -0
  133. itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_geny_2_13_0_6_0_jar.xml +13 -0
  134. itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_sourcecode_2_13_0_2_1_jar.xml +13 -0
  135. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_lenses_2_13_0_10_9_jar.xml +13 -0
  136. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_2_13_0_10_9_jar.xml +13 -0
  137. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_grpc_2_13_0_10_9_jar.xml +13 -0
  138. itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_zio_grpc_zio_grpc_core_2_13_0_4_2_jar.xml +13 -0
  139. itp_interface/pisa/.idea/libraries/sbt__com_thoughtworks_paranamer_paranamer_2_8_jar.xml +13 -0
  140. itp_interface/pisa/.idea/libraries/sbt__commons_io_commons_io_2_8_0_jar.xml +13 -0
  141. itp_interface/pisa/.idea/libraries/sbt__de_unruh_java_patterns_0_1_0_jar.xml +13 -0
  142. itp_interface/pisa/.idea/libraries/sbt__de_unruh_scala_isabelle_2_13_master_SNAPSHOT_jar.xml +13 -0
  143. itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_2_13_1_0_0_M9_jar.xml +13 -0
  144. itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_thirdparty_boopickle_shaded_2_13_1_0_0_M9_jar.xml +13 -0
  145. itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_2_13_1_0_3_jar.xml +13 -0
  146. itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_stacktracer_2_13_1_0_3_jar.xml +13 -0
  147. itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_streams_2_13_1_0_3_jar.xml +13 -0
  148. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_api_1_34_0_jar.xml +13 -0
  149. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_context_1_34_0_jar.xml +13 -0
  150. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_core_1_34_0_jar.xml +13 -0
  151. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_netty_1_34_0_jar.xml +13 -0
  152. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_1_34_0_jar.xml +13 -0
  153. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_lite_1_34_0_jar.xml +13 -0
  154. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_services_1_34_0_jar.xml +13 -0
  155. itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_stub_1_34_0_jar.xml +13 -0
  156. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_buffer_4_1_51_Final_jar.xml +13 -0
  157. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_4_1_51_Final_jar.xml +13 -0
  158. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http2_4_1_51_Final_jar.xml +13 -0
  159. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http_4_1_51_Final_jar.xml +13 -0
  160. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_socks_4_1_51_Final_jar.xml +13 -0
  161. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_common_4_1_51_Final_jar.xml +13 -0
  162. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_4_1_51_Final_jar.xml +13 -0
  163. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_proxy_4_1_51_Final_jar.xml +13 -0
  164. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_resolver_4_1_51_Final_jar.xml +13 -0
  165. itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_transport_4_1_51_Final_jar.xml +13 -0
  166. itp_interface/pisa/.idea/libraries/sbt__io_perfmark_perfmark_api_0_19_0_jar.xml +13 -0
  167. itp_interface/pisa/.idea/libraries/sbt__net_java_dev_jna_jna_5_3_1_jar.xml +13 -0
  168. itp_interface/pisa/.idea/libraries/sbt__net_liftweb_lift_json_2_13_3_4_3_jar.xml +13 -0
  169. itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_lang3_3_11_jar.xml +13 -0
  170. itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_text_1_9_jar.xml +13 -0
  171. itp_interface/pisa/.idea/libraries/sbt__org_checkerframework_checker_qual_3_5_0_jar.xml +13 -0
  172. itp_interface/pisa/.idea/libraries/sbt__org_codehaus_mojo_animal_sniffer_annotations_1_18_jar.xml +13 -0
  173. itp_interface/pisa/.idea/libraries/sbt__org_jetbrains_annotations_20_1_0_jar.xml +13 -0
  174. itp_interface/pisa/.idea/libraries/sbt__org_jline_jline_3_16_0_jar.xml +13 -0
  175. itp_interface/pisa/.idea/libraries/sbt__org_log4s_log4s_2_13_1_9_0_jar.xml +13 -0
  176. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_collection_compat_2_13_2_1_6_jar.xml +13 -0
  177. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_xml_2_13_1_3_0_jar.xml +13 -0
  178. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_compiler_2_13_4_jar.xml +13 -0
  179. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_library_2_13_4_jar.xml +23 -0
  180. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_reflect_2_13_4_jar.xml +13 -0
  181. itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scalap_2_13_4_jar.xml +13 -0
  182. itp_interface/pisa/.idea/libraries/sbt__org_scalaz_scalaz_core_2_13_7_3_2_jar.xml +13 -0
  183. itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_api_1_7_30_jar.xml +13 -0
  184. itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_simple_1_7_30_jar.xml +13 -0
  185. itp_interface/pisa/.idea/misc.xml +7 -0
  186. itp_interface/pisa/.idea/modules/PISA-build.iml +127 -0
  187. itp_interface/pisa/.idea/modules/PISA.iml +94 -0
  188. itp_interface/pisa/.idea/modules.xml +9 -0
  189. itp_interface/pisa/.idea/other.xml +6 -0
  190. itp_interface/pisa/.idea/sbt.xml +20 -0
  191. itp_interface/pisa/.idea/scala_compiler.xml +6 -0
  192. itp_interface/pisa/.idea/uiDesigner.xml +124 -0
  193. itp_interface/pisa/.idea/vcs.xml +6 -0
  194. itp_interface/pisa/.scalafmt.conf +2 -0
  195. itp_interface/pisa/LICENSE +29 -0
  196. itp_interface/pisa/README.md +262 -0
  197. itp_interface/pisa/build.sbt +49 -0
  198. itp_interface/pisa/build.sh +26 -0
  199. itp_interface/pisa/command_generation/close_gaps.py +44 -0
  200. itp_interface/pisa/command_generation/conjecture_normal_order.py +62 -0
  201. itp_interface/pisa/command_generation/conjecturer_command_generator.py +36 -0
  202. itp_interface/pisa/command_generation/create_dirs.py +11 -0
  203. itp_interface/pisa/command_generation/find_std.py +67 -0
  204. itp_interface/pisa/command_generation/generate_build_commands_afp.py +15 -0
  205. itp_interface/pisa/command_generation/generate_build_commands_std.py +15 -0
  206. itp_interface/pisa/command_generation/generate_commands_afp.py +103 -0
  207. itp_interface/pisa/command_generation/generate_commands_mini.py +73 -0
  208. itp_interface/pisa/command_generation/generate_commands_std.py +69 -0
  209. itp_interface/pisa/command_generation/generate_hammer_extraction_text.py +5 -0
  210. itp_interface/pisa/command_generation/hammer_command_generator.py +40 -0
  211. itp_interface/pisa/command_generation/hp_search_command_generator.py +63 -0
  212. itp_interface/pisa/command_generation/oracle_command_generator.py +56 -0
  213. itp_interface/pisa/command_generation/search_command_generator.py +69 -0
  214. itp_interface/pisa/command_generation/summarise_problem_names.py +45 -0
  215. itp_interface/pisa/command_generation/tpu_hp_search.py +75 -0
  216. itp_interface/pisa/docker/Dockerfile +34 -0
  217. itp_interface/pisa/docker/docker_tutorial.md +64 -0
  218. itp_interface/pisa/eval_setup/copy_isabelle.py +42 -0
  219. itp_interface/pisa/eval_setup/copy_pisa_jars.py +18 -0
  220. itp_interface/pisa/mesh_transformer_utils/tokenization.py +86 -0
  221. itp_interface/pisa/project/build.properties +1 -0
  222. itp_interface/pisa/project/plugins.sbt +5 -0
  223. itp_interface/pisa/requirements.txt +4 -0
  224. itp_interface/pisa/scripts/extract_last_k_steps.py +28 -0
  225. itp_interface/pisa/scripts/extract_proof_corpus.py +26 -0
  226. itp_interface/pisa/scripts/gather_hammer_results.py +27 -0
  227. itp_interface/pisa/scripts/length_in_char_stats.py +20 -0
  228. itp_interface/pisa/scripts/mix.py +127 -0
  229. itp_interface/pisa/scripts/results_stat.py +52 -0
  230. itp_interface/pisa/scripts/test_array_job.sh +34 -0
  231. itp_interface/pisa/setup.sh +25 -0
  232. itp_interface/pisa/src/main/protobuf/server.proto +60 -0
  233. itp_interface/pisa/src/main/python/.idea/.gitignore +8 -0
  234. itp_interface/pisa/src/main/python/.idea/inspectionProfiles/Project_Default.xml +18 -0
  235. itp_interface/pisa/src/main/python/.idea/inspectionProfiles/profiles_settings.xml +6 -0
  236. itp_interface/pisa/src/main/python/.idea/misc.xml +4 -0
  237. itp_interface/pisa/src/main/python/.idea/modules.xml +8 -0
  238. itp_interface/pisa/src/main/python/.idea/python.iml +12 -0
  239. itp_interface/pisa/src/main/python/.idea/vcs.xml +6 -0
  240. itp_interface/pisa/src/main/python/conjecturing_parsing/conjecturer_postprocessing.py +59 -0
  241. itp_interface/pisa/src/main/python/data_extraction/extract_data.py +184 -0
  242. itp_interface/pisa/src/main/python/data_extraction/find_premises.py +221 -0
  243. itp_interface/pisa/src/main/python/data_extraction/process_data.py +129 -0
  244. itp_interface/pisa/src/main/python/legacy/PisaFlexibleClient.py +167 -0
  245. itp_interface/pisa/src/main/python/legacy/autof_test.py +74 -0
  246. itp_interface/pisa/src/main/python/legacy/cmd_client.py +23 -0
  247. itp_interface/pisa/src/main/python/legacy/convert_scala_dump_to_test_name_jsons.py +14 -0
  248. itp_interface/pisa/src/main/python/legacy/create_data_txt.py +72 -0
  249. itp_interface/pisa/src/main/python/legacy/create_finetune_tfrecords.py +311 -0
  250. itp_interface/pisa/src/main/python/legacy/demo.py +49 -0
  251. itp_interface/pisa/src/main/python/legacy/evaluate.py +108 -0
  252. itp_interface/pisa/src/main/python/legacy/extract_first_step.py +25 -0
  253. itp_interface/pisa/src/main/python/legacy/get_global_facts.py +35 -0
  254. itp_interface/pisa/src/main/python/legacy/mix_data.py +19 -0
  255. itp_interface/pisa/src/main/python/legacy/one_stage_extraction.py +111 -0
  256. itp_interface/pisa/src/main/python/legacy/prepare_episodic_transitions.py +137 -0
  257. itp_interface/pisa/src/main/python/legacy/prepare_translation_pairs.py +277 -0
  258. itp_interface/pisa/src/main/python/pisa_client.py +322 -0
  259. itp_interface/pisa/src/main/python/server_pb2.py +394 -0
  260. itp_interface/pisa/src/main/python/server_pb2_grpc.py +230 -0
  261. itp_interface/pisa/src/main/python/test_client.py +17 -0
  262. itp_interface/pisa/src/main/python/test_client2.py +79 -0
  263. itp_interface/pisa/src/main/python/utils/filters.py +59 -0
  264. itp_interface/pisa/src/main/python/utils/pisa_server_control.py +29 -0
  265. itp_interface/pisa/src/main/scala/pisa/agent/CheckSyntax.scala +257 -0
  266. itp_interface/pisa/src/main/scala/pisa/agent/DepThms.scala +29 -0
  267. itp_interface/pisa/src/main/scala/pisa/agent/PisaStat.scala +46 -0
  268. itp_interface/pisa/src/main/scala/pisa/agent/RefactorTest.scala +40 -0
  269. itp_interface/pisa/src/main/scala/pisa/agent/RepHammer.scala +95 -0
  270. itp_interface/pisa/src/main/scala/pisa/server/HammFacts.scala +63 -0
  271. itp_interface/pisa/src/main/scala/pisa/server/PisaOS.scala +881 -0
  272. itp_interface/pisa/src/main/scala/pisa/server/PisaOneStage.scala +540 -0
  273. itp_interface/pisa/src/main/scala/pisa/server/PisaOneStageServers.scala +1048 -0
  274. itp_interface/pisa/src/main/scala/pisa/utils/TheoryManager.scala +95 -0
  275. itp_interface/pisa/src/test/python/analyse_debug.py +33 -0
  276. itp_interface/pisa/src/test/python/extract_test_seq2seq.py +53 -0
  277. itp_interface/pisa/src/test/python/extract_test_theorem_ground_truth_indices.py +31 -0
  278. itp_interface/pisa/src/test/python/proof_originality.py +24 -0
  279. itp_interface/pisa/src/test/python/test_command_generator.py +25 -0
  280. itp_interface/pisa/src/test/python/test_model_sequence_accuracy.py +70 -0
  281. itp_interface/pisa/src/test/scala/pisa/Easy.scala +26 -0
  282. itp_interface/pisa/src/test/scala/pisa/TestCurl.scala +82 -0
  283. itp_interface/pisa/src/test/scala/pisa/TestIsa.scala +27 -0
  284. itp_interface/pisa/test.sh +19 -0
  285. itp_interface/pisa/universal_test_theorems.tar.gz +0 -0
  286. itp_interface/repo/build.py +78 -0
  287. itp_interface/repo/clone.py +79 -0
  288. itp_interface/repo/dataset_discovery.py +99 -0
  289. itp_interface/retrieval/__init__.py +0 -0
  290. itp_interface/retrieval/abstraction.py +35 -0
  291. itp_interface/retrieval/coq_bm25_reranker.py +153 -0
  292. itp_interface/retrieval/isabelle_bm25_reranker.py +86 -0
  293. itp_interface/retrieval/lean3_bm25_reranker.py +86 -0
  294. itp_interface/rl/__init__.py +0 -0
  295. itp_interface/rl/abstraction.py +168 -0
  296. itp_interface/rl/proof_action.py +172 -0
  297. itp_interface/rl/proof_state.py +149 -0
  298. itp_interface/rl/proof_tree.py +109 -0
  299. itp_interface/rl/simpl_proof_env_pool.py +16 -0
  300. itp_interface/rl/simple_proof_env.py +713 -0
  301. itp_interface/rl/simple_proof_env_pool.py +591 -0
  302. itp_interface/scripts/setup.sh +228 -0
  303. itp_interface/tools/__init__.py +0 -0
  304. itp_interface/tools/basic_utils.py +172 -0
  305. itp_interface/tools/bin_packing.py +61 -0
  306. itp_interface/tools/cache.py +93 -0
  307. itp_interface/tools/coq_build_spec.py +31 -0
  308. itp_interface/tools/coq_build_tool.py +319 -0
  309. itp_interface/tools/coq_context_helper.py +354 -0
  310. itp_interface/tools/coq_executor.py +508 -0
  311. itp_interface/tools/coq_local_data_generation_transform.py +158 -0
  312. itp_interface/tools/coq_parse_utils.py +154 -0
  313. itp_interface/tools/coq_raw_proofs.py +193 -0
  314. itp_interface/tools/coq_theorem_proof_pair_generation_transform.py +146 -0
  315. itp_interface/tools/coq_training_data_generator.py +76 -0
  316. itp_interface/tools/dynamic_coq_proof_exec.py +220 -0
  317. itp_interface/tools/dynamic_isabelle_proof_exec.py +229 -0
  318. itp_interface/tools/dynamic_lean4_proof_exec.py +236 -0
  319. itp_interface/tools/dynamic_lean_proof_exec.py +228 -0
  320. itp_interface/tools/isabelle_context_helper.py +66 -0
  321. itp_interface/tools/isabelle_executor.py +862 -0
  322. itp_interface/tools/isabelle_local_data_generation_transform.py +149 -0
  323. itp_interface/tools/isabelle_parse_utils.py +131 -0
  324. itp_interface/tools/isabelle_server.py +106 -0
  325. itp_interface/tools/lean4_context_helper.py +72 -0
  326. itp_interface/tools/lean4_local_data_generation_transform.py +122 -0
  327. itp_interface/tools/lean4_sync_executor.py +1193 -0
  328. itp_interface/tools/lean_cmd_executor.py +804 -0
  329. itp_interface/tools/lean_context_helper.py +327 -0
  330. itp_interface/tools/lean_dojo_data_generation_transform.py +206 -0
  331. itp_interface/tools/lean_executor.py +687 -0
  332. itp_interface/tools/lean_local_data_generation_transform.py +136 -0
  333. itp_interface/tools/lean_parse_utils.py +32 -0
  334. itp_interface/tools/log_utils.py +20 -0
  335. itp_interface/tools/proof_exec_callback.py +76 -0
  336. itp_interface/tools/ray_utils.py +265 -0
  337. itp_interface/tools/repl/.git +1 -0
  338. itp_interface/tools/repl/.github/workflows/ci.yml +24 -0
  339. itp_interface/tools/repl/.gitignore +7 -0
  340. itp_interface/tools/repl/.vscode/copyright.code-snippets +13 -0
  341. itp_interface/tools/repl/.vscode/extensions.json +13 -0
  342. itp_interface/tools/repl/.vscode/module-docstring.code-snippets +35 -0
  343. itp_interface/tools/repl/.vscode/settings.json +11 -0
  344. itp_interface/tools/repl/README.md +174 -0
  345. itp_interface/tools/repl/REPL/Frontend.lean +47 -0
  346. itp_interface/tools/repl/REPL/JSON.lean +186 -0
  347. itp_interface/tools/repl/REPL/Lean/ContextInfo.lean +9 -0
  348. itp_interface/tools/repl/REPL/Lean/Environment.lean +31 -0
  349. itp_interface/tools/repl/REPL/Lean/InfoTree/ToJson.lean +114 -0
  350. itp_interface/tools/repl/REPL/Lean/InfoTree.lean +272 -0
  351. itp_interface/tools/repl/REPL/Main.lean +323 -0
  352. itp_interface/tools/repl/REPL/Snapshots.lean +306 -0
  353. itp_interface/tools/repl/REPL/Util/Path.lean +36 -0
  354. itp_interface/tools/repl/REPL/Util/Pickle.lean +44 -0
  355. itp_interface/tools/repl/REPL.lean +4 -0
  356. itp_interface/tools/repl/lake-manifest.json +5 -0
  357. itp_interface/tools/repl/lakefile.lean +15 -0
  358. itp_interface/tools/repl/lean-toolchain +1 -0
  359. itp_interface/tools/repl/test/Mathlib/.gitignore +5 -0
  360. itp_interface/tools/repl/test/Mathlib/H20231110.sh +2 -0
  361. itp_interface/tools/repl/test/Mathlib/ReplMathlibTests.lean +1 -0
  362. itp_interface/tools/repl/test/Mathlib/lake-manifest.json +68 -0
  363. itp_interface/tools/repl/test/Mathlib/lakefile.lean +11 -0
  364. itp_interface/tools/repl/test/Mathlib/lean-toolchain +1 -0
  365. itp_interface/tools/repl/test/Mathlib/test/20240209.expected.out +20 -0
  366. itp_interface/tools/repl/test/Mathlib/test/20240209.in +3 -0
  367. itp_interface/tools/repl/test/Mathlib/test/20240209.lean +4 -0
  368. itp_interface/tools/repl/test/Mathlib/test/H20231020.expected.out +8 -0
  369. itp_interface/tools/repl/test/Mathlib/test/H20231020.in +8 -0
  370. itp_interface/tools/repl/test/Mathlib/test/H20231020.lean +22 -0
  371. itp_interface/tools/repl/test/Mathlib/test/H20231110.expected.out +4 -0
  372. itp_interface/tools/repl/test/Mathlib/test/H20231110.in +4 -0
  373. itp_interface/tools/repl/test/Mathlib/test/H20231115.expected.out +19 -0
  374. itp_interface/tools/repl/test/Mathlib/test/H20231115.in +5 -0
  375. itp_interface/tools/repl/test/Mathlib/test/H20231115_2.expected.out +18 -0
  376. itp_interface/tools/repl/test/Mathlib/test/H20231115_2.in +4 -0
  377. itp_interface/tools/repl/test/Mathlib/test/H20231115_3.expected.out +10 -0
  378. itp_interface/tools/repl/test/Mathlib/test/H20231115_3.in +4 -0
  379. itp_interface/tools/repl/test/Mathlib/test/H20231214.in +9 -0
  380. itp_interface/tools/repl/test/Mathlib/test/H20231214.lean +30 -0
  381. itp_interface/tools/repl/test/Mathlib/test/H20231215.expected.out +4 -0
  382. itp_interface/tools/repl/test/Mathlib/test/H20231215.in +4 -0
  383. itp_interface/tools/repl/test/Mathlib/test/H20231215_2.expected.out +14 -0
  384. itp_interface/tools/repl/test/Mathlib/test/H20231215_2.in +3 -0
  385. itp_interface/tools/repl/test/Mathlib/test/exact.expected.out +37 -0
  386. itp_interface/tools/repl/test/Mathlib/test/exact.in +10 -0
  387. itp_interface/tools/repl/test/Mathlib/test/import_Mathlib.lean +1 -0
  388. itp_interface/tools/repl/test/Mathlib/test/induction.expected.out +29 -0
  389. itp_interface/tools/repl/test/Mathlib/test/induction.in +10 -0
  390. itp_interface/tools/repl/test/Mathlib/test/induction.lean +6 -0
  391. itp_interface/tools/repl/test/Mathlib/test/on_goal.expected.out +22 -0
  392. itp_interface/tools/repl/test/Mathlib/test/on_goal.in +5 -0
  393. itp_interface/tools/repl/test/Mathlib/test/pickle.expected.out +16 -0
  394. itp_interface/tools/repl/test/Mathlib/test/pickle.in +6 -0
  395. itp_interface/tools/repl/test/Mathlib/test/pickle_2.expected.out +4 -0
  396. itp_interface/tools/repl/test/Mathlib/test/pickle_2.in +4 -0
  397. itp_interface/tools/repl/test/Mathlib/test.sh +41 -0
  398. itp_interface/tools/repl/test/all_tactics.expected.out +13 -0
  399. itp_interface/tools/repl/test/all_tactics.in +1 -0
  400. itp_interface/tools/repl/test/by_cases.expected.out +25 -0
  401. itp_interface/tools/repl/test/by_cases.in +8 -0
  402. itp_interface/tools/repl/test/by_cases.lean +4 -0
  403. itp_interface/tools/repl/test/calc.expected.out +32 -0
  404. itp_interface/tools/repl/test/calc.in +1 -0
  405. itp_interface/tools/repl/test/def_eval.expected.out +9 -0
  406. itp_interface/tools/repl/test/def_eval.in +3 -0
  407. itp_interface/tools/repl/test/enableInitializersExecution.expected.out +2 -0
  408. itp_interface/tools/repl/test/enableInitializersExecution.in +1 -0
  409. itp_interface/tools/repl/test/file.expected.out +8 -0
  410. itp_interface/tools/repl/test/file.in +1 -0
  411. itp_interface/tools/repl/test/file.lean +5 -0
  412. itp_interface/tools/repl/test/have_by_sorry.expected.out +28 -0
  413. itp_interface/tools/repl/test/have_by_sorry.in +6 -0
  414. itp_interface/tools/repl/test/import_lean.in +1 -0
  415. itp_interface/tools/repl/test/incomplete.expected.out +18 -0
  416. itp_interface/tools/repl/test/incomplete.in +3 -0
  417. itp_interface/tools/repl/test/incomplete.lean +0 -0
  418. itp_interface/tools/repl/test/infotree.expected.out +20 -0
  419. itp_interface/tools/repl/test/infotree.in +2 -0
  420. itp_interface/tools/repl/test/invalid_tactic.expected.out +20 -0
  421. itp_interface/tools/repl/test/invalid_tactic.in +3 -0
  422. itp_interface/tools/repl/test/name_generator.expected.out +53 -0
  423. itp_interface/tools/repl/test/name_generator.in +18 -0
  424. itp_interface/tools/repl/test/no_goal_sorry.expected.out +11 -0
  425. itp_interface/tools/repl/test/no_goal_sorry.in +1 -0
  426. itp_interface/tools/repl/test/no_goal_sorry_2.expected.out +12 -0
  427. itp_interface/tools/repl/test/no_goal_sorry_2.in +1 -0
  428. itp_interface/tools/repl/test/options.expected.out +17 -0
  429. itp_interface/tools/repl/test/options.in +6 -0
  430. itp_interface/tools/repl/test/pickle_environment.expected.out +8 -0
  431. itp_interface/tools/repl/test/pickle_environment.in +7 -0
  432. itp_interface/tools/repl/test/pickle_environment_with_imports.expected.out +10 -0
  433. itp_interface/tools/repl/test/pickle_environment_with_imports.in +9 -0
  434. itp_interface/tools/repl/test/pickle_open.expected.out +8 -0
  435. itp_interface/tools/repl/test/pickle_open.in +7 -0
  436. itp_interface/tools/repl/test/pickle_open_2.expected.out +4 -0
  437. itp_interface/tools/repl/test/pickle_open_2.in +3 -0
  438. itp_interface/tools/repl/test/pickle_open_scoped.expected.out +18 -0
  439. itp_interface/tools/repl/test/pickle_open_scoped.in +8 -0
  440. itp_interface/tools/repl/test/pickle_open_scoped_2.expected.out +14 -0
  441. itp_interface/tools/repl/test/pickle_open_scoped_2.in +3 -0
  442. itp_interface/tools/repl/test/pickle_proof_state_1.expected.out +26 -0
  443. itp_interface/tools/repl/test/pickle_proof_state_1.in +15 -0
  444. itp_interface/tools/repl/test/pickle_proof_state_2.expected.out +4 -0
  445. itp_interface/tools/repl/test/pickle_proof_state_2.in +3 -0
  446. itp_interface/tools/repl/test/pickle_proof_state_env.expected.out +26 -0
  447. itp_interface/tools/repl/test/pickle_proof_state_env.in +15 -0
  448. itp_interface/tools/repl/test/pickle_scoped_notation.in +16 -0
  449. itp_interface/tools/repl/test/pickle_scoped_notation_2.in +3 -0
  450. itp_interface/tools/repl/test/proof_step.expected.out +18 -0
  451. itp_interface/tools/repl/test/proof_step.in +7 -0
  452. itp_interface/tools/repl/test/readme.expected.out +16 -0
  453. itp_interface/tools/repl/test/readme.in +5 -0
  454. itp_interface/tools/repl/test/sorry_hypotheses.expected.out +16 -0
  455. itp_interface/tools/repl/test/sorry_hypotheses.in +4 -0
  456. itp_interface/tools/repl/test/synthesize_placeholder.expected.out +7 -0
  457. itp_interface/tools/repl/test/synthesize_placeholder.in +1 -0
  458. itp_interface/tools/repl/test/tactic_mode_sorry.expected.out +14 -0
  459. itp_interface/tools/repl/test/tactic_mode_sorry.in +3 -0
  460. itp_interface/tools/repl/test/tactic_sorry.expected.out +12 -0
  461. itp_interface/tools/repl/test/tactic_sorry.in +1 -0
  462. itp_interface/tools/repl/test/term_sorry.expected.out +12 -0
  463. itp_interface/tools/repl/test/term_sorry.in +1 -0
  464. itp_interface/tools/repl/test/trace_simp.expected.out +41 -0
  465. itp_interface/tools/repl/test/trace_simp.in +15 -0
  466. itp_interface/tools/repl/test/unfinished_tactic_block.expected.out +11 -0
  467. itp_interface/tools/repl/test/unfinished_tactic_block.in +1 -0
  468. itp_interface/tools/repl/test/unknown_environment.expected.out +2 -0
  469. itp_interface/tools/repl/test/unknown_environment.in +1 -0
  470. itp_interface/tools/repl/test/unknown_proof_state.expected.out +14 -0
  471. itp_interface/tools/repl/test/unknown_proof_state.in +3 -0
  472. itp_interface/tools/repl/test/unknown_tactic.expected.out +14 -0
  473. itp_interface/tools/repl/test/unknown_tactic.in +3 -0
  474. itp_interface/tools/repl/test/variables.expected.out +26 -0
  475. itp_interface/tools/repl/test/variables.in +5 -0
  476. itp_interface/tools/repl/test.sh +43 -0
  477. itp_interface/tools/run_data_generation_transforms.py +350 -0
  478. itp_interface/tools/theorem_details.py +25 -0
  479. itp_interface/tools/training_data.py +358 -0
  480. itp_interface/tools/training_data_format.py +599 -0
  481. itp_interface-1.0.0.dist-info/METADATA +78 -0
  482. itp_interface-1.0.0.dist-info/RECORD +485 -0
  483. itp_interface-1.0.0.dist-info/WHEEL +4 -0
  484. itp_interface-1.0.0.dist-info/entry_points.txt +3 -0
  485. itp_interface-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,358 @@
1
+
2
+ #!/usr/bin/env python3
3
+
4
+ import sys
5
+
6
+ root_dir = f"{__file__.split('itp_interface')[0]}"
7
+ if root_dir not in sys.path:
8
+ sys.path.append(root_dir)
9
+ import os
10
+ import copy
11
+ import ray
12
+ import typing
13
+ import logging
14
+ import time
15
+ import psutil
16
+ from itp_interface.tools.ray_utils import RayUtils
17
+ from itp_interface.tools.training_data_format import LemmaRefWithScore, LemmaReferencesCollection, MergableCollection, TrainingDataCollection, TrainingDataFormat, TrainingDataMetadataFormat
18
+
19
+
20
+ class TrainingData(MergableCollection):
21
+ def __init__(
22
+ self,
23
+ folder: str,
24
+ training_meta_filename: str,
25
+ training_meta: TrainingDataMetadataFormat = None,
26
+ max_parallelism: int = 4,
27
+ remove_from_store_after_loading: bool = True,
28
+ logger: logging.Logger = None):
29
+ assert os.path.exists(folder), f"Folder {folder} does not exist"
30
+ assert os.path.isdir(folder), f"Folder {folder} is not a directory"
31
+ assert training_meta_filename is not None, "Training meta filename cannot be None"
32
+ self.folder = folder
33
+ self._is_loaded = False
34
+ self.training_meta_filename = training_meta_filename
35
+ self.meta : TrainingDataMetadataFormat = training_meta
36
+ self.lemma_ref_collection : LemmaReferencesCollection = LemmaReferencesCollection()
37
+ self._lemma_ref_filename : str = None
38
+ self._training_data_filenames : typing.List[str] = []
39
+ self.training_data_collections : typing.List[TrainingDataCollection] = []
40
+ self._max_parallelism: int = max_parallelism
41
+ self.logger = logger if logger is not None else logging.getLogger(__name__)
42
+ self.remove_from_store_after_loading = remove_from_store_after_loading
43
+ self._meta_loaded = False
44
+ self._object_id_map : typing.List[ray.ObjectRef] = []
45
+ super().__init__()
46
+
47
+ def __len__(self) -> int:
48
+ assert self.meta is not None, "Training meta is not set"
49
+ return self.meta.total_proof_step_cnt
50
+
51
+ @property
52
+ def is_readonly(self) -> bool:
53
+ return os.path.exists(os.path.join(self.folder, self.training_meta_filename))
54
+
55
+ def load_meta(self):
56
+ assert self.is_readonly, "Training data is not loadable"
57
+ self.meta : TrainingDataMetadataFormat = TrainingDataMetadataFormat.load_from_file(os.path.join(self.folder, self.training_meta_filename))
58
+ self.training_data_collections.clear()
59
+ self._training_data_filenames.clear()
60
+ lemma_file_cnt = 0
61
+ for filename in os.listdir(self.folder):
62
+ if filename.startswith(self.meta.lemma_ref_filename_prefix) and filename.endswith(self.meta.lemma_ref_filename_suffix):
63
+ self._lemma_ref_filename = filename
64
+ lemma_file_cnt += 1
65
+ elif filename.startswith(self.meta.data_filename_prefix) and filename.endswith(self.meta.data_filename_suffix):
66
+ self._training_data_filenames.append(filename)
67
+ assert lemma_file_cnt == 1, "There must be exactly one lemma reference file"
68
+ self._training_data_filenames.sort()
69
+ self.logger.info(f"Loading lemma reference from {self.folder}: {self._lemma_ref_filename}")
70
+ self.logger.info(f"Loading training data from {self.folder}: {self._training_data_filenames}")
71
+ assert self._lemma_ref_filename is not None, "Lemma reference filename is not set"
72
+ self._object_id_map = [None] * (len(self._training_data_filenames) + 2)
73
+ self.training_data_collections = [None] * len(self._training_data_filenames)
74
+ self.lemma_ref_collection = None
75
+ meta_id = ray.put(self.meta)
76
+ self._object_id_map[0] = meta_id
77
+ self._meta_loaded = True
78
+ pass
79
+
80
+ def load(self):
81
+ assert self.is_readonly, "Training data is not loadable"
82
+ if not self._meta_loaded:
83
+ self.load_meta()
84
+ files_to_load = [self.training_meta_filename, self._lemma_ref_filename] + self._training_data_filenames
85
+ self.logger.info(f"Loading {len(files_to_load)} files...")
86
+ last_loaded_idx = 1
87
+
88
+ def _create_remote(filenames):
89
+ remotes = []
90
+ base_idx = last_loaded_idx - len(filenames)
91
+ for i, filename in enumerate(filenames):
92
+ self.logger.info(f"[TrainingData] Starting the loading of [{base_idx + i}] {filename}...")
93
+ collection_fn = TrainingData._get_lemma_ref_collection if filename == self._lemma_ref_filename else TrainingData._get_training_data_collection
94
+ remotes.append(collection_fn.remote(base_idx + i, self.folder, filename))
95
+ return remotes
96
+
97
+ def _transform_remote(results):
98
+ for res in results:
99
+ if isinstance(res, tuple):
100
+ assert len(res) == 2, "Invalid tuple length"
101
+ assert isinstance(res[0], int), "Invalid type"
102
+ assert res[0] < len(self._object_id_map), f"Invalid index {res[0]} in {len(self._object_id_map)}"
103
+ obj = res[1]
104
+ is_lemma_ref = res[0] == 1
105
+ if not is_lemma_ref:
106
+ assert isinstance(obj, TrainingDataCollection), "Invalid type"
107
+ self.training_data_collections[res[0] - 2] = obj
108
+ self.logger.info(f"[TrainingData] Finished the loading of {res[0]}")
109
+ else:
110
+ assert isinstance(obj, LemmaReferencesCollection), "Invalid type"
111
+ self.lemma_ref_collection = obj
112
+ self.logger.info(f"[TrainingData] Finished the loading of {self._lemma_ref_filename}")
113
+ else:
114
+ raise Exception(f"Invalid type {type(res)}")
115
+ process = psutil.Process()
116
+ self.logger.info(f"[TrainingData] Memory usage: {process.memory_info().rss / 2**30} GiB, Process: {process.pid}")
117
+
118
+ def _prepare_next_batch(num:int):
119
+ nonlocal last_loaded_idx
120
+ filenames = files_to_load[last_loaded_idx: last_loaded_idx + num]
121
+ last_loaded_idx += len(filenames)
122
+ return filenames
123
+
124
+ RayUtils.ray_run_within_parallel_limits(self._max_parallelism, len(files_to_load) - 1, _transform_remote, _prepare_next_batch, _create_remote, logger=self.logger)
125
+ self.logger.info(f"Finished loading {len(files_to_load)} files")
126
+ self._is_loaded = True
127
+
128
+ def unload(self):
129
+ assert self.is_readonly, "Training data is not loadable"
130
+ if self._is_loaded:
131
+ self._is_loaded = False
132
+ # Reload the metadata
133
+ self.load_meta()
134
+
135
+ def merge(self, __o: object, new_lemma_ref_idx: typing.List[int] = None):
136
+ assert isinstance(__o, TrainingDataFormat) or \
137
+ isinstance(__o, TrainingData), "other must be a TrainingDataFormat or TrainingDataMetadata"
138
+ assert not self.is_readonly, "Training data is read only"
139
+ assert self.lemma_ref_collection is not None, "Lemma reference collection is not set"
140
+ if isinstance(__o, TrainingData):
141
+ assert new_lemma_ref_idx is None, "new_lemma_ref_idx must be None"
142
+ new_lemma_ref_idx = self.lemma_ref_collection.merge(__o.lemma_ref_collection) # merge lemma references
143
+ for idx in range(len(__o)):
144
+ self._merge_training_data_format(__o[idx], new_lemma_ref_idx) # merge training data
145
+ self.meta.num_theorems += __o.meta.num_theorems
146
+ assert (len(__o) > 0 and len(self.training_data_collections[-1]) <= self.meta.training_data_buffer_size) or len(__o) == 0, "Training data buffer size is too large"
147
+ else:
148
+ self._merge_training_data_format(__o, new_lemma_ref_idx)
149
+ assert len(self.training_data_collections[-1]) <= self.meta.training_data_buffer_size, "Training data buffer size is too large"
150
+
151
+ def clone_skeleton(self, training_data, lemma_ref_collection: LemmaReferencesCollection = None):
152
+ assert self.meta is not None, "Metadata is not set"
153
+ assert isinstance(training_data, TrainingData), "Invalid type"
154
+ assert not self._meta_loaded, "Training metadata is already loaded"
155
+ self.meta.training_data_buffer_size = training_data.meta.training_data_buffer_size
156
+ self.meta.total_proof_step_cnt = training_data.meta.total_proof_step_cnt
157
+ self.meta.external_theorems_used_cnt = training_data.meta.external_theorems_used_cnt
158
+ self.meta.local_theorems_used_cnt = training_data.meta.local_theorems_used_cnt
159
+ self.meta.last_proof_id = training_data.meta.last_proof_id
160
+ self.meta.last_training_data = training_data.meta.last_training_data
161
+ # Add all the training data to the new training data
162
+ if lemma_ref_collection is None:
163
+ lemma_ref_id = training_data._object_id_map[1]
164
+ else:
165
+ lemma_ref_id = ray.put(lemma_ref_collection)
166
+ meta_id = ray.put(self.meta)
167
+ self._object_id_map = [None] * (len(training_data.training_data_collections) + 2)
168
+ self._object_id_map[0] = meta_id
169
+ self._object_id_map[1] = lemma_ref_id
170
+ self._training_data_filenames.clear()
171
+ lemma_len = int(training_data._lemma_ref_filename[len(training_data.meta.lemma_ref_filename_prefix): -1*len(training_data.meta.lemma_ref_filename_suffix)])
172
+ self._lemma_ref_filename = self.meta.lemma_ref_filename_prefix + f"{lemma_len:010d}" + self.meta.lemma_ref_filename_suffix
173
+ self.lemma_ref_collection = training_data.lemma_ref_collection
174
+ for filename in training_data._training_data_filenames:
175
+ idx_len = int(filename[len(training_data.meta.data_filename_prefix): -1*len(training_data.meta.data_filename_suffix)])
176
+ self._training_data_filenames.append(self.meta.data_filename_prefix + f"{idx_len:010d}" + self.meta.data_filename_suffix)
177
+ self.training_data_collections.append(None)
178
+ assert len(self._training_data_filenames) == len(self.training_data_collections), "Invalid length"
179
+ assert len(self._training_data_filenames) == len(training_data.training_data_collections), "Invalid length"
180
+
181
+ def __getitem__(self, idx: int) -> TrainingDataFormat:
182
+ tdc_idx = idx // self.meta.training_data_buffer_size
183
+ idx_in_tdc = idx % self.meta.training_data_buffer_size
184
+ if tdc_idx >= len(self.training_data_collections):
185
+ raise IndexError(f"Index out of (len(self.training_data_collections)={len(self.training_data_collections)}, buffer={self.meta.training_data_buffer_size}, range idx={idx}, tdc_idx={tdc_idx}, idx_in_tdc={idx_in_tdc}, len(self.training_data_collections)={len(self.training_data_collections)})")
186
+ tdc = self.training_data_collections[tdc_idx]
187
+ if idx_in_tdc >= len(tdc):
188
+ raise IndexError(f"Index out of range (len(self.training_data_collections)={len(self.training_data_collections)},buffer={self.meta.training_data_buffer_size}, range idx={idx}, tdc_idx={tdc_idx}, idx_in_tdc={idx_in_tdc}, len(tdc)={len(tdc)})")
189
+ training_data = copy.deepcopy(tdc.training_data[idx_in_tdc])
190
+ lemma_refs : typing.Set[int] = set()
191
+ for goal in training_data.start_goals:
192
+ lemma_refs.update([ref.lemma_idx for ref in goal.relevant_defns])
193
+ lemma_refs.update([ref.lemma_idx for ref in goal.used_theorems_local])
194
+ lemma_refs.update([ref.lemma_idx for ref in goal.used_theorems_external])
195
+ lemma_refs.update([ref.lemma_idx for ref in goal.possible_useful_theorems_local])
196
+ lemma_refs.update([ref.lemma_idx for ref in goal.possible_useful_theorems_external])
197
+ ordered_lemma_refs = sorted(list(lemma_refs))
198
+ lemma_ref_map = {lemma_ref: idx for idx, lemma_ref in enumerate(ordered_lemma_refs)}
199
+ training_data.all_useful_defns_theorems = [self.lemma_ref_collection.lemma_references[lemma_idx].clone(idx) for idx, lemma_idx in enumerate(ordered_lemma_refs)]
200
+ # Change the lemma references
201
+ for goal in training_data.start_goals:
202
+ goal.relevant_defns = [LemmaRefWithScore(lemma_ref_map[lemma_ref.lemma_idx], lemma_ref.score) for lemma_ref in goal.relevant_defns]
203
+ goal.used_theorems_local = [LemmaRefWithScore(lemma_ref_map[lemma_ref.lemma_idx], lemma_ref.score) for lemma_ref in goal.used_theorems_local]
204
+ goal.used_theorems_external = [LemmaRefWithScore(lemma_ref_map[lemma_ref.lemma_idx], lemma_ref.score) for lemma_ref in goal.used_theorems_external]
205
+ goal.possible_useful_theorems_local = [LemmaRefWithScore(lemma_ref_map[lemma_ref.lemma_idx], lemma_ref.score) for lemma_ref in goal.possible_useful_theorems_local]
206
+ goal.possible_useful_theorems_external = [LemmaRefWithScore(lemma_ref_map[lemma_ref.lemma_idx], lemma_ref.score) for lemma_ref in goal.possible_useful_theorems_external]
207
+ return training_data
208
+
209
+ def save(self) -> str:
210
+ assert not self.is_readonly, "Training data is read only"
211
+ self.logger.info(f"[TrainingData] Saving training data {self.folder} ...")
212
+
213
+ use_named_reference = len(self._object_id_map) == len(self._training_data_filenames) + 2
214
+
215
+ if not use_named_reference:
216
+ # Generate lemma ref file name
217
+ if self._lemma_ref_filename is None:
218
+ self._lemma_ref_filename = self.meta.lemma_ref_filename_prefix + f"{len(self.lemma_ref_collection):010d}" + self.meta.lemma_ref_filename_suffix
219
+
220
+ if len(self._training_data_filenames) == 0:
221
+ # Generate training data file names
222
+ cum_cnt = 0
223
+ for tdc in self.training_data_collections:
224
+ cum_cnt += len(tdc)
225
+ training_data_filename = self.meta.data_filename_prefix + f"{cum_cnt:010d}" + self.meta.data_filename_suffix
226
+ self._training_data_filenames.append(training_data_filename)
227
+ assert len(self._training_data_filenames) == len(self.training_data_collections), "Invalid length"
228
+ self._object_id_map = [None] * (len(self._training_data_filenames) + 2)
229
+ else:
230
+ assert len(self._object_id_map) == len(self._training_data_filenames) + 2, "Invalid length"
231
+ files_to_save = [self.training_meta_filename, self._lemma_ref_filename] + self._training_data_filenames
232
+ last_idx = 0
233
+
234
+ self.logger.info(f"[TrainingData] Files to save: {files_to_save}")
235
+
236
+ if not use_named_reference:
237
+ tdcs = [self.meta, self.lemma_ref_collection] + self.training_data_collections
238
+ self.logger.info(f"[TrainingData] Putting tdc to ray...")
239
+ for idx, tdc in enumerate(tdcs):
240
+ self._object_id_map[idx] = ray.put(tdc)
241
+ self.logger.info(f"[TrainingData] Put [{idx}] to ray")
242
+ self.logger.info(f"[TrainingData] Finished putting tdc to ray")
243
+ else:
244
+ self.logger.info(f"[TrainingData] Using named reference")
245
+
246
+ assert len(self._object_id_map) == len(files_to_save), "Invalid length"
247
+ assert all([obj_ref is not None for obj_ref in self._object_id_map]), "Invalid object id map"
248
+
249
+ def _create_remote(filenames):
250
+ remotes = []
251
+ base_idx = last_idx - len(filenames)
252
+ for i, filename in enumerate(filenames):
253
+ self.logger.info(f"[TrainingData] Starting the saving of [{base_idx + i}] {filename}...")
254
+ obj_ref = self._object_id_map[base_idx + i]
255
+ remotes.append(TrainingData._save_object.remote(base_idx + i, obj_ref, os.path.join(self.folder, filename)))
256
+ return remotes
257
+
258
+ def _transform_remote(results):
259
+ for res in results:
260
+ if isinstance(res, tuple):
261
+ assert len(res) == 2, "Invalid return value"
262
+ assert isinstance(res[0], int), "Invalid return value"
263
+ assert isinstance(res[1], str), "Invalid return value"
264
+ self.logger.info(f"[TrainingData] Saved [{res[0]}] in file {res[1]}")
265
+ else:
266
+ raise Exception(f"Unable to save {res}")
267
+ process = psutil.Process()
268
+ self.logger.info(f"[TrainingData] Memory usage: {process.memory_info().rss / 2**30} GiB, Process: {process.pid}")
269
+
270
+ def _prepare_next_batch(num:int):
271
+ nonlocal last_idx, files_to_save
272
+ filenames = files_to_save[last_idx:last_idx + num]
273
+ last_idx += len(filenames)
274
+ return filenames
275
+
276
+ RayUtils.ray_run_within_parallel_limits(self._max_parallelism, len(files_to_save), _transform_remote, _prepare_next_batch, _create_remote, self.logger)
277
+ return self.folder
278
+
279
+ def _merge_training_data_format(self, other: TrainingDataFormat, new_lemma_ref_idx: typing.List[int] = None):
280
+ assert isinstance(other, TrainingDataFormat), "other must be a TrainingDataFormat"
281
+ assert self.lemma_ref_collection is not None, "Lemma ref collection is None"
282
+ if new_lemma_ref_idx is None:
283
+ new_lemma_ref_idx : typing.List[int] = self.lemma_ref_collection.merge(other.all_useful_defns_theorems)
284
+ assert len(new_lemma_ref_idx) == len(other.all_useful_defns_theorems), "Invalid lemma ref idx"
285
+ if len(self.training_data_collections) == 0:
286
+ self.training_data_collections.append(TrainingDataCollection())
287
+ last_training_data_collection = self.training_data_collections[-1]
288
+ if len(last_training_data_collection) + 1 > self.meta.training_data_buffer_size:
289
+ self.training_data_collections.append(TrainingDataCollection())
290
+ last_training_data_collection = self.training_data_collections[-1]
291
+ TrainingData._merge_training_data_collection(last_training_data_collection, [other], new_lemma_ref_idx)
292
+ # Update the metadata
293
+ self.meta.last_proof_id = other.proof_id
294
+ self.meta.last_training_data += 1
295
+ self.meta.external_theorems_used_cnt += sum([len(goal.used_theorems_external) for goal in other.start_goals])
296
+ self.meta.local_theorems_used_cnt += sum([len(goal.used_theorems_local) for goal in other.start_goals])
297
+ self.meta.total_proof_step_cnt += len(other.proof_steps)
298
+
299
+ @ray.remote(max_retries=-1)
300
+ def _get_training_data_collection(idx : int, folder: str, filename: str) -> typing.Tuple[int, ray.ObjectID]:
301
+ file_path = os.path.join(folder, filename)
302
+ start_time = time.time()
303
+ ray.logger.info(f"[TrainingData] Trying to load {file_path}")
304
+ tdc = TrainingDataCollection.load_from_file(file_path)
305
+ end_time = time.time()
306
+ ray.logger.info(f"[TrainingData] Loaded {file_path} in {end_time - start_time} seconds")
307
+ return idx, tdc
308
+
309
+ @ray.remote(max_retries=-1)
310
+ def _get_lemma_ref_collection(idx : int, folder: str, filename: str) -> typing.Tuple[int, ray.ObjectID]:
311
+ file_path = os.path.join(folder, filename)
312
+ start_time = time.time()
313
+ ray.logger.info(f"[TrainingData] Trying to load {file_path}")
314
+ res = LemmaReferencesCollection.load_from_file(file_path)
315
+ end_time = time.time()
316
+ ray.logger.info(f"[TrainingData] Loaded {file_path} in {end_time - start_time} seconds")
317
+ return idx, res
318
+
319
+ @ray.remote(max_retries=-1)
320
+ def _save_object(i : int, obj: typing.Union[TrainingDataCollection, TrainingDataMetadataFormat, LemmaReferencesCollection], filepath: str):
321
+ save_start_time = time.time()
322
+ ray.logger.info(f"[TrainingData] Saving {filepath}")
323
+ with open(filepath, 'w') as f:
324
+ # serialize the current metadata
325
+ json_str = obj.to_json()
326
+ # update the metadata in the file
327
+ f.write(json_str)
328
+ save_end_time = time.time()
329
+ ray.logger.info(f"[TrainingData] Saved {filepath} in {save_end_time - save_start_time}s")
330
+ return i, filepath
331
+
332
+ def _merge_training_data_collection(other: TrainingDataCollection, training_data_points: typing.List[TrainingDataFormat], new_lemma_ref_idx: typing.List[int]):
333
+ assert isinstance(other, TrainingDataCollection), "other must be a TrainingDataFormat or TrainingDataCollection"
334
+ assert isinstance(training_data_points, list), "training_data_points must be a list"
335
+ assert isinstance(new_lemma_ref_idx, list), "new_lemma_ref_idx must be a list"
336
+ new_tdps : typing.List[TrainingDataFormat] = []
337
+ for tdp in training_data_points:
338
+ assert isinstance(tdp, TrainingDataFormat), "training_data_points must contain TrainingDataFormat objects"
339
+ new_tdp = TrainingDataFormat(
340
+ tdp.proof_id,
341
+ all_useful_defns_theorems=[],
342
+ start_goals=tdp.start_goals,
343
+ end_goals=tdp.end_goals,
344
+ proof_steps=tdp.proof_steps,
345
+ simplified_goals=tdp.simplified_goals,
346
+ addition_state_info=tdp.addition_state_info,
347
+ file_path=tdp.file_path,
348
+ project_id=tdp.project_id,
349
+ theorem_name=tdp.theorem_name)
350
+ # Reset the lemma references
351
+ for goal in new_tdp.start_goals:
352
+ goal.relevant_defns = [LemmaRefWithScore(new_lemma_ref_idx[lemma_ref.lemma_idx], lemma_ref.score) for lemma_ref in goal.relevant_defns]
353
+ goal.used_theorems_local = [LemmaRefWithScore(new_lemma_ref_idx[lemma_ref.lemma_idx] , lemma_ref.score) for lemma_ref in goal.used_theorems_local]
354
+ goal.used_theorems_external = [LemmaRefWithScore(new_lemma_ref_idx[lemma_ref.lemma_idx], lemma_ref.score) for lemma_ref in goal.used_theorems_external]
355
+ goal.possible_useful_theorems_local = [LemmaRefWithScore(new_lemma_ref_idx[lemma_ref.lemma_idx], lemma_ref.score) for lemma_ref in goal.possible_useful_theorems_local]
356
+ goal.possible_useful_theorems_external = [LemmaRefWithScore(new_lemma_ref_idx[lemma_ref.lemma_idx], lemma_ref.score) for lemma_ref in goal.possible_useful_theorems_external]
357
+ new_tdps.append(new_tdp)
358
+ other.training_data.extend(new_tdps)