itp-interface 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- itp_interface/__init__.py +0 -0
- itp_interface/agent/__init__.py +0 -0
- itp_interface/agent/simple_proof_agent.py +100 -0
- itp_interface/coq_ser_api/__init__.py +165 -0
- itp_interface/coq_ser_api/contexts.py +283 -0
- itp_interface/coq_ser_api/coq_agent.py +459 -0
- itp_interface/coq_ser_api/coq_backend.py +135 -0
- itp_interface/coq_ser_api/coq_util.py +839 -0
- itp_interface/coq_ser_api/example.py +67 -0
- itp_interface/coq_ser_api/lsp_backend.py +375 -0
- itp_interface/coq_ser_api/py.typed +0 -0
- itp_interface/coq_ser_api/serapi_backend.py +841 -0
- itp_interface/coq_ser_api/util.py +145 -0
- itp_interface/coq_ser_api_old/__init__.py +2583 -0
- itp_interface/coq_ser_api_old/contexts.py +172 -0
- itp_interface/coq_ser_api_old/util.py +146 -0
- itp_interface/lean_server/__init__.py +0 -0
- itp_interface/lean_server/commands.py +484 -0
- itp_interface/lean_server/lean3_search_tool.py +358 -0
- itp_interface/lean_server/lean4_repl_interface.py +151 -0
- itp_interface/lean_server/lean4_utils.py +255 -0
- itp_interface/lean_server/lean_cmd_server.py +111 -0
- itp_interface/lean_server/lean_context.py +60 -0
- itp_interface/lean_server/lean_sync_server.py +174 -0
- itp_interface/lean_server/lean_utils.py +199 -0
- itp_interface/lean_server/py.typed +1 -0
- itp_interface/main/__init__.py +0 -0
- itp_interface/main/config/afp_data_gen.yaml +14 -0
- itp_interface/main/config/benchmark/CompCert.yaml +366 -0
- itp_interface/main/config/benchmark/GeoCoq.yaml +930 -0
- itp_interface/main/config/benchmark/UniMath.yaml +2690 -0
- itp_interface/main/config/benchmark/afp_isabelle.yaml +29200 -0
- itp_interface/main/config/benchmark/agent_proverbot_hard.yaml +247 -0
- itp_interface/main/config/benchmark/category-theory.yaml +470 -0
- itp_interface/main/config/benchmark/compcert_118_subset.yaml +148 -0
- itp_interface/main/config/benchmark/compcert_benchmark.yaml +36 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard.yaml +498 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard_1.yaml +55 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard_2.yaml +24 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard_3.yaml +95 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard_7_per_cent.yaml +78 -0
- itp_interface/main/config/benchmark/compcert_benchmark_test.yaml +38 -0
- itp_interface/main/config/benchmark/compcert_benchmark_train.yaml +340 -0
- itp_interface/main/config/benchmark/leandojo_novel_premises_test.yaml +2908 -0
- itp_interface/main/config/benchmark/leandojo_novel_premises_train.yaml +98645 -0
- itp_interface/main/config/benchmark/leandojo_novel_premises_val.yaml +2912 -0
- itp_interface/main/config/benchmark/leandojo_random.yaml +2889 -0
- itp_interface/main/config/benchmark/leandojo_random_test.yaml +2421 -0
- itp_interface/main/config/benchmark/leandojo_random_train.yaml +62729 -0
- itp_interface/main/config/benchmark/leandojo_random_val.yaml +2504 -0
- itp_interface/main/config/benchmark/math-comp.yaml +200 -0
- itp_interface/main/config/benchmark/miniF2F_test.yaml +12 -0
- itp_interface/main/config/benchmark/miniF2F_test_aime.yaml +27 -0
- itp_interface/main/config/benchmark/miniF2F_test_algebra.yaml +30 -0
- itp_interface/main/config/benchmark/miniF2F_test_amc12.yaml +57 -0
- itp_interface/main/config/benchmark/miniF2F_test_few_shot_hard.yaml +231 -0
- itp_interface/main/config/benchmark/miniF2F_test_imo.yaml +32 -0
- itp_interface/main/config/benchmark/miniF2F_test_induction.yaml +20 -0
- itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra.yaml +82 -0
- itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra_hard.yaml +72 -0
- itp_interface/main/config/benchmark/miniF2F_test_mathd_numbertheory.yaml +72 -0
- itp_interface/main/config/benchmark/miniF2F_test_numbertheory.yaml +20 -0
- itp_interface/main/config/benchmark/minicompcert_benchmark_1.yaml +14 -0
- itp_interface/main/config/benchmark/proverbot_hard.yaml +104 -0
- itp_interface/main/config/benchmark/re_prover.yaml +66 -0
- itp_interface/main/config/benchmark/re_prover_hard.yaml +41 -0
- itp_interface/main/config/benchmark/re_prover_very_hard.yaml +22 -0
- itp_interface/main/config/benchmark/reprover_with_retrieval.yaml +73 -0
- itp_interface/main/config/benchmark/reprover_with_retrieval_hard.yaml +30 -0
- itp_interface/main/config/benchmark/reprover_with_retrieval_neg.yaml +195 -0
- itp_interface/main/config/benchmark/simple_benchmark_1.yaml +24 -0
- itp_interface/main/config/benchmark/simple_benchmark_8.yaml +50 -0
- itp_interface/main/config/benchmark/simple_benchmark_9.yaml +65 -0
- itp_interface/main/config/benchmark/simple_benchmark_isabelle.yaml +18 -0
- itp_interface/main/config/benchmark/simple_benchmark_lean.yaml +12 -0
- itp_interface/main/config/benchmark/simple_benchmark_lean_training_data.yaml +12 -0
- itp_interface/main/config/benchmark/simple_rl_benchmark_lean.yaml +14 -0
- itp_interface/main/config/benchmark/stack_machine.yaml +13 -0
- itp_interface/main/config/benchmark/stack_machine_hard.yaml +15 -0
- itp_interface/main/config/category_theory_data_gen.yaml +14 -0
- itp_interface/main/config/category_theory_data_gen_random.yaml +16 -0
- itp_interface/main/config/compcert_data_gen_test.yaml +10 -0
- itp_interface/main/config/compcert_data_gen_train.yaml +7 -0
- itp_interface/main/config/env_settings/bm25_retrieval.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_no_dfns.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_only_local_no_dfns.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_with_print.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local_no_dfns.yaml +2 -0
- itp_interface/main/config/env_settings/no_retrieval.yaml +2 -0
- itp_interface/main/config/experiments.yaml +12 -0
- itp_interface/main/config/geo_coq_data_gen.yaml +14 -0
- itp_interface/main/config/geo_coq_data_gen_random.yaml +16 -0
- itp_interface/main/config/leandojo_random_data_gen.yaml +16 -0
- itp_interface/main/config/math_comp_data_gen.yaml +14 -0
- itp_interface/main/config/math_comp_data_gen_random.yaml +16 -0
- itp_interface/main/config/mathlib_data_gen.yaml +14 -0
- itp_interface/main/config/repo/coq_repos.yaml +191 -0
- itp_interface/main/config/run_settings/default_coq_data_generation_transforms.yaml +24 -0
- itp_interface/main/config/run_settings/default_isabelle_data_generation_transforms.yaml +24 -0
- itp_interface/main/config/run_settings/default_lean4_data_generation_transforms.yaml +24 -0
- itp_interface/main/config/run_settings/default_lean_data_generation_transforms.yaml +24 -0
- itp_interface/main/config/simple_coq_data_gen.yaml +12 -0
- itp_interface/main/config/simple_coq_data_gen_random.yaml +17 -0
- itp_interface/main/config/simple_lean_data_gen.yaml +12 -0
- itp_interface/main/config/simple_rl_lean_data_gen.yaml +12 -0
- itp_interface/main/config/uni_math_data_gen.yaml +14 -0
- itp_interface/main/config.py +192 -0
- itp_interface/main/extract_benchmark_dataset.py +106 -0
- itp_interface/main/filter_dataset.py +107 -0
- itp_interface/main/install.py +92 -0
- itp_interface/main/merge_dataset.py +96 -0
- itp_interface/main/run_tool.py +444 -0
- itp_interface/pisa/.git +1 -0
- itp_interface/pisa/.gitignore +125 -0
- itp_interface/pisa/.idea/.gitignore +8 -0
- itp_interface/pisa/.idea/ClojureProjectResolveSettings.xml +6 -0
- itp_interface/pisa/.idea/codeStyles/Project.xml +7 -0
- itp_interface/pisa/.idea/codeStyles/codeStyleConfig.xml +5 -0
- itp_interface/pisa/.idea/inspectionProfiles/Project_Default.xml +16 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_android_annotations_4_1_1_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_api_grpc_proto_google_common_protos_1_17_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_code_findbugs_jsr305_3_0_2_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_code_gson_gson_2_8_6_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_errorprone_error_prone_annotations_2_3_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_guava_failureaccess_1_0_1_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_guava_guava_30_0_jre_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_guava_listenablefuture_9999_0_empty_to_avoid_conflict_with_guava_jar.xml +9 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_j2objc_j2objc_annotations_1_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_3_12_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_util_3_12_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_fastparse_2_13_2_3_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_geny_2_13_0_6_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_sourcecode_2_13_0_2_1_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_lenses_2_13_0_10_9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_2_13_0_10_9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_grpc_2_13_0_10_9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_zio_grpc_zio_grpc_core_2_13_0_4_2_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thoughtworks_paranamer_paranamer_2_8_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__commons_io_commons_io_2_8_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__de_unruh_java_patterns_0_1_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__de_unruh_scala_isabelle_2_13_master_SNAPSHOT_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_2_13_1_0_0_M9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_thirdparty_boopickle_shaded_2_13_1_0_0_M9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_2_13_1_0_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_stacktracer_2_13_1_0_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_streams_2_13_1_0_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_api_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_context_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_core_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_netty_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_lite_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_services_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_stub_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_buffer_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http2_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_socks_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_common_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_proxy_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_resolver_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_transport_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_perfmark_perfmark_api_0_19_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__net_java_dev_jna_jna_5_3_1_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__net_liftweb_lift_json_2_13_3_4_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_lang3_3_11_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_text_1_9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_checkerframework_checker_qual_3_5_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_codehaus_mojo_animal_sniffer_annotations_1_18_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_jetbrains_annotations_20_1_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_jline_jline_3_16_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_log4s_log4s_2_13_1_9_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_collection_compat_2_13_2_1_6_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_xml_2_13_1_3_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_compiler_2_13_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_library_2_13_4_jar.xml +23 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_reflect_2_13_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scalap_2_13_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scalaz_scalaz_core_2_13_7_3_2_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_api_1_7_30_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_simple_1_7_30_jar.xml +13 -0
- itp_interface/pisa/.idea/misc.xml +7 -0
- itp_interface/pisa/.idea/modules/PISA-build.iml +127 -0
- itp_interface/pisa/.idea/modules/PISA.iml +94 -0
- itp_interface/pisa/.idea/modules.xml +9 -0
- itp_interface/pisa/.idea/other.xml +6 -0
- itp_interface/pisa/.idea/sbt.xml +20 -0
- itp_interface/pisa/.idea/scala_compiler.xml +6 -0
- itp_interface/pisa/.idea/uiDesigner.xml +124 -0
- itp_interface/pisa/.idea/vcs.xml +6 -0
- itp_interface/pisa/.scalafmt.conf +2 -0
- itp_interface/pisa/LICENSE +29 -0
- itp_interface/pisa/README.md +262 -0
- itp_interface/pisa/build.sbt +49 -0
- itp_interface/pisa/build.sh +26 -0
- itp_interface/pisa/command_generation/close_gaps.py +44 -0
- itp_interface/pisa/command_generation/conjecture_normal_order.py +62 -0
- itp_interface/pisa/command_generation/conjecturer_command_generator.py +36 -0
- itp_interface/pisa/command_generation/create_dirs.py +11 -0
- itp_interface/pisa/command_generation/find_std.py +67 -0
- itp_interface/pisa/command_generation/generate_build_commands_afp.py +15 -0
- itp_interface/pisa/command_generation/generate_build_commands_std.py +15 -0
- itp_interface/pisa/command_generation/generate_commands_afp.py +103 -0
- itp_interface/pisa/command_generation/generate_commands_mini.py +73 -0
- itp_interface/pisa/command_generation/generate_commands_std.py +69 -0
- itp_interface/pisa/command_generation/generate_hammer_extraction_text.py +5 -0
- itp_interface/pisa/command_generation/hammer_command_generator.py +40 -0
- itp_interface/pisa/command_generation/hp_search_command_generator.py +63 -0
- itp_interface/pisa/command_generation/oracle_command_generator.py +56 -0
- itp_interface/pisa/command_generation/search_command_generator.py +69 -0
- itp_interface/pisa/command_generation/summarise_problem_names.py +45 -0
- itp_interface/pisa/command_generation/tpu_hp_search.py +75 -0
- itp_interface/pisa/docker/Dockerfile +34 -0
- itp_interface/pisa/docker/docker_tutorial.md +64 -0
- itp_interface/pisa/eval_setup/copy_isabelle.py +42 -0
- itp_interface/pisa/eval_setup/copy_pisa_jars.py +18 -0
- itp_interface/pisa/mesh_transformer_utils/tokenization.py +86 -0
- itp_interface/pisa/project/build.properties +1 -0
- itp_interface/pisa/project/plugins.sbt +5 -0
- itp_interface/pisa/requirements.txt +4 -0
- itp_interface/pisa/scripts/extract_last_k_steps.py +28 -0
- itp_interface/pisa/scripts/extract_proof_corpus.py +26 -0
- itp_interface/pisa/scripts/gather_hammer_results.py +27 -0
- itp_interface/pisa/scripts/length_in_char_stats.py +20 -0
- itp_interface/pisa/scripts/mix.py +127 -0
- itp_interface/pisa/scripts/results_stat.py +52 -0
- itp_interface/pisa/scripts/test_array_job.sh +34 -0
- itp_interface/pisa/setup.sh +25 -0
- itp_interface/pisa/src/main/protobuf/server.proto +60 -0
- itp_interface/pisa/src/main/python/.idea/.gitignore +8 -0
- itp_interface/pisa/src/main/python/.idea/inspectionProfiles/Project_Default.xml +18 -0
- itp_interface/pisa/src/main/python/.idea/inspectionProfiles/profiles_settings.xml +6 -0
- itp_interface/pisa/src/main/python/.idea/misc.xml +4 -0
- itp_interface/pisa/src/main/python/.idea/modules.xml +8 -0
- itp_interface/pisa/src/main/python/.idea/python.iml +12 -0
- itp_interface/pisa/src/main/python/.idea/vcs.xml +6 -0
- itp_interface/pisa/src/main/python/conjecturing_parsing/conjecturer_postprocessing.py +59 -0
- itp_interface/pisa/src/main/python/data_extraction/extract_data.py +184 -0
- itp_interface/pisa/src/main/python/data_extraction/find_premises.py +221 -0
- itp_interface/pisa/src/main/python/data_extraction/process_data.py +129 -0
- itp_interface/pisa/src/main/python/legacy/PisaFlexibleClient.py +167 -0
- itp_interface/pisa/src/main/python/legacy/autof_test.py +74 -0
- itp_interface/pisa/src/main/python/legacy/cmd_client.py +23 -0
- itp_interface/pisa/src/main/python/legacy/convert_scala_dump_to_test_name_jsons.py +14 -0
- itp_interface/pisa/src/main/python/legacy/create_data_txt.py +72 -0
- itp_interface/pisa/src/main/python/legacy/create_finetune_tfrecords.py +311 -0
- itp_interface/pisa/src/main/python/legacy/demo.py +49 -0
- itp_interface/pisa/src/main/python/legacy/evaluate.py +108 -0
- itp_interface/pisa/src/main/python/legacy/extract_first_step.py +25 -0
- itp_interface/pisa/src/main/python/legacy/get_global_facts.py +35 -0
- itp_interface/pisa/src/main/python/legacy/mix_data.py +19 -0
- itp_interface/pisa/src/main/python/legacy/one_stage_extraction.py +111 -0
- itp_interface/pisa/src/main/python/legacy/prepare_episodic_transitions.py +137 -0
- itp_interface/pisa/src/main/python/legacy/prepare_translation_pairs.py +277 -0
- itp_interface/pisa/src/main/python/pisa_client.py +322 -0
- itp_interface/pisa/src/main/python/server_pb2.py +394 -0
- itp_interface/pisa/src/main/python/server_pb2_grpc.py +230 -0
- itp_interface/pisa/src/main/python/test_client.py +17 -0
- itp_interface/pisa/src/main/python/test_client2.py +79 -0
- itp_interface/pisa/src/main/python/utils/filters.py +59 -0
- itp_interface/pisa/src/main/python/utils/pisa_server_control.py +29 -0
- itp_interface/pisa/src/main/scala/pisa/agent/CheckSyntax.scala +257 -0
- itp_interface/pisa/src/main/scala/pisa/agent/DepThms.scala +29 -0
- itp_interface/pisa/src/main/scala/pisa/agent/PisaStat.scala +46 -0
- itp_interface/pisa/src/main/scala/pisa/agent/RefactorTest.scala +40 -0
- itp_interface/pisa/src/main/scala/pisa/agent/RepHammer.scala +95 -0
- itp_interface/pisa/src/main/scala/pisa/server/HammFacts.scala +63 -0
- itp_interface/pisa/src/main/scala/pisa/server/PisaOS.scala +881 -0
- itp_interface/pisa/src/main/scala/pisa/server/PisaOneStage.scala +540 -0
- itp_interface/pisa/src/main/scala/pisa/server/PisaOneStageServers.scala +1048 -0
- itp_interface/pisa/src/main/scala/pisa/utils/TheoryManager.scala +95 -0
- itp_interface/pisa/src/test/python/analyse_debug.py +33 -0
- itp_interface/pisa/src/test/python/extract_test_seq2seq.py +53 -0
- itp_interface/pisa/src/test/python/extract_test_theorem_ground_truth_indices.py +31 -0
- itp_interface/pisa/src/test/python/proof_originality.py +24 -0
- itp_interface/pisa/src/test/python/test_command_generator.py +25 -0
- itp_interface/pisa/src/test/python/test_model_sequence_accuracy.py +70 -0
- itp_interface/pisa/src/test/scala/pisa/Easy.scala +26 -0
- itp_interface/pisa/src/test/scala/pisa/TestCurl.scala +82 -0
- itp_interface/pisa/src/test/scala/pisa/TestIsa.scala +27 -0
- itp_interface/pisa/test.sh +19 -0
- itp_interface/pisa/universal_test_theorems.tar.gz +0 -0
- itp_interface/repo/build.py +78 -0
- itp_interface/repo/clone.py +79 -0
- itp_interface/repo/dataset_discovery.py +99 -0
- itp_interface/retrieval/__init__.py +0 -0
- itp_interface/retrieval/abstraction.py +35 -0
- itp_interface/retrieval/coq_bm25_reranker.py +153 -0
- itp_interface/retrieval/isabelle_bm25_reranker.py +86 -0
- itp_interface/retrieval/lean3_bm25_reranker.py +86 -0
- itp_interface/rl/__init__.py +0 -0
- itp_interface/rl/abstraction.py +168 -0
- itp_interface/rl/proof_action.py +172 -0
- itp_interface/rl/proof_state.py +149 -0
- itp_interface/rl/proof_tree.py +109 -0
- itp_interface/rl/simpl_proof_env_pool.py +16 -0
- itp_interface/rl/simple_proof_env.py +713 -0
- itp_interface/rl/simple_proof_env_pool.py +591 -0
- itp_interface/scripts/setup.sh +228 -0
- itp_interface/tools/__init__.py +0 -0
- itp_interface/tools/basic_utils.py +172 -0
- itp_interface/tools/bin_packing.py +61 -0
- itp_interface/tools/cache.py +93 -0
- itp_interface/tools/coq_build_spec.py +31 -0
- itp_interface/tools/coq_build_tool.py +319 -0
- itp_interface/tools/coq_context_helper.py +354 -0
- itp_interface/tools/coq_executor.py +508 -0
- itp_interface/tools/coq_local_data_generation_transform.py +158 -0
- itp_interface/tools/coq_parse_utils.py +154 -0
- itp_interface/tools/coq_raw_proofs.py +193 -0
- itp_interface/tools/coq_theorem_proof_pair_generation_transform.py +146 -0
- itp_interface/tools/coq_training_data_generator.py +76 -0
- itp_interface/tools/dynamic_coq_proof_exec.py +220 -0
- itp_interface/tools/dynamic_isabelle_proof_exec.py +229 -0
- itp_interface/tools/dynamic_lean4_proof_exec.py +236 -0
- itp_interface/tools/dynamic_lean_proof_exec.py +228 -0
- itp_interface/tools/isabelle_context_helper.py +66 -0
- itp_interface/tools/isabelle_executor.py +862 -0
- itp_interface/tools/isabelle_local_data_generation_transform.py +149 -0
- itp_interface/tools/isabelle_parse_utils.py +131 -0
- itp_interface/tools/isabelle_server.py +106 -0
- itp_interface/tools/lean4_context_helper.py +72 -0
- itp_interface/tools/lean4_local_data_generation_transform.py +122 -0
- itp_interface/tools/lean4_sync_executor.py +1193 -0
- itp_interface/tools/lean_cmd_executor.py +804 -0
- itp_interface/tools/lean_context_helper.py +327 -0
- itp_interface/tools/lean_dojo_data_generation_transform.py +206 -0
- itp_interface/tools/lean_executor.py +687 -0
- itp_interface/tools/lean_local_data_generation_transform.py +136 -0
- itp_interface/tools/lean_parse_utils.py +32 -0
- itp_interface/tools/log_utils.py +20 -0
- itp_interface/tools/proof_exec_callback.py +76 -0
- itp_interface/tools/ray_utils.py +265 -0
- itp_interface/tools/repl/.git +1 -0
- itp_interface/tools/repl/.github/workflows/ci.yml +24 -0
- itp_interface/tools/repl/.gitignore +7 -0
- itp_interface/tools/repl/.vscode/copyright.code-snippets +13 -0
- itp_interface/tools/repl/.vscode/extensions.json +13 -0
- itp_interface/tools/repl/.vscode/module-docstring.code-snippets +35 -0
- itp_interface/tools/repl/.vscode/settings.json +11 -0
- itp_interface/tools/repl/README.md +174 -0
- itp_interface/tools/repl/REPL/Frontend.lean +47 -0
- itp_interface/tools/repl/REPL/JSON.lean +186 -0
- itp_interface/tools/repl/REPL/Lean/ContextInfo.lean +9 -0
- itp_interface/tools/repl/REPL/Lean/Environment.lean +31 -0
- itp_interface/tools/repl/REPL/Lean/InfoTree/ToJson.lean +114 -0
- itp_interface/tools/repl/REPL/Lean/InfoTree.lean +272 -0
- itp_interface/tools/repl/REPL/Main.lean +323 -0
- itp_interface/tools/repl/REPL/Snapshots.lean +306 -0
- itp_interface/tools/repl/REPL/Util/Path.lean +36 -0
- itp_interface/tools/repl/REPL/Util/Pickle.lean +44 -0
- itp_interface/tools/repl/REPL.lean +4 -0
- itp_interface/tools/repl/lake-manifest.json +5 -0
- itp_interface/tools/repl/lakefile.lean +15 -0
- itp_interface/tools/repl/lean-toolchain +1 -0
- itp_interface/tools/repl/test/Mathlib/.gitignore +5 -0
- itp_interface/tools/repl/test/Mathlib/H20231110.sh +2 -0
- itp_interface/tools/repl/test/Mathlib/ReplMathlibTests.lean +1 -0
- itp_interface/tools/repl/test/Mathlib/lake-manifest.json +68 -0
- itp_interface/tools/repl/test/Mathlib/lakefile.lean +11 -0
- itp_interface/tools/repl/test/Mathlib/lean-toolchain +1 -0
- itp_interface/tools/repl/test/Mathlib/test/20240209.expected.out +20 -0
- itp_interface/tools/repl/test/Mathlib/test/20240209.in +3 -0
- itp_interface/tools/repl/test/Mathlib/test/20240209.lean +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231020.expected.out +8 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231020.in +8 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231020.lean +22 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231110.expected.out +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231110.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115.expected.out +19 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115.in +5 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115_2.expected.out +18 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115_2.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115_3.expected.out +10 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115_3.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231214.in +9 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231214.lean +30 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231215.expected.out +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231215.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231215_2.expected.out +14 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231215_2.in +3 -0
- itp_interface/tools/repl/test/Mathlib/test/exact.expected.out +37 -0
- itp_interface/tools/repl/test/Mathlib/test/exact.in +10 -0
- itp_interface/tools/repl/test/Mathlib/test/import_Mathlib.lean +1 -0
- itp_interface/tools/repl/test/Mathlib/test/induction.expected.out +29 -0
- itp_interface/tools/repl/test/Mathlib/test/induction.in +10 -0
- itp_interface/tools/repl/test/Mathlib/test/induction.lean +6 -0
- itp_interface/tools/repl/test/Mathlib/test/on_goal.expected.out +22 -0
- itp_interface/tools/repl/test/Mathlib/test/on_goal.in +5 -0
- itp_interface/tools/repl/test/Mathlib/test/pickle.expected.out +16 -0
- itp_interface/tools/repl/test/Mathlib/test/pickle.in +6 -0
- itp_interface/tools/repl/test/Mathlib/test/pickle_2.expected.out +4 -0
- itp_interface/tools/repl/test/Mathlib/test/pickle_2.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test.sh +41 -0
- itp_interface/tools/repl/test/all_tactics.expected.out +13 -0
- itp_interface/tools/repl/test/all_tactics.in +1 -0
- itp_interface/tools/repl/test/by_cases.expected.out +25 -0
- itp_interface/tools/repl/test/by_cases.in +8 -0
- itp_interface/tools/repl/test/by_cases.lean +4 -0
- itp_interface/tools/repl/test/calc.expected.out +32 -0
- itp_interface/tools/repl/test/calc.in +1 -0
- itp_interface/tools/repl/test/def_eval.expected.out +9 -0
- itp_interface/tools/repl/test/def_eval.in +3 -0
- itp_interface/tools/repl/test/enableInitializersExecution.expected.out +2 -0
- itp_interface/tools/repl/test/enableInitializersExecution.in +1 -0
- itp_interface/tools/repl/test/file.expected.out +8 -0
- itp_interface/tools/repl/test/file.in +1 -0
- itp_interface/tools/repl/test/file.lean +5 -0
- itp_interface/tools/repl/test/have_by_sorry.expected.out +28 -0
- itp_interface/tools/repl/test/have_by_sorry.in +6 -0
- itp_interface/tools/repl/test/import_lean.in +1 -0
- itp_interface/tools/repl/test/incomplete.expected.out +18 -0
- itp_interface/tools/repl/test/incomplete.in +3 -0
- itp_interface/tools/repl/test/incomplete.lean +0 -0
- itp_interface/tools/repl/test/infotree.expected.out +20 -0
- itp_interface/tools/repl/test/infotree.in +2 -0
- itp_interface/tools/repl/test/invalid_tactic.expected.out +20 -0
- itp_interface/tools/repl/test/invalid_tactic.in +3 -0
- itp_interface/tools/repl/test/name_generator.expected.out +53 -0
- itp_interface/tools/repl/test/name_generator.in +18 -0
- itp_interface/tools/repl/test/no_goal_sorry.expected.out +11 -0
- itp_interface/tools/repl/test/no_goal_sorry.in +1 -0
- itp_interface/tools/repl/test/no_goal_sorry_2.expected.out +12 -0
- itp_interface/tools/repl/test/no_goal_sorry_2.in +1 -0
- itp_interface/tools/repl/test/options.expected.out +17 -0
- itp_interface/tools/repl/test/options.in +6 -0
- itp_interface/tools/repl/test/pickle_environment.expected.out +8 -0
- itp_interface/tools/repl/test/pickle_environment.in +7 -0
- itp_interface/tools/repl/test/pickle_environment_with_imports.expected.out +10 -0
- itp_interface/tools/repl/test/pickle_environment_with_imports.in +9 -0
- itp_interface/tools/repl/test/pickle_open.expected.out +8 -0
- itp_interface/tools/repl/test/pickle_open.in +7 -0
- itp_interface/tools/repl/test/pickle_open_2.expected.out +4 -0
- itp_interface/tools/repl/test/pickle_open_2.in +3 -0
- itp_interface/tools/repl/test/pickle_open_scoped.expected.out +18 -0
- itp_interface/tools/repl/test/pickle_open_scoped.in +8 -0
- itp_interface/tools/repl/test/pickle_open_scoped_2.expected.out +14 -0
- itp_interface/tools/repl/test/pickle_open_scoped_2.in +3 -0
- itp_interface/tools/repl/test/pickle_proof_state_1.expected.out +26 -0
- itp_interface/tools/repl/test/pickle_proof_state_1.in +15 -0
- itp_interface/tools/repl/test/pickle_proof_state_2.expected.out +4 -0
- itp_interface/tools/repl/test/pickle_proof_state_2.in +3 -0
- itp_interface/tools/repl/test/pickle_proof_state_env.expected.out +26 -0
- itp_interface/tools/repl/test/pickle_proof_state_env.in +15 -0
- itp_interface/tools/repl/test/pickle_scoped_notation.in +16 -0
- itp_interface/tools/repl/test/pickle_scoped_notation_2.in +3 -0
- itp_interface/tools/repl/test/proof_step.expected.out +18 -0
- itp_interface/tools/repl/test/proof_step.in +7 -0
- itp_interface/tools/repl/test/readme.expected.out +16 -0
- itp_interface/tools/repl/test/readme.in +5 -0
- itp_interface/tools/repl/test/sorry_hypotheses.expected.out +16 -0
- itp_interface/tools/repl/test/sorry_hypotheses.in +4 -0
- itp_interface/tools/repl/test/synthesize_placeholder.expected.out +7 -0
- itp_interface/tools/repl/test/synthesize_placeholder.in +1 -0
- itp_interface/tools/repl/test/tactic_mode_sorry.expected.out +14 -0
- itp_interface/tools/repl/test/tactic_mode_sorry.in +3 -0
- itp_interface/tools/repl/test/tactic_sorry.expected.out +12 -0
- itp_interface/tools/repl/test/tactic_sorry.in +1 -0
- itp_interface/tools/repl/test/term_sorry.expected.out +12 -0
- itp_interface/tools/repl/test/term_sorry.in +1 -0
- itp_interface/tools/repl/test/trace_simp.expected.out +41 -0
- itp_interface/tools/repl/test/trace_simp.in +15 -0
- itp_interface/tools/repl/test/unfinished_tactic_block.expected.out +11 -0
- itp_interface/tools/repl/test/unfinished_tactic_block.in +1 -0
- itp_interface/tools/repl/test/unknown_environment.expected.out +2 -0
- itp_interface/tools/repl/test/unknown_environment.in +1 -0
- itp_interface/tools/repl/test/unknown_proof_state.expected.out +14 -0
- itp_interface/tools/repl/test/unknown_proof_state.in +3 -0
- itp_interface/tools/repl/test/unknown_tactic.expected.out +14 -0
- itp_interface/tools/repl/test/unknown_tactic.in +3 -0
- itp_interface/tools/repl/test/variables.expected.out +26 -0
- itp_interface/tools/repl/test/variables.in +5 -0
- itp_interface/tools/repl/test.sh +43 -0
- itp_interface/tools/run_data_generation_transforms.py +350 -0
- itp_interface/tools/theorem_details.py +25 -0
- itp_interface/tools/training_data.py +358 -0
- itp_interface/tools/training_data_format.py +599 -0
- itp_interface-1.0.0.dist-info/METADATA +78 -0
- itp_interface-1.0.0.dist-info/RECORD +485 -0
- itp_interface-1.0.0.dist-info/WHEEL +4 -0
- itp_interface-1.0.0.dist-info/entry_points.txt +3 -0
- itp_interface-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import random
|
|
3
|
+
import shutil
|
|
4
|
+
import jsonlines
|
|
5
|
+
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
from mpmath import mp, mpf, fmod
|
|
8
|
+
import hashlib
|
|
9
|
+
import math
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
random.seed(0)
|
|
13
|
+
mp.dps = 50
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def split_transitions(problem_names, transitions):
|
|
17
|
+
transitions_for_problems = {problem_name: [] for problem_name in problem_names}
|
|
18
|
+
current_problem_name = ""
|
|
19
|
+
for transition in transitions:
|
|
20
|
+
if transition[1] in problem_names:
|
|
21
|
+
current_problem_name = transition[1]
|
|
22
|
+
elif "proof" not in transition[0]:
|
|
23
|
+
continue
|
|
24
|
+
transitions_for_problems[current_problem_name].append(transition)
|
|
25
|
+
return transitions_for_problems
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def process_translations_for_a_problem(transitions_for_a_problem):
|
|
29
|
+
"""Transform the transitions for a problem to translation pairs"""
|
|
30
|
+
# The first one is the lemma/theorem definition
|
|
31
|
+
previous_proof_segment = transitions_for_a_problem[0][1]
|
|
32
|
+
|
|
33
|
+
episodic_transitions = []
|
|
34
|
+
for transition in transitions_for_a_problem[1:]:
|
|
35
|
+
rl_transition = {
|
|
36
|
+
"observation": transition[0],
|
|
37
|
+
"extra context": previous_proof_segment,
|
|
38
|
+
"action": transition[1],
|
|
39
|
+
"complete": False
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
episodic_transitions.append(rl_transition)
|
|
43
|
+
previous_proof_segment += " \\n " + transition[1]
|
|
44
|
+
|
|
45
|
+
if episodic_transitions:
|
|
46
|
+
episodic_transitions[-1]["complete"] = True
|
|
47
|
+
return episodic_transitions
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def trim_string(s: str):
|
|
51
|
+
"""Remove all change line characters and replace them with spaces"""
|
|
52
|
+
return " ".join(s.replace("\n", " ").split())
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def remove_extra_spaces(s: str):
|
|
56
|
+
return " ".join(s.split())
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def hash_string_to_int(arg):
|
|
60
|
+
return int(hashlib.sha256(arg.encode("utf-8")).hexdigest(), 16) % 10**30
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def hash_string_to_float(arg):
|
|
64
|
+
x = mpf(hash_string_to_int(arg))
|
|
65
|
+
return fmod(x * mp.pi, mpf(1.))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_split(arg):
|
|
69
|
+
float_hash = hash_string_to_float(arg)
|
|
70
|
+
if float_hash < 0.95:
|
|
71
|
+
return "train"
|
|
72
|
+
elif float_hash < 0.96:
|
|
73
|
+
return "val"
|
|
74
|
+
else:
|
|
75
|
+
return "test"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def random_split_file_names(file_names, val_test_files=100):
|
|
79
|
+
random.shuffle(file_names)
|
|
80
|
+
return file_names[:-2 * val_test_files], file_names[-2 * val_test_files:-val_test_files], \
|
|
81
|
+
file_names[-val_test_files:]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def process_files_with_proof_statements(file_names, saving_directory):
|
|
85
|
+
problem_names_split = {
|
|
86
|
+
"train": list(),
|
|
87
|
+
"val": list(),
|
|
88
|
+
"test": list()
|
|
89
|
+
}
|
|
90
|
+
unique_id_to_transitions = dict()
|
|
91
|
+
for file_path in tqdm(file_names):
|
|
92
|
+
file = json.load(open(file_path))
|
|
93
|
+
original_file_name = file['file_name']
|
|
94
|
+
problem_names = set(file['problem_names'])
|
|
95
|
+
transitions_split = split_transitions(problem_names, file['translations'])
|
|
96
|
+
|
|
97
|
+
for problem_name in set(file['problem_names']):
|
|
98
|
+
split = get_split(problem_name)
|
|
99
|
+
problem_names_split[split].append((original_file_name, problem_name))
|
|
100
|
+
episodic_transitions = process_translations_for_a_problem(transitions_split[problem_name])
|
|
101
|
+
unique_id_to_transitions[(original_file_name, problem_name)] = episodic_transitions
|
|
102
|
+
|
|
103
|
+
for split, split_uids in problem_names_split.items():
|
|
104
|
+
with jsonlines.open(os.path.join(saving_directory, "{}.jsonl".format(split)), "w") as writer:
|
|
105
|
+
for uid in split_uids:
|
|
106
|
+
writer.write(unique_id_to_transitions[uid])
|
|
107
|
+
|
|
108
|
+
json.dump(problem_names_split, open(os.path.join(saving_directory, "problem_names_split.json"), "w"))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
if __name__ == "__main__":
|
|
112
|
+
import argparse
|
|
113
|
+
import glob
|
|
114
|
+
import os
|
|
115
|
+
parser = argparse.ArgumentParser(description='Extracting translation pairs.')
|
|
116
|
+
parser.add_argument('--extraction-file-directory', '-efd', help='Where the parsed json files are')
|
|
117
|
+
parser.add_argument('--saving-directory', '-sd', help='Where to save the translation pairs')
|
|
118
|
+
parser.add_argument('--proof', dest='proof', action='store_true')
|
|
119
|
+
parser.add_argument('--state', dest='state', action='store_true')
|
|
120
|
+
args = parser.parse_args()
|
|
121
|
+
|
|
122
|
+
assert args.proof or args.state
|
|
123
|
+
if args.proof and not args.state:
|
|
124
|
+
proof_state_suffix = "proof"
|
|
125
|
+
elif args.state and not args.proof:
|
|
126
|
+
proof_state_suffix = "state"
|
|
127
|
+
else:
|
|
128
|
+
proof_state_suffix = "proof_and_state"
|
|
129
|
+
|
|
130
|
+
saving_directory = args.saving_directory
|
|
131
|
+
if os.path.isdir(saving_directory):
|
|
132
|
+
shutil.rmtree(saving_directory)
|
|
133
|
+
os.makedirs(saving_directory)
|
|
134
|
+
|
|
135
|
+
file_names = list(glob.glob("{}/*/*_ground_truth.json".format(
|
|
136
|
+
args.extraction_file_directory, proof_state_suffix)))
|
|
137
|
+
process_files_with_proof_statements(file_names, saving_directory)
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import random
|
|
3
|
+
import shutil
|
|
4
|
+
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
from mpmath import mp, mpf, fmod
|
|
7
|
+
import hashlib
|
|
8
|
+
import math
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
random.seed(0)
|
|
12
|
+
mp.dps = 50
|
|
13
|
+
DEFAULT_MAX_LENGTH = 3000
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def split_transitions(problem_names, transitions):
|
|
17
|
+
transitions_for_problems = {problem_name: [] for problem_name in problem_names}
|
|
18
|
+
current_problem_name = ""
|
|
19
|
+
for transition in transitions:
|
|
20
|
+
if transition[1] in problem_names:
|
|
21
|
+
current_problem_name = transition[1]
|
|
22
|
+
elif "proof" not in transition[0]:
|
|
23
|
+
continue
|
|
24
|
+
transitions_for_problems[current_problem_name].append(transition)
|
|
25
|
+
return transitions_for_problems
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extract_siblings(proof_steps, current_step_index):
|
|
29
|
+
sibling_indices = []
|
|
30
|
+
current_proof_level = proof_steps[current_step_index][2]
|
|
31
|
+
# We shall have current_proof_level ≥ 1 since we're inside a proof
|
|
32
|
+
search_index = current_step_index - 1
|
|
33
|
+
while search_index >= 0:
|
|
34
|
+
if proof_steps[search_index][2] > current_proof_level:
|
|
35
|
+
# Unimportant proof subtree content*
|
|
36
|
+
pass
|
|
37
|
+
elif proof_steps[search_index][2] == current_proof_level:
|
|
38
|
+
# Sibling
|
|
39
|
+
sibling_indices.insert(0, search_index)
|
|
40
|
+
elif proof_steps[search_index][2] < current_proof_level:
|
|
41
|
+
# Higher level steps
|
|
42
|
+
return sibling_indices, search_index
|
|
43
|
+
search_index -= 1
|
|
44
|
+
return [], None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def extract_needed(proof_steps, current_step_index, needed_found):
|
|
48
|
+
if needed_found[current_step_index]:
|
|
49
|
+
return needed_found[current_step_index]
|
|
50
|
+
sibling_indices, search_index = extract_siblings(proof_steps, current_step_index)
|
|
51
|
+
if search_index is None:
|
|
52
|
+
return sibling_indices
|
|
53
|
+
elif search_index > 0:
|
|
54
|
+
return extract_needed(proof_steps, search_index, needed_found) + [search_index] + sibling_indices
|
|
55
|
+
elif search_index == 0:
|
|
56
|
+
return [search_index] + sibling_indices
|
|
57
|
+
else:
|
|
58
|
+
raise AssertionError
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def extract_needed_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found, kwargs):
|
|
62
|
+
needed_indices = extract_needed(transitions_for_a_problem, i, needed_found)
|
|
63
|
+
needed_found[i] = needed_indices
|
|
64
|
+
needed_segment = " \\n ".join([transitions_for_a_problem[index][1] for index in needed_indices])
|
|
65
|
+
return f"<ISA_NDS> {needed_segment} <ISA_OBS> {transition[0]}"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def extract_last_k_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found, kwargs):
|
|
69
|
+
last_k = 1 if "last_k" not in kwargs else kwargs["last_k"]
|
|
70
|
+
assert isinstance(last_k, int)
|
|
71
|
+
proof_lines = previous_proof_segment.strip().split(" \\n ")
|
|
72
|
+
last_k_proof_lines = " \\n ".join(proof_lines[-last_k:])
|
|
73
|
+
return f"<ISA_LAST_{last_k}> {last_k_proof_lines} <ISA_OBS> {transition[0]}"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def extract_proof_only_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found, kwargs):
|
|
77
|
+
return f"<ISA_PRF> {previous_proof_segment}"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def extract_state_only_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found, kwargs):
|
|
81
|
+
return f"<ISA_OBS> {transition[0]}"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def extract_proof_and_state_string(transition, transitions_for_a_problem, previous_proof_segment, i, needed_found,
|
|
85
|
+
kwargs):
|
|
86
|
+
return f"<ISA_PRF> {previous_proof_segment} <ISA_OBS> {transition[0]}"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def extract_trimmed_proof_and_state_string(transition, transitions_for_a_problem, previous_proof_segment, i,
|
|
90
|
+
needed_found, kwargs):
|
|
91
|
+
max_length = kwargs["max_length"] if "max_length" in kwargs else DEFAULT_MAX_LENGTH
|
|
92
|
+
proof_lines = previous_proof_segment.strip().split(" \\n ")
|
|
93
|
+
state_string = transition[0]
|
|
94
|
+
proof_string = ""
|
|
95
|
+
state_length = len(state_string)
|
|
96
|
+
for i in reversed(range(len(proof_lines))):
|
|
97
|
+
if len(proof_string) + state_length + len(proof_lines[i].strip()) > max_length:
|
|
98
|
+
break
|
|
99
|
+
proof_string = f"{proof_lines[i].strip()} \\n {proof_string}"
|
|
100
|
+
|
|
101
|
+
return f"<ISA_TRIM_PRF> {proof_string} <ISA_OBS> {transition[0]}"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
all_processing_methods = {
|
|
105
|
+
"needed": extract_needed_string,
|
|
106
|
+
"last_k": extract_last_k_string,
|
|
107
|
+
"proof_only": extract_proof_only_string,
|
|
108
|
+
"state_only": extract_state_only_string,
|
|
109
|
+
"proof_and_state": extract_proof_and_state_string,
|
|
110
|
+
"trimmed_proof_and_state": extract_trimmed_proof_and_state_string,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def process_translations_for_a_problem(transitions_for_a_problem, processing_method_config=None):
|
|
115
|
+
"""Transform the transitions for a problem to translation pairs"""
|
|
116
|
+
processing_method_name, additional_args = processing_method_config
|
|
117
|
+
|
|
118
|
+
# The first one is the lemma/theorem definition
|
|
119
|
+
previous_proof_segment = transitions_for_a_problem[0][1]
|
|
120
|
+
needed_found = {i: False for i in range(len(transitions_for_a_problem))}
|
|
121
|
+
|
|
122
|
+
translation_pairs = []
|
|
123
|
+
for i in range(1, len(transitions_for_a_problem)):
|
|
124
|
+
transition = transitions_for_a_problem[i]
|
|
125
|
+
translation_src = ""
|
|
126
|
+
if processing_method_name is None or not processing_method_name in all_processing_methods:
|
|
127
|
+
raise AssertionError("We must have a specified processing method")
|
|
128
|
+
|
|
129
|
+
processing_method = all_processing_methods[processing_method_name]
|
|
130
|
+
translation_src += processing_method(transition, transitions_for_a_problem, previous_proof_segment, i,
|
|
131
|
+
needed_found, additional_args)
|
|
132
|
+
translation_pairs.append((translation_src, transition[1]))
|
|
133
|
+
previous_proof_segment += " \\n " + transition[1]
|
|
134
|
+
return translation_pairs
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def trim_string(s: str):
|
|
138
|
+
"""Remove all change line characters and replace them with spaces"""
|
|
139
|
+
return " ".join(s.replace("\n", " ").split())
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def hash_string_to_int(arg):
|
|
143
|
+
return int(hashlib.sha256(arg.encode("utf-8")).hexdigest(), 16) % 10**30
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def hash_string_to_float(arg):
|
|
147
|
+
x = mpf(hash_string_to_int(arg))
|
|
148
|
+
return fmod(x * mp.pi, mpf(1.))
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def get_split(arg):
|
|
152
|
+
float_hash = hash_string_to_float(arg)
|
|
153
|
+
if float_hash < 0.95:
|
|
154
|
+
return "train"
|
|
155
|
+
elif float_hash < 0.96:
|
|
156
|
+
return "val"
|
|
157
|
+
else:
|
|
158
|
+
return "test"
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def random_split_file_names(file_names, val_test_files=100):
|
|
162
|
+
random.shuffle(file_names)
|
|
163
|
+
return file_names[:-2 * val_test_files], file_names[-2 * val_test_files:-val_test_files], \
|
|
164
|
+
file_names[-val_test_files:]
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def process_files_with_proof_statements(file_names, saving_directory, processing_method_config=None):
|
|
168
|
+
# Store everything in jsonl format, and in text src and tgt format
|
|
169
|
+
datapoints = {
|
|
170
|
+
"train": list(),
|
|
171
|
+
"valid": list(),
|
|
172
|
+
"test": list()
|
|
173
|
+
}
|
|
174
|
+
problem_names_split = {
|
|
175
|
+
"train": list(),
|
|
176
|
+
"val": list(),
|
|
177
|
+
"test": list()
|
|
178
|
+
}
|
|
179
|
+
for file_path in tqdm(file_names, desc="Processing files"):
|
|
180
|
+
file = json.load(open(file_path))
|
|
181
|
+
original_file_name = file['file_name']
|
|
182
|
+
problem_names = set(file['problem_names'])
|
|
183
|
+
transitions_split = split_transitions(problem_names, file['translations'])
|
|
184
|
+
|
|
185
|
+
split = None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
for problem_name in set(file['problem_names']):
|
|
189
|
+
if "Isabelle2021/src/HOL" in original_file_name:
|
|
190
|
+
split = "train"
|
|
191
|
+
else:
|
|
192
|
+
split = get_split(problem_name)
|
|
193
|
+
|
|
194
|
+
problem_names_split[split].append((original_file_name, problem_name))
|
|
195
|
+
|
|
196
|
+
json_split = "valid" if split == "val" else split
|
|
197
|
+
translation_pairs = process_translations_for_a_problem(
|
|
198
|
+
transitions_split[problem_name],
|
|
199
|
+
processing_method_config=processing_method_config
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
proof_script_till_now = [problem_name]
|
|
203
|
+
for proof_state, proof_step in translation_pairs:
|
|
204
|
+
sanitised_x = trim_string(proof_state)
|
|
205
|
+
sanitised_y = trim_string(proof_step)
|
|
206
|
+
datapoints[json_split].append(
|
|
207
|
+
{
|
|
208
|
+
"file_name": file['file_name'],
|
|
209
|
+
'problem_name': problem_name,
|
|
210
|
+
"x": sanitised_x,
|
|
211
|
+
"y": sanitised_y,
|
|
212
|
+
"proof_script_until_now": "<SEP>".join(proof_script_till_now)
|
|
213
|
+
}
|
|
214
|
+
)
|
|
215
|
+
proof_script_till_now.append(sanitised_y)
|
|
216
|
+
|
|
217
|
+
for json_split in tqdm(datapoints.keys(), desc="Saving files"):
|
|
218
|
+
# Write json
|
|
219
|
+
with open(os.path.join(saving_directory, f"{json_split}.jsonl"), "w") as fout:
|
|
220
|
+
for datapoint in datapoints[json_split]:
|
|
221
|
+
fout.write(json.dumps(datapoint))
|
|
222
|
+
fout.write("\n")
|
|
223
|
+
|
|
224
|
+
# Write src and tgt
|
|
225
|
+
split = "val" if json_split == "valid" else json_split
|
|
226
|
+
with open(os.path.join(saving_directory, f"{split}.src"), "w") as f_src, \
|
|
227
|
+
open(os.path.join(saving_directory, f"{split}.tgt"), "w") as f_tgt:
|
|
228
|
+
for datapoint in datapoints[json_split]:
|
|
229
|
+
f_src.write(datapoint["x"]+"\n")
|
|
230
|
+
f_tgt.write(datapoint["y"]+"\n")
|
|
231
|
+
|
|
232
|
+
json.dump(problem_names_split, open(os.path.join(saving_directory, "problem_names_split.json"), "w"))
|
|
233
|
+
|
|
234
|
+
for split, lines in problem_names_split.items():
|
|
235
|
+
split = "valid" if split == "val" else split
|
|
236
|
+
|
|
237
|
+
with open(os.path.join(saving_directory, f"split.std_and_afp.{split}"), "w") as fout:
|
|
238
|
+
for theory_path, theorem_name in lines:
|
|
239
|
+
theorem_name = " ".join(theorem_name.strip().split())
|
|
240
|
+
fout.write(f"{theory_path} <PATH_AND_THEOREM_SEP> {theorem_name}")
|
|
241
|
+
fout.write("\n")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
if __name__ == "__main__":
|
|
245
|
+
import argparse
|
|
246
|
+
import glob
|
|
247
|
+
import os
|
|
248
|
+
parser = argparse.ArgumentParser(description='Extracting translation pairs.')
|
|
249
|
+
parser.add_argument('--extraction-file-directory', '-efd', nargs='+', help='Where the parsed json files are')
|
|
250
|
+
parser.add_argument('--saving-directory', '-sd', help='Where to save the translation pairs')
|
|
251
|
+
parser.add_argument('--processing-method-config', '-pmc', type=str,
|
|
252
|
+
help='Where to find the processing method config')
|
|
253
|
+
parser.add_argument('--last-k', '-lk', type=int, default=1, help='How many last k steps to use')
|
|
254
|
+
args = parser.parse_args()
|
|
255
|
+
|
|
256
|
+
all_configs = {
|
|
257
|
+
"needed": ("needed", {}),
|
|
258
|
+
"last_k": ("last_k", {"last_k": args.last_k}),
|
|
259
|
+
"proof_only": ("proof_only", {}),
|
|
260
|
+
"state_only": ("state_only", {}),
|
|
261
|
+
"proof_and_state": ("proof_and_state", {}),
|
|
262
|
+
"trimmed_proof_and_state": ("trimmed_proof_and_state", {}),
|
|
263
|
+
}
|
|
264
|
+
ppc = all_configs[args.processing_method_config]
|
|
265
|
+
file_suffix = args.processing_method_config
|
|
266
|
+
|
|
267
|
+
saving_directory = "{}_with_{}".format(args.saving_directory, file_suffix)
|
|
268
|
+
if os.path.isdir(saving_directory):
|
|
269
|
+
shutil.rmtree(saving_directory)
|
|
270
|
+
os.makedirs(saving_directory)
|
|
271
|
+
|
|
272
|
+
file_names = []
|
|
273
|
+
for extraction_file_directory in args.extraction_file_directory:
|
|
274
|
+
file_names += list(glob.glob("{}/*/*_ground_truth.json".format(extraction_file_directory)))
|
|
275
|
+
|
|
276
|
+
print(f"Processing {len(file_names)} files in total")
|
|
277
|
+
process_files_with_proof_statements(file_names, saving_directory, ppc)
|