itp-interface 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- itp_interface/__init__.py +0 -0
- itp_interface/agent/__init__.py +0 -0
- itp_interface/agent/simple_proof_agent.py +100 -0
- itp_interface/coq_ser_api/__init__.py +165 -0
- itp_interface/coq_ser_api/contexts.py +283 -0
- itp_interface/coq_ser_api/coq_agent.py +459 -0
- itp_interface/coq_ser_api/coq_backend.py +135 -0
- itp_interface/coq_ser_api/coq_util.py +839 -0
- itp_interface/coq_ser_api/example.py +67 -0
- itp_interface/coq_ser_api/lsp_backend.py +375 -0
- itp_interface/coq_ser_api/py.typed +0 -0
- itp_interface/coq_ser_api/serapi_backend.py +841 -0
- itp_interface/coq_ser_api/util.py +145 -0
- itp_interface/coq_ser_api_old/__init__.py +2583 -0
- itp_interface/coq_ser_api_old/contexts.py +172 -0
- itp_interface/coq_ser_api_old/util.py +146 -0
- itp_interface/lean_server/__init__.py +0 -0
- itp_interface/lean_server/commands.py +484 -0
- itp_interface/lean_server/lean3_search_tool.py +358 -0
- itp_interface/lean_server/lean4_repl_interface.py +151 -0
- itp_interface/lean_server/lean4_utils.py +255 -0
- itp_interface/lean_server/lean_cmd_server.py +111 -0
- itp_interface/lean_server/lean_context.py +60 -0
- itp_interface/lean_server/lean_sync_server.py +174 -0
- itp_interface/lean_server/lean_utils.py +199 -0
- itp_interface/lean_server/py.typed +1 -0
- itp_interface/main/__init__.py +0 -0
- itp_interface/main/config/afp_data_gen.yaml +14 -0
- itp_interface/main/config/benchmark/CompCert.yaml +366 -0
- itp_interface/main/config/benchmark/GeoCoq.yaml +930 -0
- itp_interface/main/config/benchmark/UniMath.yaml +2690 -0
- itp_interface/main/config/benchmark/afp_isabelle.yaml +29200 -0
- itp_interface/main/config/benchmark/agent_proverbot_hard.yaml +247 -0
- itp_interface/main/config/benchmark/category-theory.yaml +470 -0
- itp_interface/main/config/benchmark/compcert_118_subset.yaml +148 -0
- itp_interface/main/config/benchmark/compcert_benchmark.yaml +36 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard.yaml +498 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard_1.yaml +55 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard_2.yaml +24 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard_3.yaml +95 -0
- itp_interface/main/config/benchmark/compcert_benchmark_hard_7_per_cent.yaml +78 -0
- itp_interface/main/config/benchmark/compcert_benchmark_test.yaml +38 -0
- itp_interface/main/config/benchmark/compcert_benchmark_train.yaml +340 -0
- itp_interface/main/config/benchmark/leandojo_novel_premises_test.yaml +2908 -0
- itp_interface/main/config/benchmark/leandojo_novel_premises_train.yaml +98645 -0
- itp_interface/main/config/benchmark/leandojo_novel_premises_val.yaml +2912 -0
- itp_interface/main/config/benchmark/leandojo_random.yaml +2889 -0
- itp_interface/main/config/benchmark/leandojo_random_test.yaml +2421 -0
- itp_interface/main/config/benchmark/leandojo_random_train.yaml +62729 -0
- itp_interface/main/config/benchmark/leandojo_random_val.yaml +2504 -0
- itp_interface/main/config/benchmark/math-comp.yaml +200 -0
- itp_interface/main/config/benchmark/miniF2F_test.yaml +12 -0
- itp_interface/main/config/benchmark/miniF2F_test_aime.yaml +27 -0
- itp_interface/main/config/benchmark/miniF2F_test_algebra.yaml +30 -0
- itp_interface/main/config/benchmark/miniF2F_test_amc12.yaml +57 -0
- itp_interface/main/config/benchmark/miniF2F_test_few_shot_hard.yaml +231 -0
- itp_interface/main/config/benchmark/miniF2F_test_imo.yaml +32 -0
- itp_interface/main/config/benchmark/miniF2F_test_induction.yaml +20 -0
- itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra.yaml +82 -0
- itp_interface/main/config/benchmark/miniF2F_test_mathd_algebra_hard.yaml +72 -0
- itp_interface/main/config/benchmark/miniF2F_test_mathd_numbertheory.yaml +72 -0
- itp_interface/main/config/benchmark/miniF2F_test_numbertheory.yaml +20 -0
- itp_interface/main/config/benchmark/minicompcert_benchmark_1.yaml +14 -0
- itp_interface/main/config/benchmark/proverbot_hard.yaml +104 -0
- itp_interface/main/config/benchmark/re_prover.yaml +66 -0
- itp_interface/main/config/benchmark/re_prover_hard.yaml +41 -0
- itp_interface/main/config/benchmark/re_prover_very_hard.yaml +22 -0
- itp_interface/main/config/benchmark/reprover_with_retrieval.yaml +73 -0
- itp_interface/main/config/benchmark/reprover_with_retrieval_hard.yaml +30 -0
- itp_interface/main/config/benchmark/reprover_with_retrieval_neg.yaml +195 -0
- itp_interface/main/config/benchmark/simple_benchmark_1.yaml +24 -0
- itp_interface/main/config/benchmark/simple_benchmark_8.yaml +50 -0
- itp_interface/main/config/benchmark/simple_benchmark_9.yaml +65 -0
- itp_interface/main/config/benchmark/simple_benchmark_isabelle.yaml +18 -0
- itp_interface/main/config/benchmark/simple_benchmark_lean.yaml +12 -0
- itp_interface/main/config/benchmark/simple_benchmark_lean_training_data.yaml +12 -0
- itp_interface/main/config/benchmark/simple_rl_benchmark_lean.yaml +14 -0
- itp_interface/main/config/benchmark/stack_machine.yaml +13 -0
- itp_interface/main/config/benchmark/stack_machine_hard.yaml +15 -0
- itp_interface/main/config/category_theory_data_gen.yaml +14 -0
- itp_interface/main/config/category_theory_data_gen_random.yaml +16 -0
- itp_interface/main/config/compcert_data_gen_test.yaml +10 -0
- itp_interface/main/config/compcert_data_gen_train.yaml +7 -0
- itp_interface/main/config/env_settings/bm25_retrieval.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_no_dfns.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_only_local_no_dfns.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_with_print.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local.yaml +2 -0
- itp_interface/main/config/env_settings/bm25_retrieval_with_print_only_local_no_dfns.yaml +2 -0
- itp_interface/main/config/env_settings/no_retrieval.yaml +2 -0
- itp_interface/main/config/experiments.yaml +12 -0
- itp_interface/main/config/geo_coq_data_gen.yaml +14 -0
- itp_interface/main/config/geo_coq_data_gen_random.yaml +16 -0
- itp_interface/main/config/leandojo_random_data_gen.yaml +16 -0
- itp_interface/main/config/math_comp_data_gen.yaml +14 -0
- itp_interface/main/config/math_comp_data_gen_random.yaml +16 -0
- itp_interface/main/config/mathlib_data_gen.yaml +14 -0
- itp_interface/main/config/repo/coq_repos.yaml +191 -0
- itp_interface/main/config/run_settings/default_coq_data_generation_transforms.yaml +24 -0
- itp_interface/main/config/run_settings/default_isabelle_data_generation_transforms.yaml +24 -0
- itp_interface/main/config/run_settings/default_lean4_data_generation_transforms.yaml +24 -0
- itp_interface/main/config/run_settings/default_lean_data_generation_transforms.yaml +24 -0
- itp_interface/main/config/simple_coq_data_gen.yaml +12 -0
- itp_interface/main/config/simple_coq_data_gen_random.yaml +17 -0
- itp_interface/main/config/simple_lean_data_gen.yaml +12 -0
- itp_interface/main/config/simple_rl_lean_data_gen.yaml +12 -0
- itp_interface/main/config/uni_math_data_gen.yaml +14 -0
- itp_interface/main/config.py +192 -0
- itp_interface/main/extract_benchmark_dataset.py +106 -0
- itp_interface/main/filter_dataset.py +107 -0
- itp_interface/main/install.py +92 -0
- itp_interface/main/merge_dataset.py +96 -0
- itp_interface/main/run_tool.py +444 -0
- itp_interface/pisa/.git +1 -0
- itp_interface/pisa/.gitignore +125 -0
- itp_interface/pisa/.idea/.gitignore +8 -0
- itp_interface/pisa/.idea/ClojureProjectResolveSettings.xml +6 -0
- itp_interface/pisa/.idea/codeStyles/Project.xml +7 -0
- itp_interface/pisa/.idea/codeStyles/codeStyleConfig.xml +5 -0
- itp_interface/pisa/.idea/inspectionProfiles/Project_Default.xml +16 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_android_annotations_4_1_1_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_api_grpc_proto_google_common_protos_1_17_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_code_findbugs_jsr305_3_0_2_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_code_gson_gson_2_8_6_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_errorprone_error_prone_annotations_2_3_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_guava_failureaccess_1_0_1_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_guava_guava_30_0_jre_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_guava_listenablefuture_9999_0_empty_to_avoid_conflict_with_guava_jar.xml +9 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_j2objc_j2objc_annotations_1_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_3_12_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_google_protobuf_protobuf_java_util_3_12_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_fastparse_2_13_2_3_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_geny_2_13_0_6_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_lihaoyi_sourcecode_2_13_0_2_1_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_lenses_2_13_0_10_9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_2_13_0_10_9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_scalapb_runtime_grpc_2_13_0_10_9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thesamet_scalapb_zio_grpc_zio_grpc_core_2_13_0_4_2_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__com_thoughtworks_paranamer_paranamer_2_8_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__commons_io_commons_io_2_8_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__de_unruh_java_patterns_0_1_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__de_unruh_scala_isabelle_2_13_master_SNAPSHOT_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_2_13_1_0_0_M9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_izumi_reflect_thirdparty_boopickle_shaded_2_13_1_0_0_M9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_2_13_1_0_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_stacktracer_2_13_1_0_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__dev_zio_zio_streams_2_13_1_0_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_api_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_context_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_core_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_netty_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_protobuf_lite_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_services_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_grpc_grpc_stub_1_34_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_buffer_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http2_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_http_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_codec_socks_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_common_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_handler_proxy_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_resolver_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_netty_netty_transport_4_1_51_Final_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__io_perfmark_perfmark_api_0_19_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__net_java_dev_jna_jna_5_3_1_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__net_liftweb_lift_json_2_13_3_4_3_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_lang3_3_11_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_apache_commons_commons_text_1_9_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_checkerframework_checker_qual_3_5_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_codehaus_mojo_animal_sniffer_annotations_1_18_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_jetbrains_annotations_20_1_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_jline_jline_3_16_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_log4s_log4s_2_13_1_9_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_collection_compat_2_13_2_1_6_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_modules_scala_xml_2_13_1_3_0_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_compiler_2_13_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_library_2_13_4_jar.xml +23 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scala_reflect_2_13_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scala_lang_scalap_2_13_4_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_scalaz_scalaz_core_2_13_7_3_2_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_api_1_7_30_jar.xml +13 -0
- itp_interface/pisa/.idea/libraries/sbt__org_slf4j_slf4j_simple_1_7_30_jar.xml +13 -0
- itp_interface/pisa/.idea/misc.xml +7 -0
- itp_interface/pisa/.idea/modules/PISA-build.iml +127 -0
- itp_interface/pisa/.idea/modules/PISA.iml +94 -0
- itp_interface/pisa/.idea/modules.xml +9 -0
- itp_interface/pisa/.idea/other.xml +6 -0
- itp_interface/pisa/.idea/sbt.xml +20 -0
- itp_interface/pisa/.idea/scala_compiler.xml +6 -0
- itp_interface/pisa/.idea/uiDesigner.xml +124 -0
- itp_interface/pisa/.idea/vcs.xml +6 -0
- itp_interface/pisa/.scalafmt.conf +2 -0
- itp_interface/pisa/LICENSE +29 -0
- itp_interface/pisa/README.md +262 -0
- itp_interface/pisa/build.sbt +49 -0
- itp_interface/pisa/build.sh +26 -0
- itp_interface/pisa/command_generation/close_gaps.py +44 -0
- itp_interface/pisa/command_generation/conjecture_normal_order.py +62 -0
- itp_interface/pisa/command_generation/conjecturer_command_generator.py +36 -0
- itp_interface/pisa/command_generation/create_dirs.py +11 -0
- itp_interface/pisa/command_generation/find_std.py +67 -0
- itp_interface/pisa/command_generation/generate_build_commands_afp.py +15 -0
- itp_interface/pisa/command_generation/generate_build_commands_std.py +15 -0
- itp_interface/pisa/command_generation/generate_commands_afp.py +103 -0
- itp_interface/pisa/command_generation/generate_commands_mini.py +73 -0
- itp_interface/pisa/command_generation/generate_commands_std.py +69 -0
- itp_interface/pisa/command_generation/generate_hammer_extraction_text.py +5 -0
- itp_interface/pisa/command_generation/hammer_command_generator.py +40 -0
- itp_interface/pisa/command_generation/hp_search_command_generator.py +63 -0
- itp_interface/pisa/command_generation/oracle_command_generator.py +56 -0
- itp_interface/pisa/command_generation/search_command_generator.py +69 -0
- itp_interface/pisa/command_generation/summarise_problem_names.py +45 -0
- itp_interface/pisa/command_generation/tpu_hp_search.py +75 -0
- itp_interface/pisa/docker/Dockerfile +34 -0
- itp_interface/pisa/docker/docker_tutorial.md +64 -0
- itp_interface/pisa/eval_setup/copy_isabelle.py +42 -0
- itp_interface/pisa/eval_setup/copy_pisa_jars.py +18 -0
- itp_interface/pisa/mesh_transformer_utils/tokenization.py +86 -0
- itp_interface/pisa/project/build.properties +1 -0
- itp_interface/pisa/project/plugins.sbt +5 -0
- itp_interface/pisa/requirements.txt +4 -0
- itp_interface/pisa/scripts/extract_last_k_steps.py +28 -0
- itp_interface/pisa/scripts/extract_proof_corpus.py +26 -0
- itp_interface/pisa/scripts/gather_hammer_results.py +27 -0
- itp_interface/pisa/scripts/length_in_char_stats.py +20 -0
- itp_interface/pisa/scripts/mix.py +127 -0
- itp_interface/pisa/scripts/results_stat.py +52 -0
- itp_interface/pisa/scripts/test_array_job.sh +34 -0
- itp_interface/pisa/setup.sh +25 -0
- itp_interface/pisa/src/main/protobuf/server.proto +60 -0
- itp_interface/pisa/src/main/python/.idea/.gitignore +8 -0
- itp_interface/pisa/src/main/python/.idea/inspectionProfiles/Project_Default.xml +18 -0
- itp_interface/pisa/src/main/python/.idea/inspectionProfiles/profiles_settings.xml +6 -0
- itp_interface/pisa/src/main/python/.idea/misc.xml +4 -0
- itp_interface/pisa/src/main/python/.idea/modules.xml +8 -0
- itp_interface/pisa/src/main/python/.idea/python.iml +12 -0
- itp_interface/pisa/src/main/python/.idea/vcs.xml +6 -0
- itp_interface/pisa/src/main/python/conjecturing_parsing/conjecturer_postprocessing.py +59 -0
- itp_interface/pisa/src/main/python/data_extraction/extract_data.py +184 -0
- itp_interface/pisa/src/main/python/data_extraction/find_premises.py +221 -0
- itp_interface/pisa/src/main/python/data_extraction/process_data.py +129 -0
- itp_interface/pisa/src/main/python/legacy/PisaFlexibleClient.py +167 -0
- itp_interface/pisa/src/main/python/legacy/autof_test.py +74 -0
- itp_interface/pisa/src/main/python/legacy/cmd_client.py +23 -0
- itp_interface/pisa/src/main/python/legacy/convert_scala_dump_to_test_name_jsons.py +14 -0
- itp_interface/pisa/src/main/python/legacy/create_data_txt.py +72 -0
- itp_interface/pisa/src/main/python/legacy/create_finetune_tfrecords.py +311 -0
- itp_interface/pisa/src/main/python/legacy/demo.py +49 -0
- itp_interface/pisa/src/main/python/legacy/evaluate.py +108 -0
- itp_interface/pisa/src/main/python/legacy/extract_first_step.py +25 -0
- itp_interface/pisa/src/main/python/legacy/get_global_facts.py +35 -0
- itp_interface/pisa/src/main/python/legacy/mix_data.py +19 -0
- itp_interface/pisa/src/main/python/legacy/one_stage_extraction.py +111 -0
- itp_interface/pisa/src/main/python/legacy/prepare_episodic_transitions.py +137 -0
- itp_interface/pisa/src/main/python/legacy/prepare_translation_pairs.py +277 -0
- itp_interface/pisa/src/main/python/pisa_client.py +322 -0
- itp_interface/pisa/src/main/python/server_pb2.py +394 -0
- itp_interface/pisa/src/main/python/server_pb2_grpc.py +230 -0
- itp_interface/pisa/src/main/python/test_client.py +17 -0
- itp_interface/pisa/src/main/python/test_client2.py +79 -0
- itp_interface/pisa/src/main/python/utils/filters.py +59 -0
- itp_interface/pisa/src/main/python/utils/pisa_server_control.py +29 -0
- itp_interface/pisa/src/main/scala/pisa/agent/CheckSyntax.scala +257 -0
- itp_interface/pisa/src/main/scala/pisa/agent/DepThms.scala +29 -0
- itp_interface/pisa/src/main/scala/pisa/agent/PisaStat.scala +46 -0
- itp_interface/pisa/src/main/scala/pisa/agent/RefactorTest.scala +40 -0
- itp_interface/pisa/src/main/scala/pisa/agent/RepHammer.scala +95 -0
- itp_interface/pisa/src/main/scala/pisa/server/HammFacts.scala +63 -0
- itp_interface/pisa/src/main/scala/pisa/server/PisaOS.scala +881 -0
- itp_interface/pisa/src/main/scala/pisa/server/PisaOneStage.scala +540 -0
- itp_interface/pisa/src/main/scala/pisa/server/PisaOneStageServers.scala +1048 -0
- itp_interface/pisa/src/main/scala/pisa/utils/TheoryManager.scala +95 -0
- itp_interface/pisa/src/test/python/analyse_debug.py +33 -0
- itp_interface/pisa/src/test/python/extract_test_seq2seq.py +53 -0
- itp_interface/pisa/src/test/python/extract_test_theorem_ground_truth_indices.py +31 -0
- itp_interface/pisa/src/test/python/proof_originality.py +24 -0
- itp_interface/pisa/src/test/python/test_command_generator.py +25 -0
- itp_interface/pisa/src/test/python/test_model_sequence_accuracy.py +70 -0
- itp_interface/pisa/src/test/scala/pisa/Easy.scala +26 -0
- itp_interface/pisa/src/test/scala/pisa/TestCurl.scala +82 -0
- itp_interface/pisa/src/test/scala/pisa/TestIsa.scala +27 -0
- itp_interface/pisa/test.sh +19 -0
- itp_interface/pisa/universal_test_theorems.tar.gz +0 -0
- itp_interface/repo/build.py +78 -0
- itp_interface/repo/clone.py +79 -0
- itp_interface/repo/dataset_discovery.py +99 -0
- itp_interface/retrieval/__init__.py +0 -0
- itp_interface/retrieval/abstraction.py +35 -0
- itp_interface/retrieval/coq_bm25_reranker.py +153 -0
- itp_interface/retrieval/isabelle_bm25_reranker.py +86 -0
- itp_interface/retrieval/lean3_bm25_reranker.py +86 -0
- itp_interface/rl/__init__.py +0 -0
- itp_interface/rl/abstraction.py +168 -0
- itp_interface/rl/proof_action.py +172 -0
- itp_interface/rl/proof_state.py +149 -0
- itp_interface/rl/proof_tree.py +109 -0
- itp_interface/rl/simpl_proof_env_pool.py +16 -0
- itp_interface/rl/simple_proof_env.py +713 -0
- itp_interface/rl/simple_proof_env_pool.py +591 -0
- itp_interface/scripts/setup.sh +228 -0
- itp_interface/tools/__init__.py +0 -0
- itp_interface/tools/basic_utils.py +172 -0
- itp_interface/tools/bin_packing.py +61 -0
- itp_interface/tools/cache.py +93 -0
- itp_interface/tools/coq_build_spec.py +31 -0
- itp_interface/tools/coq_build_tool.py +319 -0
- itp_interface/tools/coq_context_helper.py +354 -0
- itp_interface/tools/coq_executor.py +508 -0
- itp_interface/tools/coq_local_data_generation_transform.py +158 -0
- itp_interface/tools/coq_parse_utils.py +154 -0
- itp_interface/tools/coq_raw_proofs.py +193 -0
- itp_interface/tools/coq_theorem_proof_pair_generation_transform.py +146 -0
- itp_interface/tools/coq_training_data_generator.py +76 -0
- itp_interface/tools/dynamic_coq_proof_exec.py +220 -0
- itp_interface/tools/dynamic_isabelle_proof_exec.py +229 -0
- itp_interface/tools/dynamic_lean4_proof_exec.py +236 -0
- itp_interface/tools/dynamic_lean_proof_exec.py +228 -0
- itp_interface/tools/isabelle_context_helper.py +66 -0
- itp_interface/tools/isabelle_executor.py +862 -0
- itp_interface/tools/isabelle_local_data_generation_transform.py +149 -0
- itp_interface/tools/isabelle_parse_utils.py +131 -0
- itp_interface/tools/isabelle_server.py +106 -0
- itp_interface/tools/lean4_context_helper.py +72 -0
- itp_interface/tools/lean4_local_data_generation_transform.py +122 -0
- itp_interface/tools/lean4_sync_executor.py +1193 -0
- itp_interface/tools/lean_cmd_executor.py +804 -0
- itp_interface/tools/lean_context_helper.py +327 -0
- itp_interface/tools/lean_dojo_data_generation_transform.py +206 -0
- itp_interface/tools/lean_executor.py +687 -0
- itp_interface/tools/lean_local_data_generation_transform.py +136 -0
- itp_interface/tools/lean_parse_utils.py +32 -0
- itp_interface/tools/log_utils.py +20 -0
- itp_interface/tools/proof_exec_callback.py +76 -0
- itp_interface/tools/ray_utils.py +265 -0
- itp_interface/tools/repl/.git +1 -0
- itp_interface/tools/repl/.github/workflows/ci.yml +24 -0
- itp_interface/tools/repl/.gitignore +7 -0
- itp_interface/tools/repl/.vscode/copyright.code-snippets +13 -0
- itp_interface/tools/repl/.vscode/extensions.json +13 -0
- itp_interface/tools/repl/.vscode/module-docstring.code-snippets +35 -0
- itp_interface/tools/repl/.vscode/settings.json +11 -0
- itp_interface/tools/repl/README.md +174 -0
- itp_interface/tools/repl/REPL/Frontend.lean +47 -0
- itp_interface/tools/repl/REPL/JSON.lean +186 -0
- itp_interface/tools/repl/REPL/Lean/ContextInfo.lean +9 -0
- itp_interface/tools/repl/REPL/Lean/Environment.lean +31 -0
- itp_interface/tools/repl/REPL/Lean/InfoTree/ToJson.lean +114 -0
- itp_interface/tools/repl/REPL/Lean/InfoTree.lean +272 -0
- itp_interface/tools/repl/REPL/Main.lean +323 -0
- itp_interface/tools/repl/REPL/Snapshots.lean +306 -0
- itp_interface/tools/repl/REPL/Util/Path.lean +36 -0
- itp_interface/tools/repl/REPL/Util/Pickle.lean +44 -0
- itp_interface/tools/repl/REPL.lean +4 -0
- itp_interface/tools/repl/lake-manifest.json +5 -0
- itp_interface/tools/repl/lakefile.lean +15 -0
- itp_interface/tools/repl/lean-toolchain +1 -0
- itp_interface/tools/repl/test/Mathlib/.gitignore +5 -0
- itp_interface/tools/repl/test/Mathlib/H20231110.sh +2 -0
- itp_interface/tools/repl/test/Mathlib/ReplMathlibTests.lean +1 -0
- itp_interface/tools/repl/test/Mathlib/lake-manifest.json +68 -0
- itp_interface/tools/repl/test/Mathlib/lakefile.lean +11 -0
- itp_interface/tools/repl/test/Mathlib/lean-toolchain +1 -0
- itp_interface/tools/repl/test/Mathlib/test/20240209.expected.out +20 -0
- itp_interface/tools/repl/test/Mathlib/test/20240209.in +3 -0
- itp_interface/tools/repl/test/Mathlib/test/20240209.lean +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231020.expected.out +8 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231020.in +8 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231020.lean +22 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231110.expected.out +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231110.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115.expected.out +19 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115.in +5 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115_2.expected.out +18 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115_2.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115_3.expected.out +10 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231115_3.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231214.in +9 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231214.lean +30 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231215.expected.out +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231215.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231215_2.expected.out +14 -0
- itp_interface/tools/repl/test/Mathlib/test/H20231215_2.in +3 -0
- itp_interface/tools/repl/test/Mathlib/test/exact.expected.out +37 -0
- itp_interface/tools/repl/test/Mathlib/test/exact.in +10 -0
- itp_interface/tools/repl/test/Mathlib/test/import_Mathlib.lean +1 -0
- itp_interface/tools/repl/test/Mathlib/test/induction.expected.out +29 -0
- itp_interface/tools/repl/test/Mathlib/test/induction.in +10 -0
- itp_interface/tools/repl/test/Mathlib/test/induction.lean +6 -0
- itp_interface/tools/repl/test/Mathlib/test/on_goal.expected.out +22 -0
- itp_interface/tools/repl/test/Mathlib/test/on_goal.in +5 -0
- itp_interface/tools/repl/test/Mathlib/test/pickle.expected.out +16 -0
- itp_interface/tools/repl/test/Mathlib/test/pickle.in +6 -0
- itp_interface/tools/repl/test/Mathlib/test/pickle_2.expected.out +4 -0
- itp_interface/tools/repl/test/Mathlib/test/pickle_2.in +4 -0
- itp_interface/tools/repl/test/Mathlib/test.sh +41 -0
- itp_interface/tools/repl/test/all_tactics.expected.out +13 -0
- itp_interface/tools/repl/test/all_tactics.in +1 -0
- itp_interface/tools/repl/test/by_cases.expected.out +25 -0
- itp_interface/tools/repl/test/by_cases.in +8 -0
- itp_interface/tools/repl/test/by_cases.lean +4 -0
- itp_interface/tools/repl/test/calc.expected.out +32 -0
- itp_interface/tools/repl/test/calc.in +1 -0
- itp_interface/tools/repl/test/def_eval.expected.out +9 -0
- itp_interface/tools/repl/test/def_eval.in +3 -0
- itp_interface/tools/repl/test/enableInitializersExecution.expected.out +2 -0
- itp_interface/tools/repl/test/enableInitializersExecution.in +1 -0
- itp_interface/tools/repl/test/file.expected.out +8 -0
- itp_interface/tools/repl/test/file.in +1 -0
- itp_interface/tools/repl/test/file.lean +5 -0
- itp_interface/tools/repl/test/have_by_sorry.expected.out +28 -0
- itp_interface/tools/repl/test/have_by_sorry.in +6 -0
- itp_interface/tools/repl/test/import_lean.in +1 -0
- itp_interface/tools/repl/test/incomplete.expected.out +18 -0
- itp_interface/tools/repl/test/incomplete.in +3 -0
- itp_interface/tools/repl/test/incomplete.lean +0 -0
- itp_interface/tools/repl/test/infotree.expected.out +20 -0
- itp_interface/tools/repl/test/infotree.in +2 -0
- itp_interface/tools/repl/test/invalid_tactic.expected.out +20 -0
- itp_interface/tools/repl/test/invalid_tactic.in +3 -0
- itp_interface/tools/repl/test/name_generator.expected.out +53 -0
- itp_interface/tools/repl/test/name_generator.in +18 -0
- itp_interface/tools/repl/test/no_goal_sorry.expected.out +11 -0
- itp_interface/tools/repl/test/no_goal_sorry.in +1 -0
- itp_interface/tools/repl/test/no_goal_sorry_2.expected.out +12 -0
- itp_interface/tools/repl/test/no_goal_sorry_2.in +1 -0
- itp_interface/tools/repl/test/options.expected.out +17 -0
- itp_interface/tools/repl/test/options.in +6 -0
- itp_interface/tools/repl/test/pickle_environment.expected.out +8 -0
- itp_interface/tools/repl/test/pickle_environment.in +7 -0
- itp_interface/tools/repl/test/pickle_environment_with_imports.expected.out +10 -0
- itp_interface/tools/repl/test/pickle_environment_with_imports.in +9 -0
- itp_interface/tools/repl/test/pickle_open.expected.out +8 -0
- itp_interface/tools/repl/test/pickle_open.in +7 -0
- itp_interface/tools/repl/test/pickle_open_2.expected.out +4 -0
- itp_interface/tools/repl/test/pickle_open_2.in +3 -0
- itp_interface/tools/repl/test/pickle_open_scoped.expected.out +18 -0
- itp_interface/tools/repl/test/pickle_open_scoped.in +8 -0
- itp_interface/tools/repl/test/pickle_open_scoped_2.expected.out +14 -0
- itp_interface/tools/repl/test/pickle_open_scoped_2.in +3 -0
- itp_interface/tools/repl/test/pickle_proof_state_1.expected.out +26 -0
- itp_interface/tools/repl/test/pickle_proof_state_1.in +15 -0
- itp_interface/tools/repl/test/pickle_proof_state_2.expected.out +4 -0
- itp_interface/tools/repl/test/pickle_proof_state_2.in +3 -0
- itp_interface/tools/repl/test/pickle_proof_state_env.expected.out +26 -0
- itp_interface/tools/repl/test/pickle_proof_state_env.in +15 -0
- itp_interface/tools/repl/test/pickle_scoped_notation.in +16 -0
- itp_interface/tools/repl/test/pickle_scoped_notation_2.in +3 -0
- itp_interface/tools/repl/test/proof_step.expected.out +18 -0
- itp_interface/tools/repl/test/proof_step.in +7 -0
- itp_interface/tools/repl/test/readme.expected.out +16 -0
- itp_interface/tools/repl/test/readme.in +5 -0
- itp_interface/tools/repl/test/sorry_hypotheses.expected.out +16 -0
- itp_interface/tools/repl/test/sorry_hypotheses.in +4 -0
- itp_interface/tools/repl/test/synthesize_placeholder.expected.out +7 -0
- itp_interface/tools/repl/test/synthesize_placeholder.in +1 -0
- itp_interface/tools/repl/test/tactic_mode_sorry.expected.out +14 -0
- itp_interface/tools/repl/test/tactic_mode_sorry.in +3 -0
- itp_interface/tools/repl/test/tactic_sorry.expected.out +12 -0
- itp_interface/tools/repl/test/tactic_sorry.in +1 -0
- itp_interface/tools/repl/test/term_sorry.expected.out +12 -0
- itp_interface/tools/repl/test/term_sorry.in +1 -0
- itp_interface/tools/repl/test/trace_simp.expected.out +41 -0
- itp_interface/tools/repl/test/trace_simp.in +15 -0
- itp_interface/tools/repl/test/unfinished_tactic_block.expected.out +11 -0
- itp_interface/tools/repl/test/unfinished_tactic_block.in +1 -0
- itp_interface/tools/repl/test/unknown_environment.expected.out +2 -0
- itp_interface/tools/repl/test/unknown_environment.in +1 -0
- itp_interface/tools/repl/test/unknown_proof_state.expected.out +14 -0
- itp_interface/tools/repl/test/unknown_proof_state.in +3 -0
- itp_interface/tools/repl/test/unknown_tactic.expected.out +14 -0
- itp_interface/tools/repl/test/unknown_tactic.in +3 -0
- itp_interface/tools/repl/test/variables.expected.out +26 -0
- itp_interface/tools/repl/test/variables.in +5 -0
- itp_interface/tools/repl/test.sh +43 -0
- itp_interface/tools/run_data_generation_transforms.py +350 -0
- itp_interface/tools/theorem_details.py +25 -0
- itp_interface/tools/training_data.py +358 -0
- itp_interface/tools/training_data_format.py +599 -0
- itp_interface-1.0.0.dist-info/METADATA +78 -0
- itp_interface-1.0.0.dist-info/RECORD +485 -0
- itp_interface-1.0.0.dist-info/WHEEL +4 -0
- itp_interface-1.0.0.dist-info/entry_points.txt +3 -0
- itp_interface-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
import random
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import tensorflow as tf
|
|
9
|
+
from lm_dataformat import Reader
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
12
|
+
from mesh_transformer_utils.tokenization import TokenizerWrapper
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def parse_args():
|
|
16
|
+
parser = argparse.ArgumentParser(description="""
|
|
17
|
+
Converts a text dataset into the training data format expected by the model.
|
|
18
|
+
|
|
19
|
+
Adapted from the script create_tfrecords.py in the gpt-neo repo.
|
|
20
|
+
|
|
21
|
+
- Your text dataset:
|
|
22
|
+
- can be provided as .txt files, or as an archive (.tar.gz, .xz, jsonl.zst).
|
|
23
|
+
- can be one file or multiple
|
|
24
|
+
- using a single large file may use too much memory and crash - if this occurs, split the file up into a few files
|
|
25
|
+
- the model's end-of-text separator is added between the contents of each file
|
|
26
|
+
- if the string '<|endoftext|>' appears inside a file, it is treated as the model's end-of-text separator (not the actual string '<|endoftext|>')
|
|
27
|
+
- this behavior can be disabled with --treat-eot-as-text
|
|
28
|
+
|
|
29
|
+
This script creates a single .tfrecords file as output
|
|
30
|
+
- Why: the model's data loader ignores "trailing" data (< 1 batch) at the end of a .tfrecords file
|
|
31
|
+
- this causes data loss if you have many .tfrecords files
|
|
32
|
+
- This is probably not appropriate for very large datasets
|
|
33
|
+
""", formatter_class=argparse.RawTextHelpFormatter)
|
|
34
|
+
parser.add_argument("--input-dir", type=str, default=None,
|
|
35
|
+
help="Path to where your files are located.")
|
|
36
|
+
parser.add_argument("--name", type=str, default=None,
|
|
37
|
+
help="Name of output file will be {name}_{seqnum}.tfrecords, where seqnum is total sequence count")
|
|
38
|
+
parser.add_argument("--output-dir", type=str, default="",
|
|
39
|
+
help="Output directory (default: current directory)")
|
|
40
|
+
parser.add_argument("--tokenizer-path", type=str, default=None,
|
|
41
|
+
help="Path to a custom BPE tokenizer (default: None, gpt2 tokenizer)")
|
|
42
|
+
|
|
43
|
+
cleaning_args = parser.add_argument_group('data cleaning arguments')
|
|
44
|
+
|
|
45
|
+
cleaning_args.add_argument("--normalize-with-ftfy", action="store_true",
|
|
46
|
+
help="Normalize text with ftfy")
|
|
47
|
+
cleaning_args.add_argument("--normalize-with-wikitext-detokenize",
|
|
48
|
+
action="store_true",
|
|
49
|
+
help="Use wikitext detokenizer")
|
|
50
|
+
minu_help = "Exclude repetitive documents made up of < MIN_UNIQUE_TOKENS unique tokens. These can produce large gradients."
|
|
51
|
+
minu_help += " Set <= 0 to disable. If enabled, 200 is a good default value. (Default: 0)"
|
|
52
|
+
cleaning_args.add_argument("--min-unique-tokens", type=int, default=0,
|
|
53
|
+
help=minu_help)
|
|
54
|
+
|
|
55
|
+
shuffle_pack_args = parser.add_argument_group(
|
|
56
|
+
'data shuffling/packing arguments')
|
|
57
|
+
repack_ep_help = "Repeat the data N_REPACK_EPOCHS times, shuffled differently in each repetition. Recommended for multi-epoch training (set this to your intended number of epochs)."
|
|
58
|
+
shuffle_pack_args.add_argument("--n-repack-epochs",
|
|
59
|
+
type=int, default=1,
|
|
60
|
+
help=repack_ep_help
|
|
61
|
+
)
|
|
62
|
+
shuffle_pack_args.add_argument("--seed", type=int, default=10,
|
|
63
|
+
help="random seed for shuffling data (default: 10)")
|
|
64
|
+
shuffle_pack_args.add_argument("--preserve-data-order",
|
|
65
|
+
default=False, action="store_true",
|
|
66
|
+
help="Disables shuffling, so the input and output data have the same order.")
|
|
67
|
+
|
|
68
|
+
misc_args = parser.add_argument_group('miscellaneous arguments')
|
|
69
|
+
misc_args.add_argument("--verbose",
|
|
70
|
+
default=False, action="store_true",
|
|
71
|
+
help="Prints extra information, such as the text removed by --min-unique-tokens")
|
|
72
|
+
|
|
73
|
+
args, unknown = parser.parse_known_args()
|
|
74
|
+
print(f'Unknown args: {unknown}')
|
|
75
|
+
|
|
76
|
+
return args
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_files(input_dir):
|
|
80
|
+
filetypes = ["jsonl.zst", ".txt", ".xz", ".tar.gz"]
|
|
81
|
+
files = [list(Path(input_dir).glob(f"*{ft}")) for ft in filetypes]
|
|
82
|
+
# flatten list of list -> list and stringify Paths
|
|
83
|
+
return [str(item) for sublist in files for item in sublist]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _int64_feature(value):
|
|
87
|
+
"""
|
|
88
|
+
Returns an int64_list from a bool / enum / int / uint.
|
|
89
|
+
"""
|
|
90
|
+
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def write_to_file(writer, data):
|
|
94
|
+
"""
|
|
95
|
+
writes data to tfrecord file
|
|
96
|
+
"""
|
|
97
|
+
feature = {
|
|
98
|
+
"text": _int64_feature(data)
|
|
99
|
+
}
|
|
100
|
+
tf_example = tf.train.Example(features=tf.train.Features(feature=feature))
|
|
101
|
+
writer.write(tf_example.SerializeToString())
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def write_tfrecord(sequences, fp):
|
|
105
|
+
with tf.io.TFRecordWriter(fp) as writer:
|
|
106
|
+
for seq in sequences:
|
|
107
|
+
write_to_file(writer, seq)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def split_list(l, n):
|
|
111
|
+
# splits list/string into n size chunks
|
|
112
|
+
return [l[i:i + n] for i in range(0, len(l), n)]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def enforce_min_unique(seqs, min_unique_tokens, enc, verbose=False):
|
|
116
|
+
for seq in tqdm(seqs, mininterval=1, smoothing=0):
|
|
117
|
+
if len(set(seq)) >= min_unique_tokens:
|
|
118
|
+
yield seq
|
|
119
|
+
elif verbose:
|
|
120
|
+
text = enc.decode(seq)
|
|
121
|
+
print(
|
|
122
|
+
f"excluding with {len(set(seq))} unique tokens:\n\n{repr(text)}\n\n")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def eot_splitting_generator(string_iterable, encoder: TokenizerWrapper):
|
|
126
|
+
"""
|
|
127
|
+
Given strings, splits them internally on <|endoftext|> and yields (generally more) strings
|
|
128
|
+
"""
|
|
129
|
+
for doc in string_iterable:
|
|
130
|
+
for d in doc.split(encoder.eos_token_str):
|
|
131
|
+
if len(d.strip()) > 0:
|
|
132
|
+
yield d
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def prep_and_tokenize_generator(string_iterable, encoder: TokenizerWrapper,
|
|
136
|
+
normalize_with_ftfy,
|
|
137
|
+
normalize_with_wikitext_detokenize):
|
|
138
|
+
"""
|
|
139
|
+
Given strings, does data cleaning / tokenization and yields arrays of tokens
|
|
140
|
+
"""
|
|
141
|
+
for doc in string_iterable:
|
|
142
|
+
tokens = encoder.encode(doc) + [encoder.eos_token_id]
|
|
143
|
+
yield np.array(tokens, dtype=np.uint16)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def file_to_tokenized_docs_generator(file_path, encoder, args):
|
|
147
|
+
"""
|
|
148
|
+
Given a file path, reads the file and tokenizes the contents
|
|
149
|
+
|
|
150
|
+
Yields token arrays of arbitrary, unequal length
|
|
151
|
+
"""
|
|
152
|
+
reader = Reader(file_path)
|
|
153
|
+
string_iterable = reader.stream_data(threaded=False)
|
|
154
|
+
string_iterable = eot_splitting_generator(string_iterable, encoder)
|
|
155
|
+
|
|
156
|
+
token_list_gen = prep_and_tokenize_generator(string_iterable,
|
|
157
|
+
encoder,
|
|
158
|
+
normalize_with_ftfy=args.normalize_with_ftfy,
|
|
159
|
+
normalize_with_wikitext_detokenize=args.normalize_with_wikitext_detokenize
|
|
160
|
+
)
|
|
161
|
+
return token_list_gen
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def read_files_to_tokenized_docs(files, args, encoder):
|
|
165
|
+
docs = []
|
|
166
|
+
|
|
167
|
+
if args.preserve_data_order:
|
|
168
|
+
files = sorted(files)
|
|
169
|
+
else:
|
|
170
|
+
random.shuffle(files)
|
|
171
|
+
|
|
172
|
+
for f in tqdm(files, mininterval=10, smoothing=0):
|
|
173
|
+
docs.extend(file_to_tokenized_docs_generator(f, encoder, args))
|
|
174
|
+
|
|
175
|
+
if not args.preserve_data_order:
|
|
176
|
+
# shuffle at individual document level
|
|
177
|
+
random.shuffle(docs)
|
|
178
|
+
|
|
179
|
+
return docs
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def arrays_to_sequences(token_list_iterable, sequence_length=2049):
|
|
183
|
+
"""
|
|
184
|
+
Given token arrays of arbitrary lengths, concats/splits them into arrays of equal length
|
|
185
|
+
|
|
186
|
+
Returns equal-length token arrays, followed by a a final array of trailing tokens (which may be shorter)
|
|
187
|
+
"""
|
|
188
|
+
print('Chunking in standard LM mode')
|
|
189
|
+
accum = []
|
|
190
|
+
|
|
191
|
+
for l in token_list_iterable:
|
|
192
|
+
accum.extend(l)
|
|
193
|
+
|
|
194
|
+
if len(accum) > sequence_length:
|
|
195
|
+
chunks = split_list(accum, sequence_length)
|
|
196
|
+
for chunk in chunks[:-1]:
|
|
197
|
+
yield chunk
|
|
198
|
+
accum = chunks[-1]
|
|
199
|
+
|
|
200
|
+
if len(accum) > 0:
|
|
201
|
+
yield accum
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def arrays_to_sequences_pad(token_list_iterable, pad_token_id,
|
|
205
|
+
sequence_length=2049,
|
|
206
|
+
sep_token_id=None,
|
|
207
|
+
eos_token_id=None):
|
|
208
|
+
print('Chunking in seq2seq mode')
|
|
209
|
+
accum = []
|
|
210
|
+
too_long = 0
|
|
211
|
+
for chunk in tqdm(token_list_iterable):
|
|
212
|
+
chunk = chunk.tolist()
|
|
213
|
+
n_sep_tokens = sum(x == sep_token_id for x in chunk)
|
|
214
|
+
n_eos_tokens = sum(x == eos_token_id for x in chunk)
|
|
215
|
+
assert n_sep_tokens == n_eos_tokens, print(n_sep_tokens,
|
|
216
|
+
n_eos_tokens)
|
|
217
|
+
if len(chunk) > sequence_length:
|
|
218
|
+
too_long += 1
|
|
219
|
+
elif len(accum) + len(chunk) > sequence_length:
|
|
220
|
+
res = accum + [pad_token_id] * (sequence_length - len(accum))
|
|
221
|
+
n_sep_tokens = sum(x == sep_token_id for x in res)
|
|
222
|
+
n_eos_tokens = sum(x == eos_token_id for x in res)
|
|
223
|
+
assert n_sep_tokens == n_eos_tokens, print(n_sep_tokens,
|
|
224
|
+
n_eos_tokens)
|
|
225
|
+
yield res
|
|
226
|
+
accum = chunk
|
|
227
|
+
else:
|
|
228
|
+
accum.extend(chunk)
|
|
229
|
+
|
|
230
|
+
print(f'Discarded {too_long} examples longer than {sequence_length}')
|
|
231
|
+
if len(accum) > 0:
|
|
232
|
+
yield accum
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def chunk_and_finalize(arrays, args, encoder):
|
|
236
|
+
seq2seq = getattr(args, 'seq2seq', True)
|
|
237
|
+
|
|
238
|
+
if seq2seq:
|
|
239
|
+
sequences = list(
|
|
240
|
+
arrays_to_sequences_pad(arrays, pad_token_id=encoder.pad_token_id,
|
|
241
|
+
sep_token_id=encoder.sep_token_id,
|
|
242
|
+
eos_token_id=encoder.eos_token_id))
|
|
243
|
+
else:
|
|
244
|
+
sequences = list(map(lambda x: np.array(x, dtype=np.uint16),
|
|
245
|
+
arrays_to_sequences(arrays)))
|
|
246
|
+
|
|
247
|
+
full_seqs, trailing_data = sequences[:-1], sequences[-1]
|
|
248
|
+
|
|
249
|
+
if args.min_unique_tokens > 0:
|
|
250
|
+
full_seqs = list(
|
|
251
|
+
enforce_min_unique(full_seqs, args.min_unique_tokens, encoder,
|
|
252
|
+
args.verbose))
|
|
253
|
+
|
|
254
|
+
if not args.preserve_data_order:
|
|
255
|
+
random.shuffle(full_seqs)
|
|
256
|
+
|
|
257
|
+
return full_seqs, trailing_data
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def create_tfrecords(files, args):
|
|
261
|
+
encoder = TokenizerWrapper.from_file_or_gpt(args.tokenizer_path)
|
|
262
|
+
|
|
263
|
+
random.seed(args.seed)
|
|
264
|
+
|
|
265
|
+
docs = read_files_to_tokenized_docs(files, args, encoder)
|
|
266
|
+
|
|
267
|
+
full_seqs, trailing_data = chunk_and_finalize(docs, args, encoder)
|
|
268
|
+
|
|
269
|
+
if getattr(args, 'seq2seq', True):
|
|
270
|
+
# Seq2seq sanity checks
|
|
271
|
+
assert all(
|
|
272
|
+
encoder.decode(x[:20]).strip().startswith('<') for x in full_seqs)
|
|
273
|
+
sep_id = encoder.sep_token_id
|
|
274
|
+
eos_id = encoder.eos_token_id
|
|
275
|
+
pad_id = encoder.pad_token_id
|
|
276
|
+
for seq in full_seqs:
|
|
277
|
+
last_non_pad_idx = max(
|
|
278
|
+
i for i in range(len(seq)) if seq[i] != pad_id)
|
|
279
|
+
assert seq[last_non_pad_idx] == eos_id
|
|
280
|
+
n_sep_tokens = sum(x == sep_id for x in seq)
|
|
281
|
+
n_eos_tokens = sum(x == eos_id for x in seq)
|
|
282
|
+
assert n_sep_tokens == n_eos_tokens, print(n_sep_tokens,
|
|
283
|
+
n_eos_tokens)
|
|
284
|
+
|
|
285
|
+
# final
|
|
286
|
+
print(f"dropped {len(trailing_data)} tokens of trailing data")
|
|
287
|
+
|
|
288
|
+
total_sequence_len = len(full_seqs)
|
|
289
|
+
|
|
290
|
+
fp = os.path.join(args.output_dir,
|
|
291
|
+
f"{args.name}_{total_sequence_len}.tfrecords")
|
|
292
|
+
write_tfrecord(full_seqs, fp)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def create_finetune_tfrecords(**kwargs):
|
|
296
|
+
args = parse_args()
|
|
297
|
+
|
|
298
|
+
# Update by kwargs
|
|
299
|
+
for k, v in kwargs.items():
|
|
300
|
+
setattr(args, k, v)
|
|
301
|
+
if not args.input_dir.endswith("/"):
|
|
302
|
+
args.input_dir = args.input_dir + "/"
|
|
303
|
+
|
|
304
|
+
if args.output_dir:
|
|
305
|
+
os.makedirs(args.output_dir, exist_ok=True)
|
|
306
|
+
files = get_files(args.input_dir)
|
|
307
|
+
create_tfrecords(files, args)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
if __name__ == "__main__":
|
|
311
|
+
create_finetune_tfrecords()
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from PisaFlexibleClient import initialise_env
|
|
2
|
+
|
|
3
|
+
# Run a server on port 8000
|
|
4
|
+
# i.e. do a 'sbt "runMain pisa.server.PisaOneStageServer8000"'
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
env = initialise_env(8000,
|
|
8
|
+
working_directory="/private/home/aqj/afp-2021-10-22/thys/FunWithFunctions",
|
|
9
|
+
isa_path="/private/home/aqj/Isabelle2021",
|
|
10
|
+
theory_file_path="/private/home/aqj/afp-2021-10-22/thys/FunWithFunctions/FunWithFunctions.thy"
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Suppose you have a list of theorems that you want to try on
|
|
15
|
+
theorems = [
|
|
16
|
+
'theorem identity1: fixes f :: "nat \<Rightarrow> nat" assumes fff: "\<And>n. f(f(n)) < f(Suc(n))" shows "f(n) = n"',
|
|
17
|
+
'theorem ifac_neg0: fixes ifac :: "int \<Rightarrow> int" assumes ifac_rec: "\<And>i. ifac i = (if i=0 then 1 else i*ifac(i - 1))" shows "i<0 \<Longrightarrow> ifac i = 0"'
|
|
18
|
+
]
|
|
19
|
+
# And the corresponding scripts
|
|
20
|
+
scripts = [
|
|
21
|
+
"sorry",
|
|
22
|
+
"bad script"
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
env.post("<initialise>")
|
|
26
|
+
for theorem, script in zip(theorems, scripts):
|
|
27
|
+
# Execute before the theorem
|
|
28
|
+
env.post(
|
|
29
|
+
f"<accumulative step before> {theorem}"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Create an experimental state with a name e.g. script[-10:]
|
|
33
|
+
# Execute the theorem declaration
|
|
34
|
+
name = script[-10:]
|
|
35
|
+
env.post(
|
|
36
|
+
f"<clone> default <clone> {name}"
|
|
37
|
+
)
|
|
38
|
+
env.post(
|
|
39
|
+
f"<apply to top level state> {name} <apply to top level state> {theorem} <apply to top level state> {name}"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Execute the script and get the proof level
|
|
43
|
+
response = env.post(
|
|
44
|
+
f"<apply to top level state> {name} <apply to top level state> {script} <apply to top level state> {name}"
|
|
45
|
+
)
|
|
46
|
+
print(f"script execution response: {response}")
|
|
47
|
+
level = env.post(f"<get_proof_level> {name}")
|
|
48
|
+
# If level = 0, succeed, other wise fail
|
|
49
|
+
print(level)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import grpc
|
|
4
|
+
import argparse
|
|
5
|
+
|
|
6
|
+
import server_pb2
|
|
7
|
+
import server_pb2_grpc
|
|
8
|
+
|
|
9
|
+
MAX_MESSAGE_LENGTH = 10485760
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def stack_lines(input_string):
|
|
13
|
+
return " ".join(input_string.replace("\n", " ").split()).strip()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def evaluate_single_problem(isa_path, theory_file_path, working_directory, theorem_name, model, mode_of_proving,
|
|
17
|
+
maximum_number_of_steps=100, port=9000):
|
|
18
|
+
channel = grpc.insecure_channel('localhost:{}'.format(port),
|
|
19
|
+
options=[('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
|
|
20
|
+
('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH)])
|
|
21
|
+
stub = server_pb2_grpc.ServerStub(channel)
|
|
22
|
+
stub.InitialiseIsabelle(server_pb2.IsaPath(path=isa_path))
|
|
23
|
+
stub.IsabelleWorkingDirectory(server_pb2.IsaPath(path=working_directory))
|
|
24
|
+
stub.IsabelleContext(server_pb2.IsaContext(context=theory_file_path))
|
|
25
|
+
|
|
26
|
+
theorem_name = stack_lines(theorem_name)
|
|
27
|
+
state_string = stub.IsabelleCommand(server_pb2.IsaCommand(command="proceed:"+theorem_name)).state
|
|
28
|
+
|
|
29
|
+
if mode_of_proving not in ["proof", "state", "proof_and_state"]:
|
|
30
|
+
raise AssertionError
|
|
31
|
+
|
|
32
|
+
previous_proof_segment = theorem_name
|
|
33
|
+
state = state_string
|
|
34
|
+
# print(state)
|
|
35
|
+
try:
|
|
36
|
+
for i in range(maximum_number_of_steps):
|
|
37
|
+
state = stack_lines(state)
|
|
38
|
+
input_string = ""
|
|
39
|
+
if mode_of_proving == "state":
|
|
40
|
+
input_string += "State: {}".format(state)
|
|
41
|
+
if mode_of_proving == "proof_and_state":
|
|
42
|
+
input_string += " <PS_SEP> "
|
|
43
|
+
if mode_of_proving == "proof":
|
|
44
|
+
input_string += "Proof: {}".format(previous_proof_segment)
|
|
45
|
+
# TODO: previous proof segment unfinished
|
|
46
|
+
|
|
47
|
+
output_string = model.predict(input_string)
|
|
48
|
+
# print(input_string)
|
|
49
|
+
# print(output_string)
|
|
50
|
+
state = stub.IsabelleCommand(server_pb2.IsaCommand(command=output_string)).state
|
|
51
|
+
# print(state)
|
|
52
|
+
if "proof" not in state:
|
|
53
|
+
stub.IsabelleCommand(server_pb2.IsaCommand(command="exit"))
|
|
54
|
+
return 1
|
|
55
|
+
except Exception as e:
|
|
56
|
+
print(e)
|
|
57
|
+
pass
|
|
58
|
+
stub.IsabelleCommand(server_pb2.IsaCommand(command="exit"))
|
|
59
|
+
return 0
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class DummyProver:
|
|
63
|
+
def __init__(self, seq2seq_repo):
|
|
64
|
+
src_list = open(os.path.join(seq2seq_repo, "train.src"), "r").readlines()
|
|
65
|
+
tgt_list = open(os.path.join(seq2seq_repo, "train.tgt"), "r").readlines()
|
|
66
|
+
src_list.extend(open(os.path.join(seq2seq_repo, "val.src"), "r").readlines())
|
|
67
|
+
tgt_list.extend(open(os.path.join(seq2seq_repo, "val.tgt"), "r").readlines())
|
|
68
|
+
src_list.extend(open(os.path.join(seq2seq_repo, "test.src"), "r").readlines())
|
|
69
|
+
tgt_list.extend(open(os.path.join(seq2seq_repo, "test.tgt"), "r").readlines())
|
|
70
|
+
self.prover_dict = dict()
|
|
71
|
+
assert len(src_list) == len(tgt_list)
|
|
72
|
+
for i in range(len(src_list)):
|
|
73
|
+
src = stack_lines(src_list[i])
|
|
74
|
+
tgt = stack_lines(tgt_list[i])
|
|
75
|
+
self.prover_dict[src] = tgt
|
|
76
|
+
|
|
77
|
+
def predict(self, input_string):
|
|
78
|
+
return self.prover_dict[input_string]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
if __name__ == "__main__":
|
|
82
|
+
parser = argparse.ArgumentParser(description='Extracting an Isabelle theory file.')
|
|
83
|
+
parser.add_argument('--isa-path', help='The path to the Isabelle executable',
|
|
84
|
+
default="/Applications/Isabelle2020.app/Isabelle")
|
|
85
|
+
parser.add_argument('--working-directory', '-wd', help='Path to the AFP project')
|
|
86
|
+
parser.add_argument('--theory-file-path', '-tfp', help='Path to the file to parse')
|
|
87
|
+
parser.add_argument('--theorem-name', '-tn', help='Name of the theorem to prove')
|
|
88
|
+
parser.add_argument('--mode-of-proving', '-mop',
|
|
89
|
+
help='Mode of proving, could be "state", "proof", or "proof_and_state"')
|
|
90
|
+
parser.add_argument('--port', '-p', help='Port to use to communicate', default=9000, type=int)
|
|
91
|
+
args = parser.parse_args()
|
|
92
|
+
|
|
93
|
+
dummy_prover = DummyProver("/Users/qj213/Projects/PISA/fs_with_state")
|
|
94
|
+
# print(evaluate_single_problem(isa_path=args.isa_path, theory_file_path=args.theory_file_path,
|
|
95
|
+
# working_directory=args.working_directory, theorem_name=args.theorem_name,
|
|
96
|
+
# port=args.port, model=dummy_prover, mode_of_proving="state"))
|
|
97
|
+
|
|
98
|
+
problem_names = json.load(open("fs_with_state/problem_names_split.json"))
|
|
99
|
+
train_names = problem_names["train"]
|
|
100
|
+
for i in range(0, 5):
|
|
101
|
+
theory_file_path = train_names[i][0].replace("/home/ywu/afp-2021-02-11", "/Users/qj213/Projects/afp-2021-02-11")
|
|
102
|
+
# print(theory_file_path)
|
|
103
|
+
# print(train_names[i][1])
|
|
104
|
+
print(evaluate_single_problem(isa_path=args.isa_path,
|
|
105
|
+
theory_file_path=theory_file_path,
|
|
106
|
+
working_directory="/".join(theory_file_path.split("/")[:-1]),
|
|
107
|
+
theorem_name=train_names[i][1],
|
|
108
|
+
port=args.port, model=dummy_prover, mode_of_proving="state"))
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
from tqdm import tqdm
|
|
5
|
+
|
|
6
|
+
proof_and_state_dir = "/home/qj213/proof_and_state"
|
|
7
|
+
first_step_dir = "/home/qj213/first_step"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
for file in os.listdir(proof_and_state_dir):
|
|
11
|
+
split_name = file.split(".")[0]
|
|
12
|
+
with open(os.path.join(proof_and_state_dir, file)) as fhand, \
|
|
13
|
+
open(os.path.join(first_step_dir, f"{split_name}.src"), "w") as src_out, \
|
|
14
|
+
open(os.path.join(first_step_dir, f"{split_name}.tgt"), "w") as tgt_out:
|
|
15
|
+
for line in tqdm(fhand.readlines()):
|
|
16
|
+
line_json = json.loads(line.strip())
|
|
17
|
+
source = line_json["source"]
|
|
18
|
+
proof_step_string = source.split("<PS_SEP>")[0].strip()
|
|
19
|
+
proof_state_string = source.split("<PS_SEP>")[1].strip()
|
|
20
|
+
target = line_json["target"]
|
|
21
|
+
if "\\n" not in proof_step_string:
|
|
22
|
+
# This is the first step
|
|
23
|
+
src_out.write(f"<ISA_OBS> {proof_state_string}\n")
|
|
24
|
+
tgt_out.write(f"{target}\n")
|
|
25
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from PisaFlexibleClient import initialise_env
|
|
2
|
+
import os
|
|
3
|
+
import pickle
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def match_names_single_file_to_data_play_szymon(
|
|
7
|
+
port, working_directory, isa_path, theory_file_path, out_dir, error_log_dir):
|
|
8
|
+
env = initialise_env(
|
|
9
|
+
port=port,
|
|
10
|
+
working_directory=working_directory,
|
|
11
|
+
isa_path=isa_path,
|
|
12
|
+
theory_file_path=theory_file_path
|
|
13
|
+
)
|
|
14
|
+
try:
|
|
15
|
+
output_string = env.post("<get global facts from file>")
|
|
16
|
+
list_of_string_tuples = output_string.split("<SEP>")
|
|
17
|
+
global_fact_dict = {}
|
|
18
|
+
for element in list_of_string_tuples:
|
|
19
|
+
name, definition = element.split("<DEF>")
|
|
20
|
+
global_fact_dict[name.strip()] = definition.strip()
|
|
21
|
+
pickle.dump(global_fact_dict, open(os.path.join(out_dir, f"dict_{theory_file_path.replace('/', '_')}"), "wb"))
|
|
22
|
+
except Exception as e:
|
|
23
|
+
with open(os.path.join(error_log_dir, f"error_log_{theory_file_path.replace('/', '_')}.txt"), "w") as fout:
|
|
24
|
+
fout.write(str(e))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
if __name__ == "__main__":
|
|
28
|
+
match_names_single_file_to_data_play_szymon(
|
|
29
|
+
port=8000,
|
|
30
|
+
working_directory="/home/qj213/afp-2021-10-22/thys/FunWithFunctions",
|
|
31
|
+
isa_path="/home/qj213/Isabelle2021",
|
|
32
|
+
theory_file_path="/home/qj213/afp-2021-10-22/thys/FunWithFunctions/FunWithFunctions.thy",
|
|
33
|
+
out_dir="/home/qj213/out_stuff",
|
|
34
|
+
error_log_dir="/home/qj213/out_stuff"
|
|
35
|
+
)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
if __name__ == "__main__":
|
|
6
|
+
parser = argparse.ArgumentParser(description="Mix the data from multiple forms of input")
|
|
7
|
+
parser.add_argument("--input", type=str, nargs="+", help="Input files")
|
|
8
|
+
parser.add_argument("--output-path", "-op", type=str, help="Output file")
|
|
9
|
+
args = parser.parse_args()
|
|
10
|
+
|
|
11
|
+
for output_file_name in ["train.src", "train.tgt", "val.src", "val.tgt", "test.src", "test.tgt"]:
|
|
12
|
+
if os.path.isfile(os.path.join(args.output_path, output_file_name)):
|
|
13
|
+
os.remove(os.path.join(args.output_path, output_file_name))
|
|
14
|
+
|
|
15
|
+
for input_path in args.input:
|
|
16
|
+
for output_file_name in ["train.src", "train.tgt", "val.src", "val.tgt", "test.src", "test.tgt"]:
|
|
17
|
+
with open(os.path.join(args.output_path, output_file_name), "a") as output_file, \
|
|
18
|
+
open(os.path.join(input_path, output_file_name), "r") as input_file:
|
|
19
|
+
output_file.write(input_file.read())
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import grpc
|
|
4
|
+
import argparse
|
|
5
|
+
|
|
6
|
+
from copy import copy
|
|
7
|
+
from func_timeout import func_set_timeout, FunctionTimedOut
|
|
8
|
+
|
|
9
|
+
import server_pb2
|
|
10
|
+
import server_pb2_grpc
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
MAX_MESSAGE_LENGTH = 10485760
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def analyse_whole_file(whole_file_string, use_sledgehammer=False):
|
|
17
|
+
transitions = whole_file_string.split("<\TRANSEP>")
|
|
18
|
+
state_action_proof_level_tuples = list()
|
|
19
|
+
problem_names = list()
|
|
20
|
+
proof_open = False
|
|
21
|
+
last_state = ""
|
|
22
|
+
for transition in transitions:
|
|
23
|
+
if not transition:
|
|
24
|
+
continue
|
|
25
|
+
if use_sledgehammer:
|
|
26
|
+
state, action, proof_level, hammer_results = transition.split("<\STATESEP>")
|
|
27
|
+
else:
|
|
28
|
+
state, action, proof_level = transition.split("<\STATESEP>")
|
|
29
|
+
hammer_results = "NA"
|
|
30
|
+
state = state.strip()
|
|
31
|
+
action = action.strip()
|
|
32
|
+
proof_level = int(proof_level.strip())
|
|
33
|
+
if action.startswith("lemma") or action.startswith("theorem"):
|
|
34
|
+
problem_names.append(action)
|
|
35
|
+
state_action_proof_level_tuples.append((state, action, proof_level, hammer_results))
|
|
36
|
+
proof_open = True
|
|
37
|
+
elif proof_open:
|
|
38
|
+
state_action_proof_level_tuples.append((state, action, proof_level, hammer_results))
|
|
39
|
+
|
|
40
|
+
if "subgoal" in last_state and "subgoal" not in state:
|
|
41
|
+
proof_open = False
|
|
42
|
+
return {
|
|
43
|
+
"problem_names": problem_names,
|
|
44
|
+
"translations": state_action_proof_level_tuples
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@func_set_timeout(12000)
|
|
49
|
+
def isa_step(stub, theory_file_path, use_sledgehammer=False):
|
|
50
|
+
stub.IsabelleContext(server_pb2.IsaContext(context=theory_file_path))
|
|
51
|
+
extraction_command = "PISA extract data with hammer" if use_sledgehammer else "PISA extract data"
|
|
52
|
+
return stub.IsabelleCommand(server_pb2.IsaCommand(command=extraction_command)).state
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def extract_file(isa_path, theory_file_path, working_directory, saving_directory, port=9000, use_sledgehammer=False):
|
|
56
|
+
channel = grpc.insecure_channel('localhost:{}'.format(port),
|
|
57
|
+
options=[('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
|
|
58
|
+
('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH)])
|
|
59
|
+
stub = server_pb2_grpc.ServerStub(channel)
|
|
60
|
+
|
|
61
|
+
stub.InitialiseIsabelle(server_pb2.IsaPath(path=isa_path))
|
|
62
|
+
stub.IsabelleWorkingDirectory(server_pb2.IsaPath(path=working_directory))
|
|
63
|
+
|
|
64
|
+
if not os.path.isdir(saving_directory):
|
|
65
|
+
os.makedirs(saving_directory)
|
|
66
|
+
close_program = False
|
|
67
|
+
try:
|
|
68
|
+
whole_file_parsed = isa_step(stub, theory_file_path, use_sledgehammer=use_sledgehammer)
|
|
69
|
+
stub.IsabelleCommand(server_pb2.IsaCommand(command="exit"))
|
|
70
|
+
except (Exception, FunctionTimedOut) as e:
|
|
71
|
+
close_program = True
|
|
72
|
+
with open(os.path.join(saving_directory,
|
|
73
|
+
"project_{}_file_{}_bug_report.txt".format(
|
|
74
|
+
working_directory.split("/")[-1], theory_file_path.split("/")[-1])), "w") as fout:
|
|
75
|
+
fout.write(str(e))
|
|
76
|
+
|
|
77
|
+
file_analysis = analyse_whole_file(whole_file_parsed)
|
|
78
|
+
file_info = {
|
|
79
|
+
"file_name": theory_file_path,
|
|
80
|
+
"working_directory": working_directory,
|
|
81
|
+
**file_analysis,
|
|
82
|
+
"raw_parsed_string": whole_file_parsed
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
json.dump(file_info,
|
|
86
|
+
open(os.path.join(saving_directory,
|
|
87
|
+
"_".join(theory_file_path.split(".thy")[0].split("/"))+"_ground_truth.json"), "w"))
|
|
88
|
+
|
|
89
|
+
if close_program:
|
|
90
|
+
stub.IsabelleCommand(server_pb2.IsaCommand(command="exit"))
|
|
91
|
+
channel.close()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
if __name__ == "__main__":
|
|
95
|
+
parser = argparse.ArgumentParser(description='Extracting an Isabelle theory file.')
|
|
96
|
+
parser.add_argument('--isa-path', help='The path to the Isabelle executable',
|
|
97
|
+
default="/Applications/Isabelle2020.app/Isabelle")
|
|
98
|
+
parser.add_argument('--working-directory', '-wd', help='Path to the AFP project')
|
|
99
|
+
parser.add_argument('--theory-file-path', '-tfp', help='Path to the file to parse')
|
|
100
|
+
parser.add_argument('--saving-directory', '-sd', help='Where the save the parsed json files')
|
|
101
|
+
parser.add_argument('--port', '-p', help='Port to use to communicate', default=9000, type=int)
|
|
102
|
+
parser.add_argument('--use-sledgehammer', '-us', help='Whether to use sledgehammer',
|
|
103
|
+
action='store_true')
|
|
104
|
+
parser.set_defaults(use_sledgehammer=False)
|
|
105
|
+
args = parser.parse_args()
|
|
106
|
+
|
|
107
|
+
# for file_name in os.listdir(args.working_directory):
|
|
108
|
+
# if file_name.endswith(".thy"):
|
|
109
|
+
# full_file_path = os.path.join(args.working_directory, file_name)
|
|
110
|
+
extract_file(args.isa_path, args.theory_file_path, args.working_directory,
|
|
111
|
+
args.saving_directory, args.port, args.use_sledgehammer)
|