langfun 0.1.2.dev202412180804__tar.gz → 0.1.2.dev202412270804__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/PKG-INFO +1 -1
- langfun-0.1.2.dev202412270804/langfun/core/eval/v2/checkpointing.py +346 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/checkpointing_test.py +4 -3
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/evaluation.py +80 -6
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/evaluation_test.py +27 -14
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/experiment.py +12 -4
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/progress_tracking_test.py +4 -4
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/reporting.py +104 -29
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/reporting_test.py +2 -2
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/runners.py +79 -38
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/runners_test.py +10 -9
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/logging.py +19 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/logging_test.py +19 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/parsing.py +24 -17
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/parsing_test.py +25 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/querying.py +10 -1
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/querying_test.py +10 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/PKG-INFO +1 -1
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/SOURCES.txt +1 -1
- langfun-0.1.2.dev202412180804/langfun/core/eval/v2/checkpointing.py +0 -224
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/LICENSE +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/README.md +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/action.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/action_eval.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/action_eval_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/action_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/correction.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/correction_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/errors.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/errors_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/execution.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/execution_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/generation.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/generation_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/parsing.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/parsing_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/permissions.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/permissions_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/component.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/component_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/concurrent.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/concurrent_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/console.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/console_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/base.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/base_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/matching.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/matching_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/patching.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/patching_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/scoring.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/scoring_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/__init__.py +0 -0
- /langfun-0.1.2.dev202412180804/langfun/core/eval/v2/test_helper.py → /langfun-0.1.2.dev202412270804/langfun/core/eval/v2/eval_test_helper.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/example.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/example_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/experiment_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/metric_values.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/metric_values_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/metrics.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/metrics_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/progress.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/progress_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/progress_tracking.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/langfunc.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/langfunc_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/language_model.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/language_model_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/anthropic.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/anthropic_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/cache/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/cache/base.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/cache/in_memory.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/cache/in_memory_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/compositional.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/compositional_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/fake.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/fake_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/google_genai.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/google_genai_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/groq.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/groq_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/llama_cpp.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/llama_cpp_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/openai.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/openai_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/rest.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/rest_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/vertexai.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/vertexai_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/memories/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/memories/conversation_history.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/memories/conversation_history_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/memory.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/message.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/message_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/audio.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/audio_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/image.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/image_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/mime.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/mime_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/ms_office.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/ms_office_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/pdf.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/pdf_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/video.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/video_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modality.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modality_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/natural_language.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/natural_language_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/repr_utils.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/repr_utils_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/sampling.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/sampling_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/completion.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/completion_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/description.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/description_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/function_generation.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/function_generation_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/mapping.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/mapping_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/schema.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/schema_generation.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/schema_generation_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/schema_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/scoring.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/scoring_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/tokenization.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/tokenization_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/subscription.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/subscription_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/template.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/template_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/__init__.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/completion.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/completion_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/conversation.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/conversation_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/demonstration.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/demonstration_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/selfplay.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/selfplay_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/text_formatting.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/text_formatting_test.py +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/dependency_links.txt +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/requires.txt +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/top_level.txt +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/setup.cfg +0 -0
- {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/setup.py +0 -0
@@ -0,0 +1,346 @@
|
|
1
|
+
# Copyright 2024 The Langfun Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Checkpointing evaluation runs."""
|
15
|
+
import abc
|
16
|
+
import threading
|
17
|
+
import traceback
|
18
|
+
|
19
|
+
import langfun.core as lf
|
20
|
+
from langfun.core.eval.v2 import example as example_lib
|
21
|
+
from langfun.core.eval.v2 import experiment as experiment_lib
|
22
|
+
import pyglove as pg
|
23
|
+
|
24
|
+
Example = example_lib.Example
|
25
|
+
Experiment = experiment_lib.Experiment
|
26
|
+
Runner = experiment_lib.Runner
|
27
|
+
|
28
|
+
|
29
|
+
class Checkpointer(experiment_lib.Plugin):
|
30
|
+
"""Base class for checkpointing evaluation examples."""
|
31
|
+
|
32
|
+
def on_experiment_start(
|
33
|
+
self,
|
34
|
+
runner: Runner,
|
35
|
+
experiment: Experiment
|
36
|
+
) -> None:
|
37
|
+
if not experiment.is_leaf:
|
38
|
+
return
|
39
|
+
|
40
|
+
# For refresh runs, we don't want to load the previous state.
|
41
|
+
if not runner.current_run.refresh:
|
42
|
+
if runner.current_run.input_root != runner.current_run.output_root:
|
43
|
+
experiment.info(
|
44
|
+
f'Warm starting from directory: {runner.current_run.input_root}.'
|
45
|
+
)
|
46
|
+
self._load_experiment(runner, experiment)
|
47
|
+
|
48
|
+
if experiment.state.evaluated_examples:
|
49
|
+
loaded_example_ids = list(
|
50
|
+
sorted(experiment.state.evaluated_examples.keys())
|
51
|
+
)
|
52
|
+
example_ids_to_evaluate = (
|
53
|
+
set(runner.current_run.example_ids) if runner.current_run.example_ids
|
54
|
+
else set(range(1, experiment.num_examples + 1))
|
55
|
+
)
|
56
|
+
example_ids_to_evaluate -= set(loaded_example_ids)
|
57
|
+
|
58
|
+
experiment.info(
|
59
|
+
f'{len(experiment.state.evaluated_examples)} examples have been '
|
60
|
+
'loaded from checkpoint files. Their outputs will be used '
|
61
|
+
f'for recomputing metrics. Example IDs: {loaded_example_ids}'
|
62
|
+
)
|
63
|
+
experiment.info(
|
64
|
+
f'{len(example_ids_to_evaluate)} examples will be processed from '
|
65
|
+
f'scratch. Example IDs: {list(sorted(example_ids_to_evaluate))}'
|
66
|
+
)
|
67
|
+
else:
|
68
|
+
experiment.info(
|
69
|
+
'No examples are loaded from checkpoint files. '
|
70
|
+
f'Experiment {experiment.id} starts from scratch.'
|
71
|
+
)
|
72
|
+
|
73
|
+
def on_example_complete(
|
74
|
+
self,
|
75
|
+
runner: Runner,
|
76
|
+
experiment: Experiment,
|
77
|
+
example: Example,
|
78
|
+
) -> None:
|
79
|
+
"""Saves the example to the checkpoint file."""
|
80
|
+
if example.has_error:
|
81
|
+
experiment.warning(
|
82
|
+
f'Example {example.id} has error. Skipping checkpointing.'
|
83
|
+
)
|
84
|
+
else:
|
85
|
+
self._save_example(runner, experiment, example)
|
86
|
+
|
87
|
+
@abc.abstractmethod
|
88
|
+
def _load_experiment(self, runner: Runner, experiment: Experiment) -> None:
|
89
|
+
"""Loads the experiment state from checkpoint files."""
|
90
|
+
|
91
|
+
@abc.abstractmethod
|
92
|
+
def _save_example(
|
93
|
+
self,
|
94
|
+
runner: Runner,
|
95
|
+
experiment: Experiment,
|
96
|
+
example: Example,
|
97
|
+
) -> None:
|
98
|
+
"""Saves an evaluated example."""
|
99
|
+
|
100
|
+
|
101
|
+
class PerExampleCheckpointer(Checkpointer):
|
102
|
+
"""Checkpointer that saves each example to a separate file."""
|
103
|
+
|
104
|
+
checkpoint_filename: str = 'checkpoint.bagz'
|
105
|
+
|
106
|
+
def _on_bound(self):
|
107
|
+
super()._on_bound()
|
108
|
+
prefix, ext = self._file_prefix_and_ext(self.checkpoint_filename)
|
109
|
+
self._checkpoint_file_prefix = prefix
|
110
|
+
self._checkpoint_file_ext = ext
|
111
|
+
|
112
|
+
def _load_experiment(
|
113
|
+
self,
|
114
|
+
runner: Runner,
|
115
|
+
experiment: Experiment,
|
116
|
+
) -> None:
|
117
|
+
"""Creates the checkpoint file."""
|
118
|
+
experiment_dir = runner.current_run.input_dir(experiment)
|
119
|
+
if pg.io.path_exists(experiment_dir):
|
120
|
+
ckpt_files = [
|
121
|
+
runner.current_run.input_path_for(experiment, filename)
|
122
|
+
for filename in pg.io.listdir(experiment_dir)
|
123
|
+
if filename.startswith(self._checkpoint_file_prefix)
|
124
|
+
and filename.endswith(self._checkpoint_file_ext)
|
125
|
+
]
|
126
|
+
else:
|
127
|
+
ckpt_files = []
|
128
|
+
|
129
|
+
experiment.info(f'Found {len(ckpt_files)} checkpoint files to load.')
|
130
|
+
|
131
|
+
# Load the checkpoint files in parallel.
|
132
|
+
context = dict(counter=0, counter_lock=threading.Lock())
|
133
|
+
def _load_state(ckpt_file):
|
134
|
+
error = None
|
135
|
+
with pg.timeit() as t:
|
136
|
+
try:
|
137
|
+
experiment.load_state(ckpt_file)
|
138
|
+
except BaseException as e: # pylint: disable=broad-except
|
139
|
+
error = e
|
140
|
+
finally:
|
141
|
+
with context['counter_lock']:
|
142
|
+
context['counter'] += 1
|
143
|
+
|
144
|
+
progress_str = f'{context["counter"]}/{len(ckpt_files)}'
|
145
|
+
if error is None:
|
146
|
+
experiment.info(
|
147
|
+
f'Loaded checkpoint file {ckpt_file} in {t.elapse:.2f} '
|
148
|
+
f'seconds. ({progress_str})'
|
149
|
+
)
|
150
|
+
else:
|
151
|
+
experiment.warning(
|
152
|
+
f'Failed to load checkpoint file {ckpt_file}: {error}. '
|
153
|
+
f'Skipping the file. ({progress_str})'
|
154
|
+
)
|
155
|
+
|
156
|
+
_ = list(
|
157
|
+
lf.concurrent_map(
|
158
|
+
_load_state, ckpt_files, max_workers=16, silence_on_errors=None
|
159
|
+
)
|
160
|
+
)
|
161
|
+
|
162
|
+
def _save_example(
|
163
|
+
self,
|
164
|
+
runner: Runner,
|
165
|
+
experiment: Experiment,
|
166
|
+
example: Example,
|
167
|
+
) -> None:
|
168
|
+
"""Saves the example to the checkpoint file."""
|
169
|
+
def save_state(example: Example):
|
170
|
+
writer = SequenceWriter(
|
171
|
+
runner.current_run.output_path_for(
|
172
|
+
experiment,
|
173
|
+
(
|
174
|
+
f'{self._checkpoint_file_prefix}_{example.id}'
|
175
|
+
f'{self._checkpoint_file_ext}'
|
176
|
+
)
|
177
|
+
)
|
178
|
+
)
|
179
|
+
try:
|
180
|
+
writer.add(example)
|
181
|
+
writer.close()
|
182
|
+
experiment.info(
|
183
|
+
f'Example {example.id} saved to {writer.path}.',
|
184
|
+
)
|
185
|
+
except BaseException as e: # pylint: disable=broad-except
|
186
|
+
experiment.error(
|
187
|
+
f'Failed to save example {example.id} to {writer.path}. '
|
188
|
+
f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
|
189
|
+
)
|
190
|
+
raise e
|
191
|
+
runner.background_run(save_state, example)
|
192
|
+
|
193
|
+
def _file_prefix_and_ext(self, filename: str) -> tuple[str, str]:
|
194
|
+
ext_index = filename.rfind('.')
|
195
|
+
if ext_index == -1:
|
196
|
+
return filename, ''
|
197
|
+
else:
|
198
|
+
return filename[:ext_index], filename[ext_index:]
|
199
|
+
|
200
|
+
|
201
|
+
class BulkCheckpointer(Checkpointer):
|
202
|
+
"""Checkpointer that saves all examples to a single file."""
|
203
|
+
|
204
|
+
checkpoint_filename: str = 'checkpoint.bagz'
|
205
|
+
|
206
|
+
def _on_bound(self):
|
207
|
+
super()._on_bound()
|
208
|
+
self._lock = threading.Lock()
|
209
|
+
self._sequence_writer = None
|
210
|
+
|
211
|
+
def on_run_start(
|
212
|
+
self,
|
213
|
+
runner: Runner,
|
214
|
+
root: Experiment,
|
215
|
+
) -> None:
|
216
|
+
self._sequence_writer = {}
|
217
|
+
|
218
|
+
def on_run_abort(
|
219
|
+
self,
|
220
|
+
runner: Runner,
|
221
|
+
root: Experiment,
|
222
|
+
error: BaseException
|
223
|
+
) -> None:
|
224
|
+
with self._lock:
|
225
|
+
if self._sequence_writer is not None:
|
226
|
+
for writer in self._sequence_writer.values():
|
227
|
+
writer.close()
|
228
|
+
self._sequence_writer.clear()
|
229
|
+
|
230
|
+
def on_run_complete(
|
231
|
+
self,
|
232
|
+
runner: Runner,
|
233
|
+
root: Experiment,
|
234
|
+
) -> None:
|
235
|
+
with self._lock:
|
236
|
+
assert self._sequence_writer is not None and not self._sequence_writer
|
237
|
+
|
238
|
+
def on_experiment_start(
|
239
|
+
self,
|
240
|
+
runner: Runner,
|
241
|
+
experiment: Experiment,
|
242
|
+
) -> None:
|
243
|
+
super().on_experiment_start(runner, experiment)
|
244
|
+
|
245
|
+
# Prepare the sequence writer for the experiment.
|
246
|
+
if experiment.is_leaf:
|
247
|
+
sequence_writer = SequenceWriter(
|
248
|
+
runner.current_run.output_path_for(
|
249
|
+
experiment, self.checkpoint_filename
|
250
|
+
)
|
251
|
+
)
|
252
|
+
with self._lock:
|
253
|
+
if self._sequence_writer is not None:
|
254
|
+
self._sequence_writer[experiment.id] = sequence_writer
|
255
|
+
|
256
|
+
def _load_experiment(
|
257
|
+
self,
|
258
|
+
runner: Runner,
|
259
|
+
experiment: Experiment,
|
260
|
+
) -> None:
|
261
|
+
"""Creates the checkpoint file."""
|
262
|
+
experiment.load_state(
|
263
|
+
runner.current_run.input_path_for(
|
264
|
+
experiment, self.checkpoint_filename
|
265
|
+
),
|
266
|
+
raise_if_not_exist=False
|
267
|
+
)
|
268
|
+
|
269
|
+
def on_experiment_complete(
|
270
|
+
self,
|
271
|
+
runner: Runner,
|
272
|
+
experiment: Experiment,
|
273
|
+
) -> None:
|
274
|
+
"""Closes the checkpoint file."""
|
275
|
+
if not experiment.is_leaf:
|
276
|
+
return
|
277
|
+
assert experiment.id in self._sequence_writer
|
278
|
+
with self._lock:
|
279
|
+
if self._sequence_writer is not None:
|
280
|
+
# Make sure the writer is closed without delay so the file will be
|
281
|
+
# available immediately.
|
282
|
+
writer = self._sequence_writer.pop(experiment.id)
|
283
|
+
writer.close()
|
284
|
+
experiment.info(
|
285
|
+
f'{len(experiment.state.evaluated_examples)} examples are '
|
286
|
+
f'checkpointed to {writer.path}.'
|
287
|
+
)
|
288
|
+
|
289
|
+
def _save_example(
|
290
|
+
self,
|
291
|
+
runner: Runner,
|
292
|
+
experiment: Experiment,
|
293
|
+
example: Example,
|
294
|
+
) -> None:
|
295
|
+
"""Saves the example to the checkpoint file."""
|
296
|
+
assert experiment.id in self._sequence_writer
|
297
|
+
def _save_example(example: Example):
|
298
|
+
writer = self._sequence_writer[experiment.id]
|
299
|
+
try:
|
300
|
+
writer.add(example)
|
301
|
+
experiment.info(
|
302
|
+
f'Example {example.id} added to {writer.path}.',
|
303
|
+
)
|
304
|
+
except BaseException as e: # pylint: disable=broad-except
|
305
|
+
experiment.error(
|
306
|
+
f'Failed to save example {example.id} to {writer.path}. '
|
307
|
+
f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
|
308
|
+
)
|
309
|
+
raise e
|
310
|
+
runner.background_run(_save_example, example)
|
311
|
+
|
312
|
+
|
313
|
+
class SequenceWriter:
|
314
|
+
"""Thread safe sequence writer."""
|
315
|
+
|
316
|
+
def __init__(self, path: str):
|
317
|
+
self._lock = threading.Lock()
|
318
|
+
self._path = path
|
319
|
+
self._sequence_writer = pg.io.open_sequence(path, 'w')
|
320
|
+
|
321
|
+
@property
|
322
|
+
def path(self) -> str:
|
323
|
+
return self._path
|
324
|
+
|
325
|
+
def add(self, example: Example):
|
326
|
+
example_blob = pg.to_json_str(
|
327
|
+
example,
|
328
|
+
hide_default_values=True,
|
329
|
+
save_ref_value=True,
|
330
|
+
exclude_input=True
|
331
|
+
)
|
332
|
+
with self._lock:
|
333
|
+
if self._sequence_writer is None:
|
334
|
+
return
|
335
|
+
self._sequence_writer.add(example_blob)
|
336
|
+
|
337
|
+
def close(self):
|
338
|
+
# Make sure there is no write in progress.
|
339
|
+
with self._lock:
|
340
|
+
if self._sequence_writer is None:
|
341
|
+
return
|
342
|
+
self._sequence_writer.close()
|
343
|
+
self._sequence_writer = None
|
344
|
+
|
345
|
+
def __del__(self):
|
346
|
+
self.close()
|
@@ -16,9 +16,9 @@ import tempfile
|
|
16
16
|
import unittest
|
17
17
|
|
18
18
|
from langfun.core.eval.v2 import checkpointing
|
19
|
+
from langfun.core.eval.v2 import eval_test_helper
|
19
20
|
from langfun.core.eval.v2 import example as example_lib
|
20
21
|
from langfun.core.eval.v2 import runners as runners_lib # pylint: disable=unused-import
|
21
|
-
from langfun.core.eval.v2 import test_helper
|
22
22
|
import pyglove as pg
|
23
23
|
|
24
24
|
Example = example_lib.Example
|
@@ -55,8 +55,9 @@ class SequenceWriterTest(unittest.TestCase):
|
|
55
55
|
class PerExampleCheckpointerTest(unittest.TestCase):
|
56
56
|
|
57
57
|
def test_checkpointing(self):
|
58
|
+
pg.defaults.loggers.use_stdout()
|
58
59
|
root_dir = os.path.join(tempfile.gettempdir(), 'per_example_checkpointer')
|
59
|
-
experiment =
|
60
|
+
experiment = eval_test_helper.test_experiment()
|
60
61
|
checkpoint_filename = 'checkpoint.jsonl'
|
61
62
|
checkpointer = checkpointing.PerExampleCheckpointer(checkpoint_filename)
|
62
63
|
run = experiment.run(
|
@@ -89,7 +90,7 @@ class BulkCheckpointerTest(unittest.TestCase):
|
|
89
90
|
|
90
91
|
def test_checkpointing(self):
|
91
92
|
root_dir = os.path.join(tempfile.gettempdir(), 'test_bulk_checkpointer')
|
92
|
-
experiment =
|
93
|
+
experiment = eval_test_helper.test_experiment()
|
93
94
|
checkpoint_filename = 'checkpoint.jsonl'
|
94
95
|
checkpointer = checkpointing.BulkCheckpointer(checkpoint_filename)
|
95
96
|
run = experiment.run(
|
{langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/evaluation.py
RENAMED
@@ -14,7 +14,9 @@
|
|
14
14
|
"""Base class for Langfun evaluation tasks."""
|
15
15
|
|
16
16
|
import abc
|
17
|
+
import datetime
|
17
18
|
import functools
|
19
|
+
import threading
|
18
20
|
import time
|
19
21
|
|
20
22
|
from typing import Annotated, Any, Callable, Iterable
|
@@ -63,6 +65,8 @@ class Evaluation(experiment_lib.Experiment):
|
|
63
65
|
self.__dict__.pop('is_leaf', None)
|
64
66
|
self.__dict__.pop('children', None)
|
65
67
|
super()._on_bound()
|
68
|
+
self._log_entries = []
|
69
|
+
self._log_lock = threading.Lock()
|
66
70
|
|
67
71
|
#
|
68
72
|
# Handling evaluation hierarchy (materialized vs. hyper evaluations).
|
@@ -277,6 +281,48 @@ class Evaluation(experiment_lib.Experiment):
|
|
277
281
|
for metric in self.metrics:
|
278
282
|
metric.reset()
|
279
283
|
|
284
|
+
#
|
285
|
+
# Evaluation-level logging.
|
286
|
+
#
|
287
|
+
|
288
|
+
def _log(self, log_func, level: lf.logging.LogLevel, message: str, **kwargs):
|
289
|
+
# Write to external logging system.
|
290
|
+
log_message = f'{self.id}: {message}'
|
291
|
+
if kwargs:
|
292
|
+
log_message = f'{log_message} (metadata: {kwargs!r})'
|
293
|
+
log_func(log_message)
|
294
|
+
|
295
|
+
# Add to experiment log history.
|
296
|
+
log_entry = lf.logging.LogEntry(
|
297
|
+
level=level,
|
298
|
+
time=datetime.datetime.now(),
|
299
|
+
message=message,
|
300
|
+
metadata=kwargs,
|
301
|
+
)
|
302
|
+
with self._log_lock:
|
303
|
+
self._log_entries.append(log_entry)
|
304
|
+
|
305
|
+
def debug(self, message: str, **kwargs):
|
306
|
+
"""Logs a debug message to the session."""
|
307
|
+
self._log(pg.logging.debug, 'debug', message, **kwargs)
|
308
|
+
|
309
|
+
def info(self, message: str, **kwargs):
|
310
|
+
"""Logs an info message to the session."""
|
311
|
+
self._log(pg.logging.info, 'info', message, **kwargs)
|
312
|
+
|
313
|
+
def warning(self, message: str, **kwargs):
|
314
|
+
"""Logs a warning message to the session."""
|
315
|
+
self._log(pg.logging.warning, 'warning', message, **kwargs)
|
316
|
+
|
317
|
+
def error(self, message: str, **kwargs):
|
318
|
+
"""Logs an error message to the session."""
|
319
|
+
self._log(pg.logging.error, 'error', message, **kwargs)
|
320
|
+
|
321
|
+
def fatal(self, message: str, **kwargs):
|
322
|
+
"""Logs a fatal message to the session."""
|
323
|
+
# We use error level for fatal message, which does not trigger assertion.
|
324
|
+
self._log(pg.logging.error, 'fatal', message, **kwargs)
|
325
|
+
|
280
326
|
#
|
281
327
|
# HTML views.
|
282
328
|
#
|
@@ -465,6 +511,25 @@ class Evaluation(experiment_lib.Experiment):
|
|
465
511
|
)
|
466
512
|
)
|
467
513
|
|
514
|
+
def _logs_tab() -> pg.views.html.controls.Tab:
|
515
|
+
"""Renders a tab for the logs of the evaluation."""
|
516
|
+
with self._log_lock:
|
517
|
+
log_history = '\n'.join(str(l) for l in self._log_entries)
|
518
|
+
return pg.views.html.controls.Tab(
|
519
|
+
label='Logs',
|
520
|
+
content=pg.Html.element(
|
521
|
+
'div',
|
522
|
+
[
|
523
|
+
pg.Html.element(
|
524
|
+
'textarea',
|
525
|
+
[pg.Html.escape(log_history)],
|
526
|
+
readonly=True,
|
527
|
+
css_classes=['logs-textarea'],
|
528
|
+
)
|
529
|
+
]
|
530
|
+
)
|
531
|
+
)
|
532
|
+
|
468
533
|
def _main_tabs() -> pg.Html:
|
469
534
|
return pg.Html.element(
|
470
535
|
'div',
|
@@ -474,6 +539,8 @@ class Evaluation(experiment_lib.Experiment):
|
|
474
539
|
_definition_tab(),
|
475
540
|
] + [
|
476
541
|
_metric_tab(m) for m in self.metrics
|
542
|
+
] + [
|
543
|
+
_logs_tab()
|
477
544
|
],
|
478
545
|
selected=1,
|
479
546
|
)
|
@@ -593,6 +660,14 @@ class Evaluation(experiment_lib.Experiment):
|
|
593
660
|
width:100%;
|
594
661
|
height:100%;
|
595
662
|
}
|
663
|
+
.logs-textarea {
|
664
|
+
width: 100%;
|
665
|
+
height: 500px;
|
666
|
+
padding: 5px;
|
667
|
+
border: 1px solid #DDD;
|
668
|
+
background-color: #EEE;
|
669
|
+
resize: vertical;
|
670
|
+
}
|
596
671
|
"""
|
597
672
|
]
|
598
673
|
|
@@ -615,6 +690,11 @@ class EvaluationState:
|
|
615
690
|
assert isinstance(example, example_lib.Example), example
|
616
691
|
self._evaluated_examples[example.id] = example
|
617
692
|
|
693
|
+
@property
|
694
|
+
def evaluated_examples(self) -> dict[int, example_lib.Example]:
|
695
|
+
"""Returns the examples in the state."""
|
696
|
+
return self._evaluated_examples
|
697
|
+
|
618
698
|
def get(self, example_id: int) -> example_lib.Example | None:
|
619
699
|
"""Returns the example with the given ID."""
|
620
700
|
return self._evaluated_examples.get(example_id)
|
@@ -622,9 +702,3 @@ class EvaluationState:
|
|
622
702
|
def update(self, example: example_lib.Example) -> None:
|
623
703
|
"""Updates the state with the given example."""
|
624
704
|
self._evaluated_examples[example.id] = example
|
625
|
-
|
626
|
-
@property
|
627
|
-
def evaluated_examples(self) -> dict[int, example_lib.Example]:
|
628
|
-
"""Returns the examples in the state."""
|
629
|
-
return self._evaluated_examples
|
630
|
-
|
@@ -15,12 +15,11 @@ import os
|
|
15
15
|
import tempfile
|
16
16
|
import unittest
|
17
17
|
|
18
|
+
from langfun.core.eval.v2 import eval_test_helper
|
18
19
|
from langfun.core.eval.v2 import evaluation as evaluation_lib
|
19
20
|
from langfun.core.eval.v2 import example as example_lib
|
20
21
|
from langfun.core.eval.v2 import experiment as experiment_lib
|
21
22
|
|
22
|
-
from langfun.core.eval.v2 import test_helper
|
23
|
-
|
24
23
|
import pyglove as pg
|
25
24
|
|
26
25
|
Example = example_lib.Example
|
@@ -32,17 +31,23 @@ Run = experiment_lib.Run
|
|
32
31
|
class EvaluationTest(unittest.TestCase):
|
33
32
|
|
34
33
|
def test_hyper_evaluation(self):
|
35
|
-
exp =
|
36
|
-
lm=
|
34
|
+
exp = eval_test_helper.TestEvaluation(
|
35
|
+
lm=eval_test_helper.TestLLM(offset=pg.oneof(range(3)))
|
37
36
|
)
|
38
37
|
self.assertFalse(exp.is_leaf)
|
39
38
|
self.assertTrue(
|
40
39
|
pg.eq(
|
41
40
|
exp.children,
|
42
41
|
[
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
eval_test_helper.TestEvaluation(
|
43
|
+
lm=eval_test_helper.TestLLM(offset=0)
|
44
|
+
),
|
45
|
+
eval_test_helper.TestEvaluation(
|
46
|
+
lm=eval_test_helper.TestLLM(offset=1)
|
47
|
+
),
|
48
|
+
eval_test_helper.TestEvaluation(
|
49
|
+
lm=eval_test_helper.TestLLM(offset=2)
|
50
|
+
),
|
46
51
|
]
|
47
52
|
)
|
48
53
|
)
|
@@ -57,19 +62,21 @@ class EvaluationTest(unittest.TestCase):
|
|
57
62
|
)
|
58
63
|
|
59
64
|
def test_input(self):
|
60
|
-
exp =
|
65
|
+
exp = eval_test_helper.TestEvaluation()
|
61
66
|
self.assertEqual(exp.num_examples, 10)
|
62
|
-
exp =
|
67
|
+
exp = eval_test_helper.TestEvaluation(
|
68
|
+
inputs=eval_test_helper.test_inputs(None)
|
69
|
+
)
|
63
70
|
self.assertEqual(exp.num_examples, 20)
|
64
71
|
@pg.functor
|
65
72
|
def my_inputs():
|
66
73
|
yield pg.Dict(x=1, y=2)
|
67
74
|
yield pg.Dict(x=3, y=4)
|
68
|
-
exp =
|
75
|
+
exp = eval_test_helper.TestEvaluation(inputs=my_inputs())
|
69
76
|
self.assertEqual(exp.num_examples, 2)
|
70
77
|
|
71
78
|
def test_evaluate(self):
|
72
|
-
exp =
|
79
|
+
exp = eval_test_helper.TestEvaluation()
|
73
80
|
example = exp.evaluate(Example(id=3))
|
74
81
|
self.assertIs(exp.state.get(3), example)
|
75
82
|
self.assertTrue(example.newly_processed)
|
@@ -85,7 +92,7 @@ class EvaluationTest(unittest.TestCase):
|
|
85
92
|
self.assertIsNotNone(example.start_time)
|
86
93
|
self.assertIsNotNone(example.end_time)
|
87
94
|
|
88
|
-
exp =
|
95
|
+
exp = eval_test_helper.TestEvaluation(lm=eval_test_helper.TestLLM(offset=1))
|
89
96
|
example = exp.evaluate(3)
|
90
97
|
self.assertTrue(example.newly_processed)
|
91
98
|
self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
|
@@ -109,7 +116,7 @@ class EvaluationTest(unittest.TestCase):
|
|
109
116
|
pg.io.mkdirs(eval_dir, exist_ok=True)
|
110
117
|
state_file = os.path.join(eval_dir, 'state.jsonl')
|
111
118
|
with pg.io.open_sequence(state_file, 'w') as f:
|
112
|
-
exp =
|
119
|
+
exp = eval_test_helper.TestEvaluation()
|
113
120
|
example = exp.evaluate(3)
|
114
121
|
self.assertTrue(example.newly_processed)
|
115
122
|
self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
|
@@ -132,7 +139,13 @@ class EvaluationTest(unittest.TestCase):
|
|
132
139
|
self.assertEqual(example.usage_summary.uncached.total.num_requests, 0)
|
133
140
|
|
134
141
|
def test_html_view(self):
|
135
|
-
exp =
|
142
|
+
exp = eval_test_helper.TestEvaluation()
|
143
|
+
exp.debug('debug message')
|
144
|
+
exp.info('info message')
|
145
|
+
exp.warning('warning message', x=1)
|
146
|
+
exp.error('error message', x=1)
|
147
|
+
exp.fatal('fatal message')
|
148
|
+
|
136
149
|
self.assertIn(
|
137
150
|
exp.id,
|
138
151
|
exp.to_html(extra_flags=dict(card_view=True, current_run=None)).content
|
{langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/experiment.py
RENAMED
@@ -81,7 +81,7 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
|
|
81
81
|
directory (using the ID 'latest'). Users can specify 'new' to start a fresh
|
82
82
|
run or provide a specific run ID (typically in the format %Y%m%d_%<number>).
|
83
83
|
Additionally, when initiating a new run, users may specify a `warm_start_from`
|
84
|
-
|
84
|
+
directory to restore the experiment’s state from a previous run.
|
85
85
|
|
86
86
|
Examples:
|
87
87
|
|
@@ -97,9 +97,9 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
|
|
97
97
|
# Start a new, clean run.
|
98
98
|
experiment.run(root_dir, 'new')
|
99
99
|
|
100
|
-
# Start a new run with a warm start from the
|
101
|
-
# '
|
102
|
-
experiment.run(root_dir, 'new', warm_start_from='
|
100
|
+
# Start a new run with a warm start from the another run located at
|
101
|
+
# '/path/to/another/run' (e.g. /my_expreriment/run_20241031_1).
|
102
|
+
experiment.run(root_dir, 'new', warm_start_from='/path/to/another/run')
|
103
103
|
|
104
104
|
# Resume run '20241031_1', re-running failed examples and recomputing
|
105
105
|
# metrics as needed.
|
@@ -959,6 +959,14 @@ class Plugin(lf.Component):
|
|
959
959
|
) -> None:
|
960
960
|
"""Called when an experiment (both leaf and non-leaf) is complete."""
|
961
961
|
|
962
|
+
def on_experiment_abort(
|
963
|
+
self,
|
964
|
+
runner: Runner,
|
965
|
+
experiment: Experiment,
|
966
|
+
error: BaseException,
|
967
|
+
) -> None:
|
968
|
+
"""Called when an experiment (both leaf and non-leaf) is aborted."""
|
969
|
+
|
962
970
|
def on_example_start(
|
963
971
|
self,
|
964
972
|
runner: Runner,
|
@@ -18,9 +18,9 @@ import tempfile
|
|
18
18
|
import unittest
|
19
19
|
|
20
20
|
from langfun.core import console as lf_console
|
21
|
+
from langfun.core.eval.v2 import eval_test_helper
|
21
22
|
from langfun.core.eval.v2 import progress_tracking # pylint: disable=unused-import
|
22
23
|
from langfun.core.eval.v2 import runners as runners_lib # pylint: disable=unused-import
|
23
|
-
from langfun.core.eval.v2 import test_helper
|
24
24
|
import pyglove as pg
|
25
25
|
|
26
26
|
|
@@ -35,7 +35,7 @@ class HtmlProgressTrackerTest(unittest.TestCase):
|
|
35
35
|
display=display
|
36
36
|
)
|
37
37
|
root_dir = os.path.join(tempfile.gettempdir(), 'test_html_progress_tracker')
|
38
|
-
experiment =
|
38
|
+
experiment = eval_test_helper.test_experiment()
|
39
39
|
_ = experiment.run(root_dir, 'new', plugins=[])
|
40
40
|
self.assertIsInstance(result['view'], pg.Html)
|
41
41
|
lf_console._notebook = None
|
@@ -45,7 +45,7 @@ class TqdmProgressTrackerTest(unittest.TestCase):
|
|
45
45
|
|
46
46
|
def test_basic(self):
|
47
47
|
root_dir = os.path.join(tempfile.gettempdir(), 'test_tqdm_progress_tracker')
|
48
|
-
experiment =
|
48
|
+
experiment = eval_test_helper.test_experiment()
|
49
49
|
string_io = io.StringIO()
|
50
50
|
with contextlib.redirect_stderr(string_io):
|
51
51
|
_ = experiment.run(root_dir, 'new', plugins=[])
|
@@ -55,7 +55,7 @@ class TqdmProgressTrackerTest(unittest.TestCase):
|
|
55
55
|
root_dir = os.path.join(
|
56
56
|
tempfile.gettempdir(), 'test_tqdm_progress_tracker_with_example_ids'
|
57
57
|
)
|
58
|
-
experiment =
|
58
|
+
experiment = eval_test_helper.test_experiment()
|
59
59
|
string_io = io.StringIO()
|
60
60
|
with contextlib.redirect_stderr(string_io):
|
61
61
|
_ = experiment.run(root_dir, 'new', example_ids=[1], plugins=[])
|