langfun 0.1.2.dev202412180804__tar.gz → 0.1.2.dev202412270804__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/PKG-INFO +1 -1
  2. langfun-0.1.2.dev202412270804/langfun/core/eval/v2/checkpointing.py +346 -0
  3. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/checkpointing_test.py +4 -3
  4. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/evaluation.py +80 -6
  5. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/evaluation_test.py +27 -14
  6. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/experiment.py +12 -4
  7. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/progress_tracking_test.py +4 -4
  8. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/reporting.py +104 -29
  9. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/reporting_test.py +2 -2
  10. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/runners.py +79 -38
  11. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/runners_test.py +10 -9
  12. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/logging.py +19 -0
  13. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/logging_test.py +19 -0
  14. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/parsing.py +24 -17
  15. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/parsing_test.py +25 -0
  16. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/querying.py +10 -1
  17. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/querying_test.py +10 -0
  18. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/PKG-INFO +1 -1
  19. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/SOURCES.txt +1 -1
  20. langfun-0.1.2.dev202412180804/langfun/core/eval/v2/checkpointing.py +0 -224
  21. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/LICENSE +0 -0
  22. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/README.md +0 -0
  23. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/__init__.py +0 -0
  24. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/__init__.py +0 -0
  25. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/__init__.py +0 -0
  26. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/action.py +0 -0
  27. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/action_eval.py +0 -0
  28. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/action_eval_test.py +0 -0
  29. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/agentic/action_test.py +0 -0
  30. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/__init__.py +0 -0
  31. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/__init__.py +0 -0
  32. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/correction.py +0 -0
  33. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/correction_test.py +0 -0
  34. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/errors.py +0 -0
  35. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/errors_test.py +0 -0
  36. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/execution.py +0 -0
  37. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/execution_test.py +0 -0
  38. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/generation.py +0 -0
  39. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/generation_test.py +0 -0
  40. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/parsing.py +0 -0
  41. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/parsing_test.py +0 -0
  42. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/permissions.py +0 -0
  43. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/coding/python/permissions_test.py +0 -0
  44. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/component.py +0 -0
  45. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/component_test.py +0 -0
  46. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/concurrent.py +0 -0
  47. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/concurrent_test.py +0 -0
  48. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/console.py +0 -0
  49. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/console_test.py +0 -0
  50. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/__init__.py +0 -0
  51. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/base.py +0 -0
  52. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/base_test.py +0 -0
  53. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/matching.py +0 -0
  54. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/matching_test.py +0 -0
  55. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/patching.py +0 -0
  56. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/patching_test.py +0 -0
  57. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/scoring.py +0 -0
  58. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/scoring_test.py +0 -0
  59. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/__init__.py +0 -0
  60. /langfun-0.1.2.dev202412180804/langfun/core/eval/v2/test_helper.py → /langfun-0.1.2.dev202412270804/langfun/core/eval/v2/eval_test_helper.py +0 -0
  61. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/example.py +0 -0
  62. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/example_test.py +0 -0
  63. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/experiment_test.py +0 -0
  64. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/metric_values.py +0 -0
  65. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/metric_values_test.py +0 -0
  66. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/metrics.py +0 -0
  67. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/metrics_test.py +0 -0
  68. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/progress.py +0 -0
  69. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/progress_test.py +0 -0
  70. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/eval/v2/progress_tracking.py +0 -0
  71. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/langfunc.py +0 -0
  72. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/langfunc_test.py +0 -0
  73. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/language_model.py +0 -0
  74. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/language_model_test.py +0 -0
  75. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/__init__.py +0 -0
  76. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/anthropic.py +0 -0
  77. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/anthropic_test.py +0 -0
  78. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/cache/__init__.py +0 -0
  79. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/cache/base.py +0 -0
  80. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/cache/in_memory.py +0 -0
  81. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/cache/in_memory_test.py +0 -0
  82. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/compositional.py +0 -0
  83. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/compositional_test.py +0 -0
  84. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/fake.py +0 -0
  85. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/fake_test.py +0 -0
  86. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/google_genai.py +0 -0
  87. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/google_genai_test.py +0 -0
  88. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/groq.py +0 -0
  89. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/groq_test.py +0 -0
  90. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/llama_cpp.py +0 -0
  91. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/llama_cpp_test.py +0 -0
  92. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/openai.py +0 -0
  93. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/openai_test.py +0 -0
  94. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/rest.py +0 -0
  95. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/rest_test.py +0 -0
  96. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/vertexai.py +0 -0
  97. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/llms/vertexai_test.py +0 -0
  98. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/memories/__init__.py +0 -0
  99. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/memories/conversation_history.py +0 -0
  100. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/memories/conversation_history_test.py +0 -0
  101. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/memory.py +0 -0
  102. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/message.py +0 -0
  103. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/message_test.py +0 -0
  104. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/__init__.py +0 -0
  105. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/audio.py +0 -0
  106. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/audio_test.py +0 -0
  107. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/image.py +0 -0
  108. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/image_test.py +0 -0
  109. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/mime.py +0 -0
  110. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/mime_test.py +0 -0
  111. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/ms_office.py +0 -0
  112. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/ms_office_test.py +0 -0
  113. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/pdf.py +0 -0
  114. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/pdf_test.py +0 -0
  115. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/video.py +0 -0
  116. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modalities/video_test.py +0 -0
  117. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modality.py +0 -0
  118. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/modality_test.py +0 -0
  119. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/natural_language.py +0 -0
  120. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/natural_language_test.py +0 -0
  121. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/repr_utils.py +0 -0
  122. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/repr_utils_test.py +0 -0
  123. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/sampling.py +0 -0
  124. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/sampling_test.py +0 -0
  125. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/__init__.py +0 -0
  126. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/completion.py +0 -0
  127. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/completion_test.py +0 -0
  128. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/description.py +0 -0
  129. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/description_test.py +0 -0
  130. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/function_generation.py +0 -0
  131. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/function_generation_test.py +0 -0
  132. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/mapping.py +0 -0
  133. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/mapping_test.py +0 -0
  134. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/schema.py +0 -0
  135. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/schema_generation.py +0 -0
  136. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/schema_generation_test.py +0 -0
  137. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/schema_test.py +0 -0
  138. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/scoring.py +0 -0
  139. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/scoring_test.py +0 -0
  140. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/tokenization.py +0 -0
  141. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/structured/tokenization_test.py +0 -0
  142. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/subscription.py +0 -0
  143. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/subscription_test.py +0 -0
  144. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/template.py +0 -0
  145. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/template_test.py +0 -0
  146. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/__init__.py +0 -0
  147. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/completion.py +0 -0
  148. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/completion_test.py +0 -0
  149. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/conversation.py +0 -0
  150. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/conversation_test.py +0 -0
  151. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/demonstration.py +0 -0
  152. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/demonstration_test.py +0 -0
  153. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/selfplay.py +0 -0
  154. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/templates/selfplay_test.py +0 -0
  155. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/text_formatting.py +0 -0
  156. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun/core/text_formatting_test.py +0 -0
  157. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/dependency_links.txt +0 -0
  158. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/requires.txt +0 -0
  159. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/langfun.egg-info/top_level.txt +0 -0
  160. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/setup.cfg +0 -0
  161. {langfun-0.1.2.dev202412180804 → langfun-0.1.2.dev202412270804}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langfun
3
- Version: 0.1.2.dev202412180804
3
+ Version: 0.1.2.dev202412270804
4
4
  Summary: Langfun: Language as Functions.
5
5
  Home-page: https://github.com/google/langfun
6
6
  Author: Langfun Authors
@@ -0,0 +1,346 @@
1
+ # Copyright 2024 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Checkpointing evaluation runs."""
15
+ import abc
16
+ import threading
17
+ import traceback
18
+
19
+ import langfun.core as lf
20
+ from langfun.core.eval.v2 import example as example_lib
21
+ from langfun.core.eval.v2 import experiment as experiment_lib
22
+ import pyglove as pg
23
+
24
+ Example = example_lib.Example
25
+ Experiment = experiment_lib.Experiment
26
+ Runner = experiment_lib.Runner
27
+
28
+
29
+ class Checkpointer(experiment_lib.Plugin):
30
+ """Base class for checkpointing evaluation examples."""
31
+
32
+ def on_experiment_start(
33
+ self,
34
+ runner: Runner,
35
+ experiment: Experiment
36
+ ) -> None:
37
+ if not experiment.is_leaf:
38
+ return
39
+
40
+ # For refresh runs, we don't want to load the previous state.
41
+ if not runner.current_run.refresh:
42
+ if runner.current_run.input_root != runner.current_run.output_root:
43
+ experiment.info(
44
+ f'Warm starting from directory: {runner.current_run.input_root}.'
45
+ )
46
+ self._load_experiment(runner, experiment)
47
+
48
+ if experiment.state.evaluated_examples:
49
+ loaded_example_ids = list(
50
+ sorted(experiment.state.evaluated_examples.keys())
51
+ )
52
+ example_ids_to_evaluate = (
53
+ set(runner.current_run.example_ids) if runner.current_run.example_ids
54
+ else set(range(1, experiment.num_examples + 1))
55
+ )
56
+ example_ids_to_evaluate -= set(loaded_example_ids)
57
+
58
+ experiment.info(
59
+ f'{len(experiment.state.evaluated_examples)} examples have been '
60
+ 'loaded from checkpoint files. Their outputs will be used '
61
+ f'for recomputing metrics. Example IDs: {loaded_example_ids}'
62
+ )
63
+ experiment.info(
64
+ f'{len(example_ids_to_evaluate)} examples will be processed from '
65
+ f'scratch. Example IDs: {list(sorted(example_ids_to_evaluate))}'
66
+ )
67
+ else:
68
+ experiment.info(
69
+ 'No examples are loaded from checkpoint files. '
70
+ f'Experiment {experiment.id} starts from scratch.'
71
+ )
72
+
73
+ def on_example_complete(
74
+ self,
75
+ runner: Runner,
76
+ experiment: Experiment,
77
+ example: Example,
78
+ ) -> None:
79
+ """Saves the example to the checkpoint file."""
80
+ if example.has_error:
81
+ experiment.warning(
82
+ f'Example {example.id} has error. Skipping checkpointing.'
83
+ )
84
+ else:
85
+ self._save_example(runner, experiment, example)
86
+
87
+ @abc.abstractmethod
88
+ def _load_experiment(self, runner: Runner, experiment: Experiment) -> None:
89
+ """Loads the experiment state from checkpoint files."""
90
+
91
+ @abc.abstractmethod
92
+ def _save_example(
93
+ self,
94
+ runner: Runner,
95
+ experiment: Experiment,
96
+ example: Example,
97
+ ) -> None:
98
+ """Saves an evaluated example."""
99
+
100
+
101
+ class PerExampleCheckpointer(Checkpointer):
102
+ """Checkpointer that saves each example to a separate file."""
103
+
104
+ checkpoint_filename: str = 'checkpoint.bagz'
105
+
106
+ def _on_bound(self):
107
+ super()._on_bound()
108
+ prefix, ext = self._file_prefix_and_ext(self.checkpoint_filename)
109
+ self._checkpoint_file_prefix = prefix
110
+ self._checkpoint_file_ext = ext
111
+
112
+ def _load_experiment(
113
+ self,
114
+ runner: Runner,
115
+ experiment: Experiment,
116
+ ) -> None:
117
+ """Creates the checkpoint file."""
118
+ experiment_dir = runner.current_run.input_dir(experiment)
119
+ if pg.io.path_exists(experiment_dir):
120
+ ckpt_files = [
121
+ runner.current_run.input_path_for(experiment, filename)
122
+ for filename in pg.io.listdir(experiment_dir)
123
+ if filename.startswith(self._checkpoint_file_prefix)
124
+ and filename.endswith(self._checkpoint_file_ext)
125
+ ]
126
+ else:
127
+ ckpt_files = []
128
+
129
+ experiment.info(f'Found {len(ckpt_files)} checkpoint files to load.')
130
+
131
+ # Load the checkpoint files in parallel.
132
+ context = dict(counter=0, counter_lock=threading.Lock())
133
+ def _load_state(ckpt_file):
134
+ error = None
135
+ with pg.timeit() as t:
136
+ try:
137
+ experiment.load_state(ckpt_file)
138
+ except BaseException as e: # pylint: disable=broad-except
139
+ error = e
140
+ finally:
141
+ with context['counter_lock']:
142
+ context['counter'] += 1
143
+
144
+ progress_str = f'{context["counter"]}/{len(ckpt_files)}'
145
+ if error is None:
146
+ experiment.info(
147
+ f'Loaded checkpoint file {ckpt_file} in {t.elapse:.2f} '
148
+ f'seconds. ({progress_str})'
149
+ )
150
+ else:
151
+ experiment.warning(
152
+ f'Failed to load checkpoint file {ckpt_file}: {error}. '
153
+ f'Skipping the file. ({progress_str})'
154
+ )
155
+
156
+ _ = list(
157
+ lf.concurrent_map(
158
+ _load_state, ckpt_files, max_workers=16, silence_on_errors=None
159
+ )
160
+ )
161
+
162
+ def _save_example(
163
+ self,
164
+ runner: Runner,
165
+ experiment: Experiment,
166
+ example: Example,
167
+ ) -> None:
168
+ """Saves the example to the checkpoint file."""
169
+ def save_state(example: Example):
170
+ writer = SequenceWriter(
171
+ runner.current_run.output_path_for(
172
+ experiment,
173
+ (
174
+ f'{self._checkpoint_file_prefix}_{example.id}'
175
+ f'{self._checkpoint_file_ext}'
176
+ )
177
+ )
178
+ )
179
+ try:
180
+ writer.add(example)
181
+ writer.close()
182
+ experiment.info(
183
+ f'Example {example.id} saved to {writer.path}.',
184
+ )
185
+ except BaseException as e: # pylint: disable=broad-except
186
+ experiment.error(
187
+ f'Failed to save example {example.id} to {writer.path}. '
188
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
189
+ )
190
+ raise e
191
+ runner.background_run(save_state, example)
192
+
193
+ def _file_prefix_and_ext(self, filename: str) -> tuple[str, str]:
194
+ ext_index = filename.rfind('.')
195
+ if ext_index == -1:
196
+ return filename, ''
197
+ else:
198
+ return filename[:ext_index], filename[ext_index:]
199
+
200
+
201
+ class BulkCheckpointer(Checkpointer):
202
+ """Checkpointer that saves all examples to a single file."""
203
+
204
+ checkpoint_filename: str = 'checkpoint.bagz'
205
+
206
+ def _on_bound(self):
207
+ super()._on_bound()
208
+ self._lock = threading.Lock()
209
+ self._sequence_writer = None
210
+
211
+ def on_run_start(
212
+ self,
213
+ runner: Runner,
214
+ root: Experiment,
215
+ ) -> None:
216
+ self._sequence_writer = {}
217
+
218
+ def on_run_abort(
219
+ self,
220
+ runner: Runner,
221
+ root: Experiment,
222
+ error: BaseException
223
+ ) -> None:
224
+ with self._lock:
225
+ if self._sequence_writer is not None:
226
+ for writer in self._sequence_writer.values():
227
+ writer.close()
228
+ self._sequence_writer.clear()
229
+
230
+ def on_run_complete(
231
+ self,
232
+ runner: Runner,
233
+ root: Experiment,
234
+ ) -> None:
235
+ with self._lock:
236
+ assert self._sequence_writer is not None and not self._sequence_writer
237
+
238
+ def on_experiment_start(
239
+ self,
240
+ runner: Runner,
241
+ experiment: Experiment,
242
+ ) -> None:
243
+ super().on_experiment_start(runner, experiment)
244
+
245
+ # Prepare the sequence writer for the experiment.
246
+ if experiment.is_leaf:
247
+ sequence_writer = SequenceWriter(
248
+ runner.current_run.output_path_for(
249
+ experiment, self.checkpoint_filename
250
+ )
251
+ )
252
+ with self._lock:
253
+ if self._sequence_writer is not None:
254
+ self._sequence_writer[experiment.id] = sequence_writer
255
+
256
+ def _load_experiment(
257
+ self,
258
+ runner: Runner,
259
+ experiment: Experiment,
260
+ ) -> None:
261
+ """Creates the checkpoint file."""
262
+ experiment.load_state(
263
+ runner.current_run.input_path_for(
264
+ experiment, self.checkpoint_filename
265
+ ),
266
+ raise_if_not_exist=False
267
+ )
268
+
269
+ def on_experiment_complete(
270
+ self,
271
+ runner: Runner,
272
+ experiment: Experiment,
273
+ ) -> None:
274
+ """Closes the checkpoint file."""
275
+ if not experiment.is_leaf:
276
+ return
277
+ assert experiment.id in self._sequence_writer
278
+ with self._lock:
279
+ if self._sequence_writer is not None:
280
+ # Make sure the writer is closed without delay so the file will be
281
+ # available immediately.
282
+ writer = self._sequence_writer.pop(experiment.id)
283
+ writer.close()
284
+ experiment.info(
285
+ f'{len(experiment.state.evaluated_examples)} examples are '
286
+ f'checkpointed to {writer.path}.'
287
+ )
288
+
289
+ def _save_example(
290
+ self,
291
+ runner: Runner,
292
+ experiment: Experiment,
293
+ example: Example,
294
+ ) -> None:
295
+ """Saves the example to the checkpoint file."""
296
+ assert experiment.id in self._sequence_writer
297
+ def _save_example(example: Example):
298
+ writer = self._sequence_writer[experiment.id]
299
+ try:
300
+ writer.add(example)
301
+ experiment.info(
302
+ f'Example {example.id} added to {writer.path}.',
303
+ )
304
+ except BaseException as e: # pylint: disable=broad-except
305
+ experiment.error(
306
+ f'Failed to save example {example.id} to {writer.path}. '
307
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
308
+ )
309
+ raise e
310
+ runner.background_run(_save_example, example)
311
+
312
+
313
+ class SequenceWriter:
314
+ """Thread safe sequence writer."""
315
+
316
+ def __init__(self, path: str):
317
+ self._lock = threading.Lock()
318
+ self._path = path
319
+ self._sequence_writer = pg.io.open_sequence(path, 'w')
320
+
321
+ @property
322
+ def path(self) -> str:
323
+ return self._path
324
+
325
+ def add(self, example: Example):
326
+ example_blob = pg.to_json_str(
327
+ example,
328
+ hide_default_values=True,
329
+ save_ref_value=True,
330
+ exclude_input=True
331
+ )
332
+ with self._lock:
333
+ if self._sequence_writer is None:
334
+ return
335
+ self._sequence_writer.add(example_blob)
336
+
337
+ def close(self):
338
+ # Make sure there is no write in progress.
339
+ with self._lock:
340
+ if self._sequence_writer is None:
341
+ return
342
+ self._sequence_writer.close()
343
+ self._sequence_writer = None
344
+
345
+ def __del__(self):
346
+ self.close()
@@ -16,9 +16,9 @@ import tempfile
16
16
  import unittest
17
17
 
18
18
  from langfun.core.eval.v2 import checkpointing
19
+ from langfun.core.eval.v2 import eval_test_helper
19
20
  from langfun.core.eval.v2 import example as example_lib
20
21
  from langfun.core.eval.v2 import runners as runners_lib # pylint: disable=unused-import
21
- from langfun.core.eval.v2 import test_helper
22
22
  import pyglove as pg
23
23
 
24
24
  Example = example_lib.Example
@@ -55,8 +55,9 @@ class SequenceWriterTest(unittest.TestCase):
55
55
  class PerExampleCheckpointerTest(unittest.TestCase):
56
56
 
57
57
  def test_checkpointing(self):
58
+ pg.defaults.loggers.use_stdout()
58
59
  root_dir = os.path.join(tempfile.gettempdir(), 'per_example_checkpointer')
59
- experiment = test_helper.test_experiment()
60
+ experiment = eval_test_helper.test_experiment()
60
61
  checkpoint_filename = 'checkpoint.jsonl'
61
62
  checkpointer = checkpointing.PerExampleCheckpointer(checkpoint_filename)
62
63
  run = experiment.run(
@@ -89,7 +90,7 @@ class BulkCheckpointerTest(unittest.TestCase):
89
90
 
90
91
  def test_checkpointing(self):
91
92
  root_dir = os.path.join(tempfile.gettempdir(), 'test_bulk_checkpointer')
92
- experiment = test_helper.test_experiment()
93
+ experiment = eval_test_helper.test_experiment()
93
94
  checkpoint_filename = 'checkpoint.jsonl'
94
95
  checkpointer = checkpointing.BulkCheckpointer(checkpoint_filename)
95
96
  run = experiment.run(
@@ -14,7 +14,9 @@
14
14
  """Base class for Langfun evaluation tasks."""
15
15
 
16
16
  import abc
17
+ import datetime
17
18
  import functools
19
+ import threading
18
20
  import time
19
21
 
20
22
  from typing import Annotated, Any, Callable, Iterable
@@ -63,6 +65,8 @@ class Evaluation(experiment_lib.Experiment):
63
65
  self.__dict__.pop('is_leaf', None)
64
66
  self.__dict__.pop('children', None)
65
67
  super()._on_bound()
68
+ self._log_entries = []
69
+ self._log_lock = threading.Lock()
66
70
 
67
71
  #
68
72
  # Handling evaluation hierarchy (materialized vs. hyper evaluations).
@@ -277,6 +281,48 @@ class Evaluation(experiment_lib.Experiment):
277
281
  for metric in self.metrics:
278
282
  metric.reset()
279
283
 
284
+ #
285
+ # Evaluation-level logging.
286
+ #
287
+
288
+ def _log(self, log_func, level: lf.logging.LogLevel, message: str, **kwargs):
289
+ # Write to external logging system.
290
+ log_message = f'{self.id}: {message}'
291
+ if kwargs:
292
+ log_message = f'{log_message} (metadata: {kwargs!r})'
293
+ log_func(log_message)
294
+
295
+ # Add to experiment log history.
296
+ log_entry = lf.logging.LogEntry(
297
+ level=level,
298
+ time=datetime.datetime.now(),
299
+ message=message,
300
+ metadata=kwargs,
301
+ )
302
+ with self._log_lock:
303
+ self._log_entries.append(log_entry)
304
+
305
+ def debug(self, message: str, **kwargs):
306
+ """Logs a debug message to the session."""
307
+ self._log(pg.logging.debug, 'debug', message, **kwargs)
308
+
309
+ def info(self, message: str, **kwargs):
310
+ """Logs an info message to the session."""
311
+ self._log(pg.logging.info, 'info', message, **kwargs)
312
+
313
+ def warning(self, message: str, **kwargs):
314
+ """Logs a warning message to the session."""
315
+ self._log(pg.logging.warning, 'warning', message, **kwargs)
316
+
317
+ def error(self, message: str, **kwargs):
318
+ """Logs an error message to the session."""
319
+ self._log(pg.logging.error, 'error', message, **kwargs)
320
+
321
+ def fatal(self, message: str, **kwargs):
322
+ """Logs a fatal message to the session."""
323
+ # We use error level for fatal message, which does not trigger assertion.
324
+ self._log(pg.logging.error, 'fatal', message, **kwargs)
325
+
280
326
  #
281
327
  # HTML views.
282
328
  #
@@ -465,6 +511,25 @@ class Evaluation(experiment_lib.Experiment):
465
511
  )
466
512
  )
467
513
 
514
+ def _logs_tab() -> pg.views.html.controls.Tab:
515
+ """Renders a tab for the logs of the evaluation."""
516
+ with self._log_lock:
517
+ log_history = '\n'.join(str(l) for l in self._log_entries)
518
+ return pg.views.html.controls.Tab(
519
+ label='Logs',
520
+ content=pg.Html.element(
521
+ 'div',
522
+ [
523
+ pg.Html.element(
524
+ 'textarea',
525
+ [pg.Html.escape(log_history)],
526
+ readonly=True,
527
+ css_classes=['logs-textarea'],
528
+ )
529
+ ]
530
+ )
531
+ )
532
+
468
533
  def _main_tabs() -> pg.Html:
469
534
  return pg.Html.element(
470
535
  'div',
@@ -474,6 +539,8 @@ class Evaluation(experiment_lib.Experiment):
474
539
  _definition_tab(),
475
540
  ] + [
476
541
  _metric_tab(m) for m in self.metrics
542
+ ] + [
543
+ _logs_tab()
477
544
  ],
478
545
  selected=1,
479
546
  )
@@ -593,6 +660,14 @@ class Evaluation(experiment_lib.Experiment):
593
660
  width:100%;
594
661
  height:100%;
595
662
  }
663
+ .logs-textarea {
664
+ width: 100%;
665
+ height: 500px;
666
+ padding: 5px;
667
+ border: 1px solid #DDD;
668
+ background-color: #EEE;
669
+ resize: vertical;
670
+ }
596
671
  """
597
672
  ]
598
673
 
@@ -615,6 +690,11 @@ class EvaluationState:
615
690
  assert isinstance(example, example_lib.Example), example
616
691
  self._evaluated_examples[example.id] = example
617
692
 
693
+ @property
694
+ def evaluated_examples(self) -> dict[int, example_lib.Example]:
695
+ """Returns the examples in the state."""
696
+ return self._evaluated_examples
697
+
618
698
  def get(self, example_id: int) -> example_lib.Example | None:
619
699
  """Returns the example with the given ID."""
620
700
  return self._evaluated_examples.get(example_id)
@@ -622,9 +702,3 @@ class EvaluationState:
622
702
  def update(self, example: example_lib.Example) -> None:
623
703
  """Updates the state with the given example."""
624
704
  self._evaluated_examples[example.id] = example
625
-
626
- @property
627
- def evaluated_examples(self) -> dict[int, example_lib.Example]:
628
- """Returns the examples in the state."""
629
- return self._evaluated_examples
630
-
@@ -15,12 +15,11 @@ import os
15
15
  import tempfile
16
16
  import unittest
17
17
 
18
+ from langfun.core.eval.v2 import eval_test_helper
18
19
  from langfun.core.eval.v2 import evaluation as evaluation_lib
19
20
  from langfun.core.eval.v2 import example as example_lib
20
21
  from langfun.core.eval.v2 import experiment as experiment_lib
21
22
 
22
- from langfun.core.eval.v2 import test_helper
23
-
24
23
  import pyglove as pg
25
24
 
26
25
  Example = example_lib.Example
@@ -32,17 +31,23 @@ Run = experiment_lib.Run
32
31
  class EvaluationTest(unittest.TestCase):
33
32
 
34
33
  def test_hyper_evaluation(self):
35
- exp = test_helper.TestEvaluation(
36
- lm=test_helper.TestLLM(offset=pg.oneof(range(3)))
34
+ exp = eval_test_helper.TestEvaluation(
35
+ lm=eval_test_helper.TestLLM(offset=pg.oneof(range(3)))
37
36
  )
38
37
  self.assertFalse(exp.is_leaf)
39
38
  self.assertTrue(
40
39
  pg.eq(
41
40
  exp.children,
42
41
  [
43
- test_helper.TestEvaluation(lm=test_helper.TestLLM(offset=0)),
44
- test_helper.TestEvaluation(lm=test_helper.TestLLM(offset=1)),
45
- test_helper.TestEvaluation(lm=test_helper.TestLLM(offset=2)),
42
+ eval_test_helper.TestEvaluation(
43
+ lm=eval_test_helper.TestLLM(offset=0)
44
+ ),
45
+ eval_test_helper.TestEvaluation(
46
+ lm=eval_test_helper.TestLLM(offset=1)
47
+ ),
48
+ eval_test_helper.TestEvaluation(
49
+ lm=eval_test_helper.TestLLM(offset=2)
50
+ ),
46
51
  ]
47
52
  )
48
53
  )
@@ -57,19 +62,21 @@ class EvaluationTest(unittest.TestCase):
57
62
  )
58
63
 
59
64
  def test_input(self):
60
- exp = test_helper.TestEvaluation()
65
+ exp = eval_test_helper.TestEvaluation()
61
66
  self.assertEqual(exp.num_examples, 10)
62
- exp = test_helper.TestEvaluation(inputs=test_helper.test_inputs(None))
67
+ exp = eval_test_helper.TestEvaluation(
68
+ inputs=eval_test_helper.test_inputs(None)
69
+ )
63
70
  self.assertEqual(exp.num_examples, 20)
64
71
  @pg.functor
65
72
  def my_inputs():
66
73
  yield pg.Dict(x=1, y=2)
67
74
  yield pg.Dict(x=3, y=4)
68
- exp = test_helper.TestEvaluation(inputs=my_inputs())
75
+ exp = eval_test_helper.TestEvaluation(inputs=my_inputs())
69
76
  self.assertEqual(exp.num_examples, 2)
70
77
 
71
78
  def test_evaluate(self):
72
- exp = test_helper.TestEvaluation()
79
+ exp = eval_test_helper.TestEvaluation()
73
80
  example = exp.evaluate(Example(id=3))
74
81
  self.assertIs(exp.state.get(3), example)
75
82
  self.assertTrue(example.newly_processed)
@@ -85,7 +92,7 @@ class EvaluationTest(unittest.TestCase):
85
92
  self.assertIsNotNone(example.start_time)
86
93
  self.assertIsNotNone(example.end_time)
87
94
 
88
- exp = test_helper.TestEvaluation(lm=test_helper.TestLLM(offset=1))
95
+ exp = eval_test_helper.TestEvaluation(lm=eval_test_helper.TestLLM(offset=1))
89
96
  example = exp.evaluate(3)
90
97
  self.assertTrue(example.newly_processed)
91
98
  self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
@@ -109,7 +116,7 @@ class EvaluationTest(unittest.TestCase):
109
116
  pg.io.mkdirs(eval_dir, exist_ok=True)
110
117
  state_file = os.path.join(eval_dir, 'state.jsonl')
111
118
  with pg.io.open_sequence(state_file, 'w') as f:
112
- exp = test_helper.TestEvaluation()
119
+ exp = eval_test_helper.TestEvaluation()
113
120
  example = exp.evaluate(3)
114
121
  self.assertTrue(example.newly_processed)
115
122
  self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
@@ -132,7 +139,13 @@ class EvaluationTest(unittest.TestCase):
132
139
  self.assertEqual(example.usage_summary.uncached.total.num_requests, 0)
133
140
 
134
141
  def test_html_view(self):
135
- exp = test_helper.TestEvaluation()
142
+ exp = eval_test_helper.TestEvaluation()
143
+ exp.debug('debug message')
144
+ exp.info('info message')
145
+ exp.warning('warning message', x=1)
146
+ exp.error('error message', x=1)
147
+ exp.fatal('fatal message')
148
+
136
149
  self.assertIn(
137
150
  exp.id,
138
151
  exp.to_html(extra_flags=dict(card_view=True, current_run=None)).content
@@ -81,7 +81,7 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
81
81
  directory (using the ID 'latest'). Users can specify 'new' to start a fresh
82
82
  run or provide a specific run ID (typically in the format %Y%m%d_%<number>).
83
83
  Additionally, when initiating a new run, users may specify a `warm_start_from`
84
- ID to restore the experiment’s state from a previous run.
84
+ directory to restore the experiment’s state from a previous run.
85
85
 
86
86
  Examples:
87
87
 
@@ -97,9 +97,9 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
97
97
  # Start a new, clean run.
98
98
  experiment.run(root_dir, 'new')
99
99
 
100
- # Start a new run with a warm start from the previous run located in
101
- # 'run_20241031_1' of the root directory.
102
- experiment.run(root_dir, 'new', warm_start_from='20241031_1')
100
+ # Start a new run with a warm start from the another run located at
101
+ # '/path/to/another/run' (e.g. /my_expreriment/run_20241031_1).
102
+ experiment.run(root_dir, 'new', warm_start_from='/path/to/another/run')
103
103
 
104
104
  # Resume run '20241031_1', re-running failed examples and recomputing
105
105
  # metrics as needed.
@@ -959,6 +959,14 @@ class Plugin(lf.Component):
959
959
  ) -> None:
960
960
  """Called when an experiment (both leaf and non-leaf) is complete."""
961
961
 
962
+ def on_experiment_abort(
963
+ self,
964
+ runner: Runner,
965
+ experiment: Experiment,
966
+ error: BaseException,
967
+ ) -> None:
968
+ """Called when an experiment (both leaf and non-leaf) is aborted."""
969
+
962
970
  def on_example_start(
963
971
  self,
964
972
  runner: Runner,
@@ -18,9 +18,9 @@ import tempfile
18
18
  import unittest
19
19
 
20
20
  from langfun.core import console as lf_console
21
+ from langfun.core.eval.v2 import eval_test_helper
21
22
  from langfun.core.eval.v2 import progress_tracking # pylint: disable=unused-import
22
23
  from langfun.core.eval.v2 import runners as runners_lib # pylint: disable=unused-import
23
- from langfun.core.eval.v2 import test_helper
24
24
  import pyglove as pg
25
25
 
26
26
 
@@ -35,7 +35,7 @@ class HtmlProgressTrackerTest(unittest.TestCase):
35
35
  display=display
36
36
  )
37
37
  root_dir = os.path.join(tempfile.gettempdir(), 'test_html_progress_tracker')
38
- experiment = test_helper.test_experiment()
38
+ experiment = eval_test_helper.test_experiment()
39
39
  _ = experiment.run(root_dir, 'new', plugins=[])
40
40
  self.assertIsInstance(result['view'], pg.Html)
41
41
  lf_console._notebook = None
@@ -45,7 +45,7 @@ class TqdmProgressTrackerTest(unittest.TestCase):
45
45
 
46
46
  def test_basic(self):
47
47
  root_dir = os.path.join(tempfile.gettempdir(), 'test_tqdm_progress_tracker')
48
- experiment = test_helper.test_experiment()
48
+ experiment = eval_test_helper.test_experiment()
49
49
  string_io = io.StringIO()
50
50
  with contextlib.redirect_stderr(string_io):
51
51
  _ = experiment.run(root_dir, 'new', plugins=[])
@@ -55,7 +55,7 @@ class TqdmProgressTrackerTest(unittest.TestCase):
55
55
  root_dir = os.path.join(
56
56
  tempfile.gettempdir(), 'test_tqdm_progress_tracker_with_example_ids'
57
57
  )
58
- experiment = test_helper.test_experiment()
58
+ experiment = eval_test_helper.test_experiment()
59
59
  string_io = io.StringIO()
60
60
  with contextlib.redirect_stderr(string_io):
61
61
  _ = experiment.run(root_dir, 'new', example_ids=[1], plugins=[])