langfun 0.1.2.dev202412190804__tar.gz → 0.1.2.dev202412210804__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/PKG-INFO +1 -1
  2. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/checkpointing.py +73 -10
  3. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/checkpointing_test.py +3 -3
  4. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/evaluation.py +21 -14
  5. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/evaluation_test.py +21 -14
  6. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/experiment.py +8 -0
  7. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/progress_tracking_test.py +4 -4
  8. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/reporting.py +58 -25
  9. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/reporting_test.py +2 -2
  10. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/runners.py +79 -38
  11. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/runners_test.py +10 -9
  12. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/querying.py +10 -1
  13. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/querying_test.py +10 -0
  14. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun.egg-info/PKG-INFO +1 -1
  15. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun.egg-info/SOURCES.txt +1 -1
  16. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/LICENSE +0 -0
  17. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/README.md +0 -0
  18. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/__init__.py +0 -0
  19. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/__init__.py +0 -0
  20. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/agentic/__init__.py +0 -0
  21. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/agentic/action.py +0 -0
  22. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/agentic/action_eval.py +0 -0
  23. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/agentic/action_eval_test.py +0 -0
  24. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/agentic/action_test.py +0 -0
  25. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/__init__.py +0 -0
  26. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/__init__.py +0 -0
  27. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/correction.py +0 -0
  28. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/correction_test.py +0 -0
  29. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/errors.py +0 -0
  30. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/errors_test.py +0 -0
  31. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/execution.py +0 -0
  32. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/execution_test.py +0 -0
  33. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/generation.py +0 -0
  34. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/generation_test.py +0 -0
  35. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/parsing.py +0 -0
  36. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/parsing_test.py +0 -0
  37. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/permissions.py +0 -0
  38. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/coding/python/permissions_test.py +0 -0
  39. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/component.py +0 -0
  40. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/component_test.py +0 -0
  41. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/concurrent.py +0 -0
  42. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/concurrent_test.py +0 -0
  43. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/console.py +0 -0
  44. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/console_test.py +0 -0
  45. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/__init__.py +0 -0
  46. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/base.py +0 -0
  47. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/base_test.py +0 -0
  48. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/matching.py +0 -0
  49. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/matching_test.py +0 -0
  50. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/patching.py +0 -0
  51. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/patching_test.py +0 -0
  52. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/scoring.py +0 -0
  53. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/scoring_test.py +0 -0
  54. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/__init__.py +0 -0
  55. /langfun-0.1.2.dev202412190804/langfun/core/eval/v2/test_helper.py → /langfun-0.1.2.dev202412210804/langfun/core/eval/v2/eval_test_helper.py +0 -0
  56. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/example.py +0 -0
  57. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/example_test.py +0 -0
  58. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/experiment_test.py +0 -0
  59. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/metric_values.py +0 -0
  60. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/metric_values_test.py +0 -0
  61. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/metrics.py +0 -0
  62. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/metrics_test.py +0 -0
  63. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/progress.py +0 -0
  64. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/progress_test.py +0 -0
  65. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/eval/v2/progress_tracking.py +0 -0
  66. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/langfunc.py +0 -0
  67. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/langfunc_test.py +0 -0
  68. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/language_model.py +0 -0
  69. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/language_model_test.py +0 -0
  70. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/__init__.py +0 -0
  71. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/anthropic.py +0 -0
  72. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/anthropic_test.py +0 -0
  73. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/cache/__init__.py +0 -0
  74. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/cache/base.py +0 -0
  75. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/cache/in_memory.py +0 -0
  76. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/cache/in_memory_test.py +0 -0
  77. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/compositional.py +0 -0
  78. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/compositional_test.py +0 -0
  79. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/fake.py +0 -0
  80. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/fake_test.py +0 -0
  81. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/google_genai.py +0 -0
  82. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/google_genai_test.py +0 -0
  83. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/groq.py +0 -0
  84. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/groq_test.py +0 -0
  85. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/llama_cpp.py +0 -0
  86. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/llama_cpp_test.py +0 -0
  87. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/openai.py +0 -0
  88. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/openai_test.py +0 -0
  89. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/rest.py +0 -0
  90. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/rest_test.py +0 -0
  91. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/vertexai.py +0 -0
  92. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/llms/vertexai_test.py +0 -0
  93. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/logging.py +0 -0
  94. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/logging_test.py +0 -0
  95. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/memories/__init__.py +0 -0
  96. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/memories/conversation_history.py +0 -0
  97. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/memories/conversation_history_test.py +0 -0
  98. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/memory.py +0 -0
  99. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/message.py +0 -0
  100. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/message_test.py +0 -0
  101. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/__init__.py +0 -0
  102. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/audio.py +0 -0
  103. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/audio_test.py +0 -0
  104. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/image.py +0 -0
  105. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/image_test.py +0 -0
  106. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/mime.py +0 -0
  107. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/mime_test.py +0 -0
  108. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/ms_office.py +0 -0
  109. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/ms_office_test.py +0 -0
  110. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/pdf.py +0 -0
  111. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/pdf_test.py +0 -0
  112. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/video.py +0 -0
  113. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modalities/video_test.py +0 -0
  114. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modality.py +0 -0
  115. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/modality_test.py +0 -0
  116. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/natural_language.py +0 -0
  117. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/natural_language_test.py +0 -0
  118. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/repr_utils.py +0 -0
  119. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/repr_utils_test.py +0 -0
  120. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/sampling.py +0 -0
  121. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/sampling_test.py +0 -0
  122. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/__init__.py +0 -0
  123. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/completion.py +0 -0
  124. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/completion_test.py +0 -0
  125. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/description.py +0 -0
  126. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/description_test.py +0 -0
  127. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/function_generation.py +0 -0
  128. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/function_generation_test.py +0 -0
  129. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/mapping.py +0 -0
  130. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/mapping_test.py +0 -0
  131. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/parsing.py +0 -0
  132. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/parsing_test.py +0 -0
  133. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/schema.py +0 -0
  134. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/schema_generation.py +0 -0
  135. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/schema_generation_test.py +0 -0
  136. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/schema_test.py +0 -0
  137. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/scoring.py +0 -0
  138. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/scoring_test.py +0 -0
  139. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/tokenization.py +0 -0
  140. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/structured/tokenization_test.py +0 -0
  141. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/subscription.py +0 -0
  142. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/subscription_test.py +0 -0
  143. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/template.py +0 -0
  144. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/template_test.py +0 -0
  145. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/templates/__init__.py +0 -0
  146. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/templates/completion.py +0 -0
  147. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/templates/completion_test.py +0 -0
  148. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/templates/conversation.py +0 -0
  149. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/templates/conversation_test.py +0 -0
  150. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/templates/demonstration.py +0 -0
  151. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/templates/demonstration_test.py +0 -0
  152. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/templates/selfplay.py +0 -0
  153. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/templates/selfplay_test.py +0 -0
  154. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/text_formatting.py +0 -0
  155. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun/core/text_formatting_test.py +0 -0
  156. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun.egg-info/dependency_links.txt +0 -0
  157. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun.egg-info/requires.txt +0 -0
  158. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/langfun.egg-info/top_level.txt +0 -0
  159. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/setup.cfg +0 -0
  160. {langfun-0.1.2.dev202412190804 → langfun-0.1.2.dev202412210804}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langfun
3
- Version: 0.1.2.dev202412190804
3
+ Version: 0.1.2.dev202412210804
4
4
  Summary: Langfun: Language as Functions.
5
5
  Home-page: https://github.com/google/langfun
6
6
  Author: Langfun Authors
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
  """Checkpointing evaluation runs."""
15
15
  import threading
16
+ import traceback
16
17
 
17
18
  import langfun.core as lf
18
19
  from langfun.core.eval.v2 import example as example_lib
@@ -27,6 +28,21 @@ Runner = experiment_lib.Runner
27
28
  class Checkpointer(experiment_lib.Plugin):
28
29
  """Base class for checkpointing evaluation examples."""
29
30
 
31
+ def on_experiment_start(self, experiment: Experiment):
32
+ if experiment.state.evaluated_examples:
33
+ experiment.info(
34
+ 'Loaded %d examples from checkpoint files. Example IDs: %s' %
35
+ (
36
+ len(experiment.state.evaluated_examples),
37
+ list(sorted(experiment.state.evaluated_examples.keys()))
38
+ ),
39
+ )
40
+ else:
41
+ experiment.info(
42
+ 'No previous evaluated examples are loaded. '
43
+ f'Experiment {experiment.id} starts from scratch.'
44
+ )
45
+
30
46
 
31
47
  class PerExampleCheckpointer(Checkpointer):
32
48
  """Checkpointer that saves each example to a separate file."""
@@ -50,6 +66,10 @@ class PerExampleCheckpointer(Checkpointer):
50
66
 
51
67
  # For refresh runs, we don't want to load the previous state.
52
68
  if not runner.current_run.refresh:
69
+ if runner.current_run.input_root != runner.current_run.output_root:
70
+ experiment.info(
71
+ f'Warm starting from directory: {runner.current_run.input_root}.'
72
+ )
53
73
  def _load_state(ckpt_file):
54
74
  experiment.load_state(ckpt_file)
55
75
 
@@ -68,10 +88,11 @@ class PerExampleCheckpointer(Checkpointer):
68
88
  _load_state, ckpt_files, max_workers=64,
69
89
  ):
70
90
  if error is not None:
71
- pg.logging.warning(
72
- 'Failed to load checkpoint file %s: %s. Skipping the file.',
73
- ckpt_file, error
91
+ experiment.warning(
92
+ f'Failed to load checkpoint file {ckpt_file}: {error}. '
93
+ 'Skipping the file.'
74
94
  )
95
+ super().on_experiment_start(experiment)
75
96
 
76
97
  def on_example_complete(
77
98
  self,
@@ -80,7 +101,11 @@ class PerExampleCheckpointer(Checkpointer):
80
101
  example: Example,
81
102
  ) -> None:
82
103
  """Saves the example to the checkpoint file."""
83
- if not example.has_error:
104
+ if example.has_error:
105
+ experiment.warning(
106
+ f'Example {example.id} has error. Skipping checkpointing.'
107
+ )
108
+ else:
84
109
  def save_state(example: Example):
85
110
  writer = SequenceWriter(
86
111
  runner.current_run.output_path_for(
@@ -91,8 +116,18 @@ class PerExampleCheckpointer(Checkpointer):
91
116
  )
92
117
  )
93
118
  )
94
- writer.add(example)
95
- writer.close()
119
+ try:
120
+ writer.add(example)
121
+ writer.close()
122
+ experiment.info(
123
+ f'Example {example.id} is saved to {writer.path}.',
124
+ )
125
+ except BaseException as e: # pylint: disable=broad-except
126
+ experiment.error(
127
+ f'Failed to save example {example.id} to {writer.path}. '
128
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
129
+ )
130
+ raise e
96
131
  runner.background_run(save_state, example)
97
132
 
98
133
  def _file_prefix_and_ext(self, filename: str) -> tuple[str, str]:
@@ -150,6 +185,10 @@ class BulkCheckpointer(Checkpointer):
150
185
  return
151
186
  # For refresh runs, we don't want to load the previous state.
152
187
  if not runner.current_run.refresh:
188
+ if runner.current_run.input_root != runner.current_run.output_root:
189
+ experiment.info(
190
+ f'Warm starting from directory: {runner.current_run.input_root}.'
191
+ )
153
192
  experiment.load_state(
154
193
  runner.current_run.input_path_for(
155
194
  experiment, self.checkpoint_filename
@@ -164,6 +203,7 @@ class BulkCheckpointer(Checkpointer):
164
203
  with self._lock:
165
204
  if self._sequence_writer is not None:
166
205
  self._sequence_writer[experiment.id] = sequence_writer
206
+ super().on_experiment_start(experiment)
167
207
 
168
208
  def on_experiment_complete(
169
209
  self,
@@ -178,8 +218,12 @@ class BulkCheckpointer(Checkpointer):
178
218
  if self._sequence_writer is not None:
179
219
  # Make sure the writer is closed without delay so the file will be
180
220
  # available immediately.
181
- self._sequence_writer[experiment.id].close()
182
- del self._sequence_writer[experiment.id]
221
+ writer = self._sequence_writer.pop(experiment.id)
222
+ writer.close()
223
+ experiment.info(
224
+ f'{len(experiment.state.evaluated_examples)} examples are '
225
+ f'checkpointed to {writer.path}.'
226
+ )
183
227
 
184
228
  def on_example_complete(
185
229
  self,
@@ -189,8 +233,22 @@ class BulkCheckpointer(Checkpointer):
189
233
  ) -> None:
190
234
  """Saves the example to the checkpoint file."""
191
235
  assert experiment.id in self._sequence_writer
192
- if not example.has_error:
193
- runner.background_run(self._sequence_writer[experiment.id].add, example)
236
+ if example.has_error:
237
+ experiment.warning(
238
+ f'Example {example.id} has error. Skipping checkpointing.'
239
+ )
240
+ else:
241
+ def _save_example(example: Example):
242
+ writer = self._sequence_writer[experiment.id]
243
+ try:
244
+ writer.add(example)
245
+ except BaseException as e: # pylint: disable=broad-except
246
+ experiment.error(
247
+ f'Failed to save example {example.id} to {writer.path}. '
248
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
249
+ )
250
+ raise e
251
+ runner.background_run(_save_example, example)
194
252
 
195
253
 
196
254
  class SequenceWriter:
@@ -198,8 +256,13 @@ class SequenceWriter:
198
256
 
199
257
  def __init__(self, path: str):
200
258
  self._lock = threading.Lock()
259
+ self._path = path
201
260
  self._sequence_writer = pg.io.open_sequence(path, 'w')
202
261
 
262
+ @property
263
+ def path(self) -> str:
264
+ return self._path
265
+
203
266
  def add(self, example: Example):
204
267
  example_blob = pg.to_json_str(
205
268
  example,
@@ -16,9 +16,9 @@ import tempfile
16
16
  import unittest
17
17
 
18
18
  from langfun.core.eval.v2 import checkpointing
19
+ from langfun.core.eval.v2 import eval_test_helper
19
20
  from langfun.core.eval.v2 import example as example_lib
20
21
  from langfun.core.eval.v2 import runners as runners_lib # pylint: disable=unused-import
21
- from langfun.core.eval.v2 import test_helper
22
22
  import pyglove as pg
23
23
 
24
24
  Example = example_lib.Example
@@ -56,7 +56,7 @@ class PerExampleCheckpointerTest(unittest.TestCase):
56
56
 
57
57
  def test_checkpointing(self):
58
58
  root_dir = os.path.join(tempfile.gettempdir(), 'per_example_checkpointer')
59
- experiment = test_helper.test_experiment()
59
+ experiment = eval_test_helper.test_experiment()
60
60
  checkpoint_filename = 'checkpoint.jsonl'
61
61
  checkpointer = checkpointing.PerExampleCheckpointer(checkpoint_filename)
62
62
  run = experiment.run(
@@ -89,7 +89,7 @@ class BulkCheckpointerTest(unittest.TestCase):
89
89
 
90
90
  def test_checkpointing(self):
91
91
  root_dir = os.path.join(tempfile.gettempdir(), 'test_bulk_checkpointer')
92
- experiment = test_helper.test_experiment()
92
+ experiment = eval_test_helper.test_experiment()
93
93
  checkpoint_filename = 'checkpoint.jsonl'
94
94
  checkpointer = checkpointing.BulkCheckpointer(checkpoint_filename)
95
95
  run = experiment.run(
@@ -285,36 +285,43 @@ class Evaluation(experiment_lib.Experiment):
285
285
  # Evaluation-level logging.
286
286
  #
287
287
 
288
- def _log(self, level: lf.logging.LogLevel, message: str, **kwargs):
288
+ def _log(self, log_func, level: lf.logging.LogLevel, message: str, **kwargs):
289
+ # Write to external logging system.
290
+ log_message = f'{self.id}: {message}'
291
+ if kwargs:
292
+ log_message = f'{log_message} (metadata: {kwargs!r})'
293
+ log_func(log_message)
294
+
295
+ # Add to experiment log history.
296
+ log_entry = lf.logging.LogEntry(
297
+ level=level,
298
+ time=datetime.datetime.now(),
299
+ message=message,
300
+ metadata=kwargs,
301
+ )
289
302
  with self._log_lock:
290
- self._log_entries.append(
291
- lf.logging.LogEntry(
292
- level=level,
293
- time=datetime.datetime.now(),
294
- message=message,
295
- metadata=kwargs,
296
- )
297
- )
303
+ self._log_entries.append(log_entry)
298
304
 
299
305
  def debug(self, message: str, **kwargs):
300
306
  """Logs a debug message to the session."""
301
- self._log('debug', message, **kwargs)
307
+ self._log(pg.logging.debug, 'debug', message, **kwargs)
302
308
 
303
309
  def info(self, message: str, **kwargs):
304
310
  """Logs an info message to the session."""
305
- self._log('info', message, **kwargs)
311
+ self._log(pg.logging.info, 'info', message, **kwargs)
306
312
 
307
313
  def warning(self, message: str, **kwargs):
308
314
  """Logs a warning message to the session."""
309
- self._log('warning', message, **kwargs)
315
+ self._log(pg.logging.warning, 'warning', message, **kwargs)
310
316
 
311
317
  def error(self, message: str, **kwargs):
312
318
  """Logs an error message to the session."""
313
- self._log('error', message, **kwargs)
319
+ self._log(pg.logging.error, 'error', message, **kwargs)
314
320
 
315
321
  def fatal(self, message: str, **kwargs):
316
322
  """Logs a fatal message to the session."""
317
- self._log('fatal', message, **kwargs)
323
+ # We use error level for fatal message, which does not trigger assertion.
324
+ self._log(pg.logging.error, 'fatal', message, **kwargs)
318
325
 
319
326
  #
320
327
  # HTML views.
@@ -15,12 +15,11 @@ import os
15
15
  import tempfile
16
16
  import unittest
17
17
 
18
+ from langfun.core.eval.v2 import eval_test_helper
18
19
  from langfun.core.eval.v2 import evaluation as evaluation_lib
19
20
  from langfun.core.eval.v2 import example as example_lib
20
21
  from langfun.core.eval.v2 import experiment as experiment_lib
21
22
 
22
- from langfun.core.eval.v2 import test_helper
23
-
24
23
  import pyglove as pg
25
24
 
26
25
  Example = example_lib.Example
@@ -32,17 +31,23 @@ Run = experiment_lib.Run
32
31
  class EvaluationTest(unittest.TestCase):
33
32
 
34
33
  def test_hyper_evaluation(self):
35
- exp = test_helper.TestEvaluation(
36
- lm=test_helper.TestLLM(offset=pg.oneof(range(3)))
34
+ exp = eval_test_helper.TestEvaluation(
35
+ lm=eval_test_helper.TestLLM(offset=pg.oneof(range(3)))
37
36
  )
38
37
  self.assertFalse(exp.is_leaf)
39
38
  self.assertTrue(
40
39
  pg.eq(
41
40
  exp.children,
42
41
  [
43
- test_helper.TestEvaluation(lm=test_helper.TestLLM(offset=0)),
44
- test_helper.TestEvaluation(lm=test_helper.TestLLM(offset=1)),
45
- test_helper.TestEvaluation(lm=test_helper.TestLLM(offset=2)),
42
+ eval_test_helper.TestEvaluation(
43
+ lm=eval_test_helper.TestLLM(offset=0)
44
+ ),
45
+ eval_test_helper.TestEvaluation(
46
+ lm=eval_test_helper.TestLLM(offset=1)
47
+ ),
48
+ eval_test_helper.TestEvaluation(
49
+ lm=eval_test_helper.TestLLM(offset=2)
50
+ ),
46
51
  ]
47
52
  )
48
53
  )
@@ -57,19 +62,21 @@ class EvaluationTest(unittest.TestCase):
57
62
  )
58
63
 
59
64
  def test_input(self):
60
- exp = test_helper.TestEvaluation()
65
+ exp = eval_test_helper.TestEvaluation()
61
66
  self.assertEqual(exp.num_examples, 10)
62
- exp = test_helper.TestEvaluation(inputs=test_helper.test_inputs(None))
67
+ exp = eval_test_helper.TestEvaluation(
68
+ inputs=eval_test_helper.test_inputs(None)
69
+ )
63
70
  self.assertEqual(exp.num_examples, 20)
64
71
  @pg.functor
65
72
  def my_inputs():
66
73
  yield pg.Dict(x=1, y=2)
67
74
  yield pg.Dict(x=3, y=4)
68
- exp = test_helper.TestEvaluation(inputs=my_inputs())
75
+ exp = eval_test_helper.TestEvaluation(inputs=my_inputs())
69
76
  self.assertEqual(exp.num_examples, 2)
70
77
 
71
78
  def test_evaluate(self):
72
- exp = test_helper.TestEvaluation()
79
+ exp = eval_test_helper.TestEvaluation()
73
80
  example = exp.evaluate(Example(id=3))
74
81
  self.assertIs(exp.state.get(3), example)
75
82
  self.assertTrue(example.newly_processed)
@@ -85,7 +92,7 @@ class EvaluationTest(unittest.TestCase):
85
92
  self.assertIsNotNone(example.start_time)
86
93
  self.assertIsNotNone(example.end_time)
87
94
 
88
- exp = test_helper.TestEvaluation(lm=test_helper.TestLLM(offset=1))
95
+ exp = eval_test_helper.TestEvaluation(lm=eval_test_helper.TestLLM(offset=1))
89
96
  example = exp.evaluate(3)
90
97
  self.assertTrue(example.newly_processed)
91
98
  self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
@@ -109,7 +116,7 @@ class EvaluationTest(unittest.TestCase):
109
116
  pg.io.mkdirs(eval_dir, exist_ok=True)
110
117
  state_file = os.path.join(eval_dir, 'state.jsonl')
111
118
  with pg.io.open_sequence(state_file, 'w') as f:
112
- exp = test_helper.TestEvaluation()
119
+ exp = eval_test_helper.TestEvaluation()
113
120
  example = exp.evaluate(3)
114
121
  self.assertTrue(example.newly_processed)
115
122
  self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
@@ -132,7 +139,7 @@ class EvaluationTest(unittest.TestCase):
132
139
  self.assertEqual(example.usage_summary.uncached.total.num_requests, 0)
133
140
 
134
141
  def test_html_view(self):
135
- exp = test_helper.TestEvaluation()
142
+ exp = eval_test_helper.TestEvaluation()
136
143
  exp.debug('debug message')
137
144
  exp.info('info message')
138
145
  exp.warning('warning message', x=1)
@@ -959,6 +959,14 @@ class Plugin(lf.Component):
959
959
  ) -> None:
960
960
  """Called when an experiment (both leaf and non-leaf) is complete."""
961
961
 
962
+ def on_experiment_abort(
963
+ self,
964
+ runner: Runner,
965
+ experiment: Experiment,
966
+ error: BaseException,
967
+ ) -> None:
968
+ """Called when an experiment (both leaf and non-leaf) is aborted."""
969
+
962
970
  def on_example_start(
963
971
  self,
964
972
  runner: Runner,
@@ -18,9 +18,9 @@ import tempfile
18
18
  import unittest
19
19
 
20
20
  from langfun.core import console as lf_console
21
+ from langfun.core.eval.v2 import eval_test_helper
21
22
  from langfun.core.eval.v2 import progress_tracking # pylint: disable=unused-import
22
23
  from langfun.core.eval.v2 import runners as runners_lib # pylint: disable=unused-import
23
- from langfun.core.eval.v2 import test_helper
24
24
  import pyglove as pg
25
25
 
26
26
 
@@ -35,7 +35,7 @@ class HtmlProgressTrackerTest(unittest.TestCase):
35
35
  display=display
36
36
  )
37
37
  root_dir = os.path.join(tempfile.gettempdir(), 'test_html_progress_tracker')
38
- experiment = test_helper.test_experiment()
38
+ experiment = eval_test_helper.test_experiment()
39
39
  _ = experiment.run(root_dir, 'new', plugins=[])
40
40
  self.assertIsInstance(result['view'], pg.Html)
41
41
  lf_console._notebook = None
@@ -45,7 +45,7 @@ class TqdmProgressTrackerTest(unittest.TestCase):
45
45
 
46
46
  def test_basic(self):
47
47
  root_dir = os.path.join(tempfile.gettempdir(), 'test_tqdm_progress_tracker')
48
- experiment = test_helper.test_experiment()
48
+ experiment = eval_test_helper.test_experiment()
49
49
  string_io = io.StringIO()
50
50
  with contextlib.redirect_stderr(string_io):
51
51
  _ = experiment.run(root_dir, 'new', plugins=[])
@@ -55,7 +55,7 @@ class TqdmProgressTrackerTest(unittest.TestCase):
55
55
  root_dir = os.path.join(
56
56
  tempfile.gettempdir(), 'test_tqdm_progress_tracker_with_example_ids'
57
57
  )
58
- experiment = test_helper.test_experiment()
58
+ experiment = eval_test_helper.test_experiment()
59
59
  string_io = io.StringIO()
60
60
  with contextlib.redirect_stderr(string_io):
61
61
  _ = experiment.run(root_dir, 'new', example_ids=[1], plugins=[])
@@ -14,6 +14,7 @@
14
14
  """Reporting evaluation results."""
15
15
 
16
16
  import time
17
+ import traceback
17
18
  from typing import Annotated
18
19
 
19
20
  from langfun.core.eval.v2 import example as example_lib
@@ -61,6 +62,14 @@ class HtmlReporter(experiment_lib.Plugin):
61
62
  ) -> None:
62
63
  self._maybe_update_summary(runner, force=True)
63
64
 
65
+ def on_run_abort(
66
+ self,
67
+ runner: Runner,
68
+ root: Experiment,
69
+ error: BaseException
70
+ ) -> None:
71
+ self._maybe_update_summary(runner, force=True)
72
+
64
73
  def on_experiment_start(
65
74
  self,
66
75
  runner: Runner,
@@ -75,6 +84,16 @@ class HtmlReporter(experiment_lib.Plugin):
75
84
  if experiment.is_leaf:
76
85
  self._maybe_update_experiment_html(runner, experiment, force=True)
77
86
 
87
+ def on_experiment_abort(
88
+ self,
89
+ runner: Runner,
90
+ experiment: Experiment,
91
+ error: BaseException
92
+ ) -> None:
93
+ del error
94
+ assert experiment.is_leaf
95
+ self._maybe_update_experiment_html(runner, experiment, force=True)
96
+
78
97
  def on_example_complete(
79
98
  self, runner: Runner, experiment: Experiment, example: Example
80
99
  ):
@@ -103,19 +122,26 @@ class HtmlReporter(experiment_lib.Plugin):
103
122
  self, runner: Runner, experiment: Experiment, force: bool = False
104
123
  ) -> None:
105
124
  def _save():
106
- html = experiment.to_html(
107
- collapse_level=None,
108
- extra_flags=dict(
109
- current_run=runner.current_run,
110
- interactive=False,
111
- card_view=False,
112
- ),
113
- )
114
- html.save(
115
- runner.current_run.output_path_for(
116
- experiment, _EVALULATION_DETAIL_FILE
117
- )
125
+ index_html_path = runner.current_run.output_path_for(
126
+ experiment, _EVALULATION_DETAIL_FILE
118
127
  )
128
+ try:
129
+ html = experiment.to_html(
130
+ collapse_level=None,
131
+ extra_flags=dict(
132
+ current_run=runner.current_run,
133
+ interactive=False,
134
+ card_view=False,
135
+ ),
136
+ )
137
+ html.save(index_html_path)
138
+ except BaseException as e: # pylint: disable=broad-except
139
+ experiment.error(
140
+ f'Failed to save HTML {index_html_path!r}. '
141
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
142
+ )
143
+ raise e
144
+
119
145
  if force or (
120
146
  time.time() - self._last_experiment_report_time[experiment.id]
121
147
  > self.experiment_report_interval
@@ -128,17 +154,24 @@ class HtmlReporter(experiment_lib.Plugin):
128
154
  ) -> None:
129
155
  """Saves the example."""
130
156
  def _save():
131
- html = example.to_html(
132
- collapse_level=None,
133
- enable_summary_tooltip=False,
134
- extra_flags=dict(
135
- # For properly rendering the next link.
136
- num_examples=getattr(experiment, 'num_examples', None)
137
- ),
138
- )
139
- html.save(
140
- runner.current_run.output_path_for(
141
- experiment, f'{example.id}.html'
142
- )
143
- )
157
+ try:
158
+ html = example.to_html(
159
+ collapse_level=None,
160
+ enable_summary_tooltip=False,
161
+ extra_flags=dict(
162
+ # For properly rendering the next link.
163
+ num_examples=getattr(experiment, 'num_examples', None)
164
+ ),
165
+ )
166
+ html.save(
167
+ runner.current_run.output_path_for(
168
+ experiment, f'{example.id}.html'
169
+ )
170
+ )
171
+ except BaseException as e: # pylint: disable=broad-except
172
+ experiment.error(
173
+ f'Failed to save HTML {example.id}.html. '
174
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
175
+ )
176
+ raise e
144
177
  runner.background_run(_save)
@@ -15,9 +15,9 @@ import os
15
15
  import tempfile
16
16
  import unittest
17
17
 
18
+ from langfun.core.eval.v2 import eval_test_helper
18
19
  from langfun.core.eval.v2 import reporting
19
20
  from langfun.core.eval.v2 import runners as runners_lib # pylint: disable=unused-import
20
- from langfun.core.eval.v2 import test_helper
21
21
  import pyglove as pg
22
22
 
23
23
 
@@ -25,7 +25,7 @@ class ReportingTest(unittest.TestCase):
25
25
 
26
26
  def test_reporting(self):
27
27
  root_dir = os.path.join(tempfile.gettempdir(), 'test_reporting')
28
- experiment = test_helper.test_experiment()
28
+ experiment = eval_test_helper.test_experiment()
29
29
  reporter = reporting.HtmlReporter()
30
30
  run = experiment.run(root_dir, 'new', plugins=[reporter])
31
31
  pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))