langfun 0.1.2.dev202503110804__tar.gz → 0.1.2.dev202503130804__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/PKG-INFO +1 -1
  2. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/agentic/action.py +4 -0
  3. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/agentic/action_eval.py +5 -4
  4. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/checkpointing.py +4 -4
  5. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/checkpointing_test.py +30 -4
  6. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/eval_test_helper.py +4 -3
  7. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/evaluation.py +68 -21
  8. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/evaluation_test.py +12 -9
  9. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/runners.py +4 -3
  10. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun.egg-info/PKG-INFO +1 -1
  11. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/LICENSE +0 -0
  12. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/README.md +0 -0
  13. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/__init__.py +0 -0
  14. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/__init__.py +0 -0
  15. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/agentic/__init__.py +0 -0
  16. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/agentic/action_eval_test.py +0 -0
  17. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/agentic/action_test.py +0 -0
  18. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/__init__.py +0 -0
  19. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/__init__.py +0 -0
  20. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/correction.py +0 -0
  21. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/correction_test.py +0 -0
  22. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/execution.py +0 -0
  23. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/execution_test.py +0 -0
  24. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/generation.py +0 -0
  25. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/generation_test.py +0 -0
  26. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/parsing.py +0 -0
  27. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/parsing_test.py +0 -0
  28. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/sandboxing.py +0 -0
  29. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/coding/python/sandboxing_test.py +0 -0
  30. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/component.py +0 -0
  31. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/component_test.py +0 -0
  32. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/concurrent.py +0 -0
  33. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/concurrent_test.py +0 -0
  34. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/console.py +0 -0
  35. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/console_test.py +0 -0
  36. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/__init__.py +0 -0
  37. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/base.py +0 -0
  38. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/base_test.py +0 -0
  39. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/matching.py +0 -0
  40. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/matching_test.py +0 -0
  41. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/patching.py +0 -0
  42. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/patching_test.py +0 -0
  43. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/scoring.py +0 -0
  44. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/scoring_test.py +0 -0
  45. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/__init__.py +0 -0
  46. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/example.py +0 -0
  47. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/example_test.py +0 -0
  48. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/experiment.py +0 -0
  49. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/experiment_test.py +0 -0
  50. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/metric_values.py +0 -0
  51. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/metric_values_test.py +0 -0
  52. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/metrics.py +0 -0
  53. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/metrics_test.py +0 -0
  54. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/progress.py +0 -0
  55. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/progress_test.py +0 -0
  56. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/progress_tracking.py +0 -0
  57. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/progress_tracking_test.py +0 -0
  58. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/reporting.py +0 -0
  59. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/reporting_test.py +0 -0
  60. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/eval/v2/runners_test.py +0 -0
  61. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/langfunc.py +0 -0
  62. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/langfunc_test.py +0 -0
  63. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/language_model.py +0 -0
  64. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/language_model_test.py +0 -0
  65. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/__init__.py +0 -0
  66. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/anthropic.py +0 -0
  67. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/anthropic_test.py +0 -0
  68. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/azure_openai.py +0 -0
  69. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/azure_openai_test.py +0 -0
  70. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/cache/__init__.py +0 -0
  71. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/cache/base.py +0 -0
  72. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/cache/in_memory.py +0 -0
  73. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/cache/in_memory_test.py +0 -0
  74. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/compositional.py +0 -0
  75. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/compositional_test.py +0 -0
  76. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/deepseek.py +0 -0
  77. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/deepseek_test.py +0 -0
  78. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/fake.py +0 -0
  79. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/fake_test.py +0 -0
  80. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/gemini.py +0 -0
  81. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/gemini_test.py +0 -0
  82. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/google_genai.py +0 -0
  83. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/google_genai_test.py +0 -0
  84. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/groq.py +0 -0
  85. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/groq_test.py +0 -0
  86. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/llama_cpp.py +0 -0
  87. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/llama_cpp_test.py +0 -0
  88. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/openai.py +0 -0
  89. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/openai_compatible.py +0 -0
  90. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/openai_compatible_test.py +0 -0
  91. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/openai_test.py +0 -0
  92. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/rest.py +0 -0
  93. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/rest_test.py +0 -0
  94. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/vertexai.py +0 -0
  95. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/llms/vertexai_test.py +0 -0
  96. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/logging.py +0 -0
  97. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/logging_test.py +0 -0
  98. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/memories/__init__.py +0 -0
  99. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/memories/conversation_history.py +0 -0
  100. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/memories/conversation_history_test.py +0 -0
  101. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/memory.py +0 -0
  102. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/message.py +0 -0
  103. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/message_test.py +0 -0
  104. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/__init__.py +0 -0
  105. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/audio.py +0 -0
  106. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/audio_test.py +0 -0
  107. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/image.py +0 -0
  108. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/image_test.py +0 -0
  109. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/mime.py +0 -0
  110. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/mime_test.py +0 -0
  111. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/ms_office.py +0 -0
  112. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/ms_office_test.py +0 -0
  113. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/pdf.py +0 -0
  114. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/pdf_test.py +0 -0
  115. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/video.py +0 -0
  116. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modalities/video_test.py +0 -0
  117. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modality.py +0 -0
  118. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/modality_test.py +0 -0
  119. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/natural_language.py +0 -0
  120. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/natural_language_test.py +0 -0
  121. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/sampling.py +0 -0
  122. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/sampling_test.py +0 -0
  123. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/__init__.py +0 -0
  124. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/completion.py +0 -0
  125. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/completion_test.py +0 -0
  126. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/description.py +0 -0
  127. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/description_test.py +0 -0
  128. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/function_generation.py +0 -0
  129. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/function_generation_test.py +0 -0
  130. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/mapping.py +0 -0
  131. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/mapping_test.py +0 -0
  132. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/parsing.py +0 -0
  133. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/parsing_test.py +0 -0
  134. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/querying.py +0 -0
  135. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/querying_test.py +0 -0
  136. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/schema.py +0 -0
  137. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/schema_generation.py +0 -0
  138. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/schema_generation_test.py +0 -0
  139. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/schema_test.py +0 -0
  140. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/scoring.py +0 -0
  141. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/scoring_test.py +0 -0
  142. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/tokenization.py +0 -0
  143. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/structured/tokenization_test.py +0 -0
  144. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/subscription.py +0 -0
  145. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/subscription_test.py +0 -0
  146. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/template.py +0 -0
  147. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/template_test.py +0 -0
  148. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/templates/__init__.py +0 -0
  149. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/templates/completion.py +0 -0
  150. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/templates/completion_test.py +0 -0
  151. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/templates/conversation.py +0 -0
  152. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/templates/conversation_test.py +0 -0
  153. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/templates/demonstration.py +0 -0
  154. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/templates/demonstration_test.py +0 -0
  155. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/templates/selfplay.py +0 -0
  156. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun/core/templates/selfplay_test.py +0 -0
  157. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun.egg-info/SOURCES.txt +0 -0
  158. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun.egg-info/dependency_links.txt +0 -0
  159. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun.egg-info/requires.txt +0 -0
  160. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/langfun.egg-info/top_level.txt +0 -0
  161. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/setup.cfg +0 -0
  162. {langfun-0.1.2.dev202503110804 → langfun-0.1.2.dev202503130804}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: langfun
3
- Version: 0.1.2.dev202503110804
3
+ Version: 0.1.2.dev202503130804
4
4
  Summary: Langfun: Language as Functions.
5
5
  Home-page: https://github.com/google/langfun
6
6
  Author: Langfun Authors
@@ -674,6 +674,10 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
674
674
  """Session for performing an agentic task."""
675
675
 
676
676
  root: ActionInvocation = ActionInvocation(RootAction())
677
+ id: Annotated[
678
+ str | None,
679
+ 'An optional identifier for the sessin, which will be used for logging.'
680
+ ] = None
677
681
 
678
682
  def _on_bound(self):
679
683
  super()._on_bound()
@@ -31,9 +31,10 @@ class ActionEval(lf.eval.v2.Evaluation):
31
31
  'Arguments to call the action.'
32
32
  ] = {}
33
33
 
34
- def process(self, example: pg.Dict) -> tuple[str, dict[str, Any]]:
35
- action = example.action
36
- session = action_lib.Session()
34
+ def process(self, example: lf.eval.v2.Example) -> tuple[str, dict[str, Any]]:
35
+ example_input = example.input
36
+ action = example_input.action
37
+ session = action_lib.Session(id=str(example.id))
37
38
  with lf.logging.use_log_level('fatal'):
38
39
  action(session=session, **self.action_args)
39
40
  return session.final_result, dict(session=session)
@@ -68,7 +69,7 @@ class ActionEvalV1(lf_eval.Matching):
68
69
 
69
70
  def process(self, example: pg.Dict, **kwargs):
70
71
  action = example.action
71
- session = action_lib.Session()
72
+ session = action_lib.Session(id=str(getattr(example, 'id', '<empty>')))
72
73
  action(session=session, lm=self.lm, **kwargs)
73
74
  return session.as_message()
74
75
 
@@ -53,14 +53,14 @@ class Checkpointer(experiment_lib.Plugin):
53
53
  self._load_experiment(runner, experiment)
54
54
 
55
55
  example_ids_to_evaluate = current_run.examples_to_evaluate(experiment)
56
- if experiment.state.evaluated_examples:
56
+ if experiment.state.ckpt_examples:
57
57
  loaded_example_ids = list(
58
- sorted(experiment.state.evaluated_examples.keys())
58
+ sorted(experiment.state.ckpt_examples.keys())
59
59
  )
60
60
  example_ids_to_evaluate -= set(loaded_example_ids)
61
61
  example_ids_to_evaluate = list(sorted(example_ids_to_evaluate))
62
62
  experiment.info(
63
- f'{len(experiment.state.evaluated_examples)} examples '
63
+ f'{len(experiment.state.ckpt_examples)} examples '
64
64
  'loaded from checkpoint files. Their outputs will be used '
65
65
  f'for recomputing metrics. Example IDs: {loaded_example_ids}.'
66
66
  )
@@ -316,7 +316,7 @@ class BulkCheckpointer(Checkpointer):
316
316
  writer = self._sequence_writer.pop(experiment.id)
317
317
  writer.close()
318
318
  experiment.info(
319
- f'{len(experiment.state.evaluated_examples)} examples are '
319
+ f'{len(experiment.state.evaluation_status)} examples are '
320
320
  f'checkpointed to {writer.path}.'
321
321
  )
322
322
 
@@ -18,6 +18,7 @@ import unittest
18
18
  from langfun.core.eval.v2 import checkpointing
19
19
  from langfun.core.eval.v2 import eval_test_helper
20
20
  from langfun.core.eval.v2 import example as example_lib
21
+ from langfun.core.eval.v2 import experiment as experiment_lib
21
22
  from langfun.core.eval.v2 import runners as runners_lib # pylint: disable=unused-import
22
23
  import pyglove as pg
23
24
 
@@ -52,6 +53,26 @@ class SequenceWriterTest(unittest.TestCase):
52
53
  self.assertEqual(len(list(iter(f))), 1)
53
54
 
54
55
 
56
+ class ExampleCollector(experiment_lib.Plugin):
57
+ """Collects all examples."""
58
+
59
+ def _on_bound(self):
60
+ super()._on_bound()
61
+ self._examples = {}
62
+
63
+ @property
64
+ def examples(self) -> dict[int, example_lib.Example]:
65
+ return self._examples
66
+
67
+ def on_example_complete(
68
+ self, runner: runners_lib.Runner,
69
+ experiment: experiment_lib.Experiment,
70
+ example: example_lib.Example,
71
+ ):
72
+ assert experiment.is_leaf, None
73
+ self._examples[example.id] = example
74
+
75
+
55
76
  class CheckpointerTest(unittest.TestCase):
56
77
 
57
78
  def assert_found_in_log(self, experiment, message):
@@ -70,13 +91,15 @@ class PerExampleCheckpointerTest(CheckpointerTest):
70
91
  experiment = eval_test_helper.test_experiment()
71
92
  checkpoint_filename = 'checkpoint.jsonl'
72
93
  checkpointer = checkpointing.PerExampleCheckpointer(checkpoint_filename)
94
+ collector = ExampleCollector()
73
95
  run = experiment.run(
74
- root_dir, 'new', runner='sequential', plugins=[checkpointer]
96
+ root_dir, 'new', runner='sequential', plugins=[checkpointer, collector]
75
97
  )
76
98
  num_processed = {}
77
99
  for leaf in experiment.leaf_nodes:
78
100
  for i in range(leaf.num_examples):
79
- example = leaf.state.get(i + 1)
101
+ self.assertIn(i + 1, collector.examples)
102
+ example = collector.examples[i + 1]
80
103
  ckpt = run.output_path_for(leaf, f'checkpoint_{example.id}.jsonl')
81
104
  if example.has_error:
82
105
  self.assertFalse(pg.io.path_exists(ckpt))
@@ -134,12 +157,15 @@ class PerExampleCheckpointerTest(CheckpointerTest):
134
157
  experiment = eval_test_helper.TestEvaluation()
135
158
  checkpoint_filename = 'checkpoint.jsonl'
136
159
  checkpointer = checkpointing.PerExampleCheckpointer(checkpoint_filename)
160
+ collector = ExampleCollector()
161
+
137
162
  run = experiment.run(
138
- root_dir, 'new', runner='sequential', plugins=[checkpointer]
163
+ root_dir, 'new', runner='sequential', plugins=[checkpointer, collector]
139
164
  )
140
165
  num_processed = {}
141
166
  for i in range(experiment.num_examples):
142
- example = experiment.state.get(i + 1)
167
+ self.assertIn(i + 1, collector.examples)
168
+ example = collector.examples[i + 1]
143
169
  ckpt = run.output_path_for(experiment, f'checkpoint_{example.id}.jsonl')
144
170
  if not example.has_error:
145
171
  self.assertTrue(pg.io.path_exists(ckpt))
@@ -63,7 +63,8 @@ class TestEvaluation(Evaluation):
63
63
  metrics = [metrics_lib.Match()]
64
64
  lm: language_model.LanguageModel = TestLLM()
65
65
 
66
- def process(self, v):
66
+ def process(self, example):
67
+ v = example.input
67
68
  if v.x == 5:
68
69
  raise ValueError('x should not be 5')
69
70
  return structured.query(
@@ -83,7 +84,7 @@ class TestEvaluationWithExampleCheckpointingError(TestEvaluation):
83
84
  inputs = test_inputs()
84
85
  metrics = [metrics_lib.Match()]
85
86
 
86
- def process(self, v):
87
+ def process(self, example):
87
88
  return 1, dict(
88
89
  x=BadJsonConvertible()
89
90
  )
@@ -100,7 +101,7 @@ class TestEvaluationWithExampleHtmlGenerationError(Evaluation):
100
101
  inputs = test_inputs()
101
102
  metrics = [metrics_lib.Match()]
102
103
 
103
- def process(self, v):
104
+ def process(self, example):
104
105
  return 1, dict(
105
106
  x=BadHtmlConvertible()
106
107
  )
@@ -127,13 +127,17 @@ class Evaluation(experiment_lib.Experiment):
127
127
  #
128
128
 
129
129
  @abc.abstractmethod
130
- def process(self, example_input: Any) -> Any | tuple[Any, dict[str, Any]]:
130
+ def process(
131
+ self,
132
+ example: example_lib.Example
133
+ ) -> Any | tuple[Any, dict[str, Any]]:
131
134
  """Processes a single example from the evaluation set.
132
135
 
133
136
  Users should override this method to implement the evaluation logic.
134
137
 
135
138
  Args:
136
- example_input: An object returned from `Evaluable.inputs`.
139
+ example: An example object to process. `example.input` is an object
140
+ returned from `Evaluable.inputs`.
137
141
 
138
142
  Returns:
139
143
  A processed output. Or a tuple of (output, metadata).
@@ -162,25 +166,24 @@ class Evaluation(experiment_lib.Experiment):
162
166
  if pg.MISSING_VALUE == example.input:
163
167
  example.input = self.example_input_by_id(example.id)
164
168
 
165
- cached = self._state.get(example.id)
166
-
169
+ checkpointed = self._state.ckpt_example(example.id)
167
170
  with pg.timeit('evaluate') as timeit, lf.track_usages() as usage_summary:
168
- if cached is None or cached.has_error:
171
+ if checkpointed is None or checkpointed.has_error:
169
172
  example.start_time = time.time()
170
173
  self._process(example, raise_if_has_error=raise_if_has_error)
171
174
  else:
172
- example.start_time = cached.start_time
175
+ example.start_time = checkpointed.start_time
173
176
 
174
- # Use cached output and metadata obtained from the previous processing.
175
- example.output = cached.output
176
- example.metadata = cached.metadata
177
+ # Use the output and metadata obtained from the previous processing.
178
+ example.output = checkpointed.output
179
+ example.metadata = checkpointed.metadata
177
180
  example.newly_processed = False
178
181
 
179
182
  # For previously processed examples, we merge previous usages as
180
183
  # cached, so the usage summary will account previous usages, but as
181
184
  # cached.
182
- assert cached.usage_summary is not None
183
- usage_summary.merge(cached.usage_summary, as_cached=True)
185
+ assert checkpointed.usage_summary is not None
186
+ usage_summary.merge(checkpointed.usage_summary, as_cached=True)
184
187
 
185
188
  # Recompute the metrics and metadata for the example even its processed
186
189
  # output and metadata were from the cache.
@@ -221,7 +224,7 @@ class Evaluation(experiment_lib.Experiment):
221
224
  ):
222
225
  try:
223
226
  with pg.timeit('process'):
224
- output = self.process(example.input)
227
+ output = self.process(example)
225
228
  if (isinstance(output, tuple)
226
229
  and len(output) == 2
227
230
  and isinstance(output[1], dict)):
@@ -687,9 +690,29 @@ class Evaluation(experiment_lib.Experiment):
687
690
  class EvaluationState:
688
691
  """Evaluation state."""
689
692
 
693
+ class ExampleStatus(pg.Object):
694
+ """Example state."""
695
+ evaluated: Annotated[
696
+ bool,
697
+ 'Whether the example is evaluated.'
698
+ ] = False
699
+
700
+ newly_processed: Annotated[
701
+ bool,
702
+ 'Whether the example is newly processed.'
703
+ ] = False
704
+
705
+ has_error: Annotated[
706
+ bool,
707
+ 'Whether the example has error.'
708
+ ] = False
709
+
690
710
  def __init__(self):
691
711
  super().__init__()
692
- self._evaluated_examples: dict[int, example_lib.Example] = {}
712
+ self._ckpt_examples: dict[int, example_lib.Example] = {}
713
+ self._evaluation_status: dict[
714
+ int, EvaluationState.ExampleStatus
715
+ ] = {}
693
716
 
694
717
  def load(
695
718
  self,
@@ -711,17 +734,41 @@ class EvaluationState:
711
734
  assert isinstance(example, example_lib.Example), example
712
735
  if filter is not None and not filter(example):
713
736
  continue
714
- self._evaluated_examples[example.id] = example
737
+ example.newly_processed = False
738
+ self._ckpt_examples[example.id] = example
715
739
 
716
740
  @property
717
- def evaluated_examples(self) -> dict[int, example_lib.Example]:
718
- """Returns the examples in the state."""
719
- return self._evaluated_examples
741
+ def evaluation_status(self) -> dict[int, ExampleStatus]:
742
+ """Returns the evaluation status of the examples."""
743
+ return self._evaluation_status
720
744
 
721
- def get(self, example_id: int) -> example_lib.Example | None:
722
- """Returns the example with the given ID."""
723
- return self._evaluated_examples.get(example_id)
745
+ @property
746
+ def ckpt_examples(self) -> dict[int, example_lib.Example]:
747
+ """Returns the unevaluated examples from checkpoints."""
748
+ return self._ckpt_examples
749
+
750
+ def ckpt_example(self, example_id: int) -> example_lib.Example | None:
751
+ """Returns the unevaluated example from checkpoints for a given ID."""
752
+ return self._ckpt_examples.get(example_id)
753
+
754
+ def get_status(self, example_id: int) -> ExampleStatus:
755
+ """Returns the evaluation status of the example."""
756
+ return self._evaluation_status.get(
757
+ example_id, EvaluationState.ExampleStatus()
758
+ )
724
759
 
725
760
  def update(self, example: example_lib.Example) -> None:
726
761
  """Updates the state with the given example."""
727
- self._evaluated_examples[example.id] = example
762
+ self._update_status(example)
763
+ # Processed examples will be removed once it's done.
764
+ self._ckpt_examples.pop(example.id, None)
765
+
766
+ def _update_status(self, example: example_lib.Example) -> None:
767
+ """Updates the evaluation status of the example."""
768
+ self._evaluation_status[example.id] = (
769
+ EvaluationState.ExampleStatus(
770
+ evaluated=example.output != pg.MISSING_VALUE,
771
+ newly_processed=example.newly_processed,
772
+ has_error=example.has_error,
773
+ )
774
+ )
@@ -78,7 +78,9 @@ class EvaluationTest(unittest.TestCase):
78
78
  def test_evaluate(self):
79
79
  exp = eval_test_helper.TestEvaluation()
80
80
  example = exp.evaluate(Example(id=3))
81
- self.assertIs(exp.state.get(3), example)
81
+ self.assertTrue(exp.state.get_status(3).evaluated)
82
+ self.assertTrue(exp.state.get_status(3).newly_processed)
83
+ self.assertFalse(exp.state.get_status(3).has_error)
82
84
  self.assertTrue(example.newly_processed)
83
85
  self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
84
86
  self.assertEqual(example.output, 6)
@@ -111,7 +113,7 @@ class EvaluationTest(unittest.TestCase):
111
113
  self.assertEqual(example.metadata, {})
112
114
  self.assertEqual(example.metric_metadata, dict(error='ValueError'))
113
115
 
114
- def test_evaluate_with_state(self):
116
+ def test_evaluate_withstate(self):
115
117
  eval_dir = os.path.join(tempfile.gettempdir(), 'test_eval')
116
118
  pg.io.mkdirs(eval_dir, exist_ok=True)
117
119
  state_file = os.path.join(eval_dir, 'state.jsonl')
@@ -121,13 +123,14 @@ class EvaluationTest(unittest.TestCase):
121
123
  self.assertTrue(example.newly_processed)
122
124
  self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
123
125
  self.assertEqual(example.output, 6)
124
- self.assertEqual(len(exp._state.evaluated_examples), 1)
126
+ self.assertEqual(len(exp.state.evaluation_status), 1)
125
127
  f.add(pg.to_json_str(example))
126
128
 
127
129
  exp.reset()
128
- self.assertEqual(len(exp._state.evaluated_examples), 0)
130
+ self.assertEqual(len(exp.state.ckpt_examples), 0)
129
131
  exp.load_state(state_file)
130
- self.assertEqual(len(exp._state.evaluated_examples), 1)
132
+ self.assertEqual(len(exp.state.ckpt_examples), 1)
133
+ self.assertEqual(len(exp.state.evaluation_status), 0)
131
134
  example = exp.evaluate(3)
132
135
  self.assertFalse(example.newly_processed)
133
136
  self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
@@ -140,14 +143,14 @@ class EvaluationTest(unittest.TestCase):
140
143
 
141
144
  # Test load_state with filter.
142
145
  exp.reset()
143
- self.assertEqual(len(exp._state.evaluated_examples), 0)
146
+ self.assertEqual(len(exp.state.ckpt_examples), 0)
144
147
  exp.load_state(state_file, filter=lambda x: x.id == 3)
145
- self.assertEqual(len(exp._state.evaluated_examples), 1)
148
+ self.assertEqual(len(exp.state.ckpt_examples), 1)
146
149
 
147
150
  exp.reset()
148
- self.assertEqual(len(exp._state.evaluated_examples), 0)
151
+ self.assertEqual(len(exp.state.ckpt_examples), 0)
149
152
  exp.load_state(state_file, filter=lambda x: x.id == 1)
150
- self.assertEqual(len(exp._state.evaluated_examples), 0)
153
+ self.assertEqual(len(exp.state.ckpt_examples), 0)
151
154
 
152
155
  def test_html_view(self):
153
156
  exp = eval_test_helper.TestEvaluation()
@@ -181,8 +181,8 @@ class RunnerBase(Runner):
181
181
  )
182
182
  num_from_checkpoint, num_processed = 0, 0
183
183
  for example_id in example_ids:
184
- example = experiment.state.get(example_id)
185
- if example.newly_processed:
184
+ status = experiment.state.get_status(example_id)
185
+ if status.newly_processed:
186
186
  num_processed += 1
187
187
  else:
188
188
  num_from_checkpoint += 1
@@ -358,7 +358,8 @@ class RunnerBase(Runner):
358
358
  """Runs the evaluation example."""
359
359
  self.on_example_start(evaluation, item)
360
360
  item = evaluation.evaluate(
361
- item, raise_if_has_error=self.current_run.raise_if_has_error
361
+ item,
362
+ raise_if_has_error=self.current_run.raise_if_has_error,
362
363
  )
363
364
  self.on_example_complete(evaluation, item)
364
365
  return item
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: langfun
3
- Version: 0.1.2.dev202503110804
3
+ Version: 0.1.2.dev202503130804
4
4
  Summary: Langfun: Language as Functions.
5
5
  Home-page: https://github.com/google/langfun
6
6
  Author: Langfun Authors