langfun 0.1.2.dev202505010804__tar.gz → 0.1.2.dev202505030803__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (168) hide show
  1. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/PKG-INFO +1 -1
  2. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/agentic/action.py +230 -13
  3. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/agentic/action_eval.py +15 -11
  4. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/agentic/action_eval_test.py +0 -1
  5. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/agentic/action_test.py +153 -12
  6. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/evaluation.py +2 -1
  7. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/gemini.py +19 -7
  8. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/gemini_test.py +33 -2
  9. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/vertexai.py +12 -0
  10. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/vertexai_test.py +17 -0
  11. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/querying.py +5 -3
  12. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun.egg-info/PKG-INFO +1 -1
  13. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/LICENSE +0 -0
  14. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/README.md +0 -0
  15. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/__init__.py +0 -0
  16. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/__init__.py +0 -0
  17. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/agentic/__init__.py +0 -0
  18. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/__init__.py +0 -0
  19. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/__init__.py +0 -0
  20. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/correction.py +0 -0
  21. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/correction_test.py +0 -0
  22. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/execution.py +0 -0
  23. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/execution_test.py +0 -0
  24. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/generation.py +0 -0
  25. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/generation_test.py +0 -0
  26. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/parsing.py +0 -0
  27. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/parsing_test.py +0 -0
  28. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/sandboxing.py +0 -0
  29. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/coding/python/sandboxing_test.py +0 -0
  30. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/component.py +0 -0
  31. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/component_test.py +0 -0
  32. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/concurrent.py +0 -0
  33. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/concurrent_test.py +0 -0
  34. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/console.py +0 -0
  35. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/console_test.py +0 -0
  36. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/data/__init__.py +0 -0
  37. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/data/conversion/__init__.py +0 -0
  38. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/data/conversion/anthropic.py +0 -0
  39. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/data/conversion/anthropic_test.py +0 -0
  40. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/data/conversion/gemini.py +0 -0
  41. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/data/conversion/gemini_test.py +0 -0
  42. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/data/conversion/openai.py +0 -0
  43. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/data/conversion/openai_test.py +0 -0
  44. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/__init__.py +0 -0
  45. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/base.py +0 -0
  46. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/base_test.py +0 -0
  47. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/matching.py +0 -0
  48. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/matching_test.py +0 -0
  49. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/patching.py +0 -0
  50. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/patching_test.py +0 -0
  51. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/scoring.py +0 -0
  52. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/scoring_test.py +0 -0
  53. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/__init__.py +0 -0
  54. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/checkpointing.py +0 -0
  55. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/checkpointing_test.py +0 -0
  56. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/eval_test_helper.py +0 -0
  57. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/evaluation_test.py +0 -0
  58. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/example.py +0 -0
  59. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/example_test.py +0 -0
  60. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/experiment.py +0 -0
  61. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/experiment_test.py +0 -0
  62. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/metric_values.py +0 -0
  63. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/metric_values_test.py +0 -0
  64. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/metrics.py +0 -0
  65. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/metrics_test.py +0 -0
  66. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/progress.py +0 -0
  67. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/progress_test.py +0 -0
  68. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/progress_tracking.py +0 -0
  69. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/progress_tracking_test.py +0 -0
  70. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/reporting.py +0 -0
  71. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/reporting_test.py +0 -0
  72. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/runners.py +0 -0
  73. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/eval/v2/runners_test.py +0 -0
  74. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/langfunc.py +0 -0
  75. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/langfunc_test.py +0 -0
  76. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/language_model.py +0 -0
  77. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/language_model_test.py +0 -0
  78. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/__init__.py +0 -0
  79. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/anthropic.py +0 -0
  80. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/anthropic_test.py +0 -0
  81. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/azure_openai.py +0 -0
  82. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/azure_openai_test.py +0 -0
  83. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/cache/__init__.py +0 -0
  84. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/cache/base.py +0 -0
  85. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/cache/in_memory.py +0 -0
  86. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/cache/in_memory_test.py +0 -0
  87. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/compositional.py +0 -0
  88. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/compositional_test.py +0 -0
  89. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/deepseek.py +0 -0
  90. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/deepseek_test.py +0 -0
  91. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/fake.py +0 -0
  92. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/fake_test.py +0 -0
  93. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/google_genai.py +0 -0
  94. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/google_genai_test.py +0 -0
  95. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/groq.py +0 -0
  96. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/groq_test.py +0 -0
  97. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/llama_cpp.py +0 -0
  98. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/llama_cpp_test.py +0 -0
  99. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/openai.py +0 -0
  100. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/openai_compatible.py +0 -0
  101. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/openai_compatible_test.py +0 -0
  102. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/openai_test.py +0 -0
  103. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/rest.py +0 -0
  104. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/llms/rest_test.py +0 -0
  105. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/logging.py +0 -0
  106. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/logging_test.py +0 -0
  107. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/memories/__init__.py +0 -0
  108. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/memories/conversation_history.py +0 -0
  109. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/memories/conversation_history_test.py +0 -0
  110. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/memory.py +0 -0
  111. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/message.py +0 -0
  112. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/message_test.py +0 -0
  113. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/__init__.py +0 -0
  114. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/audio.py +0 -0
  115. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/audio_test.py +0 -0
  116. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/image.py +0 -0
  117. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/image_test.py +0 -0
  118. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/mime.py +0 -0
  119. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/mime_test.py +0 -0
  120. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/pdf.py +0 -0
  121. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/pdf_test.py +0 -0
  122. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/video.py +0 -0
  123. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modalities/video_test.py +0 -0
  124. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modality.py +0 -0
  125. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/modality_test.py +0 -0
  126. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/natural_language.py +0 -0
  127. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/natural_language_test.py +0 -0
  128. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/sampling.py +0 -0
  129. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/sampling_test.py +0 -0
  130. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/__init__.py +0 -0
  131. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/completion.py +0 -0
  132. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/completion_test.py +0 -0
  133. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/description.py +0 -0
  134. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/description_test.py +0 -0
  135. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/function_generation.py +0 -0
  136. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/function_generation_test.py +0 -0
  137. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/mapping.py +0 -0
  138. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/mapping_test.py +0 -0
  139. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/parsing.py +0 -0
  140. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/parsing_test.py +0 -0
  141. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/querying_test.py +0 -0
  142. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/schema.py +0 -0
  143. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/schema_generation.py +0 -0
  144. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/schema_generation_test.py +0 -0
  145. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/schema_test.py +0 -0
  146. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/scoring.py +0 -0
  147. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/scoring_test.py +0 -0
  148. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/tokenization.py +0 -0
  149. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/structured/tokenization_test.py +0 -0
  150. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/subscription.py +0 -0
  151. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/subscription_test.py +0 -0
  152. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/template.py +0 -0
  153. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/template_test.py +0 -0
  154. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/templates/__init__.py +0 -0
  155. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/templates/completion.py +0 -0
  156. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/templates/completion_test.py +0 -0
  157. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/templates/conversation.py +0 -0
  158. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/templates/conversation_test.py +0 -0
  159. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/templates/demonstration.py +0 -0
  160. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/templates/demonstration_test.py +0 -0
  161. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/templates/selfplay.py +0 -0
  162. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun/core/templates/selfplay_test.py +0 -0
  163. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun.egg-info/SOURCES.txt +0 -0
  164. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun.egg-info/dependency_links.txt +0 -0
  165. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun.egg-info/requires.txt +0 -0
  166. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/langfun.egg-info/top_level.txt +0 -0
  167. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/setup.cfg +0 -0
  168. {langfun-0.1.2.dev202505010804 → langfun-0.1.2.dev202505030803}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langfun
3
- Version: 0.1.2.dev202505010804
3
+ Version: 0.1.2.dev202505030803
4
4
  Summary: Langfun: Language as Functions.
5
5
  Home-page: https://github.com/google/langfun
6
6
  Author: Langfun Authors
@@ -27,7 +27,152 @@ import pyglove as pg
27
27
 
28
28
 
29
29
  class Action(pg.Object):
30
- """Base class for agent actions."""
30
+ """Base class for Langfun's agentic actions.
31
+
32
+ # Developing Actions
33
+
34
+ In Langfun, an `Action` is a class representing a task an agent can execute.
35
+ To define custom actions, subclass `lf.agentic.Action` and implement the
36
+ `call` method, which contains the logic for the action's execution.
37
+
38
+ ```python
39
+ class Calculate(lf.agentic.Action):
40
+ expression: str
41
+
42
+ def call(self, session: Session, *, lm: lf.LanguageModel, **kwargs):
43
+ return session.query(expression, float, lm=lm)
44
+ ```
45
+
46
+ Key aspects of the `call` method:
47
+
48
+ - `session` (First Argument): An `lf.Session` object required to make queries,
49
+ perform logging, and add metadata to the action. It also tracks the
50
+ execution of the action and its sub-actions.
51
+
52
+ - Use `session.query(...)` to make calls to a Language Model.
53
+ - Use `session.debug(...)`, `session.info(...)`, `session.warning(...)`,
54
+ and `session.error(...)` for adding logs associated with the
55
+ current action.
56
+ - Use `session.add_metadata(...)` to associate custom metadata with
57
+ the current action.
58
+
59
+ - Keyword Arguments (e.g., lm): Arguments required for the action's execution
60
+ (like a language model) should be defined as keyword arguments.
61
+
62
+ - **kwargs: Include **kwargs to allow:
63
+
64
+ - Users to pass additional arguments to child actions.
65
+ - The action to gracefully handle extra arguments passed by parent actions.
66
+
67
+ # Using Actions
68
+
69
+ ## Creating Action objects
70
+ Action objects can be instantiated in two primary ways:
71
+
72
+ - Direct instantiation by Users:
73
+
74
+ ```
75
+ calculate_action = Calculate(expression='1 + 1')
76
+ ```
77
+
78
+ - Generation by Language Models (LLMs): LLMs can generate Action objects when
79
+ provided with an "action space" (a schema defining possible actions). The
80
+ LLM populates the action's attributes. User code can then invoke the
81
+ generated action.
82
+
83
+ ```python
84
+ import pyglove as pg
85
+ import langfun as lf
86
+
87
+ # Define possible actions for the LLM
88
+ class Search(lf.agentic.Action):
89
+ query: str
90
+ def call(self, session: lf.Session, *, lm: lf.LanguageModel, **kwargs):
91
+ # Placeholder for actual search logic
92
+ return f"Results for: {self.query}"
93
+
94
+ class DirectAnswer(lf.agentic.Action):
95
+ answer: str
96
+ def call(self, session: lf.Session, *, lm: lf.LanguageModel, **kwargs):
97
+ return self.answer
98
+
99
+ # Define the schema for the LLM's output
100
+ class NextStep(pg.Object):
101
+ step_by_step_thoughts: list[str]
102
+ next_action: Calculate | Search | DirectAnswer
103
+
104
+ # Query the LLM to determine the next step
105
+ next_step = lf.query(
106
+ 'What is the next step for {{question}}?',
107
+ NextStep,
108
+ question='why is the sky blue?'
109
+ )
110
+ # Execute the action chosen by the LLM
111
+ result = next_step.next_action()
112
+ print(result)
113
+ ```
114
+
115
+ ## Invoking Actions and Managing Sessions:
116
+
117
+ When an action is called, the session argument (the first argument to call)
118
+ is handled as follows:
119
+
120
+ - Implicit Session Management: If no session is explicitly provided when
121
+ calling an action, Langfun automatically creates and passes one.
122
+
123
+ ```python
124
+ calc = Calculate(expression='1 + 1')
125
+
126
+ # A session is implicitly created and passed here.
127
+ result = calc()
128
+ print(result)
129
+
130
+ # Access the implicitly created session.
131
+ # print(calc.session)
132
+ ```
133
+
134
+ - Explicit Session Management: You can create and manage `lf.Session` objects
135
+ explicitly. This is useful for customizing session identifiers or managing
136
+ a shared context for multiple actions.
137
+
138
+ ```python
139
+ calc = Calculate(expression='1 + 1')
140
+
141
+ # Explicitly create and pass a session.
142
+ with lf.Session(id='my_agent_session') as session:
143
+ result = calc(session=session) # Pass the session explicitly
144
+ print(result)
145
+ ```
146
+
147
+ ## Accessing Execution Trajectory:
148
+
149
+ After an action is executed, the Session object holds a record of its
150
+ execution, known as the trajectory. This includes queries made and any
151
+ sub-actions performed.
152
+
153
+ - To access all queries issued directly by the root action:
154
+
155
+ ```python
156
+ print(session.root.execution.queries)
157
+ ```
158
+ - To access all actions issued by the root action and any of its
159
+ sub-actions (recursively):
160
+
161
+ ```python
162
+ print(session.root.execution.all_queries)
163
+ ```
164
+ - To access all child actions issued by the root action:
165
+
166
+ ```python
167
+ print(session.root.execution.actions)
168
+ ```
169
+
170
+ - To access all the actions in the sub-tree issued by the root action:
171
+
172
+ ```python
173
+ print(session.root.execution.all_actions)
174
+ ```
175
+ """
31
176
 
32
177
  def _on_bound(self):
33
178
  super()._on_bound()
@@ -60,6 +205,8 @@ class Action(pg.Object):
60
205
  """Executes the action."""
61
206
  if session is None:
62
207
  session = Session()
208
+ session.start()
209
+
63
210
  if show_progress:
64
211
  lf.console.display(pg.view(session, name='agent_session'))
65
212
 
@@ -107,8 +254,14 @@ class Action(pg.Object):
107
254
  action=self,
108
255
  error=error
109
256
  )
257
+ if self._session is not None:
258
+ self._session.end(result=None, error=error)
110
259
  raise
111
- return result
260
+
261
+ if self._session is not None:
262
+ # Session is created by current action. Stop the session.
263
+ self._session.end(result)
264
+ return result
112
265
 
113
266
  @abc.abstractmethod
114
267
  def call(self, session: 'Session', **kwargs) -> Any:
@@ -229,9 +382,6 @@ class ExecutionTrace(pg.Object, pg.views.html.HtmlTreeView.Extension):
229
382
  remove_class=['running'],
230
383
  )
231
384
 
232
- def __len__(self) -> int:
233
- return len(self.items)
234
-
235
385
  @property
236
386
  def has_started(self) -> bool:
237
387
  return self.start_time is not None
@@ -306,6 +456,22 @@ class ExecutionTrace(pg.Object, pg.views.html.HtmlTreeView.Extension):
306
456
  for x in branch._iter_subtree(item_cls): # pylint: disable=protected-access
307
457
  yield x
308
458
 
459
+ #
460
+ # Shortcut methods to operate on the execution trace.
461
+ #
462
+
463
+ def __len__(self) -> int:
464
+ return len(self.items)
465
+
466
+ def __iter__(self) -> Iterator[TracedItem]:
467
+ return iter(self.items)
468
+
469
+ def __bool__(self) -> bool:
470
+ return bool(self.items)
471
+
472
+ def __getitem__(self, index: int) -> TracedItem:
473
+ return self.items[index]
474
+
309
475
  def append(self, item: TracedItem) -> None:
310
476
  """Appends an item to the sequence."""
311
477
  with pg.notify_on_change(False):
@@ -935,6 +1101,44 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
935
1101
  skip_notification=True
936
1102
  )
937
1103
 
1104
+ def start(self) -> None:
1105
+ """Starts the session."""
1106
+ self.root.execution.start()
1107
+
1108
+ def end(
1109
+ self,
1110
+ result: Any,
1111
+ error: pg.utils.ErrorInfo | None = None,
1112
+ metadata: dict[str, Any] | None = None,
1113
+ ) -> None:
1114
+ """Ends the session."""
1115
+ self.root.end(result, error, metadata)
1116
+
1117
+ def __enter__(self):
1118
+ """Enters the session."""
1119
+ self.start()
1120
+ return self
1121
+
1122
+ def __exit__(self, exc_type, exc_val, exc_tb):
1123
+ """Exits the session."""
1124
+ # We allow users to explicitly end the session with specified result
1125
+ # and metadata.
1126
+ if self.root.execution.has_stopped:
1127
+ return
1128
+
1129
+ if exc_val is not None:
1130
+ result, metadata = None, None
1131
+ error = pg.utils.ErrorInfo.from_exception(exc_val)
1132
+ else:
1133
+ actions = self.root.actions
1134
+ if actions:
1135
+ result = actions[-1].result
1136
+ error = actions[-1].error
1137
+ metadata = actions[-1].metadata
1138
+ else:
1139
+ result, error, metadata = None, None, None
1140
+ self.end(result, error, metadata)
1141
+
938
1142
  #
939
1143
  # Context-manager for information tracking.
940
1144
  #
@@ -942,8 +1146,12 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
942
1146
  @contextlib.contextmanager
943
1147
  def track_action(self, action: Action) -> Iterator[ActionInvocation]:
944
1148
  """Track the execution of an action."""
945
- if not self._current_execution.has_started:
946
- self._current_execution.start()
1149
+ if not self.root.execution.has_started:
1150
+ raise ValueError(
1151
+ 'Please call `Session.start() / Session.end()` explicitly, '
1152
+ 'or use `with Session(...) as session: ...` context manager to '
1153
+ 'signal the start and end of the session.'
1154
+ )
947
1155
 
948
1156
  invocation = ActionInvocation(pg.maybe_ref(action))
949
1157
  action._invocation = invocation # pylint: disable=protected-access
@@ -960,12 +1168,6 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
960
1168
  finally:
961
1169
  self._current_execution = parent_execution
962
1170
  self._current_action = parent_action
963
- if parent_action is self.root:
964
- parent_action.end(
965
- result=invocation.result,
966
- metadata=invocation.metadata,
967
- error=invocation.error
968
- )
969
1171
 
970
1172
  @contextlib.contextmanager
971
1173
  def track_phase(self, name: str | None) -> Iterator[ExecutionTrace]:
@@ -1255,6 +1457,21 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
1255
1457
  """Returns the final result of the session."""
1256
1458
  return self.root.result
1257
1459
 
1460
+ @property
1461
+ def has_started(self) -> bool:
1462
+ """Returns whether the session has started."""
1463
+ return self.root.execution.has_started
1464
+
1465
+ @property
1466
+ def has_stopped(self) -> bool:
1467
+ """Returns whether the session has stopped."""
1468
+ return self.root.execution.has_stopped
1469
+
1470
+ @property
1471
+ def has_error(self) -> bool:
1472
+ """Returns whether the session has an error."""
1473
+ return self.root.has_error
1474
+
1258
1475
  @property
1259
1476
  def current_action(self) -> ActionInvocation:
1260
1477
  """Returns the current invocation."""
@@ -34,17 +34,20 @@ class ActionEval(lf.eval.v2.Evaluation):
34
34
  def process(self, example: lf.eval.v2.Example) -> tuple[str, dict[str, Any]]:
35
35
  example_input = example.input
36
36
  action = example_input.action
37
- session = action_lib.Session(id=f'{self.id}#example-{example.id}')
38
37
 
39
- # NOTE(daiyip): Setting session as metadata before action execution, so we
40
- # could use `Evaluation.state.in_progress_examples` to access the session
41
- # for status reporting from other threads.
42
- example.metadata['session'] = session
38
+ # We explicitly create a session here to use a custom session ID.
39
+ with action_lib.Session(id=f'{self.id}#example-{example.id}') as session:
40
+
41
+ # NOTE(daiyip): Setting session as metadata before action execution, so we
42
+ # could use `Evaluation.state.in_progress_examples` to access the session
43
+ # for status reporting from other threads.
44
+ example.metadata['session'] = session
45
+
46
+ with lf.logging.use_log_level('fatal'):
47
+ kwargs = self.action_args.copy()
48
+ kwargs.update(verbose=True)
49
+ action(session=session, **kwargs)
43
50
 
44
- with lf.logging.use_log_level('fatal'):
45
- kwargs = self.action_args.copy()
46
- kwargs.update(verbose=True)
47
- action(session=session, **kwargs)
48
51
  return session.final_result, dict(session=session)
49
52
 
50
53
  #
@@ -76,8 +79,9 @@ class ActionEvalV1(lf_eval.Matching):
76
79
 
77
80
  def process(self, example: pg.Dict, **kwargs):
78
81
  action = example.action
79
- session = action_lib.Session(id=str(getattr(example, 'id', '<empty>')))
80
- action(session=session, lm=self.lm, **kwargs)
82
+ with action_lib.Session(
83
+ id=str(getattr(example, 'id', '<empty>'))) as session:
84
+ action(session=session, lm=self.lm, **kwargs)
81
85
  return session.as_message()
82
86
 
83
87
  def answer(self, output: Any, example: pg.Dict) -> Any:
@@ -68,7 +68,6 @@ class ActionEvalV1Test(unittest.TestCase):
68
68
 
69
69
  s = FooEval()
70
70
  result = s.run(summary=False)
71
- pg.print(result)
72
71
  self.assertEqual(
73
72
  result,
74
73
  dict(
@@ -98,7 +98,7 @@ class ExecutionTraceTest(unittest.TestCase):
98
98
  self.assertEqual(action_invocation.execution.id, '/a1')
99
99
 
100
100
  root.execution.reset()
101
- self.assertEqual(len(root.execution), 0)
101
+ self.assertFalse(root.execution)
102
102
 
103
103
 
104
104
  class SessionTest(unittest.TestCase):
@@ -112,12 +112,18 @@ class SessionTest(unittest.TestCase):
112
112
 
113
113
  session = action_lib.Session(id='agent@1')
114
114
  self.assertEqual(session.id, 'agent@1')
115
+ self.assertFalse(session.has_started)
116
+ self.assertFalse(session.has_stopped)
115
117
 
116
118
  # Render HTML view to trigger dynamic update during execution.
117
119
  _ = session.to_html()
118
120
 
119
- self.assertEqual(foo(session, lm=lm, verbose=True), 3)
121
+ with session:
122
+ result = foo(session, lm=lm, verbose=True)
120
123
 
124
+ self.assertTrue(session.has_started)
125
+ self.assertTrue(session.has_stopped)
126
+ self.assertEqual(result, 3)
121
127
  self.assertIsNone(foo.session)
122
128
  self.assertEqual(foo.result, 3)
123
129
  self.assertEqual(
@@ -135,8 +141,8 @@ class SessionTest(unittest.TestCase):
135
141
  self.assertIsNone(root.parent_action)
136
142
  self.assertEqual(root.id, 'agent@1:')
137
143
  self.assertEqual(root.execution.id, 'agent@1:')
138
- self.assertEqual(len(root.execution.items), 1)
139
- self.assertIs(root.execution.items[0].action, foo)
144
+ self.assertEqual(len(root.execution), 1)
145
+ self.assertIs(root.execution[0].action, foo)
140
146
 
141
147
  self.assertTrue(root.execution.has_started)
142
148
  self.assertTrue(root.execution.has_stopped)
@@ -160,14 +166,14 @@ class SessionTest(unittest.TestCase):
160
166
  self.assertEqual(root.usage_summary.total.num_requests, 6)
161
167
 
162
168
  # Inspecting the top-level action (Foo)
163
- foo_invocation = root.execution.items[0]
169
+ foo_invocation = root.execution[0]
164
170
  self.assertIs(foo_invocation.parent_action, root)
165
171
  self.assertEqual(foo_invocation.id, 'agent@1:/a1')
166
172
  self.assertEqual(foo_invocation.execution.id, 'agent@1:/a1')
167
173
  self.assertEqual(len(foo_invocation.execution.items), 4)
168
174
 
169
175
  # Prepare phase.
170
- prepare_phase = foo_invocation.execution.items[0]
176
+ prepare_phase = foo_invocation.execution[0]
171
177
  self.assertIsInstance(prepare_phase, action_lib.ExecutionTrace)
172
178
  self.assertEqual(prepare_phase.id, 'agent@1:/a1/prepare')
173
179
  self.assertEqual(len(prepare_phase.items), 2)
@@ -179,7 +185,7 @@ class SessionTest(unittest.TestCase):
179
185
  self.assertEqual(prepare_phase.items[1].id, 'agent@1:/a1/prepare/q1')
180
186
 
181
187
  # Tracked queries.
182
- query_invocation = foo_invocation.execution.items[1]
188
+ query_invocation = foo_invocation.execution[1]
183
189
  self.assertIsInstance(query_invocation, lf_structured.QueryInvocation)
184
190
  self.assertEqual(query_invocation.id, 'agent@1:/a1/q2')
185
191
  self.assertIs(query_invocation.lm, lm)
@@ -197,7 +203,7 @@ class SessionTest(unittest.TestCase):
197
203
  )
198
204
 
199
205
  # Tracked parallel executions.
200
- parallel_executions = foo_invocation.execution.items[2]
206
+ parallel_executions = foo_invocation.execution[2]
201
207
  self.assertEqual(parallel_executions.id, 'agent@1:/a1/p1')
202
208
  self.assertIsInstance(parallel_executions, action_lib.ParallelExecutions)
203
209
  self.assertEqual(len(parallel_executions), 3)
@@ -209,7 +215,7 @@ class SessionTest(unittest.TestCase):
209
215
  self.assertEqual(len(parallel_executions[2].queries), 1)
210
216
 
211
217
  # Invocation to Bar.
212
- bar_invocation = foo_invocation.execution.items[3]
218
+ bar_invocation = foo_invocation.execution[3]
213
219
  self.assertIs(bar_invocation.parent_action, foo_invocation)
214
220
  self.assertEqual(bar_invocation.id, 'agent@1:/a1/a1')
215
221
  self.assertIsInstance(bar_invocation, action_lib.ActionInvocation)
@@ -240,10 +246,10 @@ class SessionTest(unittest.TestCase):
240
246
  root = session.root
241
247
  self.assertRegex(root.id, 'agent@.*:')
242
248
  self.assertTrue(root.has_error)
243
- foo_invocation = root.execution.items[0]
249
+ foo_invocation = root.execution[0]
244
250
  self.assertIsInstance(foo_invocation, action_lib.ActionInvocation)
245
251
  self.assertTrue(foo_invocation.has_error)
246
- bar_invocation = foo_invocation.execution.items[3]
252
+ bar_invocation = foo_invocation.execution[3]
247
253
  self.assertIsInstance(bar_invocation, action_lib.ActionInvocation)
248
254
  self.assertTrue(bar_invocation.has_error)
249
255
 
@@ -265,11 +271,146 @@ class SessionTest(unittest.TestCase):
265
271
  root = session.root
266
272
  self.assertRegex(root.id, 'agent@.*:')
267
273
  self.assertTrue(root.has_error)
268
- foo_invocation = root.execution.items[0]
274
+ foo_invocation = root.execution[0]
269
275
  self.assertIsInstance(foo_invocation, action_lib.ActionInvocation)
270
276
  self.assertTrue(foo_invocation.has_error)
271
277
  self.assertEqual(len(foo_invocation.execution.items), 2)
272
278
 
279
+ def test_succeeded_with_implicit_session(self):
280
+ lm = fake.StaticResponse('lm response')
281
+ foo = Foo(1)
282
+ foo(lm=lm, verbose=True)
283
+ session = foo.session
284
+ self.assertIsNotNone(session)
285
+ self.assertIsInstance(session.root.action, action_lib.RootAction)
286
+ self.assertIs(session.current_action, session.root)
287
+ self.assertTrue(session.has_started)
288
+ self.assertTrue(session.has_stopped)
289
+ self.assertEqual(session.final_result, 3)
290
+ self.assertFalse(session.root.has_error)
291
+ self.assertEqual(session.root.metadata, {})
292
+
293
+ def test_failed_with_implicit_session(self):
294
+ lm = fake.StaticResponse('lm response')
295
+ foo = Foo(1, simulate_action_error=True)
296
+ with self.assertRaisesRegex(ValueError, 'Bar error'):
297
+ foo(lm=lm)
298
+ session = foo.session
299
+ self.assertIsNotNone(session)
300
+ self.assertIsInstance(session.root.action, action_lib.RootAction)
301
+ self.assertIs(session.current_action, session.root)
302
+ self.assertTrue(session.has_started)
303
+ self.assertTrue(session.has_stopped)
304
+ self.assertTrue(session.has_error)
305
+ self.assertIsInstance(session.root.error, pg.utils.ErrorInfo)
306
+ self.assertIn('Bar error', str(session.root.error))
307
+
308
+ def test_succeeded_with_explicit_session(self):
309
+ lm = fake.StaticResponse('lm response')
310
+ foo = Foo(1)
311
+ self.assertIsNone(foo.session)
312
+ self.assertIsNone(foo.result)
313
+ self.assertIsNone(foo.metadata)
314
+
315
+ session = action_lib.Session(id='agent@1')
316
+ self.assertEqual(session.id, 'agent@1')
317
+ self.assertFalse(session.has_started)
318
+ self.assertFalse(session.has_stopped)
319
+
320
+ with session:
321
+ result = foo(session, lm=lm, verbose=True)
322
+
323
+ self.assertTrue(session.has_started)
324
+ self.assertTrue(session.has_stopped)
325
+ self.assertEqual(result, 3)
326
+ self.assertIsNone(foo.session)
327
+ self.assertEqual(foo.result, 3)
328
+ self.assertEqual(
329
+ foo.metadata, dict(note='foo', subtask_0=0, subtask_1=1, subtask_2=2)
330
+ )
331
+ self.assertIs(session.final_result, foo.result)
332
+ self.assertFalse(session.has_error)
333
+
334
+ def test_succeeded_with_explicit_session_start_end(self):
335
+ lm = fake.StaticResponse('lm response')
336
+ foo = Foo(1)
337
+ self.assertIsNone(foo.session)
338
+ self.assertIsNone(foo.result)
339
+ self.assertIsNone(foo.metadata)
340
+
341
+ session = action_lib.Session(id='agent@1')
342
+ self.assertEqual(session.id, 'agent@1')
343
+ self.assertFalse(session.has_started)
344
+ self.assertFalse(session.has_stopped)
345
+
346
+ session.start()
347
+ result = foo(session, lm=lm, verbose=True)
348
+ session.end(result)
349
+
350
+ self.assertTrue(session.has_started)
351
+ self.assertTrue(session.has_stopped)
352
+ self.assertEqual(result, 3)
353
+ self.assertIsNone(foo.session)
354
+ self.assertEqual(foo.result, 3)
355
+ self.assertEqual(
356
+ foo.metadata, dict(note='foo', subtask_0=0, subtask_1=1, subtask_2=2)
357
+ )
358
+ self.assertIs(session.final_result, foo.result)
359
+ self.assertFalse(session.has_error)
360
+
361
+ def test_failed_with_explicit_session(self):
362
+ lm = fake.StaticResponse('lm response')
363
+ foo = Foo(1, simulate_action_error=True)
364
+ session = action_lib.Session(id='agent@1')
365
+ with self.assertRaisesRegex(ValueError, 'Bar error'):
366
+ with session:
367
+ foo(session, lm=lm, verbose=True)
368
+ self.assertTrue(session.has_started)
369
+ self.assertTrue(session.has_stopped)
370
+ self.assertTrue(session.has_error)
371
+ self.assertIsNone(session.final_result)
372
+ self.assertIsInstance(session.root.error, pg.utils.ErrorInfo)
373
+ self.assertIn('Bar error', str(session.root.error))
374
+
375
+ def test_failed_with_explicit_session_without_start(self):
376
+ lm = fake.StaticResponse('lm response')
377
+ foo = Foo(1, simulate_action_error=True)
378
+ session = action_lib.Session(id='agent@1')
379
+ with self.assertRaisesRegex(ValueError, 'Please call `Session.start'):
380
+ foo(session, lm=lm, verbose=True)
381
+
382
+ def test_succeed_with_multiple_actions(self):
383
+ lm = fake.StaticResponse('lm response')
384
+ with action_lib.Session() as session:
385
+ x = Bar()(session, lm=lm)
386
+ y = Bar()(session, lm=lm)
387
+ self.assertTrue(session.has_started)
388
+ self.assertFalse(session.has_stopped)
389
+ session.add_metadata(note='root metadata')
390
+ session.end(x + y)
391
+
392
+ self.assertTrue(session.has_started)
393
+ self.assertTrue(session.has_stopped)
394
+ self.assertEqual(session.final_result, 2 + 2)
395
+ self.assertEqual(len(session.root.execution), 2)
396
+ self.assertEqual(session.root.metadata, dict(note='root metadata'))
397
+
398
+ def test_failed_with_multiple_actions(self):
399
+ lm = fake.StaticResponse('lm response')
400
+ with self.assertRaisesRegex(ValueError, 'Bar error'):
401
+ with action_lib.Session() as session:
402
+ x = Bar()(session, lm=lm)
403
+ y = Bar(simulate_action_error=True)(session, lm=lm)
404
+ session.end(x + y)
405
+
406
+ self.assertTrue(session.has_started)
407
+ self.assertTrue(session.has_stopped)
408
+ self.assertTrue(session.has_error)
409
+ self.assertIsInstance(session.root.error, pg.utils.ErrorInfo)
410
+ self.assertEqual(len(session.root.execution), 2)
411
+ self.assertFalse(session.root.execution[0].has_error)
412
+ self.assertTrue(session.root.execution[1].has_error)
413
+
273
414
  def test_log(self):
274
415
  session = action_lib.Session()
275
416
  session.debug('hi', x=1, y=2)
@@ -167,7 +167,6 @@ class Evaluation(experiment_lib.Experiment):
167
167
  example.input = self.example_input_by_id(example.id)
168
168
 
169
169
  checkpointed = self._state.ckpt_example(example.id)
170
- self._state.update(example, in_progress=True)
171
170
 
172
171
  with pg.timeit('evaluate') as timeit, lf.track_usages() as usage_summary:
173
172
  if checkpointed is None or checkpointed.has_error:
@@ -182,6 +181,7 @@ class Evaluation(experiment_lib.Experiment):
182
181
  f'contains error: {checkpointed.error}'
183
182
  )
184
183
  example.start_time = time.time()
184
+ self._state.update(example, in_progress=True)
185
185
  self._process(example, raise_if_has_error=raise_if_has_error)
186
186
  else:
187
187
  self.info(
@@ -189,6 +189,7 @@ class Evaluation(experiment_lib.Experiment):
189
189
  'is available and error free.'
190
190
  )
191
191
  example.start_time = checkpointed.start_time
192
+ self._state.update(example, in_progress=True)
192
193
 
193
194
  # Use the output and metadata obtained from the previous processing.
194
195
  example.output = checkpointed.output
@@ -605,13 +605,13 @@ class Gemini(rest.REST):
605
605
  raise lf.ModalityError(f'Unsupported modality: {chunk!r}') from e
606
606
  return chunk
607
607
 
608
- contents = []
609
608
  if system_message := prompt.get('system_message'):
610
609
  assert isinstance(system_message, lf.SystemMessage), type(system_message)
611
- contents.append(
612
- system_message.as_format(
613
- 'gemini', chunk_preprocessor=modality_conversion)
610
+ request['systemInstruction'] = system_message.as_format(
611
+ 'gemini', chunk_preprocessor=modality_conversion
614
612
  )
613
+
614
+ contents = []
615
615
  contents.append(
616
616
  prompt.as_format('gemini', chunk_preprocessor=modality_conversion)
617
617
  )
@@ -647,6 +647,11 @@ class Gemini(rest.REST):
647
647
  + '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
648
648
  + pg.to_json_str(json_schema, json_indent=2)
649
649
  )
650
+ if options.max_thinking_tokens is not None:
651
+ config['thinkingConfig'] = {
652
+ 'thinkingBudget': options.max_thinking_tokens
653
+ }
654
+
650
655
  return config
651
656
 
652
657
  def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
@@ -659,18 +664,25 @@ class Gemini(rest.REST):
659
664
  # NOTE(daiyip): We saw cases that `candidatesTokenCount` is not present.
660
665
  # Therefore, we use 0 as the default value.
661
666
  output_tokens = usage.get('candidatesTokenCount', 0)
667
+ thinking_tokens = usage.get('thoughtsTokenCount', 0)
668
+ total_tokens = usage.get('totalTokenCount', 0)
662
669
 
663
670
  return lf.LMSamplingResult(
664
671
  [lf.LMSample(message) for message in messages],
665
672
  usage=lf.LMSamplingUsage(
666
673
  prompt_tokens=input_tokens,
667
674
  completion_tokens=output_tokens,
668
- total_tokens=input_tokens + output_tokens,
675
+ total_tokens=total_tokens,
676
+ completion_tokens_details={
677
+ 'thinking_tokens': thinking_tokens,
678
+ },
669
679
  ),
670
680
  )
671
681
 
672
682
  def _error(self, status_code: int, content: str) -> lf.LMError:
673
- if (status_code == 400
674
- and b'exceeds the maximum number of tokens' in content):
683
+ if (
684
+ status_code == 400
685
+ and b'exceeds the maximum number of tokens' in content
686
+ ):
675
687
  return lf.ContextLimitError(f'{status_code}: {content}')
676
688
  return super()._error(status_code, content)