langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +7 -1
  3. langfun/core/agentic/__init__.py +8 -1
  4. langfun/core/agentic/action.py +740 -112
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +189 -24
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +11 -2
  20. langfun/core/data/conversion/gemini_test.py +48 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +48 -44
  24. langfun/core/eval/base_test.py +5 -5
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +3 -0
  29. langfun/core/eval/v2/checkpointing.py +148 -46
  30. langfun/core/eval/v2/checkpointing_test.py +9 -2
  31. langfun/core/eval/v2/config_saver.py +37 -0
  32. langfun/core/eval/v2/config_saver_test.py +36 -0
  33. langfun/core/eval/v2/eval_test_helper.py +104 -3
  34. langfun/core/eval/v2/evaluation.py +102 -19
  35. langfun/core/eval/v2/evaluation_test.py +9 -3
  36. langfun/core/eval/v2/example.py +50 -40
  37. langfun/core/eval/v2/example_test.py +16 -8
  38. langfun/core/eval/v2/experiment.py +95 -20
  39. langfun/core/eval/v2/experiment_test.py +19 -0
  40. langfun/core/eval/v2/metric_values.py +31 -3
  41. langfun/core/eval/v2/metric_values_test.py +32 -0
  42. langfun/core/eval/v2/metrics.py +157 -44
  43. langfun/core/eval/v2/metrics_test.py +39 -18
  44. langfun/core/eval/v2/progress.py +31 -1
  45. langfun/core/eval/v2/progress_test.py +27 -0
  46. langfun/core/eval/v2/progress_tracking.py +13 -5
  47. langfun/core/eval/v2/progress_tracking_test.py +9 -1
  48. langfun/core/eval/v2/reporting.py +88 -71
  49. langfun/core/eval/v2/reporting_test.py +24 -6
  50. langfun/core/eval/v2/runners/__init__.py +30 -0
  51. langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
  52. langfun/core/eval/v2/runners/beam.py +354 -0
  53. langfun/core/eval/v2/runners/beam_test.py +153 -0
  54. langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
  55. langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
  56. langfun/core/eval/v2/runners/debug.py +40 -0
  57. langfun/core/eval/v2/runners/debug_test.py +76 -0
  58. langfun/core/eval/v2/runners/parallel.py +243 -0
  59. langfun/core/eval/v2/runners/parallel_test.py +182 -0
  60. langfun/core/eval/v2/runners/sequential.py +47 -0
  61. langfun/core/eval/v2/runners/sequential_test.py +169 -0
  62. langfun/core/langfunc.py +45 -130
  63. langfun/core/langfunc_test.py +7 -5
  64. langfun/core/language_model.py +189 -36
  65. langfun/core/language_model_test.py +54 -3
  66. langfun/core/llms/__init__.py +14 -1
  67. langfun/core/llms/anthropic.py +157 -2
  68. langfun/core/llms/azure_openai.py +29 -17
  69. langfun/core/llms/cache/base.py +25 -3
  70. langfun/core/llms/cache/in_memory.py +48 -7
  71. langfun/core/llms/cache/in_memory_test.py +14 -4
  72. langfun/core/llms/compositional.py +25 -1
  73. langfun/core/llms/deepseek.py +30 -2
  74. langfun/core/llms/fake.py +32 -1
  75. langfun/core/llms/gemini.py +90 -12
  76. langfun/core/llms/gemini_test.py +110 -0
  77. langfun/core/llms/google_genai.py +52 -1
  78. langfun/core/llms/groq.py +28 -3
  79. langfun/core/llms/llama_cpp.py +23 -4
  80. langfun/core/llms/openai.py +120 -3
  81. langfun/core/llms/openai_compatible.py +148 -27
  82. langfun/core/llms/openai_compatible_test.py +207 -20
  83. langfun/core/llms/openai_test.py +0 -2
  84. langfun/core/llms/rest.py +16 -1
  85. langfun/core/llms/vertexai.py +78 -8
  86. langfun/core/logging.py +1 -1
  87. langfun/core/mcp/__init__.py +10 -0
  88. langfun/core/mcp/client.py +177 -0
  89. langfun/core/mcp/client_test.py +71 -0
  90. langfun/core/mcp/session.py +241 -0
  91. langfun/core/mcp/session_test.py +54 -0
  92. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  93. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  94. langfun/core/mcp/tool.py +254 -0
  95. langfun/core/mcp/tool_test.py +197 -0
  96. langfun/core/memory.py +1 -0
  97. langfun/core/message.py +160 -55
  98. langfun/core/message_test.py +65 -81
  99. langfun/core/modalities/__init__.py +8 -0
  100. langfun/core/modalities/audio.py +21 -1
  101. langfun/core/modalities/image.py +73 -3
  102. langfun/core/modalities/image_test.py +116 -0
  103. langfun/core/modalities/mime.py +78 -4
  104. langfun/core/modalities/mime_test.py +59 -0
  105. langfun/core/modalities/pdf.py +19 -1
  106. langfun/core/modalities/video.py +21 -1
  107. langfun/core/modality.py +167 -29
  108. langfun/core/modality_test.py +42 -12
  109. langfun/core/natural_language.py +1 -1
  110. langfun/core/sampling.py +4 -4
  111. langfun/core/sampling_test.py +20 -4
  112. langfun/core/structured/__init__.py +2 -24
  113. langfun/core/structured/completion.py +34 -44
  114. langfun/core/structured/completion_test.py +23 -43
  115. langfun/core/structured/description.py +54 -50
  116. langfun/core/structured/function_generation.py +29 -12
  117. langfun/core/structured/mapping.py +81 -37
  118. langfun/core/structured/parsing.py +95 -79
  119. langfun/core/structured/parsing_test.py +0 -3
  120. langfun/core/structured/querying.py +230 -154
  121. langfun/core/structured/querying_test.py +69 -33
  122. langfun/core/structured/schema/__init__.py +49 -0
  123. langfun/core/structured/schema/base.py +664 -0
  124. langfun/core/structured/schema/base_test.py +531 -0
  125. langfun/core/structured/schema/json.py +174 -0
  126. langfun/core/structured/schema/json_test.py +121 -0
  127. langfun/core/structured/schema/python.py +316 -0
  128. langfun/core/structured/schema/python_test.py +410 -0
  129. langfun/core/structured/schema_generation.py +33 -14
  130. langfun/core/structured/scoring.py +47 -36
  131. langfun/core/structured/tokenization.py +26 -11
  132. langfun/core/subscription.py +2 -2
  133. langfun/core/template.py +175 -50
  134. langfun/core/template_test.py +123 -17
  135. langfun/env/__init__.py +43 -0
  136. langfun/env/base_environment.py +827 -0
  137. langfun/env/base_environment_test.py +473 -0
  138. langfun/env/base_feature.py +304 -0
  139. langfun/env/base_feature_test.py +228 -0
  140. langfun/env/base_sandbox.py +842 -0
  141. langfun/env/base_sandbox_test.py +1235 -0
  142. langfun/env/event_handlers/__init__.py +14 -0
  143. langfun/env/event_handlers/chain.py +233 -0
  144. langfun/env/event_handlers/chain_test.py +253 -0
  145. langfun/env/event_handlers/event_logger.py +472 -0
  146. langfun/env/event_handlers/event_logger_test.py +304 -0
  147. langfun/env/event_handlers/metric_writer.py +726 -0
  148. langfun/env/event_handlers/metric_writer_test.py +214 -0
  149. langfun/env/interface.py +1640 -0
  150. langfun/env/interface_test.py +153 -0
  151. langfun/env/load_balancers.py +59 -0
  152. langfun/env/load_balancers_test.py +141 -0
  153. langfun/env/test_utils.py +507 -0
  154. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
  155. langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
  156. langfun/core/eval/v2/runners_test.py +0 -343
  157. langfun/core/structured/schema.py +0 -987
  158. langfun/core/structured/schema_test.py +0 -982
  159. langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
  160. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
  161. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
  162. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0
@@ -14,12 +14,14 @@
14
14
  import contextlib
15
15
  import io
16
16
  import os
17
+ import sys
17
18
  import tempfile
18
19
  import unittest
19
20
 
21
+ from langfun.core import concurrent as lf_concurrent
20
22
  from langfun.core import console as lf_console
21
23
  from langfun.core.eval.v2 import eval_test_helper
22
- from langfun.core.eval.v2 import progress_tracking # pylint: disable=unused-import
24
+ from langfun.core.eval.v2 import progress_tracking
23
25
  from langfun.core.eval.v2 import runners as runners_lib # pylint: disable=unused-import
24
26
  import pyglove as pg
25
27
 
@@ -31,6 +33,7 @@ class HtmlProgressTrackerTest(unittest.TestCase):
31
33
  def display(x):
32
34
  result['view'] = x.to_html()
33
35
 
36
+ self.assertFalse(progress_tracking._HtmlProgressTracker.is_per_example())
34
37
  lf_console._notebook = pg.Dict(
35
38
  display=display
36
39
  )
@@ -44,11 +47,14 @@ class HtmlProgressTrackerTest(unittest.TestCase):
44
47
  class TqdmProgressTrackerTest(unittest.TestCase):
45
48
 
46
49
  def test_basic(self):
50
+ self.assertFalse(progress_tracking._TqdmProgressTracker.is_per_example())
47
51
  root_dir = os.path.join(tempfile.mkdtemp(), 'test_tqdm_progress_tracker')
48
52
  experiment = eval_test_helper.test_experiment()
49
53
  string_io = io.StringIO()
50
54
  with contextlib.redirect_stderr(string_io):
51
55
  _ = experiment.run(root_dir, 'new', plugins=[])
56
+ sys.stderr.flush()
57
+ lf_concurrent.ProgressBar.refresh()
52
58
  self.assertIn('All: 100%', string_io.getvalue())
53
59
 
54
60
  def test_with_example_ids(self):
@@ -59,6 +65,8 @@ class TqdmProgressTrackerTest(unittest.TestCase):
59
65
  string_io = io.StringIO()
60
66
  with contextlib.redirect_stderr(string_io):
61
67
  _ = experiment.run(root_dir, 'new', example_ids=[1], plugins=[])
68
+ sys.stderr.flush()
69
+ lf_concurrent.ProgressBar.refresh()
62
70
  self.assertIn('All: 100%', string_io.getvalue())
63
71
 
64
72
 
@@ -32,8 +32,95 @@ _SUMMARY_FILE = 'summary.html'
32
32
  _EVALULATION_DETAIL_FILE = 'index.html'
33
33
 
34
34
 
35
+ class ExampleHtmlGenerator(experiment_lib.Plugin):
36
+ """Plugin for generating HTML views for each evaluation example."""
37
+
38
+ def on_example_complete(
39
+ self, runner: Runner, experiment: Experiment, example: Example
40
+ ):
41
+ self._save_example_html(runner, experiment, example)
42
+
43
+ def _save_example_html(
44
+ self, runner: Runner, experiment: Experiment, example: Example
45
+ ) -> None:
46
+ """Saves the example in HTML format."""
47
+ current_run = runner.current_run
48
+ def _generate():
49
+ try:
50
+ with pg.timeit() as t:
51
+ html = example.to_html(
52
+ collapse_level=None,
53
+ enable_summary_tooltip=False,
54
+ extra_flags=dict(
55
+ # For properly rendering the next link.
56
+ num_examples=getattr(experiment, 'num_examples', None)
57
+ ),
58
+ )
59
+ html.save(
60
+ runner.current_run.output_path_for(
61
+ experiment, f'{example.id}.html'
62
+ )
63
+ )
64
+ experiment.info(
65
+ f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
66
+ )
67
+ except BaseException as e: # pylint: disable=broad-except
68
+ experiment.error(
69
+ f'Failed to generate \'{example.id}.html\'. '
70
+ f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
71
+ )
72
+ raise e
73
+
74
+ def _copy():
75
+ src_file = current_run.input_path_for(experiment, f'{example.id}.html')
76
+ dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
77
+
78
+ if src_file == dest_file:
79
+ return
80
+
81
+ if not pg.io.path_exists(src_file):
82
+ experiment.warning(
83
+ f'Skip copying \'{example.id}.html\' as '
84
+ f'{src_file!r} does not exist.'
85
+ )
86
+ return
87
+
88
+ try:
89
+ with pg.timeit() as t:
90
+ pg.io.copy(src_file, dest_file)
91
+ experiment.info(
92
+ f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
93
+ )
94
+ except BaseException as e: # pylint: disable=broad-except
95
+ experiment.error(
96
+ f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
97
+ )
98
+ raise e
99
+
100
+ generate_example_html = current_run.generate_example_html
101
+ if (generate_example_html == 'all'
102
+ or (generate_example_html == 'new' and example.newly_processed)
103
+ or (isinstance(generate_example_html, list)
104
+ and example.id in generate_example_html)):
105
+ op = _generate
106
+ else:
107
+ op = _copy
108
+ runner.background_run(op)
109
+
110
+
35
111
  class HtmlReporter(experiment_lib.Plugin):
36
- """Plugin for periodically generating HTML reports for the experiment."""
112
+ """Plugin for periodically generating HTML reports for the experiment.
113
+
114
+ The `HtmlReporter` plugin generates several HTML files during an experiment
115
+ run:
116
+ - A `summary.html` at the root of the run directory, summarizing all
117
+ evaluations in the experiment.
118
+ - An `index.html` for each leaf evaluation, detailing the evaluation
119
+ definition, metrics, and logs.
120
+
121
+ These reports are updated periodically in the background during the run,
122
+ allowing users to monitor progress in near real-time.
123
+ """
37
124
 
38
125
  summary_interval: Annotated[
39
126
  int,
@@ -127,7 +214,6 @@ class HtmlReporter(experiment_lib.Plugin):
127
214
  def on_example_complete(
128
215
  self, runner: Runner, experiment: Experiment, example: Example
129
216
  ):
130
- self._save_example_html(runner, experiment, example)
131
217
  self._maybe_update_experiment_html(runner, experiment)
132
218
  self._maybe_update_summary(runner)
133
219
 
@@ -197,72 +283,3 @@ class HtmlReporter(experiment_lib.Plugin):
197
283
  runner.background_run(_save)
198
284
  else:
199
285
  _save()
200
-
201
- def _save_example_html(
202
- self, runner: Runner, experiment: Experiment, example: Example
203
- ) -> None:
204
- """Saves the example in HTML format."""
205
- current_run = runner.current_run
206
- def _generate():
207
- try:
208
- with pg.timeit() as t:
209
- html = example.to_html(
210
- collapse_level=None,
211
- enable_summary_tooltip=False,
212
- extra_flags=dict(
213
- # For properly rendering the next link.
214
- num_examples=getattr(experiment, 'num_examples', None)
215
- ),
216
- )
217
- html.save(
218
- runner.current_run.output_path_for(
219
- experiment, f'{example.id}.html'
220
- )
221
- )
222
- experiment.info(
223
- f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
224
- )
225
- except BaseException as e: # pylint: disable=broad-except
226
- experiment.error(
227
- f'Failed to generate \'{example.id}.html\'. '
228
- f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
229
- )
230
- raise e
231
-
232
- def _copy():
233
- src_file = current_run.input_path_for(experiment, f'{example.id}.html')
234
- dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
235
-
236
- if src_file == dest_file:
237
- return
238
-
239
- if not pg.io.path_exists(src_file):
240
- experiment.warning(
241
- f'Skip copying \'{example.id}.html\' as '
242
- f'{src_file!r} does not exist.'
243
- )
244
- return
245
-
246
- try:
247
- with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
248
- content = src.read()
249
- with pg.io.open(dest_file, 'w') as dest:
250
- dest.write(content)
251
- experiment.info(
252
- f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
253
- )
254
- except BaseException as e: # pylint: disable=broad-except
255
- experiment.error(
256
- f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
257
- )
258
- raise e
259
-
260
- generate_example_html = current_run.generate_example_html
261
- if (generate_example_html == 'all'
262
- or (generate_example_html == 'new' and example.newly_processed)
263
- or (isinstance(generate_example_html, list)
264
- and example.id in generate_example_html)):
265
- op = _generate
266
- else:
267
- op = _copy
268
- runner.background_run(op)
@@ -29,7 +29,16 @@ class ReportingTest(unittest.TestCase):
29
29
  experiment = eval_test_helper.test_experiment()
30
30
  checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
31
31
  reporter = reporting.HtmlReporter()
32
- run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
32
+ self.assertFalse(reporter.is_per_example())
33
+
34
+ example_html_generator = reporting.ExampleHtmlGenerator()
35
+ self.assertTrue(example_html_generator.is_per_example())
36
+
37
+ run = experiment.run(
38
+ root_dir,
39
+ 'new',
40
+ plugins=[checkpointer, reporter, example_html_generator]
41
+ )
33
42
  self.assertTrue(
34
43
  pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
35
44
  )
@@ -52,8 +61,10 @@ class ReportingTest(unittest.TestCase):
52
61
  root_dir = os.path.join(tempfile.mkdtemp(), 'test_reporting2')
53
62
  experiment = eval_test_helper.test_experiment()
54
63
  run = experiment.run(
55
- root_dir, 'new', plugins=[checkpointer, reporter],
56
- warm_start_from=run.output_root
64
+ root_dir,
65
+ 'new',
66
+ plugins=[checkpointer, reporter, example_html_generator],
67
+ warm_start_from=run.output_root,
57
68
  )
58
69
  self.assertTrue(
59
70
  pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
@@ -105,7 +116,12 @@ class ReportingTest(unittest.TestCase):
105
116
  .test_experiment_with_example_html_generation_error())
106
117
  checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
107
118
  reporter = reporting.HtmlReporter()
108
- run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
119
+ example_html_generator = reporting.ExampleHtmlGenerator()
120
+ run = experiment.run(
121
+ root_dir,
122
+ 'new',
123
+ plugins=[checkpointer, reporter, example_html_generator]
124
+ )
109
125
  self.assertTrue(
110
126
  pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
111
127
  )
@@ -132,8 +148,10 @@ class ReportingTest(unittest.TestCase):
132
148
  experiment = (eval_test_helper
133
149
  .test_experiment_with_example_html_generation_error())
134
150
  run = experiment.run(
135
- root_dir, 'new', plugins=[checkpointer, reporter],
136
- warm_start_from=run.output_root
151
+ root_dir,
152
+ 'new',
153
+ plugins=[checkpointer, reporter, example_html_generator],
154
+ warm_start_from=run.output_root,
137
155
  )
138
156
  self.assertTrue(
139
157
  pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
@@ -0,0 +1,30 @@
1
+ # Copyright 2024 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Langfun evaluation runners."""
15
+
16
+ # pylint: disable=g-importing-member
17
+ from langfun.core.eval.v2.runners.base import RunnerBase
18
+ from langfun.core.eval.v2.runners.beam import BeamRunner
19
+ from langfun.core.eval.v2.runners.debug import DebugRunner
20
+ from langfun.core.eval.v2.runners.parallel import ParallelRunner
21
+ from langfun.core.eval.v2.runners.sequential import SequentialRunner
22
+ # pylint: enable=g-importing-member
23
+
24
+ __all__ = [
25
+ 'RunnerBase',
26
+ 'BeamRunner',
27
+ 'DebugRunner',
28
+ 'ParallelRunner',
29
+ 'SequentialRunner',
30
+ ]