langfun 0.1.2.dev202508250805__py3-none-any.whl → 0.1.2.dev202511110805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/__init__.py +1 -1
- langfun/core/__init__.py +6 -1
- langfun/core/agentic/__init__.py +4 -0
- langfun/core/agentic/action.py +412 -103
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +68 -6
- langfun/core/async_support.py +104 -5
- langfun/core/async_support_test.py +23 -0
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +9 -2
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +9 -2
- langfun/core/data/conversion/gemini_test.py +12 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +47 -43
- langfun/core/eval/base_test.py +4 -4
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +1 -0
- langfun/core/eval/v2/checkpointing.py +30 -4
- langfun/core/eval/v2/eval_test_helper.py +1 -1
- langfun/core/eval/v2/evaluation.py +60 -14
- langfun/core/eval/v2/example.py +22 -11
- langfun/core/eval/v2/experiment.py +51 -8
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +39 -4
- langfun/core/eval/v2/metrics_test.py +14 -0
- langfun/core/eval/v2/progress.py +30 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking_test.py +6 -0
- langfun/core/eval/v2/reporting.py +90 -71
- langfun/core/eval/v2/reporting_test.py +20 -6
- langfun/core/eval/v2/runners.py +27 -7
- langfun/core/eval/v2/runners_test.py +3 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +6 -4
- langfun/core/language_model.py +151 -31
- langfun/core/language_model_test.py +9 -3
- langfun/core/llms/__init__.py +12 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +39 -1
- langfun/core/llms/fake_test.py +9 -0
- langfun/core/llms/gemini.py +43 -7
- langfun/core/llms/google_genai.py +34 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +93 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +16 -1
- langfun/core/llms/vertexai.py +59 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/__init__.py +10 -0
- langfun/core/mcp/client.py +177 -0
- langfun/core/mcp/client_test.py +71 -0
- langfun/core/mcp/session.py +241 -0
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/testing/simple_mcp_client.py +33 -0
- langfun/core/mcp/testing/simple_mcp_server.py +33 -0
- langfun/core/mcp/tool.py +256 -0
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +19 -1
- langfun/core/modalities/mime.py +62 -3
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +74 -28
- langfun/core/structured/parsing.py +90 -74
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +242 -156
- langfun/core/structured/querying_test.py +95 -64
- langfun/core/structured/schema.py +70 -10
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +45 -34
- langfun/core/structured/tokenization.py +24 -9
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +175 -50
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +43 -0
- langfun/env/base_environment.py +827 -0
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +304 -0
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +842 -0
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +14 -0
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +472 -0
- langfun/env/event_handlers/event_logger_test.py +304 -0
- langfun/env/event_handlers/metric_writer.py +726 -0
- langfun/env/event_handlers/metric_writer_test.py +214 -0
- langfun/env/interface.py +1640 -0
- langfun/env/interface_test.py +151 -0
- langfun/env/load_balancers.py +59 -0
- langfun/env/load_balancers_test.py +139 -0
- langfun/env/test_utils.py +497 -0
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/METADATA +7 -3
- langfun-0.1.2.dev202511110805.dist-info/RECORD +200 -0
- langfun-0.1.2.dev202508250805.dist-info/RECORD +0 -172
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/top_level.txt +0 -0
|
@@ -32,8 +32,97 @@ _SUMMARY_FILE = 'summary.html'
|
|
|
32
32
|
_EVALULATION_DETAIL_FILE = 'index.html'
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
class ExampleHtmlGenerator(experiment_lib.Plugin):
|
|
36
|
+
"""Plugin for generating HTML views for each evaluation example."""
|
|
37
|
+
|
|
38
|
+
def on_example_complete(
|
|
39
|
+
self, runner: Runner, experiment: Experiment, example: Example
|
|
40
|
+
):
|
|
41
|
+
self._save_example_html(runner, experiment, example)
|
|
42
|
+
|
|
43
|
+
def _save_example_html(
|
|
44
|
+
self, runner: Runner, experiment: Experiment, example: Example
|
|
45
|
+
) -> None:
|
|
46
|
+
"""Saves the example in HTML format."""
|
|
47
|
+
current_run = runner.current_run
|
|
48
|
+
def _generate():
|
|
49
|
+
try:
|
|
50
|
+
with pg.timeit() as t:
|
|
51
|
+
html = example.to_html(
|
|
52
|
+
collapse_level=None,
|
|
53
|
+
enable_summary_tooltip=False,
|
|
54
|
+
extra_flags=dict(
|
|
55
|
+
# For properly rendering the next link.
|
|
56
|
+
num_examples=getattr(experiment, 'num_examples', None)
|
|
57
|
+
),
|
|
58
|
+
)
|
|
59
|
+
html.save(
|
|
60
|
+
runner.current_run.output_path_for(
|
|
61
|
+
experiment, f'{example.id}.html'
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
experiment.info(
|
|
65
|
+
f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
|
|
66
|
+
)
|
|
67
|
+
except BaseException as e: # pylint: disable=broad-except
|
|
68
|
+
experiment.error(
|
|
69
|
+
f'Failed to generate \'{example.id}.html\'. '
|
|
70
|
+
f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
|
|
71
|
+
)
|
|
72
|
+
raise e
|
|
73
|
+
|
|
74
|
+
def _copy():
|
|
75
|
+
src_file = current_run.input_path_for(experiment, f'{example.id}.html')
|
|
76
|
+
dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
|
|
77
|
+
|
|
78
|
+
if src_file == dest_file:
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
if not pg.io.path_exists(src_file):
|
|
82
|
+
experiment.warning(
|
|
83
|
+
f'Skip copying \'{example.id}.html\' as '
|
|
84
|
+
f'{src_file!r} does not exist.'
|
|
85
|
+
)
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
|
|
90
|
+
content = src.read()
|
|
91
|
+
with pg.io.open(dest_file, 'w') as dest:
|
|
92
|
+
dest.write(content)
|
|
93
|
+
experiment.info(
|
|
94
|
+
f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
|
|
95
|
+
)
|
|
96
|
+
except BaseException as e: # pylint: disable=broad-except
|
|
97
|
+
experiment.error(
|
|
98
|
+
f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
|
|
99
|
+
)
|
|
100
|
+
raise e
|
|
101
|
+
|
|
102
|
+
generate_example_html = current_run.generate_example_html
|
|
103
|
+
if (generate_example_html == 'all'
|
|
104
|
+
or (generate_example_html == 'new' and example.newly_processed)
|
|
105
|
+
or (isinstance(generate_example_html, list)
|
|
106
|
+
and example.id in generate_example_html)):
|
|
107
|
+
op = _generate
|
|
108
|
+
else:
|
|
109
|
+
op = _copy
|
|
110
|
+
runner.background_run(op)
|
|
111
|
+
|
|
112
|
+
|
|
35
113
|
class HtmlReporter(experiment_lib.Plugin):
|
|
36
|
-
"""Plugin for periodically generating HTML reports for the experiment.
|
|
114
|
+
"""Plugin for periodically generating HTML reports for the experiment.
|
|
115
|
+
|
|
116
|
+
The `HtmlReporter` plugin generates several HTML files during an experiment
|
|
117
|
+
run:
|
|
118
|
+
- A `summary.html` at the root of the run directory, summarizing all
|
|
119
|
+
evaluations in the experiment.
|
|
120
|
+
- An `index.html` for each leaf evaluation, detailing the evaluation
|
|
121
|
+
definition, metrics, and logs.
|
|
122
|
+
|
|
123
|
+
These reports are updated periodically in the background during the run,
|
|
124
|
+
allowing users to monitor progress in near real-time.
|
|
125
|
+
"""
|
|
37
126
|
|
|
38
127
|
summary_interval: Annotated[
|
|
39
128
|
int,
|
|
@@ -127,7 +216,6 @@ class HtmlReporter(experiment_lib.Plugin):
|
|
|
127
216
|
def on_example_complete(
|
|
128
217
|
self, runner: Runner, experiment: Experiment, example: Example
|
|
129
218
|
):
|
|
130
|
-
self._save_example_html(runner, experiment, example)
|
|
131
219
|
self._maybe_update_experiment_html(runner, experiment)
|
|
132
220
|
self._maybe_update_summary(runner)
|
|
133
221
|
|
|
@@ -197,72 +285,3 @@ class HtmlReporter(experiment_lib.Plugin):
|
|
|
197
285
|
runner.background_run(_save)
|
|
198
286
|
else:
|
|
199
287
|
_save()
|
|
200
|
-
|
|
201
|
-
def _save_example_html(
|
|
202
|
-
self, runner: Runner, experiment: Experiment, example: Example
|
|
203
|
-
) -> None:
|
|
204
|
-
"""Saves the example in HTML format."""
|
|
205
|
-
current_run = runner.current_run
|
|
206
|
-
def _generate():
|
|
207
|
-
try:
|
|
208
|
-
with pg.timeit() as t:
|
|
209
|
-
html = example.to_html(
|
|
210
|
-
collapse_level=None,
|
|
211
|
-
enable_summary_tooltip=False,
|
|
212
|
-
extra_flags=dict(
|
|
213
|
-
# For properly rendering the next link.
|
|
214
|
-
num_examples=getattr(experiment, 'num_examples', None)
|
|
215
|
-
),
|
|
216
|
-
)
|
|
217
|
-
html.save(
|
|
218
|
-
runner.current_run.output_path_for(
|
|
219
|
-
experiment, f'{example.id}.html'
|
|
220
|
-
)
|
|
221
|
-
)
|
|
222
|
-
experiment.info(
|
|
223
|
-
f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
|
|
224
|
-
)
|
|
225
|
-
except BaseException as e: # pylint: disable=broad-except
|
|
226
|
-
experiment.error(
|
|
227
|
-
f'Failed to generate \'{example.id}.html\'. '
|
|
228
|
-
f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
|
|
229
|
-
)
|
|
230
|
-
raise e
|
|
231
|
-
|
|
232
|
-
def _copy():
|
|
233
|
-
src_file = current_run.input_path_for(experiment, f'{example.id}.html')
|
|
234
|
-
dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
|
|
235
|
-
|
|
236
|
-
if src_file == dest_file:
|
|
237
|
-
return
|
|
238
|
-
|
|
239
|
-
if not pg.io.path_exists(src_file):
|
|
240
|
-
experiment.warning(
|
|
241
|
-
f'Skip copying \'{example.id}.html\' as '
|
|
242
|
-
f'{src_file!r} does not exist.'
|
|
243
|
-
)
|
|
244
|
-
return
|
|
245
|
-
|
|
246
|
-
try:
|
|
247
|
-
with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
|
|
248
|
-
content = src.read()
|
|
249
|
-
with pg.io.open(dest_file, 'w') as dest:
|
|
250
|
-
dest.write(content)
|
|
251
|
-
experiment.info(
|
|
252
|
-
f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
|
|
253
|
-
)
|
|
254
|
-
except BaseException as e: # pylint: disable=broad-except
|
|
255
|
-
experiment.error(
|
|
256
|
-
f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
|
|
257
|
-
)
|
|
258
|
-
raise e
|
|
259
|
-
|
|
260
|
-
generate_example_html = current_run.generate_example_html
|
|
261
|
-
if (generate_example_html == 'all'
|
|
262
|
-
or (generate_example_html == 'new' and example.newly_processed)
|
|
263
|
-
or (isinstance(generate_example_html, list)
|
|
264
|
-
and example.id in generate_example_html)):
|
|
265
|
-
op = _generate
|
|
266
|
-
else:
|
|
267
|
-
op = _copy
|
|
268
|
-
runner.background_run(op)
|
|
@@ -29,7 +29,12 @@ class ReportingTest(unittest.TestCase):
|
|
|
29
29
|
experiment = eval_test_helper.test_experiment()
|
|
30
30
|
checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
|
|
31
31
|
reporter = reporting.HtmlReporter()
|
|
32
|
-
|
|
32
|
+
example_html_generator = reporting.ExampleHtmlGenerator()
|
|
33
|
+
run = experiment.run(
|
|
34
|
+
root_dir,
|
|
35
|
+
'new',
|
|
36
|
+
plugins=[checkpointer, reporter, example_html_generator]
|
|
37
|
+
)
|
|
33
38
|
self.assertTrue(
|
|
34
39
|
pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
|
|
35
40
|
)
|
|
@@ -52,8 +57,10 @@ class ReportingTest(unittest.TestCase):
|
|
|
52
57
|
root_dir = os.path.join(tempfile.mkdtemp(), 'test_reporting2')
|
|
53
58
|
experiment = eval_test_helper.test_experiment()
|
|
54
59
|
run = experiment.run(
|
|
55
|
-
root_dir,
|
|
56
|
-
|
|
60
|
+
root_dir,
|
|
61
|
+
'new',
|
|
62
|
+
plugins=[checkpointer, reporter, example_html_generator],
|
|
63
|
+
warm_start_from=run.output_root,
|
|
57
64
|
)
|
|
58
65
|
self.assertTrue(
|
|
59
66
|
pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
|
|
@@ -105,7 +112,12 @@ class ReportingTest(unittest.TestCase):
|
|
|
105
112
|
.test_experiment_with_example_html_generation_error())
|
|
106
113
|
checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
|
|
107
114
|
reporter = reporting.HtmlReporter()
|
|
108
|
-
|
|
115
|
+
example_html_generator = reporting.ExampleHtmlGenerator()
|
|
116
|
+
run = experiment.run(
|
|
117
|
+
root_dir,
|
|
118
|
+
'new',
|
|
119
|
+
plugins=[checkpointer, reporter, example_html_generator]
|
|
120
|
+
)
|
|
109
121
|
self.assertTrue(
|
|
110
122
|
pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
|
|
111
123
|
)
|
|
@@ -132,8 +144,10 @@ class ReportingTest(unittest.TestCase):
|
|
|
132
144
|
experiment = (eval_test_helper
|
|
133
145
|
.test_experiment_with_example_html_generation_error())
|
|
134
146
|
run = experiment.run(
|
|
135
|
-
root_dir,
|
|
136
|
-
|
|
147
|
+
root_dir,
|
|
148
|
+
'new',
|
|
149
|
+
plugins=[checkpointer, reporter, example_html_generator],
|
|
150
|
+
warm_start_from=run.output_root,
|
|
137
151
|
)
|
|
138
152
|
self.assertTrue(
|
|
139
153
|
pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
|
langfun/core/eval/v2/runners.py
CHANGED
|
@@ -42,7 +42,14 @@ _RUN_MANIFEST = 'run.json'
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
class RunnerBase(Runner):
|
|
45
|
-
"""
|
|
45
|
+
"""Base class for runners with plugin support and IO pooling.
|
|
46
|
+
|
|
47
|
+
`RunnerBase` provides the basic runner functionalities such as plugin
|
|
48
|
+
integration for checkpointing, reporting and progress tracking.
|
|
49
|
+
It also manages a thread pool for background IO operations.
|
|
50
|
+
Subclasses should implement `_run` and `_evaluate_items` for different
|
|
51
|
+
execution strategies.
|
|
52
|
+
"""
|
|
46
53
|
|
|
47
54
|
tqdm: Annotated[
|
|
48
55
|
bool,
|
|
@@ -397,11 +404,12 @@ class RunnerBase(Runner):
|
|
|
397
404
|
|
|
398
405
|
|
|
399
406
|
class SequentialRunner(RunnerBase):
|
|
400
|
-
"""
|
|
407
|
+
"""A runner that executes evaluations and examples sequentially.
|
|
401
408
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
exceptions
|
|
409
|
+
The sequential runner executes all evaluations and their examples in the
|
|
410
|
+
calling thread. Background tasks are also run sequentially, which makes it
|
|
411
|
+
easier to debug as exceptions from background tasks will be raised
|
|
412
|
+
immediately.
|
|
405
413
|
"""
|
|
406
414
|
|
|
407
415
|
NAME = 'sequential'
|
|
@@ -426,7 +434,13 @@ class SequentialRunner(RunnerBase):
|
|
|
426
434
|
|
|
427
435
|
|
|
428
436
|
class DebugRunner(SequentialRunner):
|
|
429
|
-
"""
|
|
437
|
+
"""A runner for debugging evaluations.
|
|
438
|
+
|
|
439
|
+
The debug runner is a sequential runner that only runs the first example
|
|
440
|
+
of each evaluation, with `raise_if_has_error` enabled. This is useful for
|
|
441
|
+
quickly identifying issues in evaluation logic during development.
|
|
442
|
+
Checkpointers are disabled for this runner.
|
|
443
|
+
"""
|
|
430
444
|
|
|
431
445
|
NAME = 'debug'
|
|
432
446
|
|
|
@@ -444,7 +458,13 @@ class DebugRunner(SequentialRunner):
|
|
|
444
458
|
|
|
445
459
|
|
|
446
460
|
class ParallelRunner(RunnerBase):
|
|
447
|
-
"""
|
|
461
|
+
"""A runner that executes evaluations and examples in parallel.
|
|
462
|
+
|
|
463
|
+
The parallel runner groups evaluations by their required resources
|
|
464
|
+
(e.g., specific LLMs) and runs evaluations that do not share resources in
|
|
465
|
+
parallel. Within each evaluation, examples are also processed in parallel
|
|
466
|
+
using threads, up to `Evaluation.max_workers`.
|
|
467
|
+
"""
|
|
448
468
|
|
|
449
469
|
NAME = 'parallel'
|
|
450
470
|
|
langfun/core/langfunc.py
CHANGED
|
@@ -32,146 +32,43 @@ _TLS_LFUN_CALL_STACK = '_langfunc_callstack'
|
|
|
32
32
|
# NOTE(daiyip): Only the template string belongs to the positional arguments,
|
|
33
33
|
# all others are keyword-only for clarity.
|
|
34
34
|
@pg.use_init_args(['template_str'])
|
|
35
|
-
class LangFunc(
|
|
36
|
-
|
|
37
|
-
):
|
|
38
|
-
r"""Base class for natural-language driven component.
|
|
39
|
-
|
|
40
|
-
``LangFunc`` is a language-driven component that enables users to
|
|
41
|
-
seamlessly interact with Language Models (LLMs) using a blend of natural
|
|
42
|
-
language and code. It empowers users to easily modularize prompt/execution
|
|
43
|
-
logics, compose them, and simplify the creation of Language Model (LLM)-based
|
|
44
|
-
components and applications.
|
|
45
|
-
|
|
46
|
-
LangFunc can be conceptualized as a string template with embeddable code,
|
|
47
|
-
but it distinguishes itself from traditional template systems in four key
|
|
48
|
-
ways.
|
|
49
|
-
|
|
50
|
-
Firstly, it enables easy modularization of templates along with the required
|
|
51
|
-
values with OO principles, providing a reusable way for LLM-based content
|
|
52
|
-
generation. For example:
|
|
53
|
-
|
|
54
|
-
```
|
|
55
|
-
class FewshotExamples(lf.LangFunc):
|
|
56
|
-
'''Base for fewshot prompt.
|
|
57
|
-
|
|
58
|
-
{% for example in examples %}
|
|
59
|
-
{{ example }}
|
|
60
|
-
{% endfor %}
|
|
61
|
-
'''
|
|
62
|
-
|
|
63
|
-
# Usage 1: __init__ time binding.
|
|
64
|
-
assert FewshotPrompt(examples=['foo', 'bar'])() == 'foo\nbar'
|
|
65
|
-
|
|
66
|
-
# Usage 2: __call__ time binding.
|
|
67
|
-
assert FewshotPrompt()(examples=['foo', 'bar']) == 'foo\nbar'
|
|
68
|
-
|
|
69
|
-
class ToolDescription(lf.LangFunc):
|
|
70
|
-
'''Tool descriptions.
|
|
71
|
-
|
|
72
|
-
{% for tool in tools %}
|
|
73
|
-
{{ tool.description }}
|
|
74
|
-
{% endfor %}
|
|
75
|
-
'''
|
|
76
|
-
# We want to constrain tools to be a list of `Tool` objects.
|
|
77
|
-
tools: list[Tool]
|
|
78
|
-
|
|
79
|
-
# Raises: runtime type checking will fail on [1, 2, 3].
|
|
80
|
-
ToolDescription(tools=[1, 2, 3])
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
Secondly, it has the capability to compose multiple LangFuncs together,
|
|
84
|
-
enabling the accomplishment of complex language tasks with maximum reuse.
|
|
85
|
-
It allows users to provide program inputs to all the LangFuncs within a
|
|
86
|
-
composition at the top level, significantly simplifying the process of
|
|
87
|
-
providing context for users. For example:
|
|
88
|
-
|
|
89
|
-
```
|
|
90
|
-
class ReAct(lf.LangFunc):
|
|
91
|
-
'''ReAct prompt for tool-use.
|
|
92
|
-
|
|
93
|
-
{{ preamble }}
|
|
94
|
-
{{ tool_description }}
|
|
95
|
-
{{ tool_examples }}
|
|
96
|
-
{{ user_input }}
|
|
97
|
-
'''
|
|
98
|
-
# Default preamble, which could be overriden from subclass
|
|
99
|
-
# or parsed from the `__init__` argument.
|
|
100
|
-
preamble = 'Please help me on my task based on the following tools.',
|
|
101
|
-
|
|
102
|
-
react = ReAct(
|
|
103
|
-
tool_description=ToolDescription()
|
|
104
|
-
tool_examples=FewshotExamples(),
|
|
105
|
-
# Partially bind `tools` and `examples`.
|
|
106
|
-
tools=my_tools,
|
|
107
|
-
examples=[t.examples for t in my_tools]
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
# Late bind `user_input` at __call__ time.
|
|
111
|
-
react(user_input='Help me get a lunch to go, veggie please.' )
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
Thirdly, it allows the flexibility to encapsulate complex compositions to
|
|
115
|
-
reusable classes and modify them. For example:
|
|
116
|
-
|
|
117
|
-
```
|
|
118
|
-
# The compound decorator converts a function into a LangFunc.
|
|
119
|
-
@lf.compound
|
|
120
|
-
def react_with_tools(preamble, tools: list[Tool]):
|
|
121
|
-
return ReAct(
|
|
122
|
-
preamble=preamble,
|
|
123
|
-
tool_description=ToolDescription()
|
|
124
|
-
tool_examples=FewshotExamples(),
|
|
125
|
-
# Partially bind `tools` and `examples`.
|
|
126
|
-
tools=my_tools,
|
|
127
|
-
examples=[t.examples for t in my_tools]
|
|
128
|
-
)
|
|
35
|
+
class LangFunc(template_lib.Template):
|
|
36
|
+
r"""Base class for Language-based functions.
|
|
129
37
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
38
|
+
LangFunc represents a function powered by a language model. It is a subclass
|
|
39
|
+
of `lf.Template` and can be thought of as a `lf.Template` augmented with an LM
|
|
40
|
+
and an output transformation. Calling a `lf.LangFunc` is equivalent to calling
|
|
41
|
+
the LM with the rendered prompt and transforming the output.
|
|
133
42
|
|
|
134
|
-
|
|
135
|
-
'''
|
|
43
|
+
LangFunc can be directly constructed and used.
|
|
136
44
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
prompt=react_with_tools(
|
|
140
|
-
preamble=(
|
|
141
|
-
f'Please help me solve my problem using tools. '
|
|
142
|
-
f'Current time is {{datetime.datetime.now()}}'),
|
|
143
|
-
tools=my_tools))
|
|
45
|
+
```python
|
|
46
|
+
import langfun as lf
|
|
144
47
|
|
|
145
|
-
|
|
146
|
-
|
|
48
|
+
func = lf.LangFunc("Hello, {{name}}!")
|
|
49
|
+
print(func(name="Gemini", lm=lf.llms.Gemini25Flash()))
|
|
50
|
+
# Output: Hello, how are you today?
|
|
51
|
+
```
|
|
147
52
|
|
|
148
|
-
|
|
149
|
-
it could be manipulated programmatically, turned into a space for data
|
|
150
|
-
sampling, or even tuned by AutoML. For example:
|
|
53
|
+
Or it can be subclassed:
|
|
151
54
|
|
|
152
|
-
|
|
153
|
-
|
|
55
|
+
```python
|
|
56
|
+
import langfun as lf
|
|
154
57
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
'Help me solve my problem using the following tools:',
|
|
158
|
-
'Help me with the tools below:',
|
|
159
|
-
...
|
|
160
|
-
])
|
|
161
|
-
# Choose any two of the tools for generating data.
|
|
162
|
-
tools=pg.manyof(2, [
|
|
163
|
-
google_search(...),
|
|
164
|
-
doordash(...),
|
|
165
|
-
...
|
|
166
|
-
])
|
|
58
|
+
class Compute(lf.LangFunc):
|
|
59
|
+
'''Compute a simple arithmetic expression.
|
|
167
60
|
|
|
168
|
-
|
|
169
|
-
|
|
61
|
+
{{expression}} = ?
|
|
62
|
+
'''
|
|
63
|
+
expression: str
|
|
170
64
|
|
|
171
|
-
|
|
65
|
+
def transform_output(self, lm_output: lf.Message) -> lf.Message:
|
|
66
|
+
lm_output.metadata.result = float(lm_output.text)
|
|
67
|
+
return lm_output
|
|
172
68
|
|
|
173
|
-
|
|
174
|
-
|
|
69
|
+
r = Compute(expression="1 + 1")(lm=lf.llms.Gemini25Flash())
|
|
70
|
+
print(r.result)
|
|
71
|
+
# Output: 2.0
|
|
175
72
|
|
|
176
73
|
Final note: always include these capitalized words if you don't want to treat
|
|
177
74
|
the docstr as the template str: THIS IS NOT A TEMPLATE. So as a result, this
|
|
@@ -305,6 +202,24 @@ class LangFunc(
|
|
|
305
202
|
message_cls: Type[message_lib.Message] = message_lib.UserMessage,
|
|
306
203
|
**kwargs,
|
|
307
204
|
) -> message_lib.Message:
|
|
205
|
+
"""Renders the template and transforms it as LM input message.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
allow_partial: If True, allows partial rendering, which leaves unresolved
|
|
209
|
+
variables in place in the output text. Otherwise, raises error when
|
|
210
|
+
there are unresolved variables.
|
|
211
|
+
implicit: If True, reuse the rendering output if a parent `lf.Template`
|
|
212
|
+
is rendering current `lf.Template` multiple times. This is important
|
|
213
|
+
for making sure all references to the same `lf.Template` within a single
|
|
214
|
+
top-level rendering would return the same result. If False, every call
|
|
215
|
+
to `render` will trigger the actual rendering process.
|
|
216
|
+
message_cls: The message class used for creating the return value.
|
|
217
|
+
**kwargs: Values for template variables, which override values from
|
|
218
|
+
member attributes or context.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
A Message object containing the rendered result.
|
|
222
|
+
"""
|
|
308
223
|
lm_input = super().render(
|
|
309
224
|
allow_partial=allow_partial,
|
|
310
225
|
implicit=implicit,
|
langfun/core/langfunc_test.py
CHANGED
|
@@ -82,7 +82,7 @@ class LangFuncCallTest(unittest.TestCase):
|
|
|
82
82
|
|
|
83
83
|
i = l.render()
|
|
84
84
|
self.assertEqual(i, 'Hello')
|
|
85
|
-
self.assertEqual(i, message.UserMessage('Hello'))
|
|
85
|
+
self.assertEqual(i, message.UserMessage('Hello', __template_input__={}))
|
|
86
86
|
self.assertEqual(i.tags, ['rendered'])
|
|
87
87
|
|
|
88
88
|
r = l()
|
|
@@ -96,7 +96,9 @@ class LangFuncCallTest(unittest.TestCase):
|
|
|
96
96
|
self.assertEqual(r.tags, ['lm-response', 'lm-output'])
|
|
97
97
|
self.assertEqual(
|
|
98
98
|
r.source,
|
|
99
|
-
message.UserMessage(
|
|
99
|
+
message.UserMessage(
|
|
100
|
+
'Hello', metadata=dict(cache_seed=0, __template_input__={})
|
|
101
|
+
)
|
|
100
102
|
)
|
|
101
103
|
self.assertEqual(r.source.tags, ['rendered', 'lm-input'])
|
|
102
104
|
|
|
@@ -107,8 +109,8 @@ class LangFuncCallTest(unittest.TestCase):
|
|
|
107
109
|
' lm=ExcitedEchoer(sampling_options=LMSamplingOptions(temperature=None,'
|
|
108
110
|
' max_tokens=None, n=1, top_k=40, top_p=None, stop=None,'
|
|
109
111
|
' random_seed=None, logprobs=False, top_logprobs=None,'
|
|
110
|
-
' max_thinking_tokens=None, reasoning_effort=None
|
|
111
|
-
' max_concurrency=None, timeout=120.0, max_attempts=5,'
|
|
112
|
+
' max_thinking_tokens=None, reasoning_effort=None, extras={}),'
|
|
113
|
+
' cache=None, max_concurrency=None, timeout=120.0, max_attempts=5,'
|
|
112
114
|
' retry_interval=(5, 60), exponential_backoff=True,'
|
|
113
115
|
' max_retry_interval=300, debug=False))',
|
|
114
116
|
)
|