langfun 0.1.2.dev202510200805__py3-none-any.whl → 0.1.2.dev202511160804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/core/__init__.py +1 -0
- langfun/core/agentic/action.py +107 -12
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +25 -0
- langfun/core/async_support.py +32 -3
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +1 -0
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +9 -2
- langfun/core/data/conversion/gemini_test.py +12 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +48 -44
- langfun/core/eval/base_test.py +4 -4
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +1 -0
- langfun/core/eval/v2/checkpointing.py +39 -5
- langfun/core/eval/v2/checkpointing_test.py +1 -1
- langfun/core/eval/v2/eval_test_helper.py +97 -1
- langfun/core/eval/v2/evaluation.py +88 -16
- langfun/core/eval/v2/evaluation_test.py +9 -3
- langfun/core/eval/v2/example.py +45 -39
- langfun/core/eval/v2/example_test.py +3 -3
- langfun/core/eval/v2/experiment.py +51 -8
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +157 -44
- langfun/core/eval/v2/metrics_test.py +39 -18
- langfun/core/eval/v2/progress.py +30 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking_test.py +3 -0
- langfun/core/eval/v2/reporting.py +90 -71
- langfun/core/eval/v2/reporting_test.py +20 -6
- langfun/core/eval/v2/runners/__init__.py +26 -0
- langfun/core/eval/v2/{runners.py → runners/base.py} +22 -124
- langfun/core/eval/v2/runners/debug.py +40 -0
- langfun/core/eval/v2/runners/debug_test.py +79 -0
- langfun/core/eval/v2/runners/parallel.py +100 -0
- langfun/core/eval/v2/runners/parallel_test.py +98 -0
- langfun/core/eval/v2/runners/sequential.py +47 -0
- langfun/core/eval/v2/runners/sequential_test.py +175 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +6 -4
- langfun/core/language_model.py +103 -16
- langfun/core/language_model_test.py +9 -3
- langfun/core/llms/__init__.py +7 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +14 -9
- langfun/core/llms/google_genai.py +29 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +36 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +12 -1
- langfun/core/llms/vertexai.py +51 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/client.py +77 -22
- langfun/core/mcp/client_test.py +8 -35
- langfun/core/mcp/session.py +94 -29
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/tool.py +151 -22
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +19 -1
- langfun/core/modalities/mime.py +62 -3
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/__init__.py +2 -24
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +81 -37
- langfun/core/structured/parsing.py +95 -79
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +215 -142
- langfun/core/structured/querying_test.py +65 -29
- langfun/core/structured/schema/__init__.py +48 -0
- langfun/core/structured/schema/base.py +664 -0
- langfun/core/structured/schema/base_test.py +531 -0
- langfun/core/structured/schema/json.py +174 -0
- langfun/core/structured/schema/json_test.py +121 -0
- langfun/core/structured/schema/python.py +316 -0
- langfun/core/structured/schema/python_test.py +410 -0
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +47 -36
- langfun/core/structured/tokenization.py +26 -11
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +175 -50
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +8 -2
- langfun/env/base_environment.py +320 -128
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +92 -15
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +84 -361
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +1 -1
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +95 -98
- langfun/env/event_handlers/event_logger_test.py +21 -21
- langfun/env/event_handlers/metric_writer.py +225 -140
- langfun/env/event_handlers/metric_writer_test.py +23 -6
- langfun/env/interface.py +854 -40
- langfun/env/interface_test.py +112 -2
- langfun/env/load_balancers_test.py +23 -2
- langfun/env/test_utils.py +126 -84
- {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/METADATA +1 -1
- langfun-0.1.2.dev202511160804.dist-info/RECORD +211 -0
- langfun/core/eval/v2/runners_test.py +0 -343
- langfun/core/structured/schema.py +0 -987
- langfun/core/structured/schema_test.py +0 -982
- langfun/env/base_test.py +0 -1481
- langfun/env/event_handlers/base.py +0 -350
- langfun-0.1.2.dev202510200805.dist-info/RECORD +0 -195
- {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Copyright 2025 The Langfun Authors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Parallel runner."""
|
|
15
|
+
|
|
16
|
+
import collections
|
|
17
|
+
import random
|
|
18
|
+
import threading
|
|
19
|
+
import time
|
|
20
|
+
|
|
21
|
+
from typing import Annotated, Iterator
|
|
22
|
+
import langfun.core as lf
|
|
23
|
+
from langfun.core.eval.v2.runners import base
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ParallelRunner(base.RunnerBase):
|
|
27
|
+
"""A runner that executes evaluations and examples in parallel.
|
|
28
|
+
|
|
29
|
+
The parallel runner groups evaluations by their required resources
|
|
30
|
+
(e.g., specific LLMs) and runs evaluations that do not share resources in
|
|
31
|
+
parallel. Within each evaluation, examples are also processed in parallel
|
|
32
|
+
using threads, up to `Evaluation.max_workers`.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
NAME = 'parallel'
|
|
36
|
+
|
|
37
|
+
timeout: Annotated[
|
|
38
|
+
int | None,
|
|
39
|
+
'Timeout for each evaluation example.'
|
|
40
|
+
] = None
|
|
41
|
+
|
|
42
|
+
concurrent_startup_delay: Annotated[
|
|
43
|
+
tuple[int, int] | None,
|
|
44
|
+
(
|
|
45
|
+
'A range of seconds to delay the initial evaluation of each thread '
|
|
46
|
+
'in the thread pool, helping to prevent a burst in LLM QPS at '
|
|
47
|
+
'startup. If set to None, no delay will be applied.'
|
|
48
|
+
)
|
|
49
|
+
] = None
|
|
50
|
+
|
|
51
|
+
def _run(self, evaluations: list[base.Evaluation]) -> None:
|
|
52
|
+
"""Runs the evaluations in parallel."""
|
|
53
|
+
def _run_group(evaluation_group: list[base.Evaluation]):
|
|
54
|
+
for e in evaluation_group:
|
|
55
|
+
self.run_evaluation(e)
|
|
56
|
+
|
|
57
|
+
# Run evaluations in parallel groupped by resource key.
|
|
58
|
+
groups: dict[str, list[base.Evaluation]] = collections.defaultdict(list)
|
|
59
|
+
for e in evaluations:
|
|
60
|
+
resource_ids = e.resource_ids()
|
|
61
|
+
if not resource_ids:
|
|
62
|
+
group_id = e.id
|
|
63
|
+
else:
|
|
64
|
+
# TODO(daiyip): support group that requires multiple resources.
|
|
65
|
+
group_id = resource_ids.pop()
|
|
66
|
+
groups[group_id].append(e)
|
|
67
|
+
|
|
68
|
+
for _, _, _ in lf.concurrent_map(
|
|
69
|
+
_run_group,
|
|
70
|
+
groups.values(),
|
|
71
|
+
max_workers=max(64, len(groups)),
|
|
72
|
+
timeout=self.timeout,
|
|
73
|
+
silence_on_errors=None,
|
|
74
|
+
):
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
def _evaluate_items(
|
|
78
|
+
self, evaluation: base.Evaluation, items: Iterator[base.Example]
|
|
79
|
+
) -> None:
|
|
80
|
+
"""Override run items to run in parallel."""
|
|
81
|
+
if self.concurrent_startup_delay is not None:
|
|
82
|
+
thread_delayed = {}
|
|
83
|
+
def _evaluate_item(item: base.Example):
|
|
84
|
+
thread_id = threading.current_thread().ident
|
|
85
|
+
if thread_id not in thread_delayed:
|
|
86
|
+
thread_delayed[thread_id] = True
|
|
87
|
+
time.sleep(random.randint(*self.concurrent_startup_delay))
|
|
88
|
+
return self.evaluate_item(evaluation, item)
|
|
89
|
+
else:
|
|
90
|
+
def _evaluate_item(item: base.Example):
|
|
91
|
+
return self.evaluate_item(evaluation, item)
|
|
92
|
+
|
|
93
|
+
for _, _, _ in lf.concurrent_map(
|
|
94
|
+
_evaluate_item,
|
|
95
|
+
items,
|
|
96
|
+
max_workers=evaluation.max_workers,
|
|
97
|
+
timeout=self.timeout,
|
|
98
|
+
silence_on_errors=None,
|
|
99
|
+
):
|
|
100
|
+
pass
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Copyright 2024 The Langfun Authors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Tests for parallel runner."""
|
|
15
|
+
import os
|
|
16
|
+
import tempfile
|
|
17
|
+
from typing import Any
|
|
18
|
+
import unittest
|
|
19
|
+
|
|
20
|
+
from langfun.core.eval.v2 import eval_test_helper
|
|
21
|
+
from langfun.core.eval.v2.runners import parallel # pylint: disable=unused-import
|
|
22
|
+
|
|
23
|
+
import pyglove as pg
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RunnerTest(unittest.TestCase):
|
|
27
|
+
|
|
28
|
+
def assert_same_list(self, actual: list[Any], expected: list[Any]):
|
|
29
|
+
self.assertEqual(len(actual), len(expected))
|
|
30
|
+
for i, (x, y) in enumerate(zip(actual, expected)):
|
|
31
|
+
if x is not y:
|
|
32
|
+
print(i, pg.diff(x, y))
|
|
33
|
+
self.assertIs(x, y)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ParallelRunnerTest(RunnerTest):
|
|
37
|
+
|
|
38
|
+
def test_parallel_runner(self):
|
|
39
|
+
plugin = eval_test_helper.TestPlugin()
|
|
40
|
+
exp = eval_test_helper.test_experiment()
|
|
41
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'test_parallel_runner')
|
|
42
|
+
run = exp.run(root_dir, runner='parallel', plugins=[plugin])
|
|
43
|
+
|
|
44
|
+
self.assertIsNotNone(plugin.start_time)
|
|
45
|
+
self.assertIsNotNone(plugin.complete_time)
|
|
46
|
+
self.assertGreater(plugin.complete_time, plugin.start_time)
|
|
47
|
+
|
|
48
|
+
self.assertEqual(
|
|
49
|
+
len(plugin.started_experiments), len(exp.nodes)
|
|
50
|
+
)
|
|
51
|
+
self.assertEqual(
|
|
52
|
+
len(plugin.completed_experiments), len(exp.nodes)
|
|
53
|
+
)
|
|
54
|
+
self.assertEqual(
|
|
55
|
+
len(plugin.started_example_ids), 6 * 10
|
|
56
|
+
)
|
|
57
|
+
self.assertEqual(
|
|
58
|
+
len(plugin.completed_example_ids), 6 * 10
|
|
59
|
+
)
|
|
60
|
+
self.assert_same_list(plugin.skipped_experiments, [])
|
|
61
|
+
self.assertTrue(
|
|
62
|
+
pg.io.path_exists(os.path.join(run.output_root, 'run.json'))
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
for node in exp.nodes:
|
|
66
|
+
self.assertTrue(node.progress.is_started)
|
|
67
|
+
self.assertTrue(node.progress.is_completed)
|
|
68
|
+
if node.is_leaf:
|
|
69
|
+
self.assertEqual(node.progress.num_skipped, 0)
|
|
70
|
+
self.assertEqual(node.progress.num_completed, 10)
|
|
71
|
+
self.assertEqual(node.progress.num_failed, 1)
|
|
72
|
+
else:
|
|
73
|
+
self.assertEqual(node.progress.num_skipped, 0)
|
|
74
|
+
self.assertEqual(node.progress.num_failed, 0)
|
|
75
|
+
self.assertEqual(node.progress.num_processed, node.progress.num_total)
|
|
76
|
+
|
|
77
|
+
def test_raise_if_has_error(self):
|
|
78
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'test_raise_if_has_error')
|
|
79
|
+
exp = eval_test_helper.TestEvaluation()
|
|
80
|
+
with self.assertRaisesRegex(ValueError, 'x should not be 5'):
|
|
81
|
+
exp.run(root_dir, runner='parallel', plugins=[], raise_if_has_error=True)
|
|
82
|
+
|
|
83
|
+
def test_concurrent_startup_delay(self):
|
|
84
|
+
plugin = eval_test_helper.TestPlugin()
|
|
85
|
+
exp = eval_test_helper.test_experiment()
|
|
86
|
+
root_dir = os.path.join(
|
|
87
|
+
tempfile.mkdtemp(), 'test_concurrent_startup_delay'
|
|
88
|
+
)
|
|
89
|
+
_ = exp.run(
|
|
90
|
+
root_dir,
|
|
91
|
+
runner='parallel',
|
|
92
|
+
plugins=[plugin],
|
|
93
|
+
concurrent_startup_delay=(0, 5),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
if __name__ == '__main__':
|
|
98
|
+
unittest.main()
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Copyright 2024 The Langfun Authors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Sequential runner."""
|
|
15
|
+
|
|
16
|
+
from typing import Any, Callable, Iterator
|
|
17
|
+
from langfun.core.eval.v2.runners import base
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SequentialRunner(base.RunnerBase):
|
|
21
|
+
"""A runner that executes evaluations and examples sequentially.
|
|
22
|
+
|
|
23
|
+
The sequential runner executes all evaluations and their examples in the
|
|
24
|
+
calling thread. Background tasks are also run sequentially, which makes it
|
|
25
|
+
easier to debug as exceptions from background tasks will be raised
|
|
26
|
+
immediately.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
NAME = 'sequential'
|
|
30
|
+
|
|
31
|
+
def background_run(
|
|
32
|
+
self, func: Callable[..., Any], *args: Any, **kwargs: Any
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Runs the function with the IO pool."""
|
|
35
|
+
func(*args, **kwargs)
|
|
36
|
+
|
|
37
|
+
def _run(self, evaluations: list[base.Evaluation]) -> None:
|
|
38
|
+
"""Runs the experiment in sequence."""
|
|
39
|
+
for e in evaluations:
|
|
40
|
+
self.run_evaluation(e)
|
|
41
|
+
|
|
42
|
+
def _evaluate_items(
|
|
43
|
+
self, evaluation: base.Evaluation, items: Iterator[base.Example]
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Runs the evaluation items in sequence."""
|
|
46
|
+
for item in items:
|
|
47
|
+
self.evaluate_item(evaluation, item)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# Copyright 2024 The Langfun Authors
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Tests for sequential runner."""
|
|
15
|
+
import os
|
|
16
|
+
import tempfile
|
|
17
|
+
from typing import Any
|
|
18
|
+
import unittest
|
|
19
|
+
|
|
20
|
+
from langfun.core.eval.v2 import eval_test_helper
|
|
21
|
+
from langfun.core.eval.v2.runners import sequential # pylint: disable=unused-import
|
|
22
|
+
|
|
23
|
+
import pyglove as pg
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RunnerTest(unittest.TestCase):
|
|
27
|
+
|
|
28
|
+
def assert_same_list(self, actual: list[Any], expected: list[Any]):
|
|
29
|
+
self.assertEqual(len(actual), len(expected))
|
|
30
|
+
for i, (x, y) in enumerate(zip(actual, expected)):
|
|
31
|
+
if x is not y:
|
|
32
|
+
print(i, pg.diff(x, y))
|
|
33
|
+
self.assertIs(x, y)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SequentialRunnerTest(RunnerTest):
|
|
37
|
+
|
|
38
|
+
def test_basic(self):
|
|
39
|
+
plugin = eval_test_helper.TestPlugin()
|
|
40
|
+
exp = eval_test_helper.test_experiment()
|
|
41
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'test_sequential_runner')
|
|
42
|
+
run = exp.run(root_dir, runner='sequential', plugins=[plugin])
|
|
43
|
+
|
|
44
|
+
self.assertIsNotNone(plugin.start_time)
|
|
45
|
+
self.assertIsNotNone(plugin.complete_time)
|
|
46
|
+
self.assertGreater(plugin.complete_time, plugin.start_time)
|
|
47
|
+
|
|
48
|
+
self.assert_same_list(
|
|
49
|
+
plugin.started_experiments,
|
|
50
|
+
exp.nonleaf_nodes + exp.leaf_nodes
|
|
51
|
+
)
|
|
52
|
+
self.assert_same_list(
|
|
53
|
+
plugin.completed_experiments,
|
|
54
|
+
exp.leaf_nodes + list(reversed(exp.nonleaf_nodes))
|
|
55
|
+
)
|
|
56
|
+
self.assert_same_list(
|
|
57
|
+
plugin.started_example_ids, list(range(1, 11)) * 6
|
|
58
|
+
)
|
|
59
|
+
self.assert_same_list(
|
|
60
|
+
plugin.completed_example_ids, list(range(1, 11)) * 6
|
|
61
|
+
)
|
|
62
|
+
self.assert_same_list(plugin.skipped_experiments, [])
|
|
63
|
+
self.assertTrue(
|
|
64
|
+
pg.io.path_exists(os.path.join(run.output_root, 'run.json'))
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
for node in exp.nodes:
|
|
68
|
+
self.assertTrue(node.progress.is_started)
|
|
69
|
+
self.assertTrue(node.progress.is_completed)
|
|
70
|
+
if node.is_leaf:
|
|
71
|
+
self.assertEqual(node.progress.num_skipped, 0)
|
|
72
|
+
self.assertEqual(node.progress.num_completed, 10)
|
|
73
|
+
self.assertEqual(node.progress.num_failed, 1)
|
|
74
|
+
else:
|
|
75
|
+
self.assertEqual(node.progress.num_skipped, 0)
|
|
76
|
+
self.assertEqual(node.progress.num_failed, 0)
|
|
77
|
+
self.assertEqual(node.progress.num_processed, node.progress.num_total)
|
|
78
|
+
|
|
79
|
+
def test_raise_if_has_error(self):
|
|
80
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'test_raise_if_has_error')
|
|
81
|
+
exp = eval_test_helper.TestEvaluation()
|
|
82
|
+
with self.assertRaisesRegex(ValueError, 'x should not be 5'):
|
|
83
|
+
exp.run(
|
|
84
|
+
root_dir, runner='sequential', plugins=[], raise_if_has_error=True
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def test_example_ids(self):
|
|
88
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'test_example_ids')
|
|
89
|
+
exp = eval_test_helper.test_experiment()
|
|
90
|
+
plugin = eval_test_helper.TestPlugin()
|
|
91
|
+
_ = exp.run(
|
|
92
|
+
root_dir, runner='sequential', plugins=[plugin], example_ids=[5, 7, 9]
|
|
93
|
+
)
|
|
94
|
+
self.assertEqual(plugin.started_example_ids, [5, 7, 9] * 6)
|
|
95
|
+
self.assertEqual(plugin.completed_example_ids, [5, 7, 9] * 6)
|
|
96
|
+
|
|
97
|
+
def test_shuffle_inputs(self):
|
|
98
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'test_shuffle_inputs')
|
|
99
|
+
exp = eval_test_helper.test_experiment()
|
|
100
|
+
plugin = eval_test_helper.TestPlugin()
|
|
101
|
+
run = exp.run(
|
|
102
|
+
root_dir, runner='sequential', plugins=[plugin], shuffle_inputs=True
|
|
103
|
+
)
|
|
104
|
+
self.assertTrue(run.shuffle_inputs)
|
|
105
|
+
|
|
106
|
+
def test_filter(self):
|
|
107
|
+
plugin = eval_test_helper.TestPlugin()
|
|
108
|
+
exp = eval_test_helper.test_experiment()
|
|
109
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'test_filter')
|
|
110
|
+
|
|
111
|
+
_ = exp.run(
|
|
112
|
+
root_dir, runner='sequential', plugins=[plugin],
|
|
113
|
+
filter=lambda e: e.lm.offset != 0
|
|
114
|
+
)
|
|
115
|
+
self.assert_same_list(
|
|
116
|
+
plugin.started_experiments,
|
|
117
|
+
exp.nonleaf_nodes + exp.leaf_nodes[2:]
|
|
118
|
+
)
|
|
119
|
+
self.assert_same_list(
|
|
120
|
+
plugin.skipped_experiments, exp.leaf_nodes[:2]
|
|
121
|
+
)
|
|
122
|
+
self.assert_same_list(
|
|
123
|
+
plugin.completed_experiments,
|
|
124
|
+
exp.leaf_nodes[2:] + [exp.children[1], exp]
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def test_use_cache(self):
|
|
128
|
+
@pg.functor()
|
|
129
|
+
def test_inputs(num_examples: int = 10):
|
|
130
|
+
return [
|
|
131
|
+
pg.Dict(
|
|
132
|
+
x=i // 2, y=(i // 2) ** 2,
|
|
133
|
+
groundtruth=(i // 2 + (i // 2) ** 2)
|
|
134
|
+
) for i in range(num_examples)
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
exp = eval_test_helper.TestEvaluation(
|
|
138
|
+
inputs=test_inputs(num_examples=pg.oneof([2, 4]))
|
|
139
|
+
)
|
|
140
|
+
# Global cache.
|
|
141
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'global_cache')
|
|
142
|
+
run = exp.run(
|
|
143
|
+
root_dir, 'new', runner='sequential', use_cache='global', plugins=[]
|
|
144
|
+
)
|
|
145
|
+
self.assertTrue(pg.io.path_exists(run.output_path_for(exp, 'cache.json')))
|
|
146
|
+
self.assertEqual(exp.usage_summary.cached.total.num_requests, 4)
|
|
147
|
+
self.assertEqual(exp.usage_summary.uncached.total.num_requests, 2)
|
|
148
|
+
|
|
149
|
+
# Per-dataset cache.
|
|
150
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'per_dataset')
|
|
151
|
+
run = exp.run(
|
|
152
|
+
root_dir, 'new', runner='sequential',
|
|
153
|
+
use_cache='per_dataset', plugins=[]
|
|
154
|
+
)
|
|
155
|
+
for leaf in exp.leaf_nodes:
|
|
156
|
+
self.assertTrue(
|
|
157
|
+
pg.io.path_exists(run.output_path_for(leaf, 'cache.json'))
|
|
158
|
+
)
|
|
159
|
+
self.assertEqual(exp.usage_summary.cached.total.num_requests, 3)
|
|
160
|
+
self.assertEqual(exp.usage_summary.uncached.total.num_requests, 3)
|
|
161
|
+
|
|
162
|
+
# No cache.
|
|
163
|
+
root_dir = os.path.join(tempfile.mkdtemp(), 'no')
|
|
164
|
+
run = exp.run(root_dir, runner='sequential', use_cache='no', plugins=[])
|
|
165
|
+
self.assertFalse(pg.io.path_exists(run.output_path_for(exp, 'cache.json')))
|
|
166
|
+
for leaf in exp.leaf_nodes:
|
|
167
|
+
self.assertFalse(
|
|
168
|
+
pg.io.path_exists(run.output_path_for(leaf, 'cache.json'))
|
|
169
|
+
)
|
|
170
|
+
self.assertEqual(exp.usage_summary.cached.total.num_requests, 0)
|
|
171
|
+
self.assertEqual(exp.usage_summary.uncached.total.num_requests, 6)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == '__main__':
|
|
175
|
+
unittest.main()
|
langfun/core/langfunc.py
CHANGED
|
@@ -32,146 +32,43 @@ _TLS_LFUN_CALL_STACK = '_langfunc_callstack'
|
|
|
32
32
|
# NOTE(daiyip): Only the template string belongs to the positional arguments,
|
|
33
33
|
# all others are keyword-only for clarity.
|
|
34
34
|
@pg.use_init_args(['template_str'])
|
|
35
|
-
class LangFunc(
|
|
36
|
-
|
|
37
|
-
):
|
|
38
|
-
r"""Base class for natural-language driven component.
|
|
39
|
-
|
|
40
|
-
``LangFunc`` is a language-driven component that enables users to
|
|
41
|
-
seamlessly interact with Language Models (LLMs) using a blend of natural
|
|
42
|
-
language and code. It empowers users to easily modularize prompt/execution
|
|
43
|
-
logics, compose them, and simplify the creation of Language Model (LLM)-based
|
|
44
|
-
components and applications.
|
|
45
|
-
|
|
46
|
-
LangFunc can be conceptualized as a string template with embeddable code,
|
|
47
|
-
but it distinguishes itself from traditional template systems in four key
|
|
48
|
-
ways.
|
|
49
|
-
|
|
50
|
-
Firstly, it enables easy modularization of templates along with the required
|
|
51
|
-
values with OO principles, providing a reusable way for LLM-based content
|
|
52
|
-
generation. For example:
|
|
53
|
-
|
|
54
|
-
```
|
|
55
|
-
class FewshotExamples(lf.LangFunc):
|
|
56
|
-
'''Base for fewshot prompt.
|
|
57
|
-
|
|
58
|
-
{% for example in examples %}
|
|
59
|
-
{{ example }}
|
|
60
|
-
{% endfor %}
|
|
61
|
-
'''
|
|
62
|
-
|
|
63
|
-
# Usage 1: __init__ time binding.
|
|
64
|
-
assert FewshotPrompt(examples=['foo', 'bar'])() == 'foo\nbar'
|
|
65
|
-
|
|
66
|
-
# Usage 2: __call__ time binding.
|
|
67
|
-
assert FewshotPrompt()(examples=['foo', 'bar']) == 'foo\nbar'
|
|
68
|
-
|
|
69
|
-
class ToolDescription(lf.LangFunc):
|
|
70
|
-
'''Tool descriptions.
|
|
71
|
-
|
|
72
|
-
{% for tool in tools %}
|
|
73
|
-
{{ tool.description }}
|
|
74
|
-
{% endfor %}
|
|
75
|
-
'''
|
|
76
|
-
# We want to constrain tools to be a list of `Tool` objects.
|
|
77
|
-
tools: list[Tool]
|
|
78
|
-
|
|
79
|
-
# Raises: runtime type checking will fail on [1, 2, 3].
|
|
80
|
-
ToolDescription(tools=[1, 2, 3])
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
Secondly, it has the capability to compose multiple LangFuncs together,
|
|
84
|
-
enabling the accomplishment of complex language tasks with maximum reuse.
|
|
85
|
-
It allows users to provide program inputs to all the LangFuncs within a
|
|
86
|
-
composition at the top level, significantly simplifying the process of
|
|
87
|
-
providing context for users. For example:
|
|
88
|
-
|
|
89
|
-
```
|
|
90
|
-
class ReAct(lf.LangFunc):
|
|
91
|
-
'''ReAct prompt for tool-use.
|
|
92
|
-
|
|
93
|
-
{{ preamble }}
|
|
94
|
-
{{ tool_description }}
|
|
95
|
-
{{ tool_examples }}
|
|
96
|
-
{{ user_input }}
|
|
97
|
-
'''
|
|
98
|
-
# Default preamble, which could be overriden from subclass
|
|
99
|
-
# or parsed from the `__init__` argument.
|
|
100
|
-
preamble = 'Please help me on my task based on the following tools.',
|
|
101
|
-
|
|
102
|
-
react = ReAct(
|
|
103
|
-
tool_description=ToolDescription()
|
|
104
|
-
tool_examples=FewshotExamples(),
|
|
105
|
-
# Partially bind `tools` and `examples`.
|
|
106
|
-
tools=my_tools,
|
|
107
|
-
examples=[t.examples for t in my_tools]
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
# Late bind `user_input` at __call__ time.
|
|
111
|
-
react(user_input='Help me get a lunch to go, veggie please.' )
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
Thirdly, it allows the flexibility to encapsulate complex compositions to
|
|
115
|
-
reusable classes and modify them. For example:
|
|
116
|
-
|
|
117
|
-
```
|
|
118
|
-
# The compound decorator converts a function into a LangFunc.
|
|
119
|
-
@lf.compound
|
|
120
|
-
def react_with_tools(preamble, tools: list[Tool]):
|
|
121
|
-
return ReAct(
|
|
122
|
-
preamble=preamble,
|
|
123
|
-
tool_description=ToolDescription()
|
|
124
|
-
tool_examples=FewshotExamples(),
|
|
125
|
-
# Partially bind `tools` and `examples`.
|
|
126
|
-
tools=my_tools,
|
|
127
|
-
examples=[t.examples for t in my_tools]
|
|
128
|
-
)
|
|
35
|
+
class LangFunc(template_lib.Template):
|
|
36
|
+
r"""Base class for Language-based functions.
|
|
129
37
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
38
|
+
LangFunc represents a function powered by a language model. It is a subclass
|
|
39
|
+
of `lf.Template` and can be thought of as a `lf.Template` augmented with an LM
|
|
40
|
+
and an output transformation. Calling a `lf.LangFunc` is equivalent to calling
|
|
41
|
+
the LM with the rendered prompt and transforming the output.
|
|
133
42
|
|
|
134
|
-
|
|
135
|
-
'''
|
|
43
|
+
LangFunc can be directly constructed and used.
|
|
136
44
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
prompt=react_with_tools(
|
|
140
|
-
preamble=(
|
|
141
|
-
f'Please help me solve my problem using tools. '
|
|
142
|
-
f'Current time is {{datetime.datetime.now()}}'),
|
|
143
|
-
tools=my_tools))
|
|
45
|
+
```python
|
|
46
|
+
import langfun as lf
|
|
144
47
|
|
|
145
|
-
|
|
146
|
-
|
|
48
|
+
func = lf.LangFunc("Hello, {{name}}!")
|
|
49
|
+
print(func(name="Gemini", lm=lf.llms.Gemini25Flash()))
|
|
50
|
+
# Output: Hello, how are you today?
|
|
51
|
+
```
|
|
147
52
|
|
|
148
|
-
|
|
149
|
-
it could be manipulated programmatically, turned into a space for data
|
|
150
|
-
sampling, or even tuned by AutoML. For example:
|
|
53
|
+
Or it can be subclassed:
|
|
151
54
|
|
|
152
|
-
|
|
153
|
-
|
|
55
|
+
```python
|
|
56
|
+
import langfun as lf
|
|
154
57
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
'Help me solve my problem using the following tools:',
|
|
158
|
-
'Help me with the tools below:',
|
|
159
|
-
...
|
|
160
|
-
])
|
|
161
|
-
# Choose any two of the tools for generating data.
|
|
162
|
-
tools=pg.manyof(2, [
|
|
163
|
-
google_search(...),
|
|
164
|
-
doordash(...),
|
|
165
|
-
...
|
|
166
|
-
])
|
|
58
|
+
class Compute(lf.LangFunc):
|
|
59
|
+
'''Compute a simple arithmetic expression.
|
|
167
60
|
|
|
168
|
-
|
|
169
|
-
|
|
61
|
+
{{expression}} = ?
|
|
62
|
+
'''
|
|
63
|
+
expression: str
|
|
170
64
|
|
|
171
|
-
|
|
65
|
+
def transform_output(self, lm_output: lf.Message) -> lf.Message:
|
|
66
|
+
lm_output.metadata.result = float(lm_output.text)
|
|
67
|
+
return lm_output
|
|
172
68
|
|
|
173
|
-
|
|
174
|
-
|
|
69
|
+
r = Compute(expression="1 + 1")(lm=lf.llms.Gemini25Flash())
|
|
70
|
+
print(r.result)
|
|
71
|
+
# Output: 2.0
|
|
175
72
|
|
|
176
73
|
Final note: always include these capitalized words if you don't want to treat
|
|
177
74
|
the docstr as the template str: THIS IS NOT A TEMPLATE. So as a result, this
|
|
@@ -305,6 +202,24 @@ class LangFunc(
|
|
|
305
202
|
message_cls: Type[message_lib.Message] = message_lib.UserMessage,
|
|
306
203
|
**kwargs,
|
|
307
204
|
) -> message_lib.Message:
|
|
205
|
+
"""Renders the template and transforms it as LM input message.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
allow_partial: If True, allows partial rendering, which leaves unresolved
|
|
209
|
+
variables in place in the output text. Otherwise, raises error when
|
|
210
|
+
there are unresolved variables.
|
|
211
|
+
implicit: If True, reuse the rendering output if a parent `lf.Template`
|
|
212
|
+
is rendering current `lf.Template` multiple times. This is important
|
|
213
|
+
for making sure all references to the same `lf.Template` within a single
|
|
214
|
+
top-level rendering would return the same result. If False, every call
|
|
215
|
+
to `render` will trigger the actual rendering process.
|
|
216
|
+
message_cls: The message class used for creating the return value.
|
|
217
|
+
**kwargs: Values for template variables, which override values from
|
|
218
|
+
member attributes or context.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
A Message object containing the rendered result.
|
|
222
|
+
"""
|
|
308
223
|
lm_input = super().render(
|
|
309
224
|
allow_partial=allow_partial,
|
|
310
225
|
implicit=implicit,
|
langfun/core/langfunc_test.py
CHANGED
|
@@ -82,7 +82,7 @@ class LangFuncCallTest(unittest.TestCase):
|
|
|
82
82
|
|
|
83
83
|
i = l.render()
|
|
84
84
|
self.assertEqual(i, 'Hello')
|
|
85
|
-
self.assertEqual(i, message.UserMessage('Hello'))
|
|
85
|
+
self.assertEqual(i, message.UserMessage('Hello', __template_input__={}))
|
|
86
86
|
self.assertEqual(i.tags, ['rendered'])
|
|
87
87
|
|
|
88
88
|
r = l()
|
|
@@ -96,7 +96,9 @@ class LangFuncCallTest(unittest.TestCase):
|
|
|
96
96
|
self.assertEqual(r.tags, ['lm-response', 'lm-output'])
|
|
97
97
|
self.assertEqual(
|
|
98
98
|
r.source,
|
|
99
|
-
message.UserMessage(
|
|
99
|
+
message.UserMessage(
|
|
100
|
+
'Hello', metadata=dict(cache_seed=0, __template_input__={})
|
|
101
|
+
)
|
|
100
102
|
)
|
|
101
103
|
self.assertEqual(r.source.tags, ['rendered', 'lm-input'])
|
|
102
104
|
|
|
@@ -107,8 +109,8 @@ class LangFuncCallTest(unittest.TestCase):
|
|
|
107
109
|
' lm=ExcitedEchoer(sampling_options=LMSamplingOptions(temperature=None,'
|
|
108
110
|
' max_tokens=None, n=1, top_k=40, top_p=None, stop=None,'
|
|
109
111
|
' random_seed=None, logprobs=False, top_logprobs=None,'
|
|
110
|
-
' max_thinking_tokens=None, reasoning_effort=None
|
|
111
|
-
' max_concurrency=None, timeout=120.0, max_attempts=5,'
|
|
112
|
+
' max_thinking_tokens=None, reasoning_effort=None, extras={}),'
|
|
113
|
+
' cache=None, max_concurrency=None, timeout=120.0, max_attempts=5,'
|
|
112
114
|
' retry_interval=(5, 60), exponential_backoff=True,'
|
|
113
115
|
' max_retry_interval=300, debug=False))',
|
|
114
116
|
)
|