langfun 0.1.2.dev202411140804__py3-none-any.whl → 0.1.2.dev202411160804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
langfun/__init__.py CHANGED
@@ -53,6 +53,10 @@ PythonFunction = coding.PythonFunction
53
53
  from langfun.core import llms
54
54
  lm_cache = llms.cache.lm_cache
55
55
 
56
+ from langfun.core import agentic
57
+ Action = agentic.Action
58
+ Session = agentic.Session
59
+
56
60
  from langfun.core import memories
57
61
 
58
62
  from langfun.core import modalities
@@ -0,0 +1,30 @@
1
+ # Copyright 2024 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Langfun agentic framework.."""
15
+
16
+ # pylint: disable=g-bad-import-order
17
+ # pylint: disable=g-importing-member
18
+ # pylint: disable=g-import-not-at-top
19
+
20
+ from langfun.core.agentic.action import Action
21
+ from langfun.core.agentic.action import ActionInvocation
22
+ from langfun.core.agentic.action import Session
23
+
24
+ from langfun.core.agentic.action_eval import ActionEval
25
+ from langfun.core.agentic.action_eval import ActionEvalV1
26
+
27
+
28
+ # pylint: enable=g-bad-import-order
29
+ # pylint: enable=g-importing-member
30
+ # pylint: enable=g-import-not-at-top
@@ -0,0 +1,250 @@
1
+ # Copyright 2024 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Base classes for agentic actions."""
15
+
16
+ import abc
17
+ from typing import Annotated, Any, Optional, Union
18
+ import langfun.core as lf
19
+ import pyglove as pg
20
+
21
+
22
+ class Action(pg.Object):
23
+ """Base class for agent actions."""
24
+
25
+ def _on_bound(self):
26
+ super()._on_bound()
27
+ self._result = None
28
+
29
+ @property
30
+ def result(self) -> Any:
31
+ """Returns the result of the action."""
32
+ return self._result
33
+
34
+ def __call__(
35
+ self, session: Optional['Session'] = None, **kwargs) -> Any:
36
+ """Executes the action."""
37
+ session = session or Session()
38
+ try:
39
+ session.begin(self)
40
+ self._result = self.call(session=session, **kwargs)
41
+ return self._result
42
+ finally:
43
+ session.end(self)
44
+
45
+ @abc.abstractmethod
46
+ def call(self, session: 'Session', **kwargs) -> Any:
47
+ """Subclasses to implement."""
48
+
49
+
50
+ class ActionInvocation(pg.Object, pg.views.html.HtmlTreeView.Extension):
51
+ """A class for capturing the invocation of an action."""
52
+ action: Action
53
+ result: Any = None
54
+ execution: Annotated[
55
+ list[Union['ActionInvocation', lf.logging.LogEntry]],
56
+ 'Execution execution.'
57
+ ] = []
58
+
59
+ # Allow symbolic assignment without `rebind`.
60
+ allow_symbolic_assignment = True
61
+
62
+ @property
63
+ def logs(self) -> list[lf.logging.LogEntry]:
64
+ """Returns logs from execution sequence."""
65
+ return [v for v in self.execution if isinstance(v, lf.logging.LogEntry)]
66
+
67
+ @property
68
+ def child_invocations(self) -> list['ActionInvocation']:
69
+ """Returns child action invocations."""
70
+ return [v for v in self.execution if isinstance(v, ActionInvocation)]
71
+
72
+ def _html_tree_view_summary(
73
+ self, *, view: pg.views.html.HtmlTreeView, **kwargs
74
+ ):
75
+ if isinstance(self.action, RootAction):
76
+ return None
77
+ kwargs.pop('title')
78
+ return view.summary(
79
+ self,
80
+ title=view.render(
81
+ self.action, name='action', collapse_level=0,
82
+ css_classes='invocation-title',
83
+ ),
84
+ **kwargs
85
+ )
86
+
87
+ def _html_tree_view_content(
88
+ self,
89
+ *,
90
+ root_path: pg.KeyPath | None = None,
91
+ collapse_level: int | None = None,
92
+ view: pg.views.html.HtmlTreeView,
93
+ **kwargs
94
+ ):
95
+ prepare_phase = []
96
+ current_phase = prepare_phase
97
+ action_phases = []
98
+ for item in self.execution:
99
+ if isinstance(item, ActionInvocation):
100
+ current_phase = []
101
+ action_phases.append(current_phase)
102
+ current_phase.append(item)
103
+
104
+ def _render_phase(
105
+ phase: list[ActionInvocation | lf.logging.LogEntry]
106
+ ) -> pg.Html.WritableTypes:
107
+ return pg.Html.element(
108
+ 'div',
109
+ [
110
+ view.render(item) for item in phase
111
+ ]
112
+ )
113
+
114
+ def _render_action_phases(
115
+ phases: list[list[ActionInvocation | lf.logging.LogEntry]]
116
+ ) -> pg.Html.WritableTypes:
117
+ if len(phases) == 1:
118
+ return _render_phase(phases[0])
119
+ return pg.views.html.controls.TabControl(
120
+ [
121
+ pg.views.html.controls.Tab(
122
+ label=f'Step {i + 1}',
123
+ content=_render_phase(phase),
124
+ )
125
+ for i, phase in enumerate(phases)
126
+ ],
127
+ )
128
+
129
+ result_name = 'final_result' if isinstance(
130
+ self.action, RootAction) else 'result'
131
+ return pg.Html.element(
132
+ 'div',
133
+ [
134
+ view.render(
135
+ self.result,
136
+ name=result_name,
137
+ css_classes=[
138
+ f'invocation-{result_name}'.replace('_', '-')
139
+ ]
140
+ ),
141
+ _render_phase(prepare_phase) if prepare_phase else None,
142
+ _render_action_phases(action_phases)
143
+ ]
144
+ )
145
+
146
+ @classmethod
147
+ def _html_tree_view_css_styles(cls) -> list[str]:
148
+ return super()._html_tree_view_css_styles() + [
149
+ """
150
+ details.invocation-title {
151
+ display: inline-block;
152
+ background-color: #b1f0ff;
153
+ border: 1px solid white;
154
+ }
155
+ details.invocation-result {
156
+ border: 1px solid #eee;
157
+ }
158
+ details.invocation-final-result {
159
+ border: 1px solid #eee;
160
+ background-color: #fef78f;
161
+ }
162
+ """
163
+ ]
164
+
165
+
166
+ class RootAction(Action):
167
+ """A placeholder action for the root of the action tree."""
168
+
169
+ def call(self, session: 'Session', **kwargs) -> Any:
170
+ raise NotImplementedError('Shall not be called.')
171
+
172
+
173
+ class Session(pg.Object):
174
+ """Session for performing an agentic task."""
175
+
176
+ root_invocation: ActionInvocation = ActionInvocation(RootAction())
177
+
178
+ def _on_bound(self):
179
+ super()._on_bound()
180
+ self._invocation_stack = [self.root_invocation]
181
+
182
+ @property
183
+ def final_result(self) -> Any:
184
+ """Returns the final result of the session."""
185
+ return self.root_invocation.result
186
+
187
+ @property
188
+ def current_invocation(self) -> ActionInvocation:
189
+ """Returns the current invocation."""
190
+ assert self._invocation_stack
191
+ return self._invocation_stack[-1]
192
+
193
+ def begin(self, action: Action):
194
+ """Signal the beginning of the execution of an action."""
195
+ new_invocation = ActionInvocation(pg.maybe_ref(action))
196
+ with pg.notify_on_change(False):
197
+ self.current_invocation.execution.append(new_invocation)
198
+ self._invocation_stack.append(new_invocation)
199
+
200
+ def end(self, action: Action):
201
+ """Signal the end of the execution of an action."""
202
+ assert self._invocation_stack
203
+ invocation = self._invocation_stack.pop(-1)
204
+ invocation.rebind(
205
+ result=action.result, skip_notification=True, raise_on_no_change=False
206
+ )
207
+ assert invocation.action is action, (invocation.action, action)
208
+ assert self._invocation_stack, self._invocation_stack
209
+
210
+ if len(self._invocation_stack) == 1:
211
+ self.root_invocation.rebind(
212
+ result=invocation.result,
213
+ skip_notification=True,
214
+ raise_on_no_change=False
215
+ )
216
+
217
+ def _log(self, level: lf.logging.LogLevel, message: str, **kwargs):
218
+ with pg.notify_on_change(False):
219
+ self.current_invocation.execution.append(
220
+ lf.logging.log(
221
+ level, message, indent=len(self._invocation_stack) - 1, **kwargs
222
+ )
223
+ )
224
+
225
+ def debug(self, message: str, **kwargs):
226
+ """Logs a debug message to the session."""
227
+ self._log('debug', message, **kwargs)
228
+
229
+ def info(self, message: str, **kwargs):
230
+ """Logs an info message to the session."""
231
+ self._log('info', message, **kwargs)
232
+
233
+ def warning(self, message: str, **kwargs):
234
+ """Logs a warning message to the session."""
235
+ self._log('warning', message, **kwargs)
236
+
237
+ def error(self, message: str, **kwargs):
238
+ """Logs an error message to the session."""
239
+ self._log('error', message, **kwargs)
240
+
241
+ def fatal(self, message: str, **kwargs):
242
+ """Logs a fatal message to the session."""
243
+ self._log('fatal', message, **kwargs)
244
+
245
+ def as_message(self) -> lf.AIMessage:
246
+ """Returns the session as a message."""
247
+ return lf.AIMessage(
248
+ 'Agentic task session.',
249
+ result=self.root_invocation
250
+ )
@@ -0,0 +1,150 @@
1
+ # Copyright 2024 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Evaluation (v1) for Langfun agentic actions."""
15
+
16
+ import io
17
+ import os
18
+ from typing import Annotated, Any
19
+
20
+ import langfun.core as lf
21
+ from langfun.core import eval as lf_eval
22
+ from langfun.core.agentic import action as action_lib
23
+ import pyglove as pg
24
+
25
+
26
+ class ActionEval(lf.eval.v2.Evaluation):
27
+ """Agent evaluation."""
28
+
29
+ action_args: Annotated[
30
+ dict[str, Any],
31
+ 'Arguments to call the action.'
32
+ ] = {}
33
+
34
+ def process(self, example: pg.Dict) -> tuple[str, dict[str, Any]]:
35
+ action = example.action
36
+ session = action_lib.Session()
37
+ with lf.logging.use_log_level('fatal'):
38
+ action(session=session, **self.action_args)
39
+ return session.final_result, dict(session=session)
40
+
41
+
42
+ #
43
+ # TODO(daiyip): Remove V1 once V2 is fully launched.
44
+ #
45
+
46
+
47
+ @pg.functor()
48
+ def _dummy_schema():
49
+ return int
50
+
51
+
52
+ class ExampleView(pg.Object):
53
+ id: int
54
+ input: Any
55
+ output: Any
56
+ error: str | None = None
57
+
58
+
59
+ class ActionEvalV1(lf_eval.Matching):
60
+ """Base class for action evaluations.
61
+
62
+ The input function should returns a list of pg.Dict, with `action` and
63
+ `groundtruth` fields.
64
+ """
65
+ # We override the schema and prompt to dummy values since they are not used.
66
+ schema_fn = _dummy_schema()
67
+ prompt = '<unused>'
68
+
69
+ def process(self, example: pg.Dict, **kwargs):
70
+ action = example.action
71
+ session = action_lib.Session()
72
+ action(session=session, lm=self.lm, **kwargs)
73
+ return session.as_message()
74
+
75
+ def answer(self, output: Any, example: pg.Dict) -> Any:
76
+ return output
77
+
78
+ def groundtruth(self, example: Any) -> Any:
79
+ return example.groundtruth
80
+
81
+ def audit(
82
+ self,
83
+ example_idx: int,
84
+ example: Any,
85
+ message: lf.Message | None,
86
+ error: Exception | None = None,
87
+ dryrun: bool = False,
88
+ ):
89
+ super().audit(example_idx, example, message, error, dryrun)
90
+ # Write each example to HTML.
91
+ if not dryrun and self.dir:
92
+ def _save_html():
93
+ ExampleView(
94
+ example_idx,
95
+ example,
96
+ None if message is None else message.result,
97
+ error
98
+ ).to_html(
99
+ collapse_level=None,
100
+ enable_summary_tooltip=False,
101
+ ).save(
102
+ os.path.join(self.dir, f'example_{example_idx}.html')
103
+ )
104
+ # Write HTML in a separate thread to avoid blocking the main thread.
105
+ lf.concurrent.get_executor(
106
+ 'background_eval_io', max_workers=16
107
+ ).submit(_save_html)
108
+
109
+ def _render_mismatches(self, s: io.StringIO) -> None:
110
+ s.write('<h2> Mismatches (Incorrect) </h2>')
111
+ first_url = None
112
+ mismatched_ids = sorted([
113
+ example_idx for example_idx, *_ in self.mismatches
114
+ ])
115
+ for example_idx in mismatched_ids:
116
+ url = os.path.join(self.dir, f'example_{example_idx}.html')
117
+ if first_url is None:
118
+ first_url = url
119
+ s.write(
120
+ f'<a href="{url}" style="margin-right: 10px" target="example_view">'
121
+ f'{example_idx}</a> '
122
+ )
123
+ if first_url:
124
+ s.write(
125
+ '<iframe style="border:0;width:100%;height:100%" name="example_view"'
126
+ f'src="{first_url}" title="Example View"></iframe>'
127
+ )
128
+ else:
129
+ s.write('No mismatches found.')
130
+
131
+ def _render_matches(self, s: io.StringIO) -> None:
132
+ s.write('<h2> Matches (correct) </h2>')
133
+ first_url = None
134
+ matched_ids = sorted([
135
+ example_idx for example_idx, *_ in self.matches
136
+ ])
137
+ for example_idx in matched_ids:
138
+ url = os.path.join(self.dir, f'example_{example_idx}.html')
139
+ if first_url is None:
140
+ first_url = url
141
+ s.write(
142
+ f'<a href="{url}" style="margin-right: 10px">{example_idx}</a> '
143
+ )
144
+ if first_url:
145
+ s.write(
146
+ '<iframe style="border:0;width:100%;height:100%" name="example_view"'
147
+ f'src="{first_url}" title="Example View"></iframe>'
148
+ )
149
+ else:
150
+ s.write('No matches found.')
@@ -0,0 +1,109 @@
1
+ # Copyright 2024 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Tests for action evaluation."""
15
+
16
+ import os
17
+ import tempfile
18
+ import unittest
19
+
20
+ from langfun.core import eval as lf_eval
21
+ from langfun.core import llms as lf_llms
22
+ from langfun.core.agentic import action as action_lib
23
+ from langfun.core.agentic import action_eval
24
+ import pyglove as pg
25
+
26
+
27
+ class Foo(action_lib.Action):
28
+ x: int
29
+
30
+ def call(self, session, **kwargs):
31
+ del session, kwargs
32
+ return self.x
33
+
34
+
35
+ @pg.functor()
36
+ def foo_inputs():
37
+ return [
38
+ pg.Dict(action=Foo(1), groundtruth=1),
39
+ pg.Dict(action=Foo(2), groundtruth=1),
40
+ ]
41
+
42
+
43
+ class ActionEvalTest(unittest.TestCase):
44
+
45
+ def test_basics(self):
46
+
47
+ class FooEval(action_eval.ActionEval):
48
+ inputs = foo_inputs()
49
+ metrics = [lf_eval.v2.metrics.Match()]
50
+ action_args = dict(
51
+ lm=lf_llms.Echo()
52
+ )
53
+
54
+ s = FooEval()
55
+ root_dir = os.path.join(tempfile.gettempdir(), 'foo_eval')
56
+ s.run(root_dir, plugins=[])
57
+ self.assertEqual(s.metrics[0].matches, 0.5)
58
+ self.assertEqual(s.metrics[0].mismatches, 0.5)
59
+
60
+
61
+ class ActionEvalV1Test(unittest.TestCase):
62
+
63
+ def test_basics(self):
64
+
65
+ class FooEval(action_eval.ActionEvalV1):
66
+ lm = lf_llms.Echo()
67
+ inputs = foo_inputs()
68
+
69
+ s = FooEval()
70
+ result = s.run(summary=False)
71
+ pg.print(result)
72
+ self.assertEqual(
73
+ result,
74
+ dict(
75
+ experiment_setup=dict(
76
+ id=s.id,
77
+ dir=None,
78
+ model='Echo',
79
+ prompt_template='<unused>',
80
+ method='query',
81
+ schema_fn='_dummy_schema()'
82
+ ),
83
+ cache_stats=dict(
84
+ use_cache=True,
85
+ num_queries=0,
86
+ num_hits=0,
87
+ num_updates=0,
88
+ ),
89
+ metrics=dict(
90
+ total=2,
91
+ failures=0,
92
+ failure_rate=0.0,
93
+ oop_failures=0,
94
+ oop_failure_rate=0.0,
95
+ non_oop_failures=0,
96
+ non_oop_failure_rate=0.0,
97
+ failure_breakdown={},
98
+ num_matches=0,
99
+ match_rate=0.0,
100
+ num_mismatches=2,
101
+ mismatch_rate=1.0
102
+ ),
103
+ usage=None
104
+ )
105
+ )
106
+
107
+
108
+ if __name__ == '__main__':
109
+ unittest.main()
@@ -0,0 +1,84 @@
1
+ # Copyright 2024 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Tests for base action."""
15
+
16
+ import unittest
17
+
18
+ import langfun.core as lf
19
+ from langfun.core.agentic import action as action_lib
20
+
21
+
22
+ class SessionTest(unittest.TestCase):
23
+
24
+ def test_basics(self):
25
+ test = self
26
+
27
+ class Bar(action_lib.Action):
28
+
29
+ def call(self, session, **kwargs):
30
+ test.assertIs(session.current_invocation.action, self)
31
+ session.info('Begin Bar')
32
+ return 2
33
+
34
+ class Foo(action_lib.Action):
35
+ x: int
36
+
37
+ def call(self, session, **kwargs):
38
+ test.assertIs(session.current_invocation.action, self)
39
+ session.info('Begin Foo', x=1)
40
+ return self.x + Bar()(session)
41
+
42
+ session = action_lib.Session()
43
+ root = session.root_invocation
44
+ self.assertIsInstance(root.action, action_lib.RootAction)
45
+ self.assertIs(session.current_invocation, session.root_invocation)
46
+ self.assertEqual(Foo(1)(session), 3)
47
+ self.assertEqual(len(session.root_invocation.child_invocations), 1)
48
+ self.assertEqual(len(session.root_invocation.child_invocations[0].logs), 1)
49
+ self.assertEqual(
50
+ len(session.root_invocation.child_invocations[0].child_invocations),
51
+ 1
52
+ )
53
+ self.assertEqual(
54
+ len(session.root_invocation
55
+ .child_invocations[0].child_invocations[0].logs),
56
+ 1
57
+ )
58
+ self.assertEqual(
59
+ len(session.root_invocation
60
+ .child_invocations[0].child_invocations[0].child_invocations),
61
+ 0
62
+ )
63
+ self.assertIs(session.current_invocation, session.root_invocation)
64
+ self.assertIs(session.final_result, 3)
65
+ self.assertIn(
66
+ 'invocation-final-result',
67
+ session.to_html().content,
68
+ )
69
+
70
+ def test_log(self):
71
+ session = action_lib.Session()
72
+ session.debug('hi', x=1, y=2)
73
+ session.info('hi', x=1, y=2)
74
+ session.warning('hi', x=1, y=2)
75
+ session.error('hi', x=1, y=2)
76
+ session.fatal('hi', x=1, y=2)
77
+
78
+ def test_as_message(self):
79
+ session = action_lib.Session()
80
+ self.assertIsInstance(session.as_message(), lf.AIMessage)
81
+
82
+
83
+ if __name__ == '__main__':
84
+ unittest.main()
@@ -15,6 +15,9 @@
15
15
  import abc
16
16
  import collections
17
17
  import concurrent.futures
18
+ import random
19
+ import threading
20
+ import time
18
21
  from typing import Any, Annotated, Callable, Iterator
19
22
 
20
23
  from langfun import core as lf
@@ -373,6 +376,15 @@ class ParallelRunner(RunnerBase):
373
376
  'Timeout for each evaluation example.'
374
377
  ] = None
375
378
 
379
+ concurrent_startup_delay: Annotated[
380
+ tuple[int, int] | None,
381
+ (
382
+ 'A range of seconds to delay the initial evaluation of each thread '
383
+ 'in the thread pool, helping to prevent a burst in LLM QPS at '
384
+ 'startup. If set to None, no delay will be applied.'
385
+ )
386
+ ] = None
387
+
376
388
  def _run(self, evaluations: list[Evaluation]) -> None:
377
389
  """Runs the evaluations in parallel."""
378
390
  def _run_group(evaluation_group: list[Evaluation]):
@@ -405,8 +417,20 @@ class ParallelRunner(RunnerBase):
405
417
  self, evaluation: Evaluation, items: Iterator[Example]
406
418
  ) -> None:
407
419
  """Override run items to run in parallel."""
420
+ if self.concurrent_startup_delay is not None:
421
+ thread_delayed = {}
422
+ def _evaluate_item(item: Example):
423
+ thread_id = threading.current_thread().ident
424
+ if thread_id not in thread_delayed:
425
+ thread_delayed[thread_id] = True
426
+ time.sleep(random.randint(*self.concurrent_startup_delay))
427
+ return self.evaluate_item(evaluation, item)
428
+ else:
429
+ def _evaluate_item(item: Example):
430
+ return self.evaluate_item(evaluation, item)
431
+
408
432
  for _, _, _ in lf.concurrent_map(
409
- lambda item: self.evaluate_item(evaluation, item),
433
+ _evaluate_item,
410
434
  items,
411
435
  max_workers=evaluation.max_workers,
412
436
  timeout=self.timeout,
@@ -198,7 +198,9 @@ class RunnerTest(unittest.TestCase):
198
198
  )
199
199
  # Global cache.
200
200
  root_dir = os.path.join(tempfile.gettempdir(), 'global_cache')
201
- run = exp.run(root_dir, runner='sequential', use_cache='global', plugins=[])
201
+ run = exp.run(
202
+ root_dir, 'new', runner='sequential', use_cache='global', plugins=[]
203
+ )
202
204
  self.assertTrue(pg.io.path_exists(run.output_path_for(exp, 'cache.json')))
203
205
  self.assertEqual(exp.usage_summary.cached.total.num_requests, 4)
204
206
  self.assertEqual(exp.usage_summary.uncached.total.num_requests, 2)
@@ -206,7 +208,8 @@ class RunnerTest(unittest.TestCase):
206
208
  # Per-dataset cache.
207
209
  root_dir = os.path.join(tempfile.gettempdir(), 'per_dataset')
208
210
  run = exp.run(
209
- root_dir, runner='sequential', use_cache='per_dataset', plugins=[]
211
+ root_dir, 'new', runner='sequential',
212
+ use_cache='per_dataset', plugins=[]
210
213
  )
211
214
  for leaf in exp.leaf_nodes:
212
215
  self.assertTrue(
@@ -226,6 +229,9 @@ class RunnerTest(unittest.TestCase):
226
229
  self.assertEqual(exp.usage_summary.cached.total.num_requests, 0)
227
230
  self.assertEqual(exp.usage_summary.uncached.total.num_requests, 6)
228
231
 
232
+
233
+ class ParallelRunnerTest(RunnerTest):
234
+
229
235
  def test_parallel_runner(self):
230
236
  plugin = TestPlugin()
231
237
  exp = test_helper.test_experiment()
@@ -266,6 +272,22 @@ class RunnerTest(unittest.TestCase):
266
272
  self.assertEqual(node.progress.num_failed, 0)
267
273
  self.assertEqual(node.progress.num_processed, node.progress.num_total)
268
274
 
275
+ def test_concurrent_startup_delay(self):
276
+ plugin = TestPlugin()
277
+ exp = test_helper.test_experiment()
278
+ root_dir = os.path.join(
279
+ tempfile.gettempdir(), 'test_concurrent_startup_delay'
280
+ )
281
+ _ = exp.run(
282
+ root_dir,
283
+ runner='parallel',
284
+ plugins=[plugin],
285
+ concurrent_startup_delay=(0, 5),
286
+ )
287
+
288
+
289
+ class DebugRunnerTest(RunnerTest):
290
+
269
291
  def test_debug_runner(self):
270
292
  plugin = TestPlugin()
271
293
  exp = test_helper.test_experiment()
@@ -124,17 +124,18 @@ class LMSamplingUsage(pg.Object):
124
124
  def __add__(self, other: Optional['LMSamplingUsage']) -> 'LMSamplingUsage':
125
125
  if other is None:
126
126
  return self
127
+ if self.estimated_cost is None:
128
+ estimated_cost = other.estimated_cost
129
+ elif other.estimated_cost is None:
130
+ estimated_cost = self.estimated_cost
131
+ else:
132
+ estimated_cost = self.estimated_cost + other.estimated_cost
127
133
  return LMSamplingUsage(
128
134
  prompt_tokens=self.prompt_tokens + other.prompt_tokens,
129
135
  completion_tokens=self.completion_tokens + other.completion_tokens,
130
136
  total_tokens=self.total_tokens + other.total_tokens,
131
137
  num_requests=self.num_requests + other.num_requests,
132
- estimated_cost=(
133
- self.estimated_cost + other.estimated_cost # pylint: disable=g-long-ternary
134
- if (self.estimated_cost is not None
135
- and other.estimated_cost is not None)
136
- else None
137
- )
138
+ estimated_cost=estimated_cost,
138
139
  )
139
140
 
140
141
  def __radd__(self, other: Optional['LMSamplingUsage']) -> 'LMSamplingUsage':
@@ -956,7 +957,9 @@ class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
956
957
  if self._usage_badge is not None:
957
958
  self._usage_badge.update(
958
959
  self._badge_text(),
959
- tooltip=pg.format(self.total, verbose=False),
960
+ tooltip=pg.format(
961
+ self, verbose=False, custom_format=self._tooltip_format
962
+ ),
960
963
  styles=dict(color=self._badge_color()),
961
964
  )
962
965
 
@@ -978,6 +981,14 @@ class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
978
981
  green = int(255 * (1 - normalized_value))
979
982
  return f'rgb({red}, {green}, 0)'
980
983
 
984
+ def _tooltip_format(self, v, root_indent):
985
+ del root_indent
986
+ if isinstance(v, int):
987
+ return f'{v:,}'
988
+ if isinstance(v, float):
989
+ return f'{v:,.3f}'
990
+ return None
991
+
981
992
  def _html_tree_view(
982
993
  self,
983
994
  *,
@@ -993,7 +1004,9 @@ class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
993
1004
  if usage_badge is None:
994
1005
  usage_badge = pg.views.html.controls.Badge(
995
1006
  self._badge_text(),
996
- tooltip=pg.format(self.total, verbose=False),
1007
+ tooltip=pg.format(
1008
+ self, custom_format=self._tooltip_format, verbose=False
1009
+ ),
997
1010
  css_classes=['usage-summary'],
998
1011
  styles=dict(color=self._badge_color()),
999
1012
  interactive=True,
@@ -744,6 +744,13 @@ class LMSamplingUsageTest(unittest.TestCase):
744
744
  self.assertEqual(usage1 + usage2, usage1 + usage2)
745
745
  self.assertIs(usage1 + None, usage1)
746
746
  self.assertIs(None + usage1, usage1)
747
+ usage3 = lm_lib.LMSamplingUsage(100, 200, 300, 4, None)
748
+ self.assertEqual(
749
+ usage1 + usage3, lm_lib.LMSamplingUsage(200, 400, 600, 8, 5.0)
750
+ )
751
+ self.assertEqual(
752
+ usage3 + usage1, lm_lib.LMSamplingUsage(200, 400, 600, 8, 5.0)
753
+ )
747
754
 
748
755
  def test_usage_not_available(self):
749
756
  usage_not_available = lm_lib.UsageNotAvailable()
@@ -100,6 +100,7 @@ from langfun.core.llms.anthropic import Claude3Sonnet
100
100
  from langfun.core.llms.anthropic import Claude3Haiku
101
101
  from langfun.core.llms.anthropic import VertexAIAnthropic
102
102
  from langfun.core.llms.anthropic import VertexAIClaude3_5_Sonnet_20241022
103
+ from langfun.core.llms.anthropic import VertexAIClaude3_5_Sonnet_20240620
103
104
  from langfun.core.llms.anthropic import VertexAIClaude3_5_Haiku_20241022
104
105
 
105
106
  from langfun.core.llms.groq import Groq
@@ -53,6 +53,13 @@ SUPPORTED_MODELS_AND_SETTINGS = {
53
53
  cost_per_1k_input_tokens=0.003,
54
54
  cost_per_1k_output_tokens=0.015,
55
55
  ),
56
+ 'claude-3-5-sonnet@20240620': pg.Dict(
57
+ max_tokens=8192,
58
+ rpm=1000,
59
+ tpm=100000,
60
+ cost_per_1k_input_tokens=0.003,
61
+ cost_per_1k_output_tokens=0.015,
62
+ ),
56
63
  'claude-3-5-haiku@20241022': pg.Dict(
57
64
  max_tokens=8192,
58
65
  rpm=1000,
@@ -459,6 +466,11 @@ class VertexAIClaude3_5_Sonnet_20241022(VertexAIAnthropic): # pylint: disable=i
459
466
  model = 'claude-3-5-sonnet-v2@20241022'
460
467
 
461
468
 
469
+ class VertexAIClaude3_5_Sonnet_20240620(VertexAIAnthropic): # pylint: disable=invalid-name
470
+ """Anthropic's Claude 3.5 Sonnet model on VertexAI."""
471
+ model = 'claude-3-5-sonnet@20240620'
472
+
473
+
462
474
  class VertexAIClaude3_5_Haiku_20241022(VertexAIAnthropic): # pylint: disable=invalid-name
463
475
  """Anthropic's Claude 3.5 Haiku model on VertexAI."""
464
476
  model = 'claude-3-5-haiku@20241022'
@@ -199,6 +199,12 @@ class VertexAITest(unittest.TestCase):
199
199
  # There is a discrepancy between the `property_ordering` in the
200
200
  # Google-internal version and the open-source version.
201
201
  actual['response_schema'].pop('property_ordering', None)
202
+ if pg.KeyPath.parse('response_schema.type_').get(actual):
203
+ actual['response_schema']['type'] = actual['response_schema'].pop('type_')
204
+ if pg.KeyPath.parse('response_schema.properties.name.type_').get(actual):
205
+ actual['response_schema']['properties']['name']['type'] = actual[
206
+ 'response_schema']['properties']['name'].pop('type_')
207
+
202
208
  self.assertEqual(
203
209
  actual,
204
210
  dict(
@@ -209,9 +215,9 @@ class VertexAITest(unittest.TestCase):
209
215
  stop_sequences=['\n'],
210
216
  response_mime_type='application/json',
211
217
  response_schema={
212
- 'type_': 'OBJECT',
218
+ 'type': 'OBJECT',
213
219
  'properties': {
214
- 'name': {'type_': 'STRING'}
220
+ 'name': {'type': 'STRING'}
215
221
  },
216
222
  'required': ['name'],
217
223
  'title': 'Person',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langfun
3
- Version: 0.1.2.dev202411140804
3
+ Version: 0.1.2.dev202411160804
4
4
  Summary: Langfun: Language as Functions.
5
5
  Home-page: https://github.com/google/langfun
6
6
  Author: Langfun Authors
@@ -1,4 +1,4 @@
1
- langfun/__init__.py,sha256=mCES7t3R7Z-ZQYvG38-yrVqZubrXNfGCa8tI5HGB7mE,2274
1
+ langfun/__init__.py,sha256=o_HvoQggla5uqNA7uF1126aZhayHnVNP__nd_t5ElEQ,2358
2
2
  langfun/core/__init__.py,sha256=xlvFTXc7IKUTs8aCFRFhzOLTmmeuhXgk9yx2InBLNiA,4937
3
3
  langfun/core/component.py,sha256=HVrEoTL1Y01iqOHC3FYdbAOnffqfHHtGJXoK1vkdEwo,11583
4
4
  langfun/core/component_test.py,sha256=sG-T2wpvBfHqWGZE7sc4NayJj2aj5QFBzSwFiwrGEIc,10376
@@ -8,8 +8,8 @@ langfun/core/console.py,sha256=Fra2_MSWZbFh6rY8HZoYgpGLsrNvhaGuL03znOwQbhM,2529
8
8
  langfun/core/console_test.py,sha256=pBOcuNMJdVELywvroptfcRtJMsegMm3wSlHAL2TdxVk,1679
9
9
  langfun/core/langfunc.py,sha256=G50YgoVZ0y1GFw2ev41MlOqr6qa8YakbvNC0h_E0PiA,11140
10
10
  langfun/core/langfunc_test.py,sha256=fKIAqcSNI_7M6nwoZW77HEam8Oa6vcWhsCNgVJanzb4,8822
11
- langfun/core/language_model.py,sha256=UtLvclKx55_SAKZ-ajaquudLxKorTARAeZFme5IaPi8,33499
12
- langfun/core/language_model_test.py,sha256=td81wm4zFPeMb16nmIuIZ6eHtpYhH0k6IeiYLfGgR0o,31525
11
+ langfun/core/language_model.py,sha256=b15MZ_qbydnz5vQ09t7sf9tc3C7qWvMSxUrGfT0p99I,33827
12
+ langfun/core/language_model_test.py,sha256=hnYhtw7GM_TbhgsJzHNYTaoDewUlPHpOVlI7xEkCFuI,31783
13
13
  langfun/core/logging.py,sha256=uslllP0RTGN223oro1m4nZZ0bFppcL07OwbFKm2iG6k,7519
14
14
  langfun/core/logging_test.py,sha256=b5bPTSUoYeICATaO6I8dOVumodwRbxSp1Oz96Sf3KcE,6104
15
15
  langfun/core/memory.py,sha256=f-asN1F7Vehgdn_fK84v73GrEUOxRtaW934keutTKjk,2416
@@ -29,6 +29,11 @@ langfun/core/template.py,sha256=_Sae_WsRo_yvwul0nqAPTOa0NOjW1zNYbW0CQpvg7l0,2538
29
29
  langfun/core/template_test.py,sha256=Qokz1hQFhRYaTZWBWGqvPJ0NXC9B9ennUpnRYHEf0hE,20542
30
30
  langfun/core/text_formatting.py,sha256=d7t9vaY6aCn1dkfkikpNYnBy5E_i93vHbfyDWFclGZU,5284
31
31
  langfun/core/text_formatting_test.py,sha256=ck0Xzdd4YF4CtCUj7VE0GybfbAyKQ8p3xkM1FBGrqIk,2096
32
+ langfun/core/agentic/__init__.py,sha256=ndoDX0sAYsa3eVdXuu6nB-a-BH5TaK3urW6zAaFiyVs,1110
33
+ langfun/core/agentic/action.py,sha256=Am5E1EH1ZBAhzagbnDVRnR4vBzI4H6MEtQ58laSPfTg,7515
34
+ langfun/core/agentic/action_eval.py,sha256=ZtjTh34S7XPIUqandQ0YwAtzw-S7ofuZ7rRXnRbUMdQ,4424
35
+ langfun/core/agentic/action_eval_test.py,sha256=tRUkWmOE9p0rpNOq19xAY2oDEnYsEEykjg6sUpAwJk0,2832
36
+ langfun/core/agentic/action_test.py,sha256=CBsUQICD8yPCDUBBFouSkZuyLAcK_C-AWYc28Zts10E,2624
32
37
  langfun/core/coding/__init__.py,sha256=5utju_fwEsImaiftx4oXKl9FAM8p281k8-Esdh_-m1w,835
33
38
  langfun/core/coding/python/__init__.py,sha256=MJ-vubliz-ebrZH3OBRKBwMi0S9-FrhGCp8YQLR6_I4,1776
34
39
  langfun/core/coding/python/correction.py,sha256=WiBdoScL-6C___iA3Tg3vizuYtJWI-_4wy9zcMfVpj8,7020
@@ -71,11 +76,11 @@ langfun/core/eval/v2/progress_tracking.py,sha256=1imwSbllxHWG3zYrzo2NvytBZsVtjqu
71
76
  langfun/core/eval/v2/progress_tracking_test.py,sha256=eY2HvZeEXDA5Zyfi2m5NDWO_9kSfQsaAOEcIhkSbWCY,1874
72
77
  langfun/core/eval/v2/reporting.py,sha256=TGkli1IDwqfqsCJ_WslOMGk_24JDg7oRRTGXlAJlWpc,4361
73
78
  langfun/core/eval/v2/reporting_test.py,sha256=JxffbUPWInUyLjo-AQVFrllga884Mdfm05R86FtxSss,1482
74
- langfun/core/eval/v2/runners.py,sha256=2OHAVTbqq9hZ3qZpUEvQ--9X-Cr_z8Ghc3MRXCfclpk,13442
75
- langfun/core/eval/v2/runners_test.py,sha256=s3GgWA-H9x0JyPhPZq2s9-5GXGHo5dSbDD-4faX0h_E,11164
79
+ langfun/core/eval/v2/runners.py,sha256=zJmu-amUiYv1g0Ek4c3mXkBgp-AFvSF7WpXVZCCf7Y4,14245
80
+ langfun/core/eval/v2/runners_test.py,sha256=UeiUNygux_U6iGVG18rhp68ZE4hoWeoT6XsXvSjxNQg,11620
76
81
  langfun/core/eval/v2/test_helper.py,sha256=pDpZTBnWRR5xjJv3Uy3NWEzArqlL8FTMOgeR4C53F5M,2348
77
- langfun/core/llms/__init__.py,sha256=i0m-fVpwuIN_Jno1M-5O9ikzbVbvXWJKFQZO22MFPq8,6272
78
- langfun/core/llms/anthropic.py,sha256=XPQxjfe9O4b-CygCgqvQU0MPSfe1rU7uErNbo8zth7Q,13606
82
+ langfun/core/llms/__init__.py,sha256=uR2vLghsnZqY6OjZKAs9Lo-YFNxZNunf3A0q6-1GYlc,6346
83
+ langfun/core/llms/anthropic.py,sha256=uJXVgaFONL8okOSVQ4VGMGht_VZ30m1hoLzmDbIjmks,13990
79
84
  langfun/core/llms/anthropic_test.py,sha256=-2U4kc_pgBM7wqxu8RuxzyHPGww1EAWqKUvN4PW8Btw,8058
80
85
  langfun/core/llms/compositional.py,sha256=csW_FLlgL-tpeyCOTVvfUQkMa_zCN5Y2I-YbSNuK27U,2872
81
86
  langfun/core/llms/compositional_test.py,sha256=4eTnOer-DncRKGaIJW2ZQQMLnt5r2R0UIx_DYOvGAQo,2027
@@ -92,7 +97,7 @@ langfun/core/llms/openai_test.py,sha256=_8cd3VRNEUfE0-Ko1RiM6MlC5hjalRj7nYTJNhG1
92
97
  langfun/core/llms/rest.py,sha256=sWbYUV8S3SuOg9giq7xwD-xDRfaF7NP_ig7bI52-Rj4,3442
93
98
  langfun/core/llms/rest_test.py,sha256=NZ3Nf0XQVpT9kLP5cBVo_yBHLI7vWTYhWQxYEJVMGs4,3472
94
99
  langfun/core/llms/vertexai.py,sha256=-KB880Ovab6CQqI-Y5Y6V7RlEA0tAIazmnnG74Ebp4A,18866
95
- langfun/core/llms/vertexai_test.py,sha256=7uBVOF5VF86xQ9HFAbSTh4J-0NjYLnuotBS1YRm-vgw,10529
100
+ langfun/core/llms/vertexai_test.py,sha256=I8gEHLRXZZGq_d2VDtJAkAIzf-lNSCoB8y2lwFckY-w,10885
96
101
  langfun/core/llms/cache/__init__.py,sha256=QAo3InUMDM_YpteNnVCSejI4zOsnjSMWKJKzkb3VY64,993
97
102
  langfun/core/llms/cache/base.py,sha256=rt3zwmyw0y9jsSGW-ZbV1vAfLxQ7_3AVk0l2EySlse4,3918
98
103
  langfun/core/llms/cache/in_memory.py,sha256=l6b-iU9OTfTRo9Zmg4VrQIuArs4cCJDOpXiEpvNocjo,5004
@@ -143,8 +148,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
143
148
  langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
144
149
  langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
145
150
  langfun/core/templates/selfplay_test.py,sha256=Ot__1P1M8oJfoTp-M9-PQ6HUXqZKyMwvZ5f7yQ3yfyM,2326
146
- langfun-0.1.2.dev202411140804.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
147
- langfun-0.1.2.dev202411140804.dist-info/METADATA,sha256=LNyNk_qsiVz-CAbbtkN4jzdSwDWGhlR5RkefV6lclFA,8890
148
- langfun-0.1.2.dev202411140804.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
149
- langfun-0.1.2.dev202411140804.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
150
- langfun-0.1.2.dev202411140804.dist-info/RECORD,,
151
+ langfun-0.1.2.dev202411160804.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
152
+ langfun-0.1.2.dev202411160804.dist-info/METADATA,sha256=iHQLFR3kun6zscZZLnzVl_mxeTVMB7-C4bCXS_dLNm8,8890
153
+ langfun-0.1.2.dev202411160804.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
154
+ langfun-0.1.2.dev202411160804.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
155
+ langfun-0.1.2.dev202411160804.dist-info/RECORD,,