langfun 0.1.2.dev202508250805__py3-none-any.whl → 0.1.2.dev202511110805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/__init__.py +1 -1
- langfun/core/__init__.py +6 -1
- langfun/core/agentic/__init__.py +4 -0
- langfun/core/agentic/action.py +412 -103
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +68 -6
- langfun/core/async_support.py +104 -5
- langfun/core/async_support_test.py +23 -0
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +9 -2
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +9 -2
- langfun/core/data/conversion/gemini_test.py +12 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +47 -43
- langfun/core/eval/base_test.py +4 -4
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +1 -0
- langfun/core/eval/v2/checkpointing.py +30 -4
- langfun/core/eval/v2/eval_test_helper.py +1 -1
- langfun/core/eval/v2/evaluation.py +60 -14
- langfun/core/eval/v2/example.py +22 -11
- langfun/core/eval/v2/experiment.py +51 -8
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +39 -4
- langfun/core/eval/v2/metrics_test.py +14 -0
- langfun/core/eval/v2/progress.py +30 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking_test.py +6 -0
- langfun/core/eval/v2/reporting.py +90 -71
- langfun/core/eval/v2/reporting_test.py +20 -6
- langfun/core/eval/v2/runners.py +27 -7
- langfun/core/eval/v2/runners_test.py +3 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +6 -4
- langfun/core/language_model.py +151 -31
- langfun/core/language_model_test.py +9 -3
- langfun/core/llms/__init__.py +12 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +39 -1
- langfun/core/llms/fake_test.py +9 -0
- langfun/core/llms/gemini.py +43 -7
- langfun/core/llms/google_genai.py +34 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +93 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +16 -1
- langfun/core/llms/vertexai.py +59 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/__init__.py +10 -0
- langfun/core/mcp/client.py +177 -0
- langfun/core/mcp/client_test.py +71 -0
- langfun/core/mcp/session.py +241 -0
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/testing/simple_mcp_client.py +33 -0
- langfun/core/mcp/testing/simple_mcp_server.py +33 -0
- langfun/core/mcp/tool.py +256 -0
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +19 -1
- langfun/core/modalities/mime.py +62 -3
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +74 -28
- langfun/core/structured/parsing.py +90 -74
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +242 -156
- langfun/core/structured/querying_test.py +95 -64
- langfun/core/structured/schema.py +70 -10
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +45 -34
- langfun/core/structured/tokenization.py +24 -9
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +175 -50
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +43 -0
- langfun/env/base_environment.py +827 -0
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +304 -0
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +842 -0
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +14 -0
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +472 -0
- langfun/env/event_handlers/event_logger_test.py +304 -0
- langfun/env/event_handlers/metric_writer.py +726 -0
- langfun/env/event_handlers/metric_writer_test.py +214 -0
- langfun/env/interface.py +1640 -0
- langfun/env/interface_test.py +151 -0
- langfun/env/load_balancers.py +59 -0
- langfun/env/load_balancers_test.py +139 -0
- langfun/env/test_utils.py +497 -0
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/METADATA +7 -3
- langfun-0.1.2.dev202511110805.dist-info/RECORD +200 -0
- langfun-0.1.2.dev202508250805.dist-info/RECORD +0 -172
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/top_level.txt +0 -0
|
@@ -24,7 +24,14 @@ import pyglove as pg
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class ActionEval(lf.eval.v2.Evaluation):
|
|
27
|
-
"""
|
|
27
|
+
"""Evaluation for agentic actions.
|
|
28
|
+
|
|
29
|
+
`ActionEval` is a specialized evaluation class for executing and evaluating
|
|
30
|
+
agentic actions based on provided inputs. Each input example is expected to
|
|
31
|
+
contain an `action` attribute. The `process` method executes the action
|
|
32
|
+
within a dedicated `Session`, captures the final result, and returns it
|
|
33
|
+
along with the session details in the metadata.
|
|
34
|
+
"""
|
|
28
35
|
|
|
29
36
|
action_args: Annotated[
|
|
30
37
|
dict[str, Any],
|
|
@@ -68,7 +75,7 @@ class ExampleView(pg.Object):
|
|
|
68
75
|
class ActionEvalV1(lf_eval.Matching):
|
|
69
76
|
"""Base class for action evaluations.
|
|
70
77
|
|
|
71
|
-
The input function should
|
|
78
|
+
The input function should return a list of pg.Dict, with `action` and
|
|
72
79
|
`groundtruth` fields.
|
|
73
80
|
"""
|
|
74
81
|
# We override the schema and prompt to dummy values since they are not used.
|
|
@@ -34,6 +34,7 @@ class Bar(action_lib.Action):
|
|
|
34
34
|
time.sleep(self.simulate_execution_time)
|
|
35
35
|
session.query('bar', lm=lm)
|
|
36
36
|
session.add_metadata(note='bar')
|
|
37
|
+
session.update_progress('Query completed')
|
|
37
38
|
if self.simulate_action_error:
|
|
38
39
|
raise ValueError('Bar error')
|
|
39
40
|
return 2 + pg.contextual_value('baz', 0)
|
|
@@ -66,12 +67,18 @@ class Foo(action_lib.Action):
|
|
|
66
67
|
time.sleep(self.simulate_execution_time[2])
|
|
67
68
|
return lf_structured.query(f'subtask_{i}', lm=lm)
|
|
68
69
|
|
|
70
|
+
self._state = []
|
|
69
71
|
for i, output, error in session.concurrent_map(
|
|
70
|
-
_sub_task,
|
|
72
|
+
_sub_task,
|
|
73
|
+
range(3),
|
|
74
|
+
max_workers=2,
|
|
75
|
+
ordered=True,
|
|
76
|
+
silence_on_errors=None,
|
|
71
77
|
):
|
|
72
78
|
assert isinstance(i, int), i
|
|
73
79
|
assert isinstance(output, str), output
|
|
74
80
|
assert error is None, error
|
|
81
|
+
self._state.append(i)
|
|
75
82
|
return self.x + Bar(
|
|
76
83
|
simulate_action_error=self.simulate_action_error,
|
|
77
84
|
simulate_execution_time=self.simulate_execution_time[3]
|
|
@@ -118,10 +125,11 @@ class SessionTest(unittest.TestCase):
|
|
|
118
125
|
foo = Foo(1)
|
|
119
126
|
self.assertIsNone(foo.session)
|
|
120
127
|
self.assertIsNone(foo.invocation)
|
|
128
|
+
self.assertIsNone(foo.state)
|
|
121
129
|
self.assertIsNone(foo.result)
|
|
122
130
|
self.assertIsNone(foo.metadata)
|
|
123
131
|
|
|
124
|
-
session = action_lib.Session(id='agent@1')
|
|
132
|
+
session = action_lib.Session(id='agent@1', verbose=True)
|
|
125
133
|
self.assertEqual(session.id, 'agent@1')
|
|
126
134
|
self.assertFalse(session.has_started)
|
|
127
135
|
self.assertFalse(session.has_stopped)
|
|
@@ -130,12 +138,14 @@ class SessionTest(unittest.TestCase):
|
|
|
130
138
|
_ = session.to_html()
|
|
131
139
|
|
|
132
140
|
with session:
|
|
133
|
-
result = foo(session, lm=lm
|
|
141
|
+
result = foo(session, lm=lm)
|
|
134
142
|
|
|
135
143
|
self.assertTrue(session.has_started)
|
|
136
144
|
self.assertTrue(session.has_stopped)
|
|
137
145
|
self.assertEqual(result, 3)
|
|
138
146
|
self.assertIsNone(foo.session)
|
|
147
|
+
self.assertEqual(foo.state, [0, 1, 2])
|
|
148
|
+
self.assertIs(foo.invocation.state, foo.state)
|
|
139
149
|
self.assertEqual(foo.result, 3)
|
|
140
150
|
self.assertEqual(
|
|
141
151
|
foo.metadata, dict(note='foo', subtask_0=0, subtask_1=1, subtask_2=2)
|
|
@@ -366,7 +376,7 @@ class SessionTest(unittest.TestCase):
|
|
|
366
376
|
self.assertFalse(session.has_stopped)
|
|
367
377
|
|
|
368
378
|
session.start()
|
|
369
|
-
result = foo(session, lm=lm
|
|
379
|
+
result = foo(session, lm=lm)
|
|
370
380
|
session.end(result)
|
|
371
381
|
|
|
372
382
|
self.assertTrue(session.has_started)
|
|
@@ -386,7 +396,7 @@ class SessionTest(unittest.TestCase):
|
|
|
386
396
|
session = action_lib.Session(id='agent@1')
|
|
387
397
|
with self.assertRaisesRegex(ValueError, 'Bar error'):
|
|
388
398
|
with session:
|
|
389
|
-
foo(session, lm=lm
|
|
399
|
+
foo(session, lm=lm)
|
|
390
400
|
self.assertTrue(session.has_started)
|
|
391
401
|
self.assertTrue(session.has_stopped)
|
|
392
402
|
self.assertTrue(session.has_error)
|
|
@@ -399,7 +409,7 @@ class SessionTest(unittest.TestCase):
|
|
|
399
409
|
foo = Foo(1, simulate_action_error=True)
|
|
400
410
|
session = action_lib.Session(id='agent@1')
|
|
401
411
|
with self.assertRaisesRegex(ValueError, 'Please call `Session.start'):
|
|
402
|
-
foo(session, lm=lm
|
|
412
|
+
foo(session, lm=lm)
|
|
403
413
|
|
|
404
414
|
def test_succeed_with_multiple_actions(self):
|
|
405
415
|
lm = fake.StaticResponse('lm response')
|
|
@@ -480,6 +490,33 @@ class SessionTest(unittest.TestCase):
|
|
|
480
490
|
):
|
|
481
491
|
foo(lm=lm, max_execution_time=1.0)
|
|
482
492
|
|
|
493
|
+
def test_event_handler(self):
|
|
494
|
+
|
|
495
|
+
class MyActionHandler(pg.Object, action_lib.SessionEventHandler):
|
|
496
|
+
def _on_bound(self):
|
|
497
|
+
super()._on_bound()
|
|
498
|
+
self.progresses = []
|
|
499
|
+
|
|
500
|
+
def on_action_progress(self, session, action, title, **kwargs):
|
|
501
|
+
self.progresses.append((action.id, title))
|
|
502
|
+
|
|
503
|
+
handler = MyActionHandler()
|
|
504
|
+
session = action_lib.Session(
|
|
505
|
+
id='agent@1',
|
|
506
|
+
event_handler=action_lib.SessionEventHandlerChain(
|
|
507
|
+
handlers=[handler, action_lib.SessionLogging()]
|
|
508
|
+
)
|
|
509
|
+
)
|
|
510
|
+
bar = Bar()
|
|
511
|
+
with session:
|
|
512
|
+
bar(session, lm=fake.StaticResponse('lm response'))
|
|
513
|
+
session.update_progress('Trajectory completed')
|
|
514
|
+
|
|
515
|
+
self.assertEqual(handler.progresses, [
|
|
516
|
+
('agent@1:/a1', 'Query completed'),
|
|
517
|
+
('agent@1:', 'Trajectory completed'),
|
|
518
|
+
])
|
|
519
|
+
|
|
483
520
|
def test_log(self):
|
|
484
521
|
session = action_lib.Session()
|
|
485
522
|
session.debug('hi', x=1, y=2)
|
|
@@ -493,6 +530,31 @@ class SessionTest(unittest.TestCase):
|
|
|
493
530
|
self.assertIn('agent@', session.id)
|
|
494
531
|
self.assertIsInstance(session.as_message(), lf.AIMessage)
|
|
495
532
|
|
|
533
|
+
def test_query_with_track_if(self):
|
|
534
|
+
lm = fake.StaticResponse('lm response')
|
|
535
|
+
session = action_lib.Session()
|
|
536
|
+
|
|
537
|
+
# Render session to trigger javascript updates to the HTML when
|
|
538
|
+
# operating on the session.
|
|
539
|
+
_ = session.to_html()
|
|
540
|
+
with session:
|
|
541
|
+
# This query will succeed.
|
|
542
|
+
session.query(
|
|
543
|
+
'prompt1',
|
|
544
|
+
schema=None,
|
|
545
|
+
lm=lm,
|
|
546
|
+
track_if=lambda q: not q.has_error,
|
|
547
|
+
default=None)
|
|
548
|
+
# This query will fail during parsing.
|
|
549
|
+
session.query(
|
|
550
|
+
'prompt2',
|
|
551
|
+
schema=int,
|
|
552
|
+
lm=lm,
|
|
553
|
+
track_if=lambda q: not q.has_error,
|
|
554
|
+
default=None)
|
|
555
|
+
self.assertEqual(len(session.root.queries), 1)
|
|
556
|
+
self.assertIsNone(session.root.queries[0].error)
|
|
557
|
+
|
|
496
558
|
|
|
497
559
|
if __name__ == '__main__':
|
|
498
560
|
unittest.main()
|
langfun/core/async_support.py
CHANGED
|
@@ -11,18 +11,117 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
"""
|
|
14
|
+
"""Utilities for asynchronous programming in Langfun."""
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
|
-
|
|
17
|
+
import contextlib
|
|
18
|
+
from typing import Any, Awaitable, Callable, Iterator
|
|
19
|
+
import anyio
|
|
18
20
|
import pyglove as pg
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
async def invoke_async(
|
|
22
|
-
|
|
24
|
+
sync_callable: Callable[..., Any], *args, **kwargs
|
|
23
25
|
) -> Any:
|
|
24
|
-
"""Invokes a callable asynchronously
|
|
26
|
+
"""Invokes a sync callable asynchronously in a separate thread.
|
|
27
|
+
|
|
28
|
+
This is useful for wrapping a sync function into an async function,
|
|
29
|
+
allowing multiple calls of the sync function to run concurrently.
|
|
30
|
+
`lf.context` will be propagated to the thread that runs the sync callable.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
sync_callable: The sync callable to invoke.
|
|
34
|
+
*args: Positional arguments to pass to the callable.
|
|
35
|
+
**kwargs: Keyword arguments to pass to the callable.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
An awaitable that resolves to the return value of the sync_callable.
|
|
39
|
+
"""
|
|
25
40
|
return await asyncio.to_thread(
|
|
26
41
|
# Enable `lf.context` manager for async calls.
|
|
27
|
-
pg.with_contextual_override(
|
|
42
|
+
pg.with_contextual_override(sync_callable), *args, **kwargs
|
|
28
43
|
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def invoke_sync(
|
|
47
|
+
async_callable: Callable[..., Awaitable[Any]],
|
|
48
|
+
*args,
|
|
49
|
+
**kwargs
|
|
50
|
+
) -> Any:
|
|
51
|
+
"""Invokes an async callable synchronously.
|
|
52
|
+
|
|
53
|
+
This is useful for calling an async function from a sync context.
|
|
54
|
+
If there is an existing async event loop in current thread managed by
|
|
55
|
+
`lf.sync_context_manager`, it will be used for running the async callable.
|
|
56
|
+
Otherwise, `anyio.run` will be used to run the async callable in a new
|
|
57
|
+
event loop.
|
|
58
|
+
`lf.context` will be propagated to the async callable.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
async_callable: The async callable to invoke.
|
|
62
|
+
*args: Positional arguments to pass to the callable.
|
|
63
|
+
**kwargs: Keyword arguments to pass to the callable.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The return value of the async_callable.
|
|
67
|
+
"""
|
|
68
|
+
async def _invoke():
|
|
69
|
+
return await async_callable(*args, **kwargs)
|
|
70
|
+
invoke_fn = pg.with_contextual_override(_invoke)
|
|
71
|
+
blocking_portal = pg.utils.thread_local_get('__blocking_portal__', None)
|
|
72
|
+
if blocking_portal is None:
|
|
73
|
+
return anyio.run(invoke_fn)
|
|
74
|
+
return blocking_portal.call(invoke_fn)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@contextlib.contextmanager
|
|
78
|
+
def sync_context_manager(
|
|
79
|
+
async_context_manager: contextlib.AbstractAsyncContextManager[Any]
|
|
80
|
+
) -> Iterator[Any]:
|
|
81
|
+
"""Adapts an async context manager to a sync context manager.
|
|
82
|
+
|
|
83
|
+
sync_context_manager installs a blocking portal in current thread to run the
|
|
84
|
+
async context manager in a blocking way. It's useful for running async code in
|
|
85
|
+
sync context managers, e.g. `sync_context_manager` can be nested and share the
|
|
86
|
+
same event loop.
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
@contextlib.asynccontextmanager
|
|
92
|
+
async def foo(x):
|
|
93
|
+
try:
|
|
94
|
+
yield x
|
|
95
|
+
finally:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
with lf.sync_context_manager(foo(x)) as x
|
|
99
|
+
with lf.sync_context_manager(foo(y)) as y:
|
|
100
|
+
...
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
async_context_manager: The async context manager to adapt.
|
|
105
|
+
|
|
106
|
+
Yields:
|
|
107
|
+
The value yielded by the async context manager.
|
|
108
|
+
"""
|
|
109
|
+
blocking_portal = pg.utils.thread_local_get('__blocking_portal__', None)
|
|
110
|
+
portal_exit_stack = None
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
if blocking_portal is None:
|
|
114
|
+
portal_exit_stack = contextlib.ExitStack()
|
|
115
|
+
blocking_portal = portal_exit_stack.enter_context(
|
|
116
|
+
anyio.from_thread.start_blocking_portal()
|
|
117
|
+
)
|
|
118
|
+
pg.utils.thread_local_set('__blocking_portal__', blocking_portal)
|
|
119
|
+
context_manager = blocking_portal.wrap_async_context_manager(
|
|
120
|
+
async_context_manager
|
|
121
|
+
)
|
|
122
|
+
with context_manager as value:
|
|
123
|
+
yield value
|
|
124
|
+
finally:
|
|
125
|
+
if portal_exit_stack is not None:
|
|
126
|
+
portal_exit_stack.close()
|
|
127
|
+
pg.utils.thread_local_del('__blocking_portal__')
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
|
+
import contextlib
|
|
16
17
|
import time
|
|
17
18
|
import unittest
|
|
18
19
|
|
|
@@ -34,6 +35,28 @@ class AsyncSupportTest(unittest.TestCase):
|
|
|
34
35
|
with pg.contextual_override(z=3):
|
|
35
36
|
self.assertEqual(asyncio.run(r), 6)
|
|
36
37
|
|
|
38
|
+
def test_invoke_sync(self):
|
|
39
|
+
@contextlib.asynccontextmanager
|
|
40
|
+
async def bar(x):
|
|
41
|
+
try:
|
|
42
|
+
yield x
|
|
43
|
+
finally:
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
async def foo(x, *, y):
|
|
47
|
+
time.sleep(2)
|
|
48
|
+
return x + y + pg.contextual_value('z', 0)
|
|
49
|
+
|
|
50
|
+
with pg.contextual_override(z=3):
|
|
51
|
+
with async_support.sync_context_manager(bar(1)) as x:
|
|
52
|
+
self.assertEqual(x, 1)
|
|
53
|
+
with async_support.sync_context_manager(bar(2)) as y:
|
|
54
|
+
self.assertEqual(y, 2)
|
|
55
|
+
self.assertEqual(async_support.invoke_sync(foo, 1, y=2), 6)
|
|
56
|
+
|
|
57
|
+
with pg.contextual_override(z=2):
|
|
58
|
+
self.assertEqual(async_support.invoke_sync(foo, 1, y=2), 5)
|
|
59
|
+
|
|
37
60
|
|
|
38
61
|
if __name__ == '__main__':
|
|
39
62
|
unittest.main()
|
|
@@ -19,13 +19,23 @@ import pyglove as pg
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class CodeWithError(pg.Object):
|
|
22
|
-
"""Python code with error.
|
|
22
|
+
"""A structure representing Python code along with an execution error.
|
|
23
|
+
|
|
24
|
+
This is used as input to a language model for error correction, providing
|
|
25
|
+
the model with the code that failed and the error message it produced.
|
|
26
|
+
"""
|
|
23
27
|
|
|
24
28
|
code: str
|
|
25
29
|
error: str
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
class CorrectedCode(pg.Object):
|
|
33
|
+
"""A structure containing corrected Python code.
|
|
34
|
+
|
|
35
|
+
This is used as the output schema when asking a language model to correct
|
|
36
|
+
code, expecting the model to return the fixed code in the `corrected_code`
|
|
37
|
+
field.
|
|
38
|
+
"""
|
|
29
39
|
corrected_code: str
|
|
30
40
|
|
|
31
41
|
|
|
@@ -49,7 +59,7 @@ def run_with_correction(
|
|
|
49
59
|
code: The source code that may or may not be problematic.
|
|
50
60
|
error: An optional initial error for `code` when it's problematic, usually
|
|
51
61
|
caught from elsewhere when it ran. If None, code will be executed once to
|
|
52
|
-
verify if
|
|
62
|
+
verify if it's good and obtain a feedback error message.
|
|
53
63
|
global_vars: A dict of str to value as the global variables that could be
|
|
54
64
|
accessed within the corrected code.
|
|
55
65
|
lm: Language model to be used. If not specified, it will try to use the `lm`
|
|
@@ -57,15 +67,15 @@ def run_with_correction(
|
|
|
57
67
|
max_attempts: Max number of attempts for the correction.
|
|
58
68
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
59
69
|
process. If None, run in sandbox first, if the output could not be
|
|
60
|
-
serialized and
|
|
70
|
+
serialized and passed to current process, run the code again in current
|
|
61
71
|
process.
|
|
62
72
|
permission: The permission to run the code.
|
|
63
73
|
timeout: The timeout for running the corrected code. If None, there is no
|
|
64
74
|
timeout. Applicable only when sandbox is set to True.
|
|
65
75
|
returns_code: If True, the return value is a tuple of (result, final code).
|
|
66
76
|
Otherwise the return value is the result only.
|
|
67
|
-
returns_stdout: If True, the stdout (a
|
|
68
|
-
outputs_intermediate: If True, intermediate output will be
|
|
77
|
+
returns_stdout: If True, the stdout (a string) will be returned.
|
|
78
|
+
outputs_intermediate: If True, intermediate output will be output as a
|
|
69
79
|
dict, with the last line's value accessible by key '__result__'. Otherwise
|
|
70
80
|
the value of the last line will be returned.
|
|
71
81
|
|
|
@@ -161,7 +171,7 @@ def correct(
|
|
|
161
171
|
code: The source code that may or may not be problematic.
|
|
162
172
|
error: An optional initial error for `code` when it's problematic, usually
|
|
163
173
|
caught from elsewhere when it ran. If None, code will be executed once to
|
|
164
|
-
verify if
|
|
174
|
+
verify if it's good and obtain a feedback error message.
|
|
165
175
|
global_vars: A dict of str to value as the global variables that could be
|
|
166
176
|
accessed within the corrected code.
|
|
167
177
|
lm: Language model to be used. If not specified, it will try to use the `lm`
|
|
@@ -169,7 +179,7 @@ def correct(
|
|
|
169
179
|
max_attempts: Max number of attempts for the correction.
|
|
170
180
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
171
181
|
process. If None, run in sandbox first, if the output could not be
|
|
172
|
-
serialized and
|
|
182
|
+
serialized and passed to current process, run the code again in current
|
|
173
183
|
process.
|
|
174
184
|
timeout: The timeout for running the corrected code. If None, there is no
|
|
175
185
|
timeout. Applicable only when sandbox is set to True.
|
|
@@ -193,7 +203,7 @@ def correct(
|
|
|
193
203
|
|
|
194
204
|
|
|
195
205
|
def _error_feedback_str(error: Exception) -> str:
|
|
196
|
-
"""Returns the error
|
|
206
|
+
"""Returns the error string for feedback."""
|
|
197
207
|
if isinstance(error, pg.coding.CodeError):
|
|
198
208
|
return pg.decolor(error.format(include_complete_code=False))
|
|
199
209
|
else:
|
|
@@ -201,7 +211,7 @@ def _error_feedback_str(error: Exception) -> str:
|
|
|
201
211
|
|
|
202
212
|
|
|
203
213
|
def _maybe_custom_validate(result: Any) -> Any:
|
|
204
|
-
"""
|
|
214
|
+
"""Applies custom validation through __validate__ method."""
|
|
205
215
|
if isinstance(result, dict) and "__result__" in result:
|
|
206
216
|
r = result["__result__"]
|
|
207
217
|
else:
|
|
@@ -45,17 +45,17 @@ def evaluate(
|
|
|
45
45
|
global_vars: An optional dict as the globals that could be referenced by the
|
|
46
46
|
code.
|
|
47
47
|
permission: Permission for the Python code to run.
|
|
48
|
-
returns_stdout: If True, the stdout (a
|
|
48
|
+
returns_stdout: If True, the stdout (a string) will be returned.
|
|
49
49
|
outputs_intermediate: Applicable when returns_stdout is False. If True,
|
|
50
|
-
intermediate output will be
|
|
51
|
-
value accessible by key '__result__' and the
|
|
50
|
+
intermediate output will be output as a dict, with the last line's
|
|
51
|
+
value accessible by key '__result__' and the stdout accessible by
|
|
52
52
|
key '__stdout__'. Otherwise the value of the last line will be returned.
|
|
53
53
|
|
|
54
54
|
Returns:
|
|
55
55
|
The value of the last line of the code block. Or a dict of variable
|
|
56
56
|
names of all locals to their evaluated values as the output of the code to
|
|
57
57
|
run. The value for the last line can be accessed by key '__result__'. Or the
|
|
58
|
-
stdout as a
|
|
58
|
+
stdout as a string.
|
|
59
59
|
"""
|
|
60
60
|
return pg.coding.evaluate(
|
|
61
61
|
parsing.clean(code),
|
|
@@ -85,28 +85,30 @@ def run(
|
|
|
85
85
|
|
|
86
86
|
Args:
|
|
87
87
|
code: Python code to run.
|
|
88
|
-
global_vars: An optional dict
|
|
88
|
+
global_vars: An optional dict as the globals that could be referenced by the
|
|
89
|
+
code.
|
|
89
90
|
permission: Permission for the Python code to run.
|
|
90
|
-
returns_stdout: If True, the stdout (a
|
|
91
|
+
returns_stdout: If True, the stdout (a string) will be returned.
|
|
91
92
|
outputs_intermediate: Applicable when returns_stdout is False. If True,
|
|
92
|
-
intermediate output will be
|
|
93
|
-
value accessible by key '__result__' and the
|
|
93
|
+
intermediate output will be output as a dict, with the last line's
|
|
94
|
+
value accessible by key '__result__' and the stdout accessible by
|
|
94
95
|
key '__stdout__'. Otherwise the value of the last line will be returned.
|
|
95
96
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
96
97
|
process. If None, run in sandbox first, if the output could not be
|
|
97
|
-
serialized and
|
|
98
|
+
serialized and passed to current process, run the code again in current
|
|
98
99
|
process.
|
|
99
|
-
timeout: Execution timeout in seconds. If None, wait the code
|
|
100
|
+
timeout: Execution timeout in seconds. If None, wait for the code to
|
|
101
|
+
complete.
|
|
100
102
|
|
|
101
103
|
Returns:
|
|
102
104
|
The value of the last line of the code block. Or a dict of variable
|
|
103
105
|
names of all locals to their evaluated values as the output of the code to
|
|
104
106
|
run. The value for the last line can be accessed by key '__result__'. Or the
|
|
105
|
-
stdout as a
|
|
107
|
+
stdout as a string.
|
|
106
108
|
|
|
107
109
|
Raises:
|
|
108
110
|
TimeoutError: If the execution time exceeds the timeout.
|
|
109
|
-
Exception:
|
|
111
|
+
Exception: Exceptions that are raised from the code.
|
|
110
112
|
"""
|
|
111
113
|
return pg.coding.run(
|
|
112
114
|
parsing.clean(code),
|
|
@@ -22,9 +22,13 @@ import pyglove as pg
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class PythonCode(pg.Object):
|
|
25
|
-
"""
|
|
25
|
+
"""Represents a piece of Python code that can be executed.
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
When `PythonCode` is instantiated within a `PythonCode.auto_run()` context,
|
|
28
|
+
it automatically executes the code and returns the result of the last
|
|
29
|
+
expression. Otherwise, it acts as a container for the source code, which
|
|
30
|
+
can be executed by calling the instance. The class also supports automatic
|
|
31
|
+
error correction via `lf.coding.run_with_correction` when called.
|
|
28
32
|
"""
|
|
29
33
|
|
|
30
34
|
source: Annotated[
|
|
@@ -56,7 +60,7 @@ class PythonCode(pg.Object):
|
|
|
56
60
|
Otherwise, auto call will be disabled.
|
|
57
61
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
58
62
|
process. If None, run in sandbox first, if the output could not be
|
|
59
|
-
serialized and
|
|
63
|
+
serialized and passed to current process, run the code again in current
|
|
60
64
|
process. Applicable when `enabled` is set to True.
|
|
61
65
|
timeout: Timeout in seconds. Applicable when both `enabled` and `sandbox`
|
|
62
66
|
are set to True.
|
|
@@ -98,17 +102,17 @@ class PythonCode(pg.Object):
|
|
|
98
102
|
Args:
|
|
99
103
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
100
104
|
process. If None, run in sandbox first, if the output could not be
|
|
101
|
-
serialized and
|
|
105
|
+
serialized and passed to current process, run the code again in current
|
|
102
106
|
process.
|
|
103
107
|
timeout: Timeout in seconds. If None, there is no timeout. Applicable when
|
|
104
108
|
sandbox is set to True.
|
|
105
109
|
global_vars: Global variables that could be accessed from the source code.
|
|
106
|
-
returns_stdout: If True, the stdout (a
|
|
110
|
+
returns_stdout: If True, the stdout (a string) will be returned.
|
|
107
111
|
outputs_intermediate: Applicable when returns_stdout is False. If True,
|
|
108
|
-
intermediate output will be
|
|
109
|
-
value accessible by key '__result__' and the
|
|
112
|
+
intermediate output will be output as a dict, with the last line's
|
|
113
|
+
value accessible by key '__result__' and the stdout accessible by
|
|
110
114
|
key '__stdout__'. Otherwise the value of the last line will be returned.
|
|
111
|
-
autofix: Number of attempts to
|
|
115
|
+
autofix: Number of attempts to autofix the generated code. If 0, autofix
|
|
112
116
|
is disabled.
|
|
113
117
|
autofix_lm: Language model to be used. If not specified, it will try to
|
|
114
118
|
use the `lm` under `lf.context`.
|
|
@@ -117,8 +121,8 @@ class PythonCode(pg.Object):
|
|
|
117
121
|
The value of the last expression in the source code. Or a dict of local
|
|
118
122
|
variable names defined in the source code to their values if
|
|
119
123
|
`outputs_intermediate` is set to True. The value for the last line can be
|
|
120
|
-
accessed by key '__result__'. Or the stdout as a
|
|
121
|
-
is set to True.
|
|
124
|
+
accessed by key '__result__'. Or the stdout as a string if
|
|
125
|
+
`returns_stdout` is set to True.
|
|
122
126
|
|
|
123
127
|
Raises:
|
|
124
128
|
TimeoutError: If `sandbox` is True and timeout has reached.
|
|
@@ -152,12 +156,12 @@ class PythonCode(pg.Object):
|
|
|
152
156
|
Args:
|
|
153
157
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
154
158
|
process. If None, run in sandbox first, if the output could not be
|
|
155
|
-
serialized and
|
|
159
|
+
serialized and passed to current process, run the code again in current
|
|
156
160
|
process.
|
|
157
161
|
timeout: Timeout in seconds. If None, there is no timeout. Applicable when
|
|
158
162
|
sandbox is set to True.
|
|
159
163
|
global_vars: Global variables that could be accessed from the source code.
|
|
160
|
-
autofix: Number of attempts to
|
|
164
|
+
autofix: Number of attempts to autofix the generated code. If 0, autofix
|
|
161
165
|
is disabled. Auto-fix is not supported for 'json' protocol.
|
|
162
166
|
autofix_lm: Language model to be used. If not specified, it will try to
|
|
163
167
|
use the `lm` under `lf.context`.
|
|
@@ -182,10 +186,11 @@ class PythonCode(pg.Object):
|
|
|
182
186
|
|
|
183
187
|
|
|
184
188
|
class PythonFunction(pg.Object):
|
|
185
|
-
"""
|
|
189
|
+
"""Represents a Python function defined by source code.
|
|
186
190
|
|
|
187
|
-
|
|
188
|
-
|
|
191
|
+
This class takes Python source code that defines a function and makes it
|
|
192
|
+
callable. The source code is evaluated to create a function object, which
|
|
193
|
+
can then be invoked like a regular Python function.
|
|
189
194
|
"""
|
|
190
195
|
|
|
191
196
|
name: str
|
|
@@ -214,7 +219,7 @@ class PythonFunction(pg.Object):
|
|
|
214
219
|
*args: Positional arguments that will be passed to the implementation.
|
|
215
220
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
216
221
|
process. If None, run in sandbox first, if the output could not be
|
|
217
|
-
serialized and
|
|
222
|
+
serialized and passed to current process, run the code again in current
|
|
218
223
|
process.
|
|
219
224
|
timeout: Timeout in seconds. If None, there is no timeout. Applicable when
|
|
220
225
|
sandbox is set to True.
|
|
@@ -23,7 +23,14 @@ import pyglove as pg
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class SandboxOutput(pg.Object):
|
|
26
|
-
"""
|
|
26
|
+
"""A structure containing the output from a sandbox execution.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
stdout: The standard output captured during execution.
|
|
30
|
+
stderr: The standard error captured during execution.
|
|
31
|
+
output_files: A dictionary of file names to their byte content for files
|
|
32
|
+
generated during execution.
|
|
33
|
+
"""
|
|
27
34
|
|
|
28
35
|
stdout: Annotated[
|
|
29
36
|
str,
|
|
@@ -42,7 +49,14 @@ class SandboxOutput(pg.Object):
|
|
|
42
49
|
|
|
43
50
|
|
|
44
51
|
class BaseSandbox(pg.Object):
|
|
45
|
-
"""
|
|
52
|
+
"""Base class for Python code sandboxing.
|
|
53
|
+
|
|
54
|
+
A sandbox provides an isolated environment for executing Python code,
|
|
55
|
+
typically with restrictions on file system access, network calls, or other
|
|
56
|
+
potentially harmful operations. This base class defines the interface for
|
|
57
|
+
sandboxes, including methods for running code (`run`), uploading files
|
|
58
|
+
(`upload`), and managing the sandbox lifecycle (`setup`, `cleanup`).
|
|
59
|
+
"""
|
|
46
60
|
|
|
47
61
|
def _on_bound(self):
|
|
48
62
|
super()._on_bound()
|
|
@@ -111,7 +125,13 @@ class BaseSandbox(pg.Object):
|
|
|
111
125
|
|
|
112
126
|
|
|
113
127
|
class MultiProcessingSandbox(BaseSandbox):
|
|
114
|
-
"""
|
|
128
|
+
"""A sandbox implementation using Python's `multiprocessing`.
|
|
129
|
+
|
|
130
|
+
This sandbox executes code in a separate process, providing isolation from
|
|
131
|
+
the main process. It uses a temporary directory for file operations,
|
|
132
|
+
which is cleaned up when the sandbox is closed. It relies on
|
|
133
|
+
`pg.coding.run` with `sandbox=True` for execution.
|
|
134
|
+
"""
|
|
115
135
|
|
|
116
136
|
def _on_bound(self):
|
|
117
137
|
super()._on_bound()
|