langfun 0.1.2.dev202510200805__py3-none-any.whl → 0.1.2.dev202511160804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/core/__init__.py +1 -0
- langfun/core/agentic/action.py +107 -12
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +25 -0
- langfun/core/async_support.py +32 -3
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +1 -0
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +9 -2
- langfun/core/data/conversion/gemini_test.py +12 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +48 -44
- langfun/core/eval/base_test.py +4 -4
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +1 -0
- langfun/core/eval/v2/checkpointing.py +39 -5
- langfun/core/eval/v2/checkpointing_test.py +1 -1
- langfun/core/eval/v2/eval_test_helper.py +97 -1
- langfun/core/eval/v2/evaluation.py +88 -16
- langfun/core/eval/v2/evaluation_test.py +9 -3
- langfun/core/eval/v2/example.py +45 -39
- langfun/core/eval/v2/example_test.py +3 -3
- langfun/core/eval/v2/experiment.py +51 -8
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +157 -44
- langfun/core/eval/v2/metrics_test.py +39 -18
- langfun/core/eval/v2/progress.py +30 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking_test.py +3 -0
- langfun/core/eval/v2/reporting.py +90 -71
- langfun/core/eval/v2/reporting_test.py +20 -6
- langfun/core/eval/v2/runners/__init__.py +26 -0
- langfun/core/eval/v2/{runners.py → runners/base.py} +22 -124
- langfun/core/eval/v2/runners/debug.py +40 -0
- langfun/core/eval/v2/runners/debug_test.py +79 -0
- langfun/core/eval/v2/runners/parallel.py +100 -0
- langfun/core/eval/v2/runners/parallel_test.py +98 -0
- langfun/core/eval/v2/runners/sequential.py +47 -0
- langfun/core/eval/v2/runners/sequential_test.py +175 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +6 -4
- langfun/core/language_model.py +103 -16
- langfun/core/language_model_test.py +9 -3
- langfun/core/llms/__init__.py +7 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +14 -9
- langfun/core/llms/google_genai.py +29 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +36 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +12 -1
- langfun/core/llms/vertexai.py +51 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/client.py +77 -22
- langfun/core/mcp/client_test.py +8 -35
- langfun/core/mcp/session.py +94 -29
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/tool.py +151 -22
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +19 -1
- langfun/core/modalities/mime.py +62 -3
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/__init__.py +2 -24
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +81 -37
- langfun/core/structured/parsing.py +95 -79
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +215 -142
- langfun/core/structured/querying_test.py +65 -29
- langfun/core/structured/schema/__init__.py +48 -0
- langfun/core/structured/schema/base.py +664 -0
- langfun/core/structured/schema/base_test.py +531 -0
- langfun/core/structured/schema/json.py +174 -0
- langfun/core/structured/schema/json_test.py +121 -0
- langfun/core/structured/schema/python.py +316 -0
- langfun/core/structured/schema/python_test.py +410 -0
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +47 -36
- langfun/core/structured/tokenization.py +26 -11
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +175 -50
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +8 -2
- langfun/env/base_environment.py +320 -128
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +92 -15
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +84 -361
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +1 -1
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +95 -98
- langfun/env/event_handlers/event_logger_test.py +21 -21
- langfun/env/event_handlers/metric_writer.py +225 -140
- langfun/env/event_handlers/metric_writer_test.py +23 -6
- langfun/env/interface.py +854 -40
- langfun/env/interface_test.py +112 -2
- langfun/env/load_balancers_test.py +23 -2
- langfun/env/test_utils.py +126 -84
- {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/METADATA +1 -1
- langfun-0.1.2.dev202511160804.dist-info/RECORD +211 -0
- langfun/core/eval/v2/runners_test.py +0 -343
- langfun/core/structured/schema.py +0 -987
- langfun/core/structured/schema_test.py +0 -982
- langfun/env/base_test.py +0 -1481
- langfun/env/event_handlers/base.py +0 -350
- langfun-0.1.2.dev202510200805.dist-info/RECORD +0 -195
- {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/top_level.txt +0 -0
langfun/core/__init__.py
CHANGED
|
@@ -93,6 +93,7 @@ from langfun.core.message import UserMessage
|
|
|
93
93
|
from langfun.core.message import AIMessage
|
|
94
94
|
from langfun.core.message import SystemMessage
|
|
95
95
|
from langfun.core.message import MemoryRecord
|
|
96
|
+
from langfun.core.message import ToolMessage
|
|
96
97
|
|
|
97
98
|
from langfun.core.message import MessageConverter
|
|
98
99
|
|
langfun/core/agentic/action.py
CHANGED
|
@@ -36,7 +36,12 @@ class ActionTimeoutError(ActionError):
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
class Action(pg.Object):
|
|
39
|
-
"""Base class for
|
|
39
|
+
"""Base class for agentic actions.
|
|
40
|
+
|
|
41
|
+
An `Action` represents a single, executable step or task that an agent can
|
|
42
|
+
perform, such as calling a tool, querying a language model, or returning a
|
|
43
|
+
final answer. Actions are designed to be composable and trackable within a
|
|
44
|
+
`Session`.
|
|
40
45
|
|
|
41
46
|
# Developing Actions
|
|
42
47
|
|
|
@@ -149,7 +154,7 @@ class Action(pg.Object):
|
|
|
149
154
|
|
|
150
155
|
# Explicitly create and pass a session.
|
|
151
156
|
with lf.Session(id='my_agent_session') as session:
|
|
152
|
-
result = calc(session=session)
|
|
157
|
+
result = calc(session=session) # Pass the session explicitly
|
|
153
158
|
print(result)
|
|
154
159
|
```
|
|
155
160
|
|
|
@@ -320,7 +325,14 @@ TracedItem = Union[
|
|
|
320
325
|
|
|
321
326
|
|
|
322
327
|
class ExecutionTrace(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
323
|
-
"""Trace of
|
|
328
|
+
"""Trace of an execution, containing queries, logs, and sub-actions.
|
|
329
|
+
|
|
330
|
+
`ExecutionTrace` records the sequence of operations performed during an
|
|
331
|
+
action's execution or within a specific phase of execution (demarcated by
|
|
332
|
+
`session.track_phase`). It captures `lf.query` calls, log entries, and
|
|
333
|
+
nested `ActionInvocation` objects in the order they occurred. It also
|
|
334
|
+
aggregates LLM usage summaries from its child items.
|
|
335
|
+
"""
|
|
324
336
|
|
|
325
337
|
name: Annotated[
|
|
326
338
|
str | None,
|
|
@@ -328,7 +340,7 @@ class ExecutionTrace(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
328
340
|
'The name of the execution trace. If None, the trace is unnamed, '
|
|
329
341
|
'which is the case for the top-level trace of an action. An '
|
|
330
342
|
'execution trace could have sub-traces, called phases, which are '
|
|
331
|
-
'created and named by `session.
|
|
343
|
+
'created and named by `session.track_phase()` context manager.'
|
|
332
344
|
)
|
|
333
345
|
] = None
|
|
334
346
|
|
|
@@ -362,7 +374,7 @@ class ExecutionTrace(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
362
374
|
self.__dict__.pop('id', None)
|
|
363
375
|
|
|
364
376
|
def indexof(self, item: TracedItem, count_item_cls: Type[Any]) -> int:
|
|
365
|
-
"""Returns the index of the child
|
|
377
|
+
"""Returns the index of the child item of given type."""
|
|
366
378
|
pos = 0
|
|
367
379
|
for x in self._iter_children(count_item_cls):
|
|
368
380
|
if x is item:
|
|
@@ -538,6 +550,18 @@ class ExecutionTrace(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
538
550
|
remove_class=['not-started'],
|
|
539
551
|
)
|
|
540
552
|
|
|
553
|
+
def remove(self, item: TracedItem) -> None:
|
|
554
|
+
"""Removes an item from the sequence."""
|
|
555
|
+
index = self.items.index(item)
|
|
556
|
+
if index == -1:
|
|
557
|
+
raise ValueError(f'Item not found in execution trace: {item!r}')
|
|
558
|
+
|
|
559
|
+
with pg.notify_on_change(False):
|
|
560
|
+
self.items.pop(index)
|
|
561
|
+
|
|
562
|
+
if self._tab_control is not None:
|
|
563
|
+
self._tab_control.remove(index)
|
|
564
|
+
|
|
541
565
|
def extend(self, items: Iterable[TracedItem]) -> None:
|
|
542
566
|
"""Extends the sequence with a list of items."""
|
|
543
567
|
for item in items:
|
|
@@ -775,7 +799,12 @@ class ExecutionTrace(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
775
799
|
|
|
776
800
|
|
|
777
801
|
class ParallelExecutions(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
778
|
-
"""A
|
|
802
|
+
"""A container for multiple parallel execution traces.
|
|
803
|
+
|
|
804
|
+
When `session.concurrent_map` is used, it creates a `ParallelExecutions`
|
|
805
|
+
object to hold an `ExecutionTrace` for each parallel branch of execution,
|
|
806
|
+
allowing inspection of parallel workflows.
|
|
807
|
+
"""
|
|
779
808
|
|
|
780
809
|
name: Annotated[
|
|
781
810
|
str | None,
|
|
@@ -864,7 +893,14 @@ class ParallelExecutions(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
864
893
|
|
|
865
894
|
|
|
866
895
|
class ActionInvocation(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
867
|
-
"""
|
|
896
|
+
"""An invocation of an action, capturing its execution and result.
|
|
897
|
+
|
|
898
|
+
`ActionInvocation` represents a single call to an `Action`. It contains
|
|
899
|
+
the `Action` object itself, its result or error, associated metadata,
|
|
900
|
+
and an `ExecutionTrace` detailing the steps taken during its execution
|
|
901
|
+
(queries, logs, sub-actions). Invocations form a tree structure within a
|
|
902
|
+
`Session`, reflecting the hierarchy of agentic operations.
|
|
903
|
+
"""
|
|
868
904
|
|
|
869
905
|
action: Annotated[
|
|
870
906
|
Action,
|
|
@@ -1394,7 +1430,50 @@ class SessionLogging(SessionEventHandler):
|
|
|
1394
1430
|
|
|
1395
1431
|
|
|
1396
1432
|
class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
1397
|
-
"""
|
|
1433
|
+
"""Manages the execution trajectory of agentic actions.
|
|
1434
|
+
|
|
1435
|
+
A `Session` tracks the execution of a root `Action` and all its
|
|
1436
|
+
sub-actions, including LLM queries (`lf.query`), logging messages,
|
|
1437
|
+
and nested actions. It provides a complete, hierarchical trace of an
|
|
1438
|
+
agent's workflow, which is important for debugging, analysis, and
|
|
1439
|
+
visualization.
|
|
1440
|
+
|
|
1441
|
+
Sessions can be created implicitly when an action is called without an
|
|
1442
|
+
active session, or explicitly for more control.
|
|
1443
|
+
|
|
1444
|
+
**1. Implicit Session:**
|
|
1445
|
+
When an action is called without a session, Langfun creates one automatically.
|
|
1446
|
+
|
|
1447
|
+
```python
|
|
1448
|
+
action = MyAction()
|
|
1449
|
+
action()
|
|
1450
|
+
session = action.session # Access the implicit session
|
|
1451
|
+
```
|
|
1452
|
+
|
|
1453
|
+
**2. Explicit Session:**
|
|
1454
|
+
Use a `with` statement to manage a session explicitly. This is useful for
|
|
1455
|
+
setting session IDs or capturing the trajectory of multiple top-level actions.
|
|
1456
|
+
|
|
1457
|
+
```python
|
|
1458
|
+
with lf.Session(id='my-session') as session:
|
|
1459
|
+
action1()
|
|
1460
|
+
action2()
|
|
1461
|
+
```
|
|
1462
|
+
|
|
1463
|
+
**3. Accessing Trajectory:**
|
|
1464
|
+
The `session.root` attribute provides access to the `ActionInvocation` tree.
|
|
1465
|
+
|
|
1466
|
+
```python
|
|
1467
|
+
with lf.Session() as session:
|
|
1468
|
+
my_action()
|
|
1469
|
+
|
|
1470
|
+
# Get all queries in the session
|
|
1471
|
+
print(session.all_queries)
|
|
1472
|
+
|
|
1473
|
+
# Get all top-level action calls in the session
|
|
1474
|
+
print(session.root.actions)
|
|
1475
|
+
```
|
|
1476
|
+
"""
|
|
1398
1477
|
|
|
1399
1478
|
root: Annotated[
|
|
1400
1479
|
ActionInvocation,
|
|
@@ -1547,7 +1626,7 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
1547
1626
|
)
|
|
1548
1627
|
|
|
1549
1628
|
def update_progress(self, title: str, **kwargs: Any) -> None:
|
|
1550
|
-
"""
|
|
1629
|
+
"""Updates the progress of current action's execution.
|
|
1551
1630
|
|
|
1552
1631
|
Args:
|
|
1553
1632
|
title: The title of the progress update.
|
|
@@ -1648,13 +1727,20 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
1648
1727
|
@contextlib.contextmanager
|
|
1649
1728
|
def track_queries(
|
|
1650
1729
|
self,
|
|
1651
|
-
phase: str | None = None
|
|
1730
|
+
phase: str | None = None,
|
|
1731
|
+
track_if: Callable[
|
|
1732
|
+
[lf_structured.QueryInvocation],
|
|
1733
|
+
bool
|
|
1734
|
+
] | None = None,
|
|
1652
1735
|
) -> Iterator[list[lf_structured.QueryInvocation]]:
|
|
1653
1736
|
"""Tracks `lf.query` made within the context.
|
|
1654
1737
|
|
|
1655
1738
|
Args:
|
|
1656
1739
|
phase: The name of a new phase to track the queries in. If not provided,
|
|
1657
1740
|
the queries will be tracked in the parent phase.
|
|
1741
|
+
track_if: A function that takes a `lf_structured.QueryInvocation` and
|
|
1742
|
+
returns True if the query should be included in the result. If None,
|
|
1743
|
+
all queries (including failed queries) will be included.
|
|
1658
1744
|
|
|
1659
1745
|
Yields:
|
|
1660
1746
|
A list of `lf.QueryInvocation` objects, each for a single `lf.query`
|
|
@@ -1673,6 +1759,11 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
1673
1759
|
self.event_handler.on_query_start(self, self._current_action, invocation)
|
|
1674
1760
|
|
|
1675
1761
|
def _query_end(invocation: lf_structured.QueryInvocation):
|
|
1762
|
+
if track_if is not None and not track_if(invocation):
|
|
1763
|
+
self._current_execution.remove(invocation)
|
|
1764
|
+
# Even if the query is not included in the execution trace, we still
|
|
1765
|
+
# count the usage summary to the current execution and trigger the
|
|
1766
|
+
# event handler to log the query.
|
|
1676
1767
|
self._current_execution.merge_usage_summary(invocation.usage_summary)
|
|
1677
1768
|
self.event_handler.on_query_end(self, self._current_action, invocation)
|
|
1678
1769
|
|
|
@@ -1705,8 +1796,9 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
1705
1796
|
*,
|
|
1706
1797
|
lm: lf.LanguageModel,
|
|
1707
1798
|
examples: list[lf_structured.MappingExample] | None = None,
|
|
1799
|
+
track_if: Callable[[lf_structured.QueryInvocation], bool] | None = None,
|
|
1708
1800
|
**kwargs
|
|
1709
|
-
|
|
1801
|
+
) -> Any:
|
|
1710
1802
|
"""Calls `lf.query` and associates it with the current invocation.
|
|
1711
1803
|
|
|
1712
1804
|
The following code are equivalent:
|
|
@@ -1731,12 +1823,15 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
|
|
|
1731
1823
|
default: The default value to return if the query fails.
|
|
1732
1824
|
lm: The language model to use for the query.
|
|
1733
1825
|
examples: The examples to use for the query.
|
|
1826
|
+
track_if: A function that takes a `lf_structured.QueryInvocation`
|
|
1827
|
+
and returns True if the query should be tracked.
|
|
1828
|
+
If None, all queries (including failed queries) will be tracked.
|
|
1734
1829
|
**kwargs: Additional keyword arguments to pass to `lf.query`.
|
|
1735
1830
|
|
|
1736
1831
|
Returns:
|
|
1737
1832
|
The result of the query.
|
|
1738
1833
|
"""
|
|
1739
|
-
with self.track_queries():
|
|
1834
|
+
with self.track_queries(track_if=track_if):
|
|
1740
1835
|
return lf_structured.query(
|
|
1741
1836
|
prompt,
|
|
1742
1837
|
schema=schema,
|
|
@@ -24,7 +24,14 @@ import pyglove as pg
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class ActionEval(lf.eval.v2.Evaluation):
|
|
27
|
-
"""
|
|
27
|
+
"""Evaluation for agentic actions.
|
|
28
|
+
|
|
29
|
+
`ActionEval` is a specialized evaluation class for executing and evaluating
|
|
30
|
+
agentic actions based on provided inputs. Each input example is expected to
|
|
31
|
+
contain an `action` attribute. The `process` method executes the action
|
|
32
|
+
within a dedicated `Session`, captures the final result, and returns it
|
|
33
|
+
along with the session details in the metadata.
|
|
34
|
+
"""
|
|
28
35
|
|
|
29
36
|
action_args: Annotated[
|
|
30
37
|
dict[str, Any],
|
|
@@ -68,7 +75,7 @@ class ExampleView(pg.Object):
|
|
|
68
75
|
class ActionEvalV1(lf_eval.Matching):
|
|
69
76
|
"""Base class for action evaluations.
|
|
70
77
|
|
|
71
|
-
The input function should
|
|
78
|
+
The input function should return a list of pg.Dict, with `action` and
|
|
72
79
|
`groundtruth` fields.
|
|
73
80
|
"""
|
|
74
81
|
# We override the schema and prompt to dummy values since they are not used.
|
|
@@ -530,6 +530,31 @@ class SessionTest(unittest.TestCase):
|
|
|
530
530
|
self.assertIn('agent@', session.id)
|
|
531
531
|
self.assertIsInstance(session.as_message(), lf.AIMessage)
|
|
532
532
|
|
|
533
|
+
def test_query_with_track_if(self):
|
|
534
|
+
lm = fake.StaticResponse('lm response')
|
|
535
|
+
session = action_lib.Session()
|
|
536
|
+
|
|
537
|
+
# Render session to trigger javascript updates to the HTML when
|
|
538
|
+
# operating on the session.
|
|
539
|
+
_ = session.to_html()
|
|
540
|
+
with session:
|
|
541
|
+
# This query will succeed.
|
|
542
|
+
session.query(
|
|
543
|
+
'prompt1',
|
|
544
|
+
schema=None,
|
|
545
|
+
lm=lm,
|
|
546
|
+
track_if=lambda q: not q.has_error,
|
|
547
|
+
default=None)
|
|
548
|
+
# This query will fail during parsing.
|
|
549
|
+
session.query(
|
|
550
|
+
'prompt2',
|
|
551
|
+
schema=int,
|
|
552
|
+
lm=lm,
|
|
553
|
+
track_if=lambda q: not q.has_error,
|
|
554
|
+
default=None)
|
|
555
|
+
self.assertEqual(len(session.root.queries), 1)
|
|
556
|
+
self.assertIsNone(session.root.queries[0].error)
|
|
557
|
+
|
|
533
558
|
|
|
534
559
|
if __name__ == '__main__':
|
|
535
560
|
unittest.main()
|
langfun/core/async_support.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
"""
|
|
14
|
+
"""Utilities for asynchronous programming in Langfun."""
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
17
|
import contextlib
|
|
@@ -23,7 +23,20 @@ import pyglove as pg
|
|
|
23
23
|
async def invoke_async(
|
|
24
24
|
sync_callable: Callable[..., Any], *args, **kwargs
|
|
25
25
|
) -> Any:
|
|
26
|
-
"""Invokes a callable asynchronously
|
|
26
|
+
"""Invokes a sync callable asynchronously in a separate thread.
|
|
27
|
+
|
|
28
|
+
This is useful for wrapping a sync function into an async function,
|
|
29
|
+
allowing multiple calls of the sync function to run concurrently.
|
|
30
|
+
`lf.context` will be propagated to the thread that runs the sync callable.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
sync_callable: The sync callable to invoke.
|
|
34
|
+
*args: Positional arguments to pass to the callable.
|
|
35
|
+
**kwargs: Keyword arguments to pass to the callable.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
An awaitable that resolves to the return value of the sync_callable.
|
|
39
|
+
"""
|
|
27
40
|
return await asyncio.to_thread(
|
|
28
41
|
# Enable `lf.context` manager for async calls.
|
|
29
42
|
pg.with_contextual_override(sync_callable), *args, **kwargs
|
|
@@ -35,7 +48,23 @@ def invoke_sync(
|
|
|
35
48
|
*args,
|
|
36
49
|
**kwargs
|
|
37
50
|
) -> Any:
|
|
38
|
-
"""Invokes
|
|
51
|
+
"""Invokes an async callable synchronously.
|
|
52
|
+
|
|
53
|
+
This is useful for calling an async function from a sync context.
|
|
54
|
+
If there is an existing async event loop in current thread managed by
|
|
55
|
+
`lf.sync_context_manager`, it will be used for running the async callable.
|
|
56
|
+
Otherwise, `anyio.run` will be used to run the async callable in a new
|
|
57
|
+
event loop.
|
|
58
|
+
`lf.context` will be propagated to the async callable.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
async_callable: The async callable to invoke.
|
|
62
|
+
*args: Positional arguments to pass to the callable.
|
|
63
|
+
**kwargs: Keyword arguments to pass to the callable.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The return value of the async_callable.
|
|
67
|
+
"""
|
|
39
68
|
async def _invoke():
|
|
40
69
|
return await async_callable(*args, **kwargs)
|
|
41
70
|
invoke_fn = pg.with_contextual_override(_invoke)
|
|
@@ -19,13 +19,23 @@ import pyglove as pg
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class CodeWithError(pg.Object):
|
|
22
|
-
"""Python code with error.
|
|
22
|
+
"""A structure representing Python code along with an execution error.
|
|
23
|
+
|
|
24
|
+
This is used as input to a language model for error correction, providing
|
|
25
|
+
the model with the code that failed and the error message it produced.
|
|
26
|
+
"""
|
|
23
27
|
|
|
24
28
|
code: str
|
|
25
29
|
error: str
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
class CorrectedCode(pg.Object):
|
|
33
|
+
"""A structure containing corrected Python code.
|
|
34
|
+
|
|
35
|
+
This is used as the output schema when asking a language model to correct
|
|
36
|
+
code, expecting the model to return the fixed code in the `corrected_code`
|
|
37
|
+
field.
|
|
38
|
+
"""
|
|
29
39
|
corrected_code: str
|
|
30
40
|
|
|
31
41
|
|
|
@@ -49,7 +59,7 @@ def run_with_correction(
|
|
|
49
59
|
code: The source code that may or may not be problematic.
|
|
50
60
|
error: An optional initial error for `code` when it's problematic, usually
|
|
51
61
|
caught from elsewhere when it ran. If None, code will be executed once to
|
|
52
|
-
verify if
|
|
62
|
+
verify if it's good and obtain a feedback error message.
|
|
53
63
|
global_vars: A dict of str to value as the global variables that could be
|
|
54
64
|
accessed within the corrected code.
|
|
55
65
|
lm: Language model to be used. If not specified, it will try to use the `lm`
|
|
@@ -57,15 +67,15 @@ def run_with_correction(
|
|
|
57
67
|
max_attempts: Max number of attempts for the correction.
|
|
58
68
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
59
69
|
process. If None, run in sandbox first, if the output could not be
|
|
60
|
-
serialized and
|
|
70
|
+
serialized and passed to current process, run the code again in current
|
|
61
71
|
process.
|
|
62
72
|
permission: The permission to run the code.
|
|
63
73
|
timeout: The timeout for running the corrected code. If None, there is no
|
|
64
74
|
timeout. Applicable only when sandbox is set to True.
|
|
65
75
|
returns_code: If True, the return value is a tuple of (result, final code).
|
|
66
76
|
Otherwise the return value is the result only.
|
|
67
|
-
returns_stdout: If True, the stdout (a
|
|
68
|
-
outputs_intermediate: If True, intermediate output will be
|
|
77
|
+
returns_stdout: If True, the stdout (a string) will be returned.
|
|
78
|
+
outputs_intermediate: If True, intermediate output will be output as a
|
|
69
79
|
dict, with the last line's value accessible by key '__result__'. Otherwise
|
|
70
80
|
the value of the last line will be returned.
|
|
71
81
|
|
|
@@ -161,7 +171,7 @@ def correct(
|
|
|
161
171
|
code: The source code that may or may not be problematic.
|
|
162
172
|
error: An optional initial error for `code` when it's problematic, usually
|
|
163
173
|
caught from elsewhere when it ran. If None, code will be executed once to
|
|
164
|
-
verify if
|
|
174
|
+
verify if it's good and obtain a feedback error message.
|
|
165
175
|
global_vars: A dict of str to value as the global variables that could be
|
|
166
176
|
accessed within the corrected code.
|
|
167
177
|
lm: Language model to be used. If not specified, it will try to use the `lm`
|
|
@@ -169,7 +179,7 @@ def correct(
|
|
|
169
179
|
max_attempts: Max number of attempts for the correction.
|
|
170
180
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
171
181
|
process. If None, run in sandbox first, if the output could not be
|
|
172
|
-
serialized and
|
|
182
|
+
serialized and passed to current process, run the code again in current
|
|
173
183
|
process.
|
|
174
184
|
timeout: The timeout for running the corrected code. If None, there is no
|
|
175
185
|
timeout. Applicable only when sandbox is set to True.
|
|
@@ -193,7 +203,7 @@ def correct(
|
|
|
193
203
|
|
|
194
204
|
|
|
195
205
|
def _error_feedback_str(error: Exception) -> str:
|
|
196
|
-
"""Returns the error
|
|
206
|
+
"""Returns the error string for feedback."""
|
|
197
207
|
if isinstance(error, pg.coding.CodeError):
|
|
198
208
|
return pg.decolor(error.format(include_complete_code=False))
|
|
199
209
|
else:
|
|
@@ -201,7 +211,7 @@ def _error_feedback_str(error: Exception) -> str:
|
|
|
201
211
|
|
|
202
212
|
|
|
203
213
|
def _maybe_custom_validate(result: Any) -> Any:
|
|
204
|
-
"""
|
|
214
|
+
"""Applies custom validation through __validate__ method."""
|
|
205
215
|
if isinstance(result, dict) and "__result__" in result:
|
|
206
216
|
r = result["__result__"]
|
|
207
217
|
else:
|
|
@@ -45,17 +45,17 @@ def evaluate(
|
|
|
45
45
|
global_vars: An optional dict as the globals that could be referenced by the
|
|
46
46
|
code.
|
|
47
47
|
permission: Permission for the Python code to run.
|
|
48
|
-
returns_stdout: If True, the stdout (a
|
|
48
|
+
returns_stdout: If True, the stdout (a string) will be returned.
|
|
49
49
|
outputs_intermediate: Applicable when returns_stdout is False. If True,
|
|
50
|
-
intermediate output will be
|
|
51
|
-
value accessible by key '__result__' and the
|
|
50
|
+
intermediate output will be output as a dict, with the last line's
|
|
51
|
+
value accessible by key '__result__' and the stdout accessible by
|
|
52
52
|
key '__stdout__'. Otherwise the value of the last line will be returned.
|
|
53
53
|
|
|
54
54
|
Returns:
|
|
55
55
|
The value of the last line of the code block. Or a dict of variable
|
|
56
56
|
names of all locals to their evaluated values as the output of the code to
|
|
57
57
|
run. The value for the last line can be accessed by key '__result__'. Or the
|
|
58
|
-
stdout as a
|
|
58
|
+
stdout as a string.
|
|
59
59
|
"""
|
|
60
60
|
return pg.coding.evaluate(
|
|
61
61
|
parsing.clean(code),
|
|
@@ -85,28 +85,30 @@ def run(
|
|
|
85
85
|
|
|
86
86
|
Args:
|
|
87
87
|
code: Python code to run.
|
|
88
|
-
global_vars: An optional dict
|
|
88
|
+
global_vars: An optional dict as the globals that could be referenced by the
|
|
89
|
+
code.
|
|
89
90
|
permission: Permission for the Python code to run.
|
|
90
|
-
returns_stdout: If True, the stdout (a
|
|
91
|
+
returns_stdout: If True, the stdout (a string) will be returned.
|
|
91
92
|
outputs_intermediate: Applicable when returns_stdout is False. If True,
|
|
92
|
-
intermediate output will be
|
|
93
|
-
value accessible by key '__result__' and the
|
|
93
|
+
intermediate output will be output as a dict, with the last line's
|
|
94
|
+
value accessible by key '__result__' and the stdout accessible by
|
|
94
95
|
key '__stdout__'. Otherwise the value of the last line will be returned.
|
|
95
96
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
96
97
|
process. If None, run in sandbox first, if the output could not be
|
|
97
|
-
serialized and
|
|
98
|
+
serialized and passed to current process, run the code again in current
|
|
98
99
|
process.
|
|
99
|
-
timeout: Execution timeout in seconds. If None, wait the code
|
|
100
|
+
timeout: Execution timeout in seconds. If None, wait for the code to
|
|
101
|
+
complete.
|
|
100
102
|
|
|
101
103
|
Returns:
|
|
102
104
|
The value of the last line of the code block. Or a dict of variable
|
|
103
105
|
names of all locals to their evaluated values as the output of the code to
|
|
104
106
|
run. The value for the last line can be accessed by key '__result__'. Or the
|
|
105
|
-
stdout as a
|
|
107
|
+
stdout as a string.
|
|
106
108
|
|
|
107
109
|
Raises:
|
|
108
110
|
TimeoutError: If the execution time exceeds the timeout.
|
|
109
|
-
Exception:
|
|
111
|
+
Exception: Exceptions that are raised from the code.
|
|
110
112
|
"""
|
|
111
113
|
return pg.coding.run(
|
|
112
114
|
parsing.clean(code),
|
|
@@ -22,9 +22,13 @@ import pyglove as pg
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class PythonCode(pg.Object):
|
|
25
|
-
"""
|
|
25
|
+
"""Represents a piece of Python code that can be executed.
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
When `PythonCode` is instantiated within a `PythonCode.auto_run()` context,
|
|
28
|
+
it automatically executes the code and returns the result of the last
|
|
29
|
+
expression. Otherwise, it acts as a container for the source code, which
|
|
30
|
+
can be executed by calling the instance. The class also supports automatic
|
|
31
|
+
error correction via `lf.coding.run_with_correction` when called.
|
|
28
32
|
"""
|
|
29
33
|
|
|
30
34
|
source: Annotated[
|
|
@@ -56,7 +60,7 @@ class PythonCode(pg.Object):
|
|
|
56
60
|
Otherwise, auto call will be disabled.
|
|
57
61
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
58
62
|
process. If None, run in sandbox first, if the output could not be
|
|
59
|
-
serialized and
|
|
63
|
+
serialized and passed to current process, run the code again in current
|
|
60
64
|
process. Applicable when `enabled` is set to True.
|
|
61
65
|
timeout: Timeout in seconds. Applicable when both `enabled` and `sandbox`
|
|
62
66
|
are set to True.
|
|
@@ -98,17 +102,17 @@ class PythonCode(pg.Object):
|
|
|
98
102
|
Args:
|
|
99
103
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
100
104
|
process. If None, run in sandbox first, if the output could not be
|
|
101
|
-
serialized and
|
|
105
|
+
serialized and passed to current process, run the code again in current
|
|
102
106
|
process.
|
|
103
107
|
timeout: Timeout in seconds. If None, there is no timeout. Applicable when
|
|
104
108
|
sandbox is set to True.
|
|
105
109
|
global_vars: Global variables that could be accessed from the source code.
|
|
106
|
-
returns_stdout: If True, the stdout (a
|
|
110
|
+
returns_stdout: If True, the stdout (a string) will be returned.
|
|
107
111
|
outputs_intermediate: Applicable when returns_stdout is False. If True,
|
|
108
|
-
intermediate output will be
|
|
109
|
-
value accessible by key '__result__' and the
|
|
112
|
+
intermediate output will be output as a dict, with the last line's
|
|
113
|
+
value accessible by key '__result__' and the stdout accessible by
|
|
110
114
|
key '__stdout__'. Otherwise the value of the last line will be returned.
|
|
111
|
-
autofix: Number of attempts to
|
|
115
|
+
autofix: Number of attempts to autofix the generated code. If 0, autofix
|
|
112
116
|
is disabled.
|
|
113
117
|
autofix_lm: Language model to be used. If not specified, it will try to
|
|
114
118
|
use the `lm` under `lf.context`.
|
|
@@ -117,8 +121,8 @@ class PythonCode(pg.Object):
|
|
|
117
121
|
The value of the last expression in the source code. Or a dict of local
|
|
118
122
|
variable names defined in the source code to their values if
|
|
119
123
|
`outputs_intermediate` is set to True. The value for the last line can be
|
|
120
|
-
accessed by key '__result__'. Or the stdout as a
|
|
121
|
-
is set to True.
|
|
124
|
+
accessed by key '__result__'. Or the stdout as a string if
|
|
125
|
+
`returns_stdout` is set to True.
|
|
122
126
|
|
|
123
127
|
Raises:
|
|
124
128
|
TimeoutError: If `sandbox` is True and timeout has reached.
|
|
@@ -152,12 +156,12 @@ class PythonCode(pg.Object):
|
|
|
152
156
|
Args:
|
|
153
157
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
154
158
|
process. If None, run in sandbox first, if the output could not be
|
|
155
|
-
serialized and
|
|
159
|
+
serialized and passed to current process, run the code again in current
|
|
156
160
|
process.
|
|
157
161
|
timeout: Timeout in seconds. If None, there is no timeout. Applicable when
|
|
158
162
|
sandbox is set to True.
|
|
159
163
|
global_vars: Global variables that could be accessed from the source code.
|
|
160
|
-
autofix: Number of attempts to
|
|
164
|
+
autofix: Number of attempts to autofix the generated code. If 0, autofix
|
|
161
165
|
is disabled. Auto-fix is not supported for 'json' protocol.
|
|
162
166
|
autofix_lm: Language model to be used. If not specified, it will try to
|
|
163
167
|
use the `lm` under `lf.context`.
|
|
@@ -182,10 +186,11 @@ class PythonCode(pg.Object):
|
|
|
182
186
|
|
|
183
187
|
|
|
184
188
|
class PythonFunction(pg.Object):
|
|
185
|
-
"""
|
|
189
|
+
"""Represents a Python function defined by source code.
|
|
186
190
|
|
|
187
|
-
|
|
188
|
-
|
|
191
|
+
This class takes Python source code that defines a function and makes it
|
|
192
|
+
callable. The source code is evaluated to create a function object, which
|
|
193
|
+
can then be invoked like a regular Python function.
|
|
189
194
|
"""
|
|
190
195
|
|
|
191
196
|
name: str
|
|
@@ -214,7 +219,7 @@ class PythonFunction(pg.Object):
|
|
|
214
219
|
*args: Positional arguments that will be passed to the implementation.
|
|
215
220
|
sandbox: If True, run code in sandbox; If False, run code in current
|
|
216
221
|
process. If None, run in sandbox first, if the output could not be
|
|
217
|
-
serialized and
|
|
222
|
+
serialized and passed to current process, run the code again in current
|
|
218
223
|
process.
|
|
219
224
|
timeout: Timeout in seconds. If None, there is no timeout. Applicable when
|
|
220
225
|
sandbox is set to True.
|
|
@@ -23,7 +23,14 @@ import pyglove as pg
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class SandboxOutput(pg.Object):
|
|
26
|
-
"""
|
|
26
|
+
"""A structure containing the output from a sandbox execution.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
stdout: The standard output captured during execution.
|
|
30
|
+
stderr: The standard error captured during execution.
|
|
31
|
+
output_files: A dictionary of file names to their byte content for files
|
|
32
|
+
generated during execution.
|
|
33
|
+
"""
|
|
27
34
|
|
|
28
35
|
stdout: Annotated[
|
|
29
36
|
str,
|
|
@@ -42,7 +49,14 @@ class SandboxOutput(pg.Object):
|
|
|
42
49
|
|
|
43
50
|
|
|
44
51
|
class BaseSandbox(pg.Object):
|
|
45
|
-
"""
|
|
52
|
+
"""Base class for Python code sandboxing.
|
|
53
|
+
|
|
54
|
+
A sandbox provides an isolated environment for executing Python code,
|
|
55
|
+
typically with restrictions on file system access, network calls, or other
|
|
56
|
+
potentially harmful operations. This base class defines the interface for
|
|
57
|
+
sandboxes, including methods for running code (`run`), uploading files
|
|
58
|
+
(`upload`), and managing the sandbox lifecycle (`setup`, `cleanup`).
|
|
59
|
+
"""
|
|
46
60
|
|
|
47
61
|
def _on_bound(self):
|
|
48
62
|
super()._on_bound()
|
|
@@ -111,7 +125,13 @@ class BaseSandbox(pg.Object):
|
|
|
111
125
|
|
|
112
126
|
|
|
113
127
|
class MultiProcessingSandbox(BaseSandbox):
|
|
114
|
-
"""
|
|
128
|
+
"""A sandbox implementation using Python's `multiprocessing`.
|
|
129
|
+
|
|
130
|
+
This sandbox executes code in a separate process, providing isolation from
|
|
131
|
+
the main process. It uses a temporary directory for file operations,
|
|
132
|
+
which is cleaned up when the sandbox is closed. It relies on
|
|
133
|
+
`pg.coding.run` with `sandbox=True` for execution.
|
|
134
|
+
"""
|
|
115
135
|
|
|
116
136
|
def _on_bound(self):
|
|
117
137
|
super()._on_bound()
|