PyPI - langfun - Versions diffs - 0.1.2.dev202511030805__py3-none-any.whl → 0.1.2.dev202511050805__py3-none-any.whl - Mend

langfun 0.1.2.dev202511030805py3-none-any.whl → 0.1.2.dev202511050805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (77) hide show

langfun/core/agentic/action.py +76 -9
langfun/core/agentic/action_eval.py +9 -2
langfun/core/async_support.py +32 -3
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +10 -3
langfun/core/data/conversion/gemini.py +9 -2
langfun/core/data/conversion/openai.py +17 -7
langfun/core/eval/base.py +46 -42
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/checkpointing.py +30 -4
langfun/core/eval/v2/evaluation.py +59 -13
langfun/core/eval/v2/example.py +22 -11
langfun/core/eval/v2/experiment.py +51 -8
langfun/core/eval/v2/metric_values.py +23 -3
langfun/core/eval/v2/metrics.py +33 -4
langfun/core/eval/v2/progress.py +9 -1
langfun/core/eval/v2/reporting.py +15 -1
langfun/core/eval/v2/runners.py +27 -7
langfun/core/langfunc.py +45 -130
langfun/core/language_model.py +88 -10
langfun/core/llms/anthropic.py +27 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +22 -2
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +29 -1
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +9 -1
langfun/core/llms/google_genai.py +29 -1
langfun/core/llms/groq.py +27 -2
langfun/core/llms/llama_cpp.py +22 -3
langfun/core/llms/openai.py +29 -1
langfun/core/llms/openai_compatible.py +18 -6
langfun/core/llms/rest.py +12 -1
langfun/core/llms/vertexai.py +39 -6
langfun/core/logging.py +1 -1
langfun/core/mcp/client.py +77 -22
langfun/core/mcp/session.py +90 -10
langfun/core/mcp/tool.py +83 -23
langfun/core/memory.py +1 -0
langfun/core/message.py +75 -11
langfun/core/message_test.py +9 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +19 -1
langfun/core/modalities/mime.py +54 -4
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +66 -5
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/structured/completion.py +32 -37
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +70 -15
langfun/core/structured/parsing.py +90 -74
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +201 -130
langfun/core/structured/schema.py +70 -10
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +45 -34
langfun/core/structured/tokenization.py +24 -9
langfun/core/subscription.py +2 -2
langfun/core/template.py +139 -40
langfun/core/template_test.py +40 -0
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/RECORD +77 -77
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/top_level.txt +0 -0

langfun/core/eval/v2/evaluation.py CHANGED Viewed

@@ -32,17 +32,63 @@ import pyglove as pg
 class Evaluation(experiment_lib.Experiment):
-  """Evaluation.
-  An evaluation can be a leaf node or a container of other evaluations,
-  depending on whether the current evaluation object is configured with
-  any `pg.oneof`.
-  For example, `MyEval(lm=pg.oneof([lf.llms.Gpt4(), lf.llms.Gemini1_5Pro()]))`
-  is a container of two sub-experiments, one for each LLM. In such case, the
-  evaluation object with `pg.oneof` is called a hyper evaluation, which
-  represents a search space of evaluations, and each sub-evaluation is called
-  a leaf evaluation, which will perform the actual evaluation.
+  """Base class for Langfun evaluations.
+  `lf.eval.Evaluation` is the base class for defining evaluation tasks in
+  Langfun. Users typically subclass it to implement custom evaluation logic by
+  overriding `inputs` and `process` methods.
+  An `Evaluation` object encapsulates:
+  *   **`inputs`**: A callable that returns an iterable of input examples to be
+      processed. This is usually provided by implementing an `inputs(self)`
+      method in the subclass, which yields input items for evaluation one by
+      one.
+  *   **`process(self, example)`**: An abstract method that processes one
+      example and returns the output, or a tuple of (output, metadata).
+      The output will be used for computing metrics.
+  *   **`metrics`**: A list of metrics (e.g., `lf.metrics.Accuracy`) to compute
+      based on the outputs from `process`. Some metrics may require users to
+      implement a `ground_truth(self, example)` method in the subclass to
+      compute metrics against ground truth.
+  *   **Hyperparameters**: Any other attributes of the class serve as
+      hyperparameters for the evaluation (e.g., the language model to use).
+  **Running Evaluations:**
+  Evaluations are executed via `lf.eval.Suite` or by calling the `.run()`
+  method on an `Evaluation` instance, which returns a `Run` object
+  containing the evaluation run information and results. If an evaluation
+  contains sweeable parameters (using `pg.oneof`), `.run()` will expand it
+  into multiple evaluation sub-tasks -- one for each combination of
+  hyperparameters -- all managed within the same `Run`.
+  **Example:**
+  ```python
+  import langfun as lf
+  import pyglove as pg
+  class MyEval(lf.eval.Evaluation):
+    lm: lf.LanguageModel
+    prompt: str = '1 + 1 = '
+    def inputs(self):
+      yield 2
+    def process(self, example: lf.eval.Example):
+      return int(lf.query(self.prompt, lm=self.lm))
+    def ground_truth(self, example: lf.eval.Example) -> int:
+      return example.input
+  # Run evaluation using two different LMs
+  evaluation = MyEval(
+      lm=pg.oneof([lf.llms.Gpt4(), lf.llms.Gemini()]),
+      metrics=[lf.metrics.Accuracy()]
+  )
+  run_info = evaluation.run()
+  ```
   """
   inputs: Annotated[
@@ -137,7 +183,7 @@ class Evaluation(experiment_lib.Experiment):
     Args:
       example: An example object to process. `example.input` is an object
-        returned from `Evaluable.inputs`.
+        yielded from `inputs()` method.
     Returns:
       A processed output. Or a tuple of (output, metadata).
@@ -760,7 +806,7 @@ class Evaluation(experiment_lib.Experiment):
 class EvaluationState:
-  """Evaluation state."""
+  """In-memory state of an evaluation."""
   class ExampleStatus(pg.Object):
     """Example state."""

langfun/core/eval/v2/example.py CHANGED Viewed

@@ -22,19 +22,30 @@ import pyglove as pg
 @dataclasses.dataclass
 class Example(pg.JSONConvertible, pg.views.HtmlTreeView.Extension):
-  """An item for the evaluation.
+  """An example for evaluation.
+  An evaluation example contains the input and output of an evaluation task,
+  as well as metadata about the evaluation process, such as execution time,
+  LLM usage, and metric results.
   Attributes:
-    id: The 1-based ID of the item in the evaluation set.
-    input: An element returned from the `Evaluable.inputs` functor.
-    output: The output of the `process` method. If `pg.MISSING_VALUE`, it has
-      not been processed yet.
-    metadata: The metadata of the item produced by the `process` method.
-    metric_metadata: The dictionary returned from `Metric.audit`.
-    start_time: The start time of the evaluation item.
-    end_time: The end time of the evaluation item.
-    usage_summary: The summary of LLM usages of the evaluation item.
-    execution_status: The timeit status of the evaluation item.
+    id: The 1-based ID of the example in the evaluation set.
+    input: An element returned from the `Evaluable.inputs` functor, which serves
+      as the input for `lf.Evaluable.process`.
+    output: The output of `lf.Evaluable.process` method. If `pg.MISSING_VALUE`,
+      it indicates the example has not been processed yet.
+    error: The error encountered during `lf.Evaluable.process`. If None, it
+      indicates the process was successful.
+    metadata: The metadata of the example produced by `lf.Evaluable.process`.
+    metric_metadata: The dictionary returned from `Metric.audit`, which contains
+      metadata about metric computation for this example.
+    newly_processed: Whether this example is processed in the current run. If
+      False, it indicates the example was loaded from a checkpoint from previous
+      runs.
+    start_time: The start time of processing this example.
+    end_time: The end time of processing this example.
+    usage_summary: The summary of LLM usages for processing this example.
+    execution_status: The timeit status of processing this example.
   """
   id: int
   input: Any = pg.MISSING_VALUE

langfun/core/eval/v2/experiment.py CHANGED Viewed

@@ -139,10 +139,10 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
   # Checkpointing
-  Experiments support checkpointing, which is enabled by default. It allows
+  Experiments support checkpointing, which is enabled by default. It allows
   users to resume their experiments from a saved state. When an experiment runs,
-  it creates a new directory for that run and saves the current state to a
-  checkpoint file. If the experiment is interrupted or fails, users can resume
+  it creates a new directory for that run and saves its progress to checkpoint
+  files. If the experiment is interrupted or fails, users can resume
   it by specifying the 'id' or 'warm_start_from' argument (shown above) to
   seamlessly continue from previously saved state without starting over.
@@ -169,7 +169,7 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
   # Experiment Plugins
-  Experiment can be extended by plugins. Plugins can listen to the events of
+  Experiments can be extended by plugins. Plugins can listen to the events of
   experiment execution and produce additional outputs. For example, a plugin
   can be added to an experiment to generate additional metrics or to save
   additional data to a database. More details will be added in the future.
@@ -657,7 +657,30 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
 @pg.use_init_args(['children'])
 class Suite(Experiment):
-  """A suite of evaluations."""
+  """A suite of evaluations.
+  `lf.eval.Suite` groups multiple `lf.eval.Evaluation` or other `Suite`
+  objects into a single experiment, allowing them to be run, managed, and
+  reported together.
+  **Example:**
+  ```python
+  import langfun as lf
+  suite = lf.eval.Suite([
+      MyEval(lm=lf.llms.Gpt4()),
+      MyEval(lm=lf.llms.Gemini()),
+      lf.eval.Suite([
+          AnotherEval(lm=lf.llms.Gpt4()),
+          AnotherEval(lm=lf.llms.Gemini())
+      ])
+  ])
+  # Run all evaluations in the suite
+  run_info = suite.run('/path/to/my/suite_run')
+  ```
+  """
   children: Annotated[
       list[Experiment], 'A list of child experiments.'
@@ -791,7 +814,14 @@ class RunId(pg.Object):
 class Run(pg.Object, pg.views.html.HtmlTreeView.Extension):
-  """A run of an experiment."""
+  """Represents a single run of an experiment.
+  A `Run` object holds all the configurations for executing an experiment,
+  such as the experiment definition, input/output directories, and flags
+  controlling the execution behavior (e.g., error handling, checkpointing).
+  It also provides utility methods for accessing run-specific paths and
+  filtering examples for evaluation.
+  """
   root_dir: Annotated[
       str,
@@ -971,7 +1001,13 @@ class Run(pg.Object, pg.views.html.HtmlTreeView.Extension):
 class Runner(pg.Object):
-  """Interface for experiment runner."""
+  """Interface for experiment runner.
+  A runner is responsible for executing the evaluations within an experiment
+  based on the configuration specified in a `Run` object. Different runners
+  can implement different execution strategies, such as sequential or parallel
+  processing of examples and evaluations.
+  """
   # Class-level variable for registering the runner.
   NAME = None
@@ -1010,7 +1046,14 @@ class Runner(pg.Object):
 class Plugin(lf.Component):
-  """Base class for experiment plugins."""
+  """Base class for experiment plugins.
+  Plugins provide a mechanism to extend the behavior of an experiment run
+  by hooking into various events during the lifecycle of experiment and
+  example execution, such as `on_run_start`, `on_experiment_complete`,
+  `on_example_start`, etc. They can be used for custom logging, monitoring,
+  or result processing.
+  """
   def on_run_start(
       self,

langfun/core/eval/v2/metric_values.py CHANGED Viewed

@@ -20,7 +20,15 @@ import pyglove as pg
 class MetricValue(pg.Object):
-  """Base class for metric values."""
+  """Base class for metric values.
+  `MetricValue` is the base class for representing aggregated metric values
+  in an evaluation. It accumulates data points from individual examples,
+  each consisting of a value and an optional weight, associated with an example
+  ID. Subclasses must implement `reduce` method to compute a single float value
+  from accumulated data points, and `scalar_repr` to provide a string
+  representation of the reduced value.
+  """
   class DataPoint(pg.Object):
     """A data point for a metric value."""
@@ -133,7 +141,13 @@ class MetricValue(pg.Object):
 class Rate(MetricValue):
-  """Representing a rate in range [0, 1]."""
+  """Metric value representing a rate in range [0, 1].
+  `Rate` is used for metrics that compute a rate, such as accuracy or error
+  rate. The final value is computed as the weighted sum of accumulated values
+  divided by the total number of examples. It's displayed as a percentage
+  (e.g., 90.0%).
+  """
   def reduce(self) -> float:
     return self._weighted_sum / self.total
@@ -145,7 +159,13 @@ class Rate(MetricValue):
 class Average(MetricValue):
-  """Average of a aggregated values."""
+  """Metric value representing an average of accumulated values.
+  `Average` is used for metrics that compute an average score across examples
+  (e.g., average quality score). The final value is computed as the weighted
+  sum of accumulated values divided by the number of data points.
+  It's displayed as a float with 3 decimal places (e.g., 4.750).
+  """
   def reduce(self) -> float:
     if not self.data_points:

langfun/core/eval/v2/metrics.py CHANGED Viewed

@@ -29,7 +29,15 @@ Average = metric_values.Average
 class Metric(pg.Object, pg.views.HtmlTreeView.Extension):
-  """Interface for an evaluation metric."""
+  """Interface for an evaluation metric.
+  A metric is used to evaluate the quality of the outputs produced by an
+  evaluation. It works by auditing each processed example via its `audit`
+  method, which in turn calls the user-overridable `_audit` method to perform
+  metric-specific logic and update metric values. Metrics can compute multiple
+  values (e.g., precision, recall, F1 score) which are exposed via the
+  `values` method.
+  """
   name: Annotated[
       str,
@@ -169,7 +177,15 @@ class Metric(pg.Object, pg.views.HtmlTreeView.Extension):
 class MetricBase(Metric):
-  """Base class for common metrics."""
+  """Base class for common metrics.
+  `MetricBase` provides common functionalities for metrics, such as automatic
+  error counting based on whether an example has an error during evaluation.
+  It distinguishes between Object-Oriented Programming (OOP) errors
+  (e.g. `MappingError` during structured output generation) and other errors.
+  Subclasses should implement `_audit_processed` for metric computation on
+  successfully processed examples.
+  """
   oop_errors: Rate | None = Rate()
   non_oop_errors: Rate | None = Rate()
@@ -229,7 +245,13 @@ class MetricBase(Metric):
 class Match(MetricBase):
-  """Metric for matching outputs against groundtruth."""
+  """Metric for matching outputs against ground truth.
+  This metric computes match and mismatch rates by comparing the output of
+  an example with its ground truth. By default, it looks for a `groundtruth`
+  attribute in `example.input` for comparison. Users can customize this behavior
+  by subclassing `Match` and overriding the `match` method.
+  """
   name = 'match'
   matches: Rate = Rate()
@@ -302,7 +324,14 @@ class Match(MetricBase):
 class Score(MetricBase):
-  """Base class for scoring."""
+  """Base class for scoring metrics.
+  `Score` is a base class for metrics that assign a numerical score to each
+  example's output (e.g., evaluating quality on a scale of 1-5).
+  It automatically computes the average score across all examples.
+  Subclasses must implement the `score` method to define how an example
+  should be scored.
+  """
   name = 'score'
   average_score: Average = Average()

langfun/core/eval/v2/progress.py CHANGED Viewed

@@ -21,7 +21,15 @@ import pyglove as pg
 class Progress(pg.Object, pg.views.HtmlTreeView.Extension):
-  """Evaluation progress."""
+  """Represents and tracks the progress of an evaluation.
+  The `Progress` class maintains counts of processed, failed, and skipped
+  items in an evaluation, along with timing information (start time, stop time,
+  duration) and an execution summary. It provides properties to check the
+  status of the evaluation (e.g., `is_started`, `is_completed`) and methods
+  to update progress as items are evaluated.
+  It also supports HTML rendering as a progress bar for visualization.
+  """
   num_total: Annotated[
       int | None,

langfun/core/eval/v2/reporting.py CHANGED Viewed

@@ -33,7 +33,21 @@ _EVALULATION_DETAIL_FILE = 'index.html'
 class HtmlReporter(experiment_lib.Plugin):
-  """Plugin for periodically generating HTML reports for the experiment."""
+  """Plugin for periodically generating HTML reports for the experiment.
+  The `HtmlReporter` plugin generates several HTML files during an experiment
+  run:
+    - A `summary.html` at the root of the run directory, summarizing all
+      evaluations in the experiment.
+    - An `index.html` for each leaf evaluation, detailing the evaluation
+      definition, metrics, and logs.
+    - An HTML file for each example (e.g., `1.html`, `2.html`, ...) within
+      each leaf evaluation's directory, showing the input, output, metadata,
+      and any errors for that example.
+  These reports are updated periodically in the background during the run,
+  allowing users to monitor progress in near real-time.
+  """
   summary_interval: Annotated[
       int,

langfun/core/eval/v2/runners.py CHANGED Viewed

@@ -42,7 +42,14 @@ _RUN_MANIFEST = 'run.json'
 class RunnerBase(Runner):
-  """A simple runner that runs evaluations and their examples sequentially."""
+  """Base class for runners with plugin support and IO pooling.
+  `RunnerBase` provides the basic runner functionalities such as plugin
+  integration for checkpointing, reporting and progress tracking.
+  It also manages a thread pool for background IO operations.
+  Subclasses should implement `_run` and `_evaluate_items` for different
+  execution strategies.
+  """
   tqdm: Annotated[
       bool,
@@ -397,11 +404,12 @@ class RunnerBase(Runner):
 class SequentialRunner(RunnerBase):
-  """Sequential runner.
+  """A runner that executes evaluations and examples sequentially.
-  Sequential runner runs all evaluations and their examples in sequence,
-  as well as the background tasks, it allows the developer to catch all
-  exceptions thrown from the background tasks, making it easier to debug.
+  The sequential runner executes all evaluations and their examples in the
+  calling thread. Background tasks are also run sequentially, which makes it
+  easier to debug as exceptions from background tasks will be raised
+  immediately.
   """
   NAME = 'sequential'
@@ -426,7 +434,13 @@ class SequentialRunner(RunnerBase):
 class DebugRunner(SequentialRunner):
-  """Debug runner."""
+  """A runner for debugging evaluations.
+  The debug runner is a sequential runner that only runs the first example
+  of each evaluation, with `raise_if_has_error` enabled. This is useful for
+  quickly identifying issues in evaluation logic during development.
+  Checkpointers are disabled for this runner.
+  """
   NAME = 'debug'
@@ -444,7 +458,13 @@ class DebugRunner(SequentialRunner):
 class ParallelRunner(RunnerBase):
-  """Parallel runner."""
+  """A runner that executes evaluations and examples in parallel.
+  The parallel runner groups evaluations by their required resources
+  (e.g., specific LLMs) and runs evaluations that do not share resources in
+  parallel. Within each evaluation, examples are also processed in parallel
+  using threads, up to `Evaluation.max_workers`.
+  """
   NAME = 'parallel'

langfun/core/langfunc.py CHANGED Viewed

@@ -32,146 +32,43 @@ _TLS_LFUN_CALL_STACK = '_langfunc_callstack'
 # NOTE(daiyip): Only the template string belongs to the positional arguments,
 # all others are keyword-only for clarity.
 @pg.use_init_args(['template_str'])
-class LangFunc(
-    template_lib.Template,
-):
-  r"""Base class for natural-language driven component.
-  ``LangFunc`` is a language-driven component that enables users to
-  seamlessly interact with Language Models (LLMs) using a blend of natural
-  language and code. It empowers users to easily modularize prompt/execution
-  logics, compose them, and simplify the creation of Language Model (LLM)-based
-  components and applications.
-  LangFunc can be conceptualized as a string template with embeddable code,
-  but it distinguishes itself from traditional template systems in four key
-  ways.
-  Firstly, it enables easy modularization of templates along with the required
-  values with OO principles, providing a reusable way for LLM-based content
-  generation. For example:
-    ```
-    class FewshotExamples(lf.LangFunc):
-      '''Base for fewshot prompt.
-      {% for example in examples %}
-      {{ example }}
-      {% endfor %}
-      '''
-    # Usage 1: __init__ time binding.
-    assert FewshotPrompt(examples=['foo', 'bar'])() == 'foo\nbar'
-    # Usage 2: __call__ time binding.
-    assert FewshotPrompt()(examples=['foo', 'bar']) == 'foo\nbar'
-    class ToolDescription(lf.LangFunc):
-      '''Tool descriptions.
-      {% for tool in tools %}
-      {{ tool.description }}
-      {% endfor %}
-      '''
-      # We want to constrain tools to be a list of `Tool` objects.
-      tools: list[Tool]
-    # Raises: runtime type checking will fail on [1, 2, 3].
-    ToolDescription(tools=[1, 2, 3])
-    ```
-  Secondly, it has the capability to compose multiple LangFuncs together,
-  enabling the accomplishment of complex language tasks with maximum reuse.
-  It allows users to provide program inputs to all the LangFuncs within a
-  composition at the top level, significantly simplifying the process of
-  providing context for users. For example:
-    ```
-    class ReAct(lf.LangFunc):
-      '''ReAct prompt for tool-use.
-      {{ preamble }}
-      {{ tool_description }}
-      {{ tool_examples }}
-      {{ user_input }}
-      '''
-      # Default preamble, which could be overriden from subclass
-      # or parsed from the `__init__` argument.
-      preamble = 'Please help me on my task based on the following tools.',
-    react = ReAct(
-        tool_description=ToolDescription()
-        tool_examples=FewshotExamples(),
-        # Partially bind `tools` and `examples`.
-        tools=my_tools,
-        examples=[t.examples for t in my_tools]
-        )
-    # Late bind `user_input` at __call__ time.
-    react(user_input='Help me get a lunch to go, veggie please.' )
-    ```
-  Thirdly, it allows the flexibility to encapsulate complex compositions to
-  reusable classes and modify them. For example:
-    ```
-    # The compound decorator converts a function into a LangFunc.
-    @lf.compound
-    def react_with_tools(preamble, tools: list[Tool]):
-      return ReAct(
-          preamble=preamble,
-          tool_description=ToolDescription()
-          tool_examples=FewshotExamples(),
-          # Partially bind `tools` and `examples`.
-          tools=my_tools,
-          examples=[t.examples for t in my_tools]
-      )
+class LangFunc(template_lib.Template):
+  r"""Base class for Language-based functions.
-    # Actually, the entire chat application is a LangFunc.
-    class Chat(lt.LangFunc):
-      '''LLM-based Chat application.
+  LangFunc represents a function powered by a language model. It is a subclass
+  of `lf.Template` and can be thought of as a `lf.Template` augmented with an LM
+  and an output transformation. Calling a `lf.LangFunc` is equivalent to calling
+  the LM with the rendered prompt and transforming the output.
-      llm({{ prompt }})
-      '''
+  LangFunc can be directly constructed and used.
-    chat = Chat(
-        llm=Bard24B(),
-        prompt=react_with_tools(
-            preamble=(
-                f'Please help me solve my problem using tools. '
-                f'Current time is {{datetime.datetime.now()}}'),
-            tools=my_tools))
+  ```python
+  import langfun as lf
-    chat(user_input='Help me get a lunch to go, veggie please.')
-    ```
+  func = lf.LangFunc("Hello, {{name}}!")
+  print(func(name="Gemini", lm=lf.llms.Gemini25Flash()))
+  # Output: Hello, how are you today?
+  ```
-  Fourthly, LangFunc is built on top of PyGlove symbolic programming power,
-  it could be manipulated programmatically, turned into a space for data
-  sampling, or even tuned by AutoML. For example:
+  Or it can be subclassed:
-    ```
-    import pyglove as pg
+  ```python
+  import langfun as lf
-    prompt_space = react_with_tools(
-        preamble=pg.oneof([
-            'Help me solve my problem using the following tools:',
-            'Help me with the tools below:',
-            ...
-        ])
-        # Choose any two of the tools for generating data.
-        tools=pg.manyof(2, [
-            google_search(...),
-            doordash(...),
-            ...
-        ])
+  class Compute(lf.LangFunc):
+    '''Compute a simple arithmetic expression.
-    for prompt in pg.random_sample(prompt_space):
-      print(prompt(user_input='Help me book a conf room please.'))
+    {{expression}} = ?
+    '''
+    expression: str
-    ```
+    def transform_output(self, lm_output: lf.Message) -> lf.Message:
+      lm_output.metadata.result = float(lm_output.text)
+      return lm_output
-  For more capabilities on symbolic programming with PyGlove, please checkout
-  https://pyglove.readthedocs.io/en/latest/.
+  r = Compute(expression="1 + 1")(lm=lf.llms.Gemini25Flash())
+  print(r.result)
+  # Output: 2.0
   Final note: always include these capitalized words if you don't want to treat
   the docstr as the template str: THIS IS NOT A TEMPLATE. So as a result, this
@@ -305,6 +202,24 @@ class LangFunc(
       message_cls: Type[message_lib.Message] = message_lib.UserMessage,
       **kwargs,
   ) -> message_lib.Message:
+    """Renders the template and transforms it as LM input message.
+    Args:
+      allow_partial: If True, allows partial rendering, which leaves unresolved
+        variables in place in the output text. Otherwise, raises error when
+        there are unresolved variables.
+      implicit: If True, reuse the rendering output if a parent `lf.Template`
+        is rendering current `lf.Template` multiple times. This is important
+        for making sure all references to the same `lf.Template` within a single
+        top-level rendering would return the same result. If False, every call
+        to `render` will trigger the actual rendering process.
+      message_cls: The message class used for creating the return value.
+      **kwargs: Values for template variables, which override values from
+        member attributes or context.
+    Returns:
+      A Message object containing the rendered result.
+    """
     lm_input = super().render(
         allow_partial=allow_partial,
         implicit=implicit,

langfun 0.1.2.dev202511030805__py3-none-any.whl → 0.1.2.dev202511050805__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202511030805py3-none-any.whl → 0.1.2.dev202511050805py3-none-any.whl